{ "cells": [ { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "%matplotlib inline\n", "\n", "from datetime import datetime, date\n", "plt.style.use('ggplot')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Loading the Customer Demographics Data from the excel file\n", "\n", "cust_demo = pd.read_excel('Raw_data.xlsx' , sheet_name='CustomerDemographic')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>gender</th>\n", " <th>past_3_years_bike_related_purchases</th>\n", " <th>DOB</th>\n", " <th>job_title</th>\n", " <th>job_industry_category</th>\n", " <th>wealth_segment</th>\n", " <th>deceased_indicator</th>\n", " <th>default</th>\n", " <th>owns_car</th>\n", " <th>tenure</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Laraine</td>\n", " <td>Medendorp</td>\n", " <td>F</td>\n", " <td>93</td>\n", " <td>1953-10-12</td>\n", " <td>Executive Secretary</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>\"'</td>\n", " <td>Yes</td>\n", " <td>11.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>Eli</td>\n", " <td>Bockman</td>\n", " <td>Male</td>\n", " <td>81</td>\n", " <td>1980-12-16</td>\n", " <td>Administrative Officer</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td><script>alert('hi')</script></td>\n", " <td>Yes</td>\n", " <td>16.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>Arlin</td>\n", " <td>Dearle</td>\n", " <td>Male</td>\n", " <td>61</td>\n", " <td>1954-01-20</td>\n", " <td>Recruiting Manager</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>2018-02-01 00:00:00</td>\n", " <td>Yes</td>\n", " <td>15.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Talbot</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>33</td>\n", " <td>1961-10-03</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>() { _; } >_[$($())] { touch /tmp/blns.shellsh...</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Sheila-kathryn</td>\n", " <td>Calton</td>\n", " <td>Female</td>\n", " <td>56</td>\n", " <td>1977-05-13</td>\n", " <td>Senior Editor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>NIL</td>\n", " <td>Yes</td>\n", " <td>8.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name gender \\\n", "0 1 Laraine Medendorp F \n", "1 2 Eli Bockman Male \n", "2 3 Arlin Dearle Male \n", "3 4 Talbot NaN Male \n", "4 5 Sheila-kathryn Calton Female \n", "\n", " past_3_years_bike_related_purchases DOB job_title \\\n", "0 93 1953-10-12 Executive Secretary \n", "1 81 1980-12-16 Administrative Officer \n", "2 61 1954-01-20 Recruiting Manager \n", "3 33 1961-10-03 NaN \n", "4 56 1977-05-13 Senior Editor \n", "\n", " job_industry_category wealth_segment deceased_indicator \\\n", "0 Health Mass Customer N \n", "1 Financial Services Mass Customer N \n", "2 Property Mass Customer N \n", "3 IT Mass Customer N \n", "4 NaN Affluent Customer N \n", "\n", " default owns_car tenure \n", "0 \"' Yes 11.0 \n", "1 <script>alert('hi')</script> Yes 16.0 \n", "2 2018-02-01 00:00:00 Yes 15.0 \n", "3 () { _; } >_[$($())] { touch /tmp/blns.shellsh... No 7.0 \n", "4 NIL Yes 8.0 " ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Checking first 5 records from Customer Demographics Data\n", "\n", "cust_demo.head(5)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "<class 'pandas.core.frame.DataFrame'>\n", "RangeIndex: 4000 entries, 0 to 3999\n", "Data columns (total 13 columns):\n", "customer_id 4000 non-null int64\n", "first_name 4000 non-null object\n", "last_name 3875 non-null object\n", "gender 4000 non-null object\n", "past_3_years_bike_related_purchases 4000 non-null int64\n", "DOB 3913 non-null datetime64[ns]\n", "job_title 3494 non-null object\n", "job_industry_category 3344 non-null object\n", "wealth_segment 4000 non-null object\n", "deceased_indicator 4000 non-null object\n", "default 3698 non-null object\n", "owns_car 4000 non-null object\n", "tenure 3913 non-null float64\n", "dtypes: datetime64[ns](1), float64(1), int64(2), object(9)\n", "memory usage: 406.3+ KB\n" ] } ], "source": [ "# Information of columns and data-types of Customer Demographics Data.\n", "\n", "cust_demo.info()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The data-type of columns looks fine. However here <b>default</b> is an irrelevent column which should be dropped / deleted from the dataset. Let's check for the data quality and apply data cleaning process where ever applicable to clean our dataset before performing any analysis." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Total Records" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total records (rows) in the dataset : 4000\n", "Total columns (features) in the dataset : 13\n" ] } ], "source": [ "print(\"Total records (rows) in the dataset : {}\".format(cust_demo.shape[0]))\n", "print(\"Total columns (features) in the dataset : {}\".format(cust_demo.shape[1]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Numeric Columns and Non-Numeric Columns" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The numeric columns are : ['customer_id' 'past_3_years_bike_related_purchases' 'tenure']\n", "The non-numeric columns are : ['first_name' 'last_name' 'gender' 'DOB' 'job_title'\n", " 'job_industry_category' 'wealth_segment' 'deceased_indicator' 'default'\n", " 'owns_car']\n" ] } ], "source": [ "# select numeric columns\n", "df_numeric = cust_demo.select_dtypes(include=[np.number])\n", "numeric_cols = df_numeric.columns.values\n", "print(\"The numeric columns are : {}\".format(numeric_cols))\n", "\n", "\n", "# select non-numeric columns\n", "df_non_numeric = cust_demo.select_dtypes(exclude=[np.number])\n", "non_numeric_cols = df_non_numeric.columns.values\n", "print(\"The non-numeric columns are : {}\".format(non_numeric_cols))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Dropping Irrelevent Columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>default is an irrelevent column. Hence it should be dropped.</b>" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "# Dropping the default column\n", "\n", "cust_demo.drop(labels={'default'}, axis=1 , inplace=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Missing Values Check" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Checking for the presence of any missing values in the dataset. If missing values are present for a particular feature then depending upon the situation the feature may be either dropped (cases when a major amount of data is missing) or an appropiate value will be imputed in the feature column with missing values." ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "customer_id 0\n", "first_name 0\n", "last_name 125\n", "gender 0\n", "past_3_years_bike_related_purchases 0\n", "DOB 87\n", "job_title 506\n", "job_industry_category 656\n", "wealth_segment 0\n", "deceased_indicator 0\n", "owns_car 0\n", "tenure 87\n", "dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Total number of missing values\n", "\n", "cust_demo.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "customer_id 0.000\n", "first_name 0.000\n", "last_name 3.125\n", "gender 0.000\n", "past_3_years_bike_related_purchases 0.000\n", "DOB 2.175\n", "job_title 12.650\n", "job_industry_category 16.400\n", "wealth_segment 0.000\n", "deceased_indicator 0.000\n", "owns_car 0.000\n", "tenure 2.175\n", "dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Percentage of missing values\n", "\n", "cust_demo.isnull().mean()*100" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here it is observed that columns like gender, DOB, job_title, job_industry_category and tenure have missing values." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.1 Last Name" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "first_name 0\n", "customer_id 0\n", "dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Checking for the presence of first name and customer id in records where last name is missing.\n", "\n", "cust_demo[cust_demo['last_name'].isnull()][['first_name', 'customer_id']].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Since All customers have a customer_id and First name, all the customers are identifiable. <b>Hence it is okay for to not have a last name. Filling null last names with \"None\"</b>." ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>gender</th>\n", " <th>past_3_years_bike_related_purchases</th>\n", " <th>DOB</th>\n", " <th>job_title</th>\n", " <th>job_industry_category</th>\n", " <th>wealth_segment</th>\n", " <th>deceased_indicator</th>\n", " <th>owns_car</th>\n", " <th>tenure</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Talbot</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>33</td>\n", " <td>1961-10-03</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " </tr>\n", " <tr>\n", " <th>66</th>\n", " <td>67</td>\n", " <td>Vernon</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>67</td>\n", " <td>1960-06-14</td>\n", " <td>Web Developer II</td>\n", " <td>Retail</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>18.0</td>\n", " </tr>\n", " <tr>\n", " <th>105</th>\n", " <td>106</td>\n", " <td>Glyn</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>54</td>\n", " <td>1966-07-03</td>\n", " <td>Software Test Engineer III</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " </tr>\n", " <tr>\n", " <th>138</th>\n", " <td>139</td>\n", " <td>Gar</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>1</td>\n", " <td>1964-07-28</td>\n", " <td>Operator</td>\n", " <td>Telecommunications</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", " <th>196</th>\n", " <td>197</td>\n", " <td>Avis</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>32</td>\n", " <td>1977-01-27</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", " <th>210</th>\n", " <td>211</td>\n", " <td>Beitris</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>6</td>\n", " <td>1974-03-04</td>\n", " <td>VP Marketing</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", " <th>249</th>\n", " <td>250</td>\n", " <td>Kristofer</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>53</td>\n", " <td>1988-04-15</td>\n", " <td>Legal Assistant</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>250</th>\n", " <td>251</td>\n", " <td>Mala</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>88</td>\n", " <td>1977-12-24</td>\n", " <td>VP Sales</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " </tr>\n", " <tr>\n", " <th>256</th>\n", " <td>257</td>\n", " <td>Marissa</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>70</td>\n", " <td>1966-02-08</td>\n", " <td>Sales Associate</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>19.0</td>\n", " </tr>\n", " <tr>\n", " <th>274</th>\n", " <td>275</td>\n", " <td>Dud</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>7</td>\n", " <td>1955-07-27</td>\n", " <td>VP Sales</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>355</th>\n", " <td>356</td>\n", " <td>Nichole</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>10</td>\n", " <td>1975-03-30</td>\n", " <td>Librarian</td>\n", " <td>Entertainment</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", " <th>459</th>\n", " <td>460</td>\n", " <td>Illa</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>0</td>\n", " <td>1986-01-23</td>\n", " <td>Electrical Engineer</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>16.0</td>\n", " </tr>\n", " <tr>\n", " <th>474</th>\n", " <td>475</td>\n", " <td>Vernor</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>0</td>\n", " <td>1996-11-14</td>\n", " <td>Nuclear Power Engineer</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>493</th>\n", " <td>494</td>\n", " <td>Gaby</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>33</td>\n", " <td>1975-06-02</td>\n", " <td>Design Engineer</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " </tr>\n", " <tr>\n", " <th>513</th>\n", " <td>514</td>\n", " <td>Trent</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>9</td>\n", " <td>1996-06-20</td>\n", " <td>Associate Professor</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", " <th>525</th>\n", " <td>526</td>\n", " <td>Ardelle</td>\n", " <td>NaN</td>\n", " <td>U</td>\n", " <td>9</td>\n", " <td>NaT</td>\n", " <td>Social Worker</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>656</th>\n", " <td>657</td>\n", " <td>Hoyt</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>66</td>\n", " <td>1993-02-18</td>\n", " <td>Safety Technician II</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>10.0</td>\n", " </tr>\n", " <tr>\n", " <th>659</th>\n", " <td>660</td>\n", " <td>Stormi</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>82</td>\n", " <td>1995-07-29</td>\n", " <td>Geological Engineer</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>6.0</td>\n", " </tr>\n", " <tr>\n", " <th>675</th>\n", " <td>676</td>\n", " <td>Curtis</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>51</td>\n", " <td>1968-05-19</td>\n", " <td>Senior Editor</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " </tr>\n", " <tr>\n", " <th>683</th>\n", " <td>684</td>\n", " <td>Malvin</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>88</td>\n", " <td>1987-07-03</td>\n", " <td>Desktop Support Technician</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>14.0</td>\n", " </tr>\n", " <tr>\n", " <th>689</th>\n", " <td>690</td>\n", " <td>Lindsey</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>95</td>\n", " <td>1987-03-27</td>\n", " <td>Assistant Professor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " </tr>\n", " <tr>\n", " <th>702</th>\n", " <td>703</td>\n", " <td>Ethelda</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>66</td>\n", " <td>1966-10-31</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>15.0</td>\n", " </tr>\n", " <tr>\n", " <th>743</th>\n", " <td>744</td>\n", " <td>Heinrik</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>54</td>\n", " <td>1977-08-30</td>\n", " <td>Graphic Designer</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " </tr>\n", " <tr>\n", " <th>779</th>\n", " <td>780</td>\n", " <td>Kim</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>24</td>\n", " <td>1973-10-12</td>\n", " <td>Professor</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>20.0</td>\n", " </tr>\n", " <tr>\n", " <th>789</th>\n", " <td>790</td>\n", " <td>Yvonne</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>22</td>\n", " <td>1968-03-24</td>\n", " <td>Senior Editor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>15.0</td>\n", " </tr>\n", " <tr>\n", " <th>856</th>\n", " <td>857</td>\n", " <td>Theo</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>15</td>\n", " <td>1964-08-14</td>\n", " <td>General Manager</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", " <th>859</th>\n", " <td>860</td>\n", " <td>Ida</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>80</td>\n", " <td>1980-08-12</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>7.0</td>\n", " </tr>\n", " <tr>\n", " <th>915</th>\n", " <td>916</td>\n", " <td>Joycelin</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>18</td>\n", " <td>1991-06-18</td>\n", " <td>Recruiter</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " </tr>\n", " <tr>\n", " <th>926</th>\n", " <td>927</td>\n", " <td>Jarret</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>25</td>\n", " <td>1966-02-19</td>\n", " <td>Cost Accountant</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " </tr>\n", " <tr>\n", " <th>937</th>\n", " <td>938</td>\n", " <td>Corabelle</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>18</td>\n", " <td>1996-04-06</td>\n", " <td>Technical Writer</td>\n", " <td>Retail</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>3179</th>\n", " <td>3180</td>\n", " <td>Gage</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>96</td>\n", " <td>1974-06-14</td>\n", " <td>Business Systems Development Analyst</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>19.0</td>\n", " </tr>\n", " <tr>\n", " <th>3187</th>\n", " <td>3188</td>\n", " <td>Boyd</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>94</td>\n", " <td>1999-07-07</td>\n", " <td>Actuary</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>3199</th>\n", " <td>3200</td>\n", " <td>Marna</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>51</td>\n", " <td>1995-11-03</td>\n", " <td>Environmental Tech</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>3258</th>\n", " <td>3259</td>\n", " <td>Rabi</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>74</td>\n", " <td>1953-11-04</td>\n", " <td>Quality Control Specialist</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>10.0</td>\n", " </tr>\n", " <tr>\n", " <th>3318</th>\n", " <td>3319</td>\n", " <td>Erda</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>67</td>\n", " <td>1966-04-04</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>19.0</td>\n", " </tr>\n", " <tr>\n", " <th>3320</th>\n", " <td>3321</td>\n", " <td>Ives</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>38</td>\n", " <td>1980-05-10</td>\n", " <td>Software Test Engineer I</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " </tr>\n", " <tr>\n", " <th>3323</th>\n", " <td>3324</td>\n", " <td>Sholom</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>32</td>\n", " <td>1973-07-11</td>\n", " <td>Research Nurse</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " </tr>\n", " <tr>\n", " <th>3324</th>\n", " <td>3325</td>\n", " <td>Sylas</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>80</td>\n", " <td>1996-10-08</td>\n", " <td>Database Administrator IV</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " </tr>\n", " <tr>\n", " <th>3346</th>\n", " <td>3347</td>\n", " <td>Nichols</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>99</td>\n", " <td>1985-11-08</td>\n", " <td>Computer Systems Analyst II</td>\n", " <td>Entertainment</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " </tr>\n", " <tr>\n", " <th>3363</th>\n", " <td>3364</td>\n", " <td>Trueman</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>77</td>\n", " <td>1993-08-19</td>\n", " <td>Engineer IV</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>3.0</td>\n", " </tr>\n", " <tr>\n", " <th>3384</th>\n", " <td>3385</td>\n", " <td>Ronda</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>23</td>\n", " <td>1975-02-10</td>\n", " <td>Systems Administrator III</td>\n", " <td>Argiculture</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " </tr>\n", " <tr>\n", " <th>3396</th>\n", " <td>3397</td>\n", " <td>Melisande</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>70</td>\n", " <td>1985-08-19</td>\n", " <td>Product Engineer</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>11.0</td>\n", " </tr>\n", " <tr>\n", " <th>3400</th>\n", " <td>3401</td>\n", " <td>Cristie</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>92</td>\n", " <td>1993-07-28</td>\n", " <td>Tax Accountant</td>\n", " <td>Telecommunications</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>4.0</td>\n", " </tr>\n", " <tr>\n", " <th>3442</th>\n", " <td>3443</td>\n", " <td>Fran</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>11</td>\n", " <td>1995-04-12</td>\n", " <td>Technical Writer</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>5.0</td>\n", " </tr>\n", " <tr>\n", " <th>3444</th>\n", " <td>3445</td>\n", " <td>Craggy</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>62</td>\n", " <td>1966-06-23</td>\n", " <td>Database Administrator I</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>11.0</td>\n", " </tr>\n", " <tr>\n", " <th>3446</th>\n", " <td>3447</td>\n", " <td>Linell</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>43</td>\n", " <td>1977-11-23</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>17.0</td>\n", " </tr>\n", " <tr>\n", " <th>3479</th>\n", " <td>3480</td>\n", " <td>Jarib</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>30</td>\n", " <td>1959-06-24</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>20.0</td>\n", " </tr>\n", " <tr>\n", " <th>3554</th>\n", " <td>3555</td>\n", " <td>Latashia</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>96</td>\n", " <td>1976-02-26</td>\n", " <td>Programmer Analyst II</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>21.0</td>\n", " </tr>\n", " <tr>\n", " <th>3596</th>\n", " <td>3597</td>\n", " <td>Giorgi</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>71</td>\n", " <td>1954-06-16</td>\n", " <td>Analog Circuit Design manager</td>\n", " <td>Property</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>16.0</td>\n", " </tr>\n", " <tr>\n", " <th>3623</th>\n", " <td>3624</td>\n", " <td>Lenka</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>54</td>\n", " <td>1984-10-16</td>\n", " <td>Cost Accountant</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>7.0</td>\n", " </tr>\n", " <tr>\n", " <th>3634</th>\n", " <td>3635</td>\n", " <td>Elset</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>51</td>\n", " <td>1977-07-06</td>\n", " <td>VP Marketing</td>\n", " <td>Retail</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " </tr>\n", " <tr>\n", " <th>3650</th>\n", " <td>3651</td>\n", " <td>Baxie</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>91</td>\n", " <td>1999-11-15</td>\n", " <td>Human Resources Assistant I</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>2.0</td>\n", " </tr>\n", " <tr>\n", " <th>3717</th>\n", " <td>3718</td>\n", " <td>Damiano</td>\n", " <td>NaN</td>\n", " <td>U</td>\n", " <td>22</td>\n", " <td>NaT</td>\n", " <td>Geologist IV</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3755</th>\n", " <td>3756</td>\n", " <td>Barry</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>22</td>\n", " <td>1977-07-08</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>10.0</td>\n", " </tr>\n", " <tr>\n", " <th>3816</th>\n", " <td>3817</td>\n", " <td>Tuckie</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>65</td>\n", " <td>1957-05-02</td>\n", " <td>VP Product Management</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>13.0</td>\n", " </tr>\n", " <tr>\n", " <th>3884</th>\n", " <td>3885</td>\n", " <td>Asher</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>55</td>\n", " <td>1978-06-17</td>\n", " <td>Actuary</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>8.0</td>\n", " </tr>\n", " <tr>\n", " <th>3915</th>\n", " <td>3916</td>\n", " <td>Myrtia</td>\n", " <td>NaN</td>\n", " <td>Female</td>\n", " <td>31</td>\n", " <td>1958-10-17</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " </tr>\n", " <tr>\n", " <th>3926</th>\n", " <td>3927</td>\n", " <td>Conway</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>29</td>\n", " <td>1978-01-07</td>\n", " <td>Electrical Engineer</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>7.0</td>\n", " </tr>\n", " <tr>\n", " <th>3961</th>\n", " <td>3962</td>\n", " <td>Benoit</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>17</td>\n", " <td>1977-10-06</td>\n", " <td>Project Manager</td>\n", " <td>Argiculture</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " </tr>\n", " <tr>\n", " <th>3998</th>\n", " <td>3999</td>\n", " <td>Patrizius</td>\n", " <td>NaN</td>\n", " <td>Male</td>\n", " <td>11</td>\n", " <td>1973-10-24</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>125 rows × 12 columns</p>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name gender \\\n", "3 4 Talbot NaN Male \n", "66 67 Vernon NaN Male \n", "105 106 Glyn NaN Male \n", "138 139 Gar NaN Male \n", "196 197 Avis NaN Female \n", "210 211 Beitris NaN Female \n", "249 250 Kristofer NaN Male \n", "250 251 Mala NaN Female \n", "256 257 Marissa NaN Female \n", "274 275 Dud NaN Male \n", "355 356 Nichole NaN Female \n", "459 460 Illa NaN Female \n", "474 475 Vernor NaN Male \n", "493 494 Gaby NaN Male \n", "513 514 Trent NaN Male \n", "525 526 Ardelle NaN U \n", "656 657 Hoyt NaN Male \n", "659 660 Stormi NaN Female \n", "675 676 Curtis NaN Male \n", "683 684 Malvin NaN Male \n", "689 690 Lindsey NaN Male \n", "702 703 Ethelda NaN Female \n", "743 744 Heinrik NaN Male \n", "779 780 Kim NaN Female \n", "789 790 Yvonne NaN Female \n", "856 857 Theo NaN Female \n", "859 860 Ida NaN Female \n", "915 916 Joycelin NaN Female \n", "926 927 Jarret NaN Male \n", "937 938 Corabelle NaN Female \n", "... ... ... ... ... \n", "3179 3180 Gage NaN Male \n", "3187 3188 Boyd NaN Male \n", "3199 3200 Marna NaN Female \n", "3258 3259 Rabi NaN Male \n", "3318 3319 Erda NaN Female \n", "3320 3321 Ives NaN Male \n", "3323 3324 Sholom NaN Male \n", "3324 3325 Sylas NaN Male \n", "3346 3347 Nichols NaN Male \n", "3363 3364 Trueman NaN Male \n", "3384 3385 Ronda NaN Female \n", "3396 3397 Melisande NaN Female \n", "3400 3401 Cristie NaN Female \n", "3442 3443 Fran NaN Male \n", "3444 3445 Craggy NaN Male \n", "3446 3447 Linell NaN Female \n", "3479 3480 Jarib NaN Male \n", "3554 3555 Latashia NaN Female \n", "3596 3597 Giorgi NaN Male \n", "3623 3624 Lenka NaN Female \n", "3634 3635 Elset NaN Female \n", "3650 3651 Baxie NaN Male \n", "3717 3718 Damiano NaN U \n", "3755 3756 Barry NaN Male \n", "3816 3817 Tuckie NaN Male \n", "3884 3885 Asher NaN Male \n", "3915 3916 Myrtia NaN Female \n", "3926 3927 Conway NaN Male \n", "3961 3962 Benoit NaN Male \n", "3998 3999 Patrizius NaN Male \n", "\n", " past_3_years_bike_related_purchases DOB \\\n", "3 33 1961-10-03 \n", "66 67 1960-06-14 \n", "105 54 1966-07-03 \n", "138 1 1964-07-28 \n", "196 32 1977-01-27 \n", "210 6 1974-03-04 \n", "249 53 1988-04-15 \n", "250 88 1977-12-24 \n", "256 70 1966-02-08 \n", "274 7 1955-07-27 \n", "355 10 1975-03-30 \n", "459 0 1986-01-23 \n", "474 0 1996-11-14 \n", "493 33 1975-06-02 \n", "513 9 1996-06-20 \n", "525 9 NaT \n", "656 66 1993-02-18 \n", "659 82 1995-07-29 \n", "675 51 1968-05-19 \n", "683 88 1987-07-03 \n", "689 95 1987-03-27 \n", "702 66 1966-10-31 \n", "743 54 1977-08-30 \n", "779 24 1973-10-12 \n", "789 22 1968-03-24 \n", "856 15 1964-08-14 \n", "859 80 1980-08-12 \n", "915 18 1991-06-18 \n", "926 25 1966-02-19 \n", "937 18 1996-04-06 \n", "... ... ... \n", "3179 96 1974-06-14 \n", "3187 94 1999-07-07 \n", "3199 51 1995-11-03 \n", "3258 74 1953-11-04 \n", "3318 67 1966-04-04 \n", "3320 38 1980-05-10 \n", "3323 32 1973-07-11 \n", "3324 80 1996-10-08 \n", "3346 99 1985-11-08 \n", "3363 77 1993-08-19 \n", "3384 23 1975-02-10 \n", "3396 70 1985-08-19 \n", "3400 92 1993-07-28 \n", "3442 11 1995-04-12 \n", "3444 62 1966-06-23 \n", "3446 43 1977-11-23 \n", "3479 30 1959-06-24 \n", "3554 96 1976-02-26 \n", "3596 71 1954-06-16 \n", "3623 54 1984-10-16 \n", "3634 51 1977-07-06 \n", "3650 91 1999-11-15 \n", "3717 22 NaT \n", "3755 22 1977-07-08 \n", "3816 65 1957-05-02 \n", "3884 55 1978-06-17 \n", "3915 31 1958-10-17 \n", "3926 29 1978-01-07 \n", "3961 17 1977-10-06 \n", "3998 11 1973-10-24 \n", "\n", " job_title job_industry_category \\\n", "3 NaN IT \n", "66 Web Developer II Retail \n", "105 Software Test Engineer III Health \n", "138 Operator Telecommunications \n", "196 NaN NaN \n", "210 VP Marketing Manufacturing \n", "249 Legal Assistant Health \n", "250 VP Sales Financial Services \n", "256 Sales Associate Manufacturing \n", "274 VP Sales Health \n", "355 Librarian Entertainment \n", "459 Electrical Engineer Manufacturing \n", "474 Nuclear Power Engineer Manufacturing \n", "493 Design Engineer Manufacturing \n", "513 Associate Professor Financial Services \n", "525 Social Worker Health \n", "656 Safety Technician II Manufacturing \n", "659 Geological Engineer Manufacturing \n", "675 Senior Editor NaN \n", "683 Desktop Support Technician Financial Services \n", "689 Assistant Professor NaN \n", "702 NaN Property \n", "743 Graphic Designer Manufacturing \n", "779 Professor Financial Services \n", "789 Senior Editor NaN \n", "856 General Manager NaN \n", "859 NaN NaN \n", "915 Recruiter NaN \n", "926 Cost Accountant Financial Services \n", "937 Technical Writer Retail \n", "... ... ... \n", "3179 Business Systems Development Analyst IT \n", "3187 Actuary Financial Services \n", "3199 Environmental Tech Manufacturing \n", "3258 Quality Control Specialist NaN \n", "3318 NaN Financial Services \n", "3320 Software Test Engineer I NaN \n", "3323 Research Nurse Health \n", "3324 Database Administrator IV Manufacturing \n", "3346 Computer Systems Analyst II Entertainment \n", "3363 Engineer IV Manufacturing \n", "3384 Systems Administrator III Argiculture \n", "3396 Product Engineer IT \n", "3400 Tax Accountant Telecommunications \n", "3442 Technical Writer NaN \n", "3444 Database Administrator I Financial Services \n", "3446 NaN Financial Services \n", "3479 NaN NaN \n", "3554 Programmer Analyst II Manufacturing \n", "3596 Analog Circuit Design manager Property \n", "3623 Cost Accountant Financial Services \n", "3634 VP Marketing Retail \n", "3650 Human Resources Assistant I Manufacturing \n", "3717 Geologist IV IT \n", "3755 NaN NaN \n", "3816 VP Product Management Manufacturing \n", "3884 Actuary Financial Services \n", "3915 NaN Retail \n", "3926 Electrical Engineer Manufacturing \n", "3961 Project Manager Argiculture \n", "3998 NaN Manufacturing \n", "\n", " wealth_segment deceased_indicator owns_car tenure \n", "3 Mass Customer N No 7.0 \n", "66 Mass Customer N No 18.0 \n", "105 High Net Worth N Yes 18.0 \n", "138 Affluent Customer N No 4.0 \n", "196 High Net Worth N No 5.0 \n", "210 Mass Customer N Yes 5.0 \n", "249 Mass Customer N Yes 13.0 \n", "250 Affluent Customer N Yes 10.0 \n", "256 Affluent Customer N Yes 19.0 \n", "274 High Net Worth N No 13.0 \n", "355 High Net Worth N No 5.0 \n", "459 Affluent Customer N Yes 16.0 \n", "474 Affluent Customer N No 1.0 \n", "493 Mass Customer N No 9.0 \n", "513 Mass Customer N Yes 4.0 \n", "525 Mass Customer N Yes NaN \n", "656 Affluent Customer N No 10.0 \n", "659 High Net Worth N No 6.0 \n", "675 High Net Worth N Yes 14.0 \n", "683 Mass Customer N No 14.0 \n", "689 Affluent Customer N Yes 17.0 \n", "702 Mass Customer N No 15.0 \n", "743 Affluent Customer N Yes 14.0 \n", "779 Mass Customer N No 20.0 \n", "789 Affluent Customer N No 15.0 \n", "856 High Net Worth N No 4.0 \n", "859 High Net Worth N Yes 7.0 \n", "915 Affluent Customer N No 8.0 \n", "926 Mass Customer N Yes 18.0 \n", "937 Mass Customer N No 7.0 \n", "... ... ... ... ... \n", "3179 Mass Customer N Yes 19.0 \n", "3187 Mass Customer N No 1.0 \n", "3199 Mass Customer N No 1.0 \n", "3258 High Net Worth N No 10.0 \n", "3318 Affluent Customer N Yes 19.0 \n", "3320 High Net Worth N Yes 14.0 \n", "3323 Mass Customer N Yes 10.0 \n", "3324 High Net Worth N No 1.0 \n", "3346 High Net Worth N Yes 18.0 \n", "3363 Mass Customer N Yes 3.0 \n", "3384 Mass Customer N No 9.0 \n", "3396 Mass Customer N No 11.0 \n", "3400 Mass Customer N Yes 4.0 \n", "3442 Mass Customer N Yes 5.0 \n", "3444 Affluent Customer N Yes 11.0 \n", "3446 High Net Worth N No 17.0 \n", "3479 Mass Customer N No 20.0 \n", "3554 Mass Customer N No 21.0 \n", "3596 Affluent Customer N Yes 16.0 \n", "3623 Mass Customer N Yes 7.0 \n", "3634 High Net Worth N No 9.0 \n", "3650 Mass Customer N No 2.0 \n", "3717 Mass Customer N Yes NaN \n", "3755 Affluent Customer N No 10.0 \n", "3816 High Net Worth N No 13.0 \n", "3884 Mass Customer N Yes 8.0 \n", "3915 Affluent Customer N Yes 17.0 \n", "3926 Mass Customer N Yes 7.0 \n", "3961 High Net Worth N Yes 14.0 \n", "3998 Affluent Customer N Yes 10.0 \n", "\n", "[125 rows x 12 columns]" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Fetching records where last name is missing.\n", "\n", "cust_demo[cust_demo['last_name'].isnull()]" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "cust_demo['last_name'].fillna('None',axis=0, inplace=True)" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['last_name'].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently there are no missing values for last name column." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.2 Date of Birth" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>gender</th>\n", " <th>past_3_years_bike_related_purchases</th>\n", " <th>DOB</th>\n", " <th>job_title</th>\n", " <th>job_industry_category</th>\n", " <th>wealth_segment</th>\n", " <th>deceased_indicator</th>\n", " <th>owns_car</th>\n", " <th>tenure</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>143</th>\n", " <td>144</td>\n", " <td>Jory</td>\n", " <td>Barrabeale</td>\n", " <td>U</td>\n", " <td>71</td>\n", " <td>NaT</td>\n", " <td>Environmental Tech</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>167</th>\n", " <td>168</td>\n", " <td>Reggie</td>\n", " <td>Broggetti</td>\n", " <td>U</td>\n", " <td>8</td>\n", " <td>NaT</td>\n", " <td>General Manager</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>266</th>\n", " <td>267</td>\n", " <td>Edgar</td>\n", " <td>Buckler</td>\n", " <td>U</td>\n", " <td>53</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>289</th>\n", " <td>290</td>\n", " <td>Giorgio</td>\n", " <td>Kevane</td>\n", " <td>U</td>\n", " <td>42</td>\n", " <td>NaT</td>\n", " <td>Senior Sales Associate</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>450</th>\n", " <td>451</td>\n", " <td>Marlow</td>\n", " <td>Flowerdew</td>\n", " <td>U</td>\n", " <td>37</td>\n", " <td>NaT</td>\n", " <td>Quality Control Specialist</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>452</th>\n", " <td>453</td>\n", " <td>Cornelius</td>\n", " <td>Yarmouth</td>\n", " <td>U</td>\n", " <td>81</td>\n", " <td>NaT</td>\n", " <td>Assistant Professor</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>453</th>\n", " <td>454</td>\n", " <td>Eugenie</td>\n", " <td>Domenc</td>\n", " <td>U</td>\n", " <td>58</td>\n", " <td>NaT</td>\n", " <td>Research Nurse</td>\n", " <td>Health</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>479</th>\n", " <td>480</td>\n", " <td>Darelle</td>\n", " <td>Ive</td>\n", " <td>U</td>\n", " <td>67</td>\n", " <td>NaT</td>\n", " <td>Registered Nurse</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>512</th>\n", " <td>513</td>\n", " <td>Kienan</td>\n", " <td>Soar</td>\n", " <td>U</td>\n", " <td>30</td>\n", " <td>NaT</td>\n", " <td>Tax Accountant</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>525</th>\n", " <td>526</td>\n", " <td>Ardelle</td>\n", " <td>None</td>\n", " <td>U</td>\n", " <td>9</td>\n", " <td>NaT</td>\n", " <td>Social Worker</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>547</th>\n", " <td>548</td>\n", " <td>Georgie</td>\n", " <td>Cudbertson</td>\n", " <td>U</td>\n", " <td>84</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>581</th>\n", " <td>582</td>\n", " <td>Rhoda</td>\n", " <td>McKeown</td>\n", " <td>U</td>\n", " <td>21</td>\n", " <td>NaT</td>\n", " <td>Staff Scientist</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>598</th>\n", " <td>599</td>\n", " <td>Ernestus</td>\n", " <td>Cruden</td>\n", " <td>U</td>\n", " <td>48</td>\n", " <td>NaT</td>\n", " <td>Senior Financial Analyst</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>679</th>\n", " <td>680</td>\n", " <td>Gay</td>\n", " <td>Pickersgill</td>\n", " <td>U</td>\n", " <td>22</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>684</th>\n", " <td>685</td>\n", " <td>Booth</td>\n", " <td>Birkin</td>\n", " <td>U</td>\n", " <td>28</td>\n", " <td>NaT</td>\n", " <td>Senior Developer</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>798</th>\n", " <td>799</td>\n", " <td>Harland</td>\n", " <td>Spilisy</td>\n", " <td>U</td>\n", " <td>39</td>\n", " <td>NaT</td>\n", " <td>Programmer I</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>838</th>\n", " <td>839</td>\n", " <td>Charis</td>\n", " <td>Greaves</td>\n", " <td>U</td>\n", " <td>14</td>\n", " <td>NaT</td>\n", " <td>Structural Analysis Engineer</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>882</th>\n", " <td>883</td>\n", " <td>Lolita</td>\n", " <td>Bennie</td>\n", " <td>U</td>\n", " <td>73</td>\n", " <td>NaT</td>\n", " <td>Recruiter</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>891</th>\n", " <td>892</td>\n", " <td>Conroy</td>\n", " <td>Healy</td>\n", " <td>U</td>\n", " <td>22</td>\n", " <td>NaT</td>\n", " <td>Office Assistant II</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>949</th>\n", " <td>950</td>\n", " <td>Bret</td>\n", " <td>Ivakhnov</td>\n", " <td>U</td>\n", " <td>24</td>\n", " <td>NaT</td>\n", " <td>Recruiter</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>974</th>\n", " <td>975</td>\n", " <td>Goldarina</td>\n", " <td>Rzehorz</td>\n", " <td>U</td>\n", " <td>26</td>\n", " <td>NaT</td>\n", " <td>Automation Specialist IV</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>982</th>\n", " <td>983</td>\n", " <td>Shaylyn</td>\n", " <td>Riggs</td>\n", " <td>U</td>\n", " <td>49</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>995</th>\n", " <td>996</td>\n", " <td>Aura</td>\n", " <td>Bemlott</td>\n", " <td>U</td>\n", " <td>67</td>\n", " <td>NaT</td>\n", " <td>Assistant Manager</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1037</th>\n", " <td>1038</td>\n", " <td>Fraser</td>\n", " <td>Acome</td>\n", " <td>U</td>\n", " <td>57</td>\n", " <td>NaT</td>\n", " <td>Engineer I</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1043</th>\n", " <td>1044</td>\n", " <td>Frederico</td>\n", " <td>Whilder</td>\n", " <td>U</td>\n", " <td>4</td>\n", " <td>NaT</td>\n", " <td>Food Chemist</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1081</th>\n", " <td>1082</td>\n", " <td>Guinevere</td>\n", " <td>Kelby</td>\n", " <td>U</td>\n", " <td>90</td>\n", " <td>NaT</td>\n", " <td>Financial Analyst</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1173</th>\n", " <td>1174</td>\n", " <td>Shellysheldon</td>\n", " <td>Gooderridge</td>\n", " <td>U</td>\n", " <td>9</td>\n", " <td>NaT</td>\n", " <td>Executive Secretary</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1209</th>\n", " <td>1210</td>\n", " <td>Shandie</td>\n", " <td>Sprigg</td>\n", " <td>U</td>\n", " <td>81</td>\n", " <td>NaT</td>\n", " <td>Programmer II</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1243</th>\n", " <td>1244</td>\n", " <td>Glenn</td>\n", " <td>Tinham</td>\n", " <td>U</td>\n", " <td>80</td>\n", " <td>NaT</td>\n", " <td>Financial Analyst</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>1350</th>\n", " <td>1351</td>\n", " <td>Lorettalorna</td>\n", " <td>None</td>\n", " <td>U</td>\n", " <td>32</td>\n", " <td>NaT</td>\n", " <td>Office Assistant IV</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>2695</th>\n", " <td>2696</td>\n", " <td>Isabelle</td>\n", " <td>Bursnoll</td>\n", " <td>U</td>\n", " <td>42</td>\n", " <td>NaT</td>\n", " <td>Social Worker</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2696</th>\n", " <td>2697</td>\n", " <td>Klarika</td>\n", " <td>Yerby</td>\n", " <td>U</td>\n", " <td>70</td>\n", " <td>NaT</td>\n", " <td>Legal Assistant</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2853</th>\n", " <td>2854</td>\n", " <td>Vikky</td>\n", " <td>Dyde</td>\n", " <td>U</td>\n", " <td>49</td>\n", " <td>NaT</td>\n", " <td>Project Manager</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2919</th>\n", " <td>2920</td>\n", " <td>Casar</td>\n", " <td>Ritchley</td>\n", " <td>U</td>\n", " <td>0</td>\n", " <td>NaT</td>\n", " <td>Business Systems Development Analyst</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2962</th>\n", " <td>2963</td>\n", " <td>Christin</td>\n", " <td>Fricke</td>\n", " <td>U</td>\n", " <td>17</td>\n", " <td>NaT</td>\n", " <td>Safety Technician II</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2998</th>\n", " <td>2999</td>\n", " <td>Rinaldo</td>\n", " <td>Diggin</td>\n", " <td>U</td>\n", " <td>28</td>\n", " <td>NaT</td>\n", " <td>Business Systems Development Analyst</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3011</th>\n", " <td>3012</td>\n", " <td>Devland</td>\n", " <td>Probart</td>\n", " <td>U</td>\n", " <td>81</td>\n", " <td>NaT</td>\n", " <td>Technical Writer</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3085</th>\n", " <td>3086</td>\n", " <td>Pieter</td>\n", " <td>Gadesby</td>\n", " <td>U</td>\n", " <td>18</td>\n", " <td>NaT</td>\n", " <td>Biostatistician I</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3150</th>\n", " <td>3151</td>\n", " <td>Thorn</td>\n", " <td>Choffin</td>\n", " <td>U</td>\n", " <td>20</td>\n", " <td>NaT</td>\n", " <td>Senior Developer</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3221</th>\n", " <td>3222</td>\n", " <td>Caralie</td>\n", " <td>Sellors</td>\n", " <td>U</td>\n", " <td>40</td>\n", " <td>NaT</td>\n", " <td>Senior Editor</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3222</th>\n", " <td>3223</td>\n", " <td>Tiffi</td>\n", " <td>Wortt</td>\n", " <td>U</td>\n", " <td>44</td>\n", " <td>NaT</td>\n", " <td>Database Administrator III</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3254</th>\n", " <td>3255</td>\n", " <td>Sutherlan</td>\n", " <td>Truin</td>\n", " <td>U</td>\n", " <td>47</td>\n", " <td>NaT</td>\n", " <td>Engineer IV</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3287</th>\n", " <td>3288</td>\n", " <td>Fair</td>\n", " <td>Dewen</td>\n", " <td>U</td>\n", " <td>47</td>\n", " <td>NaT</td>\n", " <td>Engineer III</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3297</th>\n", " <td>3298</td>\n", " <td>Christine</td>\n", " <td>Baignard</td>\n", " <td>U</td>\n", " <td>1</td>\n", " <td>NaT</td>\n", " <td>VP Quality Control</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3311</th>\n", " <td>3312</td>\n", " <td>Franky</td>\n", " <td>Nanninini</td>\n", " <td>U</td>\n", " <td>49</td>\n", " <td>NaT</td>\n", " <td>Administrative Officer</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3321</th>\n", " <td>3322</td>\n", " <td>Hew</td>\n", " <td>Sworder</td>\n", " <td>U</td>\n", " <td>24</td>\n", " <td>NaT</td>\n", " <td>Financial Analyst</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3342</th>\n", " <td>3343</td>\n", " <td>Cristabel</td>\n", " <td>Bim</td>\n", " <td>U</td>\n", " <td>3</td>\n", " <td>NaT</td>\n", " <td>Recruiter</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3364</th>\n", " <td>3365</td>\n", " <td>Karlens</td>\n", " <td>Chaffyn</td>\n", " <td>U</td>\n", " <td>29</td>\n", " <td>NaT</td>\n", " <td>Engineer III</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3472</th>\n", " <td>3473</td>\n", " <td>Sanderson</td>\n", " <td>Alloway</td>\n", " <td>U</td>\n", " <td>34</td>\n", " <td>NaT</td>\n", " <td>Analog Circuit Design manager</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3509</th>\n", " <td>3510</td>\n", " <td>Jemima</td>\n", " <td>Izaac</td>\n", " <td>U</td>\n", " <td>48</td>\n", " <td>NaT</td>\n", " <td>Safety Technician II</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3512</th>\n", " <td>3513</td>\n", " <td>Enriqueta</td>\n", " <td>Waterhowse</td>\n", " <td>U</td>\n", " <td>80</td>\n", " <td>NaT</td>\n", " <td>Internal Auditor</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3564</th>\n", " <td>3565</td>\n", " <td>Charyl</td>\n", " <td>Pottiphar</td>\n", " <td>U</td>\n", " <td>14</td>\n", " <td>NaT</td>\n", " <td>Structural Engineer</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3653</th>\n", " <td>3654</td>\n", " <td>Kenyon</td>\n", " <td>Paddefield</td>\n", " <td>U</td>\n", " <td>78</td>\n", " <td>NaT</td>\n", " <td>Electrical Engineer</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3717</th>\n", " <td>3718</td>\n", " <td>Damiano</td>\n", " <td>None</td>\n", " <td>U</td>\n", " <td>22</td>\n", " <td>NaT</td>\n", " <td>Geologist IV</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3726</th>\n", " <td>3727</td>\n", " <td>Eba</td>\n", " <td>Youle</td>\n", " <td>U</td>\n", " <td>65</td>\n", " <td>NaT</td>\n", " <td>Assistant Professor</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3778</th>\n", " <td>3779</td>\n", " <td>Ulick</td>\n", " <td>Daspar</td>\n", " <td>U</td>\n", " <td>68</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3882</th>\n", " <td>3883</td>\n", " <td>Nissa</td>\n", " <td>Conrad</td>\n", " <td>U</td>\n", " <td>35</td>\n", " <td>NaT</td>\n", " <td>Legal Assistant</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3930</th>\n", " <td>3931</td>\n", " <td>Kylie</td>\n", " <td>Epine</td>\n", " <td>U</td>\n", " <td>19</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3934</th>\n", " <td>3935</td>\n", " <td>Teodor</td>\n", " <td>Alfonsini</td>\n", " <td>U</td>\n", " <td>72</td>\n", " <td>NaT</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>3997</th>\n", " <td>3998</td>\n", " <td>Sarene</td>\n", " <td>Woolley</td>\n", " <td>U</td>\n", " <td>60</td>\n", " <td>NaT</td>\n", " <td>Assistant Manager</td>\n", " <td>IT</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>NaN</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>87 rows × 12 columns</p>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name gender \\\n", "143 144 Jory Barrabeale U \n", "167 168 Reggie Broggetti U \n", "266 267 Edgar Buckler U \n", "289 290 Giorgio Kevane U \n", "450 451 Marlow Flowerdew U \n", "452 453 Cornelius Yarmouth U \n", "453 454 Eugenie Domenc U \n", "479 480 Darelle Ive U \n", "512 513 Kienan Soar U \n", "525 526 Ardelle None U \n", "547 548 Georgie Cudbertson U \n", "581 582 Rhoda McKeown U \n", "598 599 Ernestus Cruden U \n", "679 680 Gay Pickersgill U \n", "684 685 Booth Birkin U \n", "798 799 Harland Spilisy U \n", "838 839 Charis Greaves U \n", "882 883 Lolita Bennie U \n", "891 892 Conroy Healy U \n", "949 950 Bret Ivakhnov U \n", "974 975 Goldarina Rzehorz U \n", "982 983 Shaylyn Riggs U \n", "995 996 Aura Bemlott U \n", "1037 1038 Fraser Acome U \n", "1043 1044 Frederico Whilder U \n", "1081 1082 Guinevere Kelby U \n", "1173 1174 Shellysheldon Gooderridge U \n", "1209 1210 Shandie Sprigg U \n", "1243 1244 Glenn Tinham U \n", "1350 1351 Lorettalorna None U \n", "... ... ... ... ... \n", "2695 2696 Isabelle Bursnoll U \n", "2696 2697 Klarika Yerby U \n", "2853 2854 Vikky Dyde U \n", "2919 2920 Casar Ritchley U \n", "2962 2963 Christin Fricke U \n", "2998 2999 Rinaldo Diggin U \n", "3011 3012 Devland Probart U \n", "3085 3086 Pieter Gadesby U \n", "3150 3151 Thorn Choffin U \n", "3221 3222 Caralie Sellors U \n", "3222 3223 Tiffi Wortt U \n", "3254 3255 Sutherlan Truin U \n", "3287 3288 Fair Dewen U \n", "3297 3298 Christine Baignard U \n", "3311 3312 Franky Nanninini U \n", "3321 3322 Hew Sworder U \n", "3342 3343 Cristabel Bim U \n", "3364 3365 Karlens Chaffyn U \n", "3472 3473 Sanderson Alloway U \n", "3509 3510 Jemima Izaac U \n", "3512 3513 Enriqueta Waterhowse U \n", "3564 3565 Charyl Pottiphar U \n", "3653 3654 Kenyon Paddefield U \n", "3717 3718 Damiano None U \n", "3726 3727 Eba Youle U \n", "3778 3779 Ulick Daspar U \n", "3882 3883 Nissa Conrad U \n", "3930 3931 Kylie Epine U \n", "3934 3935 Teodor Alfonsini U \n", "3997 3998 Sarene Woolley U \n", "\n", " past_3_years_bike_related_purchases DOB \\\n", "143 71 NaT \n", "167 8 NaT \n", "266 53 NaT \n", "289 42 NaT \n", "450 37 NaT \n", "452 81 NaT \n", "453 58 NaT \n", "479 67 NaT \n", "512 30 NaT \n", "525 9 NaT \n", "547 84 NaT \n", "581 21 NaT \n", "598 48 NaT \n", "679 22 NaT \n", "684 28 NaT \n", "798 39 NaT \n", "838 14 NaT \n", "882 73 NaT \n", "891 22 NaT \n", "949 24 NaT \n", "974 26 NaT \n", "982 49 NaT \n", "995 67 NaT \n", "1037 57 NaT \n", "1043 4 NaT \n", "1081 90 NaT \n", "1173 9 NaT \n", "1209 81 NaT \n", "1243 80 NaT \n", "1350 32 NaT \n", "... ... .. \n", "2695 42 NaT \n", "2696 70 NaT \n", "2853 49 NaT \n", "2919 0 NaT \n", "2962 17 NaT \n", "2998 28 NaT \n", "3011 81 NaT \n", "3085 18 NaT \n", "3150 20 NaT \n", "3221 40 NaT \n", "3222 44 NaT \n", "3254 47 NaT \n", "3287 47 NaT \n", "3297 1 NaT \n", "3311 49 NaT \n", "3321 24 NaT \n", "3342 3 NaT \n", "3364 29 NaT \n", "3472 34 NaT \n", "3509 48 NaT \n", "3512 80 NaT \n", "3564 14 NaT \n", "3653 78 NaT \n", "3717 22 NaT \n", "3726 65 NaT \n", "3778 68 NaT \n", "3882 35 NaT \n", "3930 19 NaT \n", "3934 72 NaT \n", "3997 60 NaT \n", "\n", " job_title job_industry_category \\\n", "143 Environmental Tech IT \n", "167 General Manager IT \n", "266 NaN IT \n", "289 Senior Sales Associate IT \n", "450 Quality Control Specialist IT \n", "452 Assistant Professor IT \n", "453 Research Nurse Health \n", "479 Registered Nurse Health \n", "512 Tax Accountant IT \n", "525 Social Worker Health \n", "547 NaN IT \n", "581 Staff Scientist IT \n", "598 Senior Financial Analyst Financial Services \n", "679 NaN IT \n", "684 Senior Developer IT \n", "798 Programmer I IT \n", "838 Structural Analysis Engineer IT \n", "882 Recruiter IT \n", "891 Office Assistant II IT \n", "949 Recruiter IT \n", "974 Automation Specialist IV IT \n", "982 NaN IT \n", "995 Assistant Manager IT \n", "1037 Engineer I Manufacturing \n", "1043 Food Chemist Health \n", "1081 Financial Analyst Financial Services \n", "1173 Executive Secretary IT \n", "1209 Programmer II IT \n", "1243 Financial Analyst Financial Services \n", "1350 Office Assistant IV IT \n", "... ... ... \n", "2695 Social Worker Health \n", "2696 Legal Assistant IT \n", "2853 Project Manager IT \n", "2919 Business Systems Development Analyst IT \n", "2962 Safety Technician II IT \n", "2998 Business Systems Development Analyst IT \n", "3011 Technical Writer IT \n", "3085 Biostatistician I IT \n", "3150 Senior Developer IT \n", "3221 Senior Editor IT \n", "3222 Database Administrator III IT \n", "3254 Engineer IV IT \n", "3287 Engineer III IT \n", "3297 VP Quality Control IT \n", "3311 Administrative Officer IT \n", "3321 Financial Analyst Financial Services \n", "3342 Recruiter IT \n", "3364 Engineer III IT \n", "3472 Analog Circuit Design manager IT \n", "3509 Safety Technician II IT \n", "3512 Internal Auditor IT \n", "3564 Structural Engineer IT \n", "3653 Electrical Engineer Manufacturing \n", "3717 Geologist IV IT \n", "3726 Assistant Professor IT \n", "3778 NaN IT \n", "3882 Legal Assistant IT \n", "3930 NaN IT \n", "3934 NaN IT \n", "3997 Assistant Manager IT \n", "\n", " wealth_segment deceased_indicator owns_car tenure \n", "143 Mass Customer N No NaN \n", "167 Affluent Customer N Yes NaN \n", "266 High Net Worth N No NaN \n", "289 Mass Customer N No NaN \n", "450 High Net Worth N No NaN \n", "452 High Net Worth N No NaN \n", "453 Affluent Customer N Yes NaN \n", "479 Mass Customer N Yes NaN \n", "512 Mass Customer N No NaN \n", "525 Mass Customer N Yes NaN \n", "547 High Net Worth N Yes NaN \n", "581 Affluent Customer N No NaN \n", "598 Mass Customer N Yes NaN \n", "679 High Net Worth N Yes NaN \n", "684 Mass Customer N No NaN \n", "798 Mass Customer N Yes NaN \n", "838 Mass Customer N Yes NaN \n", "882 Mass Customer N Yes NaN \n", "891 Mass Customer N Yes NaN \n", "949 High Net Worth N Yes NaN \n", "974 Mass Customer N No NaN \n", "982 Affluent Customer N No NaN \n", "995 Mass Customer N Yes NaN \n", "1037 Mass Customer N Yes NaN \n", "1043 High Net Worth N No NaN \n", "1081 Mass Customer N Yes NaN \n", "1173 Mass Customer N No NaN \n", "1209 Mass Customer N No NaN \n", "1243 Mass Customer N Yes NaN \n", "1350 High Net Worth N No NaN \n", "... ... ... ... ... \n", "2695 Mass Customer N Yes NaN \n", "2696 High Net Worth N No NaN \n", "2853 High Net Worth N Yes NaN \n", "2919 Mass Customer N Yes NaN \n", "2962 Affluent Customer N Yes NaN \n", "2998 Affluent Customer N Yes NaN \n", "3011 Mass Customer N Yes NaN \n", "3085 High Net Worth N No NaN \n", "3150 Affluent Customer N Yes NaN \n", "3221 Affluent Customer N No NaN \n", "3222 Mass Customer N Yes NaN \n", "3254 High Net Worth N No NaN \n", "3287 High Net Worth N No NaN \n", "3297 Affluent Customer N Yes NaN \n", "3311 High Net Worth N No NaN \n", "3321 Affluent Customer N Yes NaN \n", "3342 Mass Customer N Yes NaN \n", "3364 Mass Customer N No NaN \n", "3472 Mass Customer N No NaN \n", "3509 Affluent Customer N Yes NaN \n", "3512 Affluent Customer N Yes NaN \n", "3564 High Net Worth N Yes NaN \n", "3653 Mass Customer N No NaN \n", "3717 Mass Customer N Yes NaN \n", "3726 Mass Customer N No NaN \n", "3778 Affluent Customer N No NaN \n", "3882 Mass Customer N No NaN \n", "3930 High Net Worth N Yes NaN \n", "3934 High Net Worth N Yes NaN \n", "3997 High Net Worth N No NaN \n", "\n", "[87 rows x 12 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo[cust_demo['DOB'].isnull()]" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "2.0" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "round(cust_demo['DOB'].isnull().mean()*100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Since less than 5 % of data has null date of birth. we can remove the records where date of birth is null.</b>" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Int64Index([ 143, 167, 266, 289, 450, 452, 453, 479, 512, 525, 547,\n", " 581, 598, 679, 684, 798, 838, 882, 891, 949, 974, 982,\n", " 995, 1037, 1043, 1081, 1173, 1209, 1243, 1350, 1476, 1508, 1582,\n", " 1627, 1682, 1739, 1772, 1779, 1805, 1917, 1937, 1989, 1999, 2020,\n", " 2068, 2164, 2204, 2251, 2294, 2334, 2340, 2413, 2425, 2468, 2539,\n", " 2641, 2646, 2695, 2696, 2853, 2919, 2962, 2998, 3011, 3085, 3150,\n", " 3221, 3222, 3254, 3287, 3297, 3311, 3321, 3342, 3364, 3472, 3509,\n", " 3512, 3564, 3653, 3717, 3726, 3778, 3882, 3930, 3934, 3997],\n", " dtype='int64')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dob_index_drop = cust_demo[cust_demo['DOB'].isnull()].index\n", "dob_index_drop" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "cust_demo.drop(index=dob_index_drop, inplace=True, axis=0)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['DOB'].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently there are no missing values for DOB column." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Creating Age Column for checking further descripency in data" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# Function to calculate the age as of today based on the DOB of the customer.\n", "\n", "def age(born):\n", " today = date.today()\n", " \n", " return today.year - born.year - ((today.month, today.day) < (born.month, born.day))\n", "\n", "cust_demo['Age'] = cust_demo['DOB'].apply(age)" ] }, { "cell_type": "code", "execution_count": 58, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "<matplotlib.axes._subplots.AxesSubplot at 0x1ef4c9deb00>" ] }, "execution_count": 58, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "<Figure size 1440x576 with 1 Axes>" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Viz to find out the Age Distribution\n", "plt.figure(figsize=(20,8))\n", "sns.distplot(cust_demo['Age'], kde=False, bins=50)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Statistics of the Age column</b>" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 3913.000000\n", "mean 43.346026\n", "std 12.803129\n", "min 19.000000\n", "25% 34.000000\n", "50% 43.000000\n", "75% 53.000000\n", "max 177.000000\n", "Name: Age, dtype: float64" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['Age'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here we find there is only 1 customer with an age of 177. Clearly this is an outlier since the 75th percentile of Age is 53." ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>gender</th>\n", " <th>past_3_years_bike_related_purchases</th>\n", " <th>DOB</th>\n", " <th>job_title</th>\n", " <th>job_industry_category</th>\n", " <th>wealth_segment</th>\n", " <th>deceased_indicator</th>\n", " <th>owns_car</th>\n", " <th>tenure</th>\n", " <th>Age</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>33</th>\n", " <td>34</td>\n", " <td>Jephthah</td>\n", " <td>Bachmann</td>\n", " <td>U</td>\n", " <td>59</td>\n", " <td>1843-12-21</td>\n", " <td>Legal Assistant</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>20.0</td>\n", " <td>177</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name gender \\\n", "33 34 Jephthah Bachmann U \n", "\n", " past_3_years_bike_related_purchases DOB job_title \\\n", "33 59 1843-12-21 Legal Assistant \n", "\n", " job_industry_category wealth_segment deceased_indicator owns_car \\\n", "33 IT Affluent Customer N No \n", "\n", " tenure Age \n", "33 20.0 177 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo[cust_demo['Age'] > 100]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Here we see a customer with age 177 which is an outlier. hence we need to remove this record.</b>" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "age_index_drop = cust_demo[cust_demo['Age']>100].index\n", "\n", "cust_demo.drop(index=age_index_drop, inplace=True , axis=0)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3 Tenure" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>When Date of Birth was Null the Tenure was also Null. Hence after removing null DOBs from dataframe , null tenures were also removed.</b>" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['tenure'].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There are no missing values for Tenure column." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.4 Job Title" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>gender</th>\n", " <th>past_3_years_bike_related_purchases</th>\n", " <th>DOB</th>\n", " <th>job_title</th>\n", " <th>job_industry_category</th>\n", " <th>wealth_segment</th>\n", " <th>deceased_indicator</th>\n", " <th>owns_car</th>\n", " <th>tenure</th>\n", " <th>Age</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Talbot</td>\n", " <td>None</td>\n", " <td>Male</td>\n", " <td>33</td>\n", " <td>1961-10-03</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>59</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>6</td>\n", " <td>Curr</td>\n", " <td>Duckhouse</td>\n", " <td>Male</td>\n", " <td>35</td>\n", " <td>1966-09-16</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>7</td>\n", " <td>Fina</td>\n", " <td>Merali</td>\n", " <td>Female</td>\n", " <td>6</td>\n", " <td>1976-02-23</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>11.0</td>\n", " <td>45</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>11</td>\n", " <td>Uriah</td>\n", " <td>Bisatt</td>\n", " <td>Male</td>\n", " <td>99</td>\n", " <td>1954-04-30</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " <td>67</td>\n", " </tr>\n", " <tr>\n", " <th>21</th>\n", " <td>22</td>\n", " <td>Deeanne</td>\n", " <td>Durtnell</td>\n", " <td>Female</td>\n", " <td>79</td>\n", " <td>1962-12-10</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>11.0</td>\n", " <td>58</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>23</td>\n", " <td>Olav</td>\n", " <td>Polak</td>\n", " <td>Male</td>\n", " <td>43</td>\n", " <td>1995-02-10</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>1.0</td>\n", " <td>26</td>\n", " </tr>\n", " <tr>\n", " <th>29</th>\n", " <td>30</td>\n", " <td>Darrick</td>\n", " <td>Helleckas</td>\n", " <td>Male</td>\n", " <td>18</td>\n", " <td>1961-10-18</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>6.0</td>\n", " <td>59</td>\n", " </tr>\n", " <tr>\n", " <th>45</th>\n", " <td>46</td>\n", " <td>Kaila</td>\n", " <td>Allin</td>\n", " <td>Female</td>\n", " <td>98</td>\n", " <td>1972-02-26</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>15.0</td>\n", " <td>49</td>\n", " </tr>\n", " <tr>\n", " <th>51</th>\n", " <td>52</td>\n", " <td>Curran</td>\n", " <td>Bentson</td>\n", " <td>Male</td>\n", " <td>57</td>\n", " <td>1988-06-22</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>32</td>\n", " </tr>\n", " <tr>\n", " <th>59</th>\n", " <td>60</td>\n", " <td>Nadiya</td>\n", " <td>Champerlen</td>\n", " <td>Female</td>\n", " <td>18</td>\n", " <td>1970-02-04</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>10.0</td>\n", " <td>51</td>\n", " </tr>\n", " <tr>\n", " <th>61</th>\n", " <td>62</td>\n", " <td>Sorcha</td>\n", " <td>Roggers</td>\n", " <td>Female</td>\n", " <td>38</td>\n", " <td>1979-07-06</td>\n", " <td>NaN</td>\n", " <td>IT</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>22.0</td>\n", " <td>41</td>\n", " </tr>\n", " <tr>\n", " <th>73</th>\n", " <td>74</td>\n", " <td>Pansy</td>\n", " <td>Kiddie</td>\n", " <td>Female</td>\n", " <td>94</td>\n", " <td>1969-06-19</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>6.0</td>\n", " <td>51</td>\n", " </tr>\n", " <tr>\n", " <th>80</th>\n", " <td>81</td>\n", " <td>Bee</td>\n", " <td>Blazewicz</td>\n", " <td>Female</td>\n", " <td>58</td>\n", " <td>1986-09-04</td>\n", " <td>NaN</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>13.0</td>\n", " <td>34</td>\n", " </tr>\n", " <tr>\n", " <th>107</th>\n", " <td>108</td>\n", " <td>Kayle</td>\n", " <td>Mingaud</td>\n", " <td>Female</td>\n", " <td>4</td>\n", " <td>1994-03-14</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>3.0</td>\n", " <td>27</td>\n", " </tr>\n", " <tr>\n", " <th>109</th>\n", " <td>110</td>\n", " <td>Sascha</td>\n", " <td>St. Quintin</td>\n", " <td>Male</td>\n", " <td>23</td>\n", " <td>2000-07-31</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " <td>20</td>\n", " </tr>\n", " <tr>\n", " <th>160</th>\n", " <td>161</td>\n", " <td>Tadd</td>\n", " <td>Bloss</td>\n", " <td>Male</td>\n", " <td>49</td>\n", " <td>1976-01-21</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>16.0</td>\n", " <td>45</td>\n", " </tr>\n", " <tr>\n", " <th>166</th>\n", " <td>167</td>\n", " <td>Nathalie</td>\n", " <td>Tideswell</td>\n", " <td>Female</td>\n", " <td>95</td>\n", " <td>1969-10-27</td>\n", " <td>NaN</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " <td>51</td>\n", " </tr>\n", " <tr>\n", " <th>177</th>\n", " <td>178</td>\n", " <td>Matthieu</td>\n", " <td>Bertelmot</td>\n", " <td>Male</td>\n", " <td>2</td>\n", " <td>1967-04-03</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>184</th>\n", " <td>185</td>\n", " <td>Crosby</td>\n", " <td>Walcot</td>\n", " <td>Male</td>\n", " <td>80</td>\n", " <td>1979-12-13</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>41</td>\n", " </tr>\n", " <tr>\n", " <th>196</th>\n", " <td>197</td>\n", " <td>Avis</td>\n", " <td>None</td>\n", " <td>Female</td>\n", " <td>32</td>\n", " <td>1977-01-27</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>5.0</td>\n", " <td>44</td>\n", " </tr>\n", " <tr>\n", " <th>206</th>\n", " <td>207</td>\n", " <td>Adena</td>\n", " <td>Whyman</td>\n", " <td>Female</td>\n", " <td>9</td>\n", " <td>1994-08-10</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>26</td>\n", " </tr>\n", " <tr>\n", " <th>216</th>\n", " <td>217</td>\n", " <td>Jeralee</td>\n", " <td>Quartly</td>\n", " <td>Female</td>\n", " <td>63</td>\n", " <td>1979-12-09</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>16.0</td>\n", " <td>41</td>\n", " </tr>\n", " <tr>\n", " <th>228</th>\n", " <td>229</td>\n", " <td>Vaughn</td>\n", " <td>Lambis</td>\n", " <td>Male</td>\n", " <td>30</td>\n", " <td>1966-03-06</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>19.0</td>\n", " <td>55</td>\n", " </tr>\n", " <tr>\n", " <th>243</th>\n", " <td>244</td>\n", " <td>Germayne</td>\n", " <td>Sperry</td>\n", " <td>Male</td>\n", " <td>57</td>\n", " <td>1974-11-25</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " <td>46</td>\n", " </tr>\n", " <tr>\n", " <th>261</th>\n", " <td>262</td>\n", " <td>Cordie</td>\n", " <td>Petrelli</td>\n", " <td>Male</td>\n", " <td>97</td>\n", " <td>1977-12-23</td>\n", " <td>NaN</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>275</th>\n", " <td>276</td>\n", " <td>Goldi</td>\n", " <td>Dwine</td>\n", " <td>Female</td>\n", " <td>47</td>\n", " <td>1990-03-25</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>22.0</td>\n", " <td>31</td>\n", " </tr>\n", " <tr>\n", " <th>287</th>\n", " <td>288</td>\n", " <td>Ebenezer</td>\n", " <td>Seedman</td>\n", " <td>Male</td>\n", " <td>71</td>\n", " <td>1985-09-08</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " <td>35</td>\n", " </tr>\n", " <tr>\n", " <th>295</th>\n", " <td>296</td>\n", " <td>Marshal</td>\n", " <td>Rathbone</td>\n", " <td>Male</td>\n", " <td>34</td>\n", " <td>1972-06-19</td>\n", " <td>NaN</td>\n", " <td>Health</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " <td>48</td>\n", " </tr>\n", " <tr>\n", " <th>301</th>\n", " <td>302</td>\n", " <td>Laurice</td>\n", " <td>Colgrave</td>\n", " <td>Female</td>\n", " <td>32</td>\n", " <td>1977-03-27</td>\n", " <td>NaN</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>13.0</td>\n", " <td>44</td>\n", " </tr>\n", " <tr>\n", " <th>318</th>\n", " <td>319</td>\n", " <td>Madelle</td>\n", " <td>Matteris</td>\n", " <td>Female</td>\n", " <td>32</td>\n", " <td>1971-10-11</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " <td>49</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>3797</th>\n", " <td>3798</td>\n", " <td>Yorker</td>\n", " <td>Dennison</td>\n", " <td>Male</td>\n", " <td>13</td>\n", " <td>1968-02-22</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " <td>53</td>\n", " </tr>\n", " <tr>\n", " <th>3803</th>\n", " <td>3804</td>\n", " <td>Andria</td>\n", " <td>Keays</td>\n", " <td>Female</td>\n", " <td>23</td>\n", " <td>1986-08-21</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>4.0</td>\n", " <td>34</td>\n", " </tr>\n", " <tr>\n", " <th>3805</th>\n", " <td>3806</td>\n", " <td>Ado</td>\n", " <td>Gailor</td>\n", " <td>Male</td>\n", " <td>1</td>\n", " <td>1954-02-08</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>67</td>\n", " </tr>\n", " <tr>\n", " <th>3810</th>\n", " <td>3811</td>\n", " <td>Etta</td>\n", " <td>Leele</td>\n", " <td>Female</td>\n", " <td>60</td>\n", " <td>1997-03-19</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>4.0</td>\n", " <td>24</td>\n", " </tr>\n", " <tr>\n", " <th>3821</th>\n", " <td>3822</td>\n", " <td>Conny</td>\n", " <td>Speechley</td>\n", " <td>Male</td>\n", " <td>37</td>\n", " <td>1959-03-09</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " <td>62</td>\n", " </tr>\n", " <tr>\n", " <th>3823</th>\n", " <td>3824</td>\n", " <td>Giffard</td>\n", " <td>Stollman</td>\n", " <td>Male</td>\n", " <td>33</td>\n", " <td>1994-11-21</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>3.0</td>\n", " <td>26</td>\n", " </tr>\n", " <tr>\n", " <th>3825</th>\n", " <td>3826</td>\n", " <td>Marlow</td>\n", " <td>Balffye</td>\n", " <td>Male</td>\n", " <td>33</td>\n", " <td>1978-09-25</td>\n", " <td>NaN</td>\n", " <td>Health</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>3826</th>\n", " <td>3827</td>\n", " <td>Cherida</td>\n", " <td>Whyffen</td>\n", " <td>Female</td>\n", " <td>10</td>\n", " <td>1976-09-05</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " <td>44</td>\n", " </tr>\n", " <tr>\n", " <th>3839</th>\n", " <td>3840</td>\n", " <td>Marc</td>\n", " <td>Torrans</td>\n", " <td>Male</td>\n", " <td>27</td>\n", " <td>1962-09-30</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>5.0</td>\n", " <td>58</td>\n", " </tr>\n", " <tr>\n", " <th>3843</th>\n", " <td>3844</td>\n", " <td>Clotilda</td>\n", " <td>Oret</td>\n", " <td>Female</td>\n", " <td>87</td>\n", " <td>1987-12-06</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>15.0</td>\n", " <td>33</td>\n", " </tr>\n", " <tr>\n", " <th>3864</th>\n", " <td>3865</td>\n", " <td>Urbanus</td>\n", " <td>Fuxman</td>\n", " <td>Male</td>\n", " <td>49</td>\n", " <td>1978-03-15</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>11.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>3880</th>\n", " <td>3881</td>\n", " <td>Olivie</td>\n", " <td>Nazair</td>\n", " <td>Female</td>\n", " <td>50</td>\n", " <td>1971-01-12</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>18.0</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>3892</th>\n", " <td>3893</td>\n", " <td>Hadria</td>\n", " <td>Moles</td>\n", " <td>Female</td>\n", " <td>7</td>\n", " <td>1996-11-18</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>4.0</td>\n", " <td>24</td>\n", " </tr>\n", " <tr>\n", " <th>3908</th>\n", " <td>3909</td>\n", " <td>Micheil</td>\n", " <td>McGeorge</td>\n", " <td>Male</td>\n", " <td>1</td>\n", " <td>1987-10-04</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " <td>33</td>\n", " </tr>\n", " <tr>\n", " <th>3915</th>\n", " <td>3916</td>\n", " <td>Myrtia</td>\n", " <td>None</td>\n", " <td>Female</td>\n", " <td>31</td>\n", " <td>1958-10-17</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " <td>62</td>\n", " </tr>\n", " <tr>\n", " <th>3927</th>\n", " <td>3928</td>\n", " <td>Kristin</td>\n", " <td>Way</td>\n", " <td>Female</td>\n", " <td>71</td>\n", " <td>1982-04-16</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>6.0</td>\n", " <td>39</td>\n", " </tr>\n", " <tr>\n", " <th>3928</th>\n", " <td>3929</td>\n", " <td>Jacqui</td>\n", " <td>Fortnam</td>\n", " <td>Female</td>\n", " <td>50</td>\n", " <td>1989-10-18</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>31</td>\n", " </tr>\n", " <tr>\n", " <th>3929</th>\n", " <td>3930</td>\n", " <td>Blancha</td>\n", " <td>Baldi</td>\n", " <td>Female</td>\n", " <td>43</td>\n", " <td>1988-01-06</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>22.0</td>\n", " <td>33</td>\n", " </tr>\n", " <tr>\n", " <th>3932</th>\n", " <td>3933</td>\n", " <td>Chiarra</td>\n", " <td>Cops</td>\n", " <td>Female</td>\n", " <td>65</td>\n", " <td>1983-07-05</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>37</td>\n", " </tr>\n", " <tr>\n", " <th>3938</th>\n", " <td>3939</td>\n", " <td>Georges</td>\n", " <td>Dumbelton</td>\n", " <td>Male</td>\n", " <td>67</td>\n", " <td>1981-06-25</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>15.0</td>\n", " <td>39</td>\n", " </tr>\n", " <tr>\n", " <th>3944</th>\n", " <td>3945</td>\n", " <td>Lazarus</td>\n", " <td>Donaghy</td>\n", " <td>Male</td>\n", " <td>77</td>\n", " <td>1994-10-21</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>26</td>\n", " </tr>\n", " <tr>\n", " <th>3945</th>\n", " <td>3946</td>\n", " <td>Wylie</td>\n", " <td>FitzGilbert</td>\n", " <td>Male</td>\n", " <td>85</td>\n", " <td>1960-06-23</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>60</td>\n", " </tr>\n", " <tr>\n", " <th>3951</th>\n", " <td>3952</td>\n", " <td>Di</td>\n", " <td>Borsnall</td>\n", " <td>Female</td>\n", " <td>96</td>\n", " <td>1968-05-09</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>10.0</td>\n", " <td>53</td>\n", " </tr>\n", " <tr>\n", " <th>3958</th>\n", " <td>3959</td>\n", " <td>Dannie</td>\n", " <td>Sowray</td>\n", " <td>Male</td>\n", " <td>76</td>\n", " <td>1992-12-07</td>\n", " <td>NaN</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>3.0</td>\n", " <td>28</td>\n", " </tr>\n", " <tr>\n", " <th>3959</th>\n", " <td>3960</td>\n", " <td>Hobart</td>\n", " <td>Burgan</td>\n", " <td>Male</td>\n", " <td>6</td>\n", " <td>2000-03-16</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " <td>21</td>\n", " </tr>\n", " <tr>\n", " <th>3967</th>\n", " <td>3968</td>\n", " <td>Alexandra</td>\n", " <td>Kroch</td>\n", " <td>Female</td>\n", " <td>99</td>\n", " <td>1977-12-22</td>\n", " <td>NaN</td>\n", " <td>Property</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>22.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>3971</th>\n", " <td>3972</td>\n", " <td>Maribelle</td>\n", " <td>Schaffel</td>\n", " <td>Female</td>\n", " <td>6</td>\n", " <td>1979-03-28</td>\n", " <td>NaN</td>\n", " <td>Retail</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>3978</th>\n", " <td>3979</td>\n", " <td>Kleon</td>\n", " <td>Adam</td>\n", " <td>Male</td>\n", " <td>67</td>\n", " <td>1974-07-13</td>\n", " <td>NaN</td>\n", " <td>Financial Services</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " <td>46</td>\n", " </tr>\n", " <tr>\n", " <th>3986</th>\n", " <td>3987</td>\n", " <td>Beckie</td>\n", " <td>Wakeham</td>\n", " <td>Female</td>\n", " <td>18</td>\n", " <td>1964-05-29</td>\n", " <td>NaN</td>\n", " <td>Argiculture</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>56</td>\n", " </tr>\n", " <tr>\n", " <th>3998</th>\n", " <td>3999</td>\n", " <td>Patrizius</td>\n", " <td>None</td>\n", " <td>Male</td>\n", " <td>11</td>\n", " <td>1973-10-24</td>\n", " <td>NaN</td>\n", " <td>Manufacturing</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>47</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>497 rows × 13 columns</p>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name gender \\\n", "3 4 Talbot None Male \n", "5 6 Curr Duckhouse Male \n", "6 7 Fina Merali Female \n", "10 11 Uriah Bisatt Male \n", "21 22 Deeanne Durtnell Female \n", "22 23 Olav Polak Male \n", "29 30 Darrick Helleckas Male \n", "45 46 Kaila Allin Female \n", "51 52 Curran Bentson Male \n", "59 60 Nadiya Champerlen Female \n", "61 62 Sorcha Roggers Female \n", "73 74 Pansy Kiddie Female \n", "80 81 Bee Blazewicz Female \n", "107 108 Kayle Mingaud Female \n", "109 110 Sascha St. Quintin Male \n", "160 161 Tadd Bloss Male \n", "166 167 Nathalie Tideswell Female \n", "177 178 Matthieu Bertelmot Male \n", "184 185 Crosby Walcot Male \n", "196 197 Avis None Female \n", "206 207 Adena Whyman Female \n", "216 217 Jeralee Quartly Female \n", "228 229 Vaughn Lambis Male \n", "243 244 Germayne Sperry Male \n", "261 262 Cordie Petrelli Male \n", "275 276 Goldi Dwine Female \n", "287 288 Ebenezer Seedman Male \n", "295 296 Marshal Rathbone Male \n", "301 302 Laurice Colgrave Female \n", "318 319 Madelle Matteris Female \n", "... ... ... ... ... \n", "3797 3798 Yorker Dennison Male \n", "3803 3804 Andria Keays Female \n", "3805 3806 Ado Gailor Male \n", "3810 3811 Etta Leele Female \n", "3821 3822 Conny Speechley Male \n", "3823 3824 Giffard Stollman Male \n", "3825 3826 Marlow Balffye Male \n", "3826 3827 Cherida Whyffen Female \n", "3839 3840 Marc Torrans Male \n", "3843 3844 Clotilda Oret Female \n", "3864 3865 Urbanus Fuxman Male \n", "3880 3881 Olivie Nazair Female \n", "3892 3893 Hadria Moles Female \n", "3908 3909 Micheil McGeorge Male \n", "3915 3916 Myrtia None Female \n", "3927 3928 Kristin Way Female \n", "3928 3929 Jacqui Fortnam Female \n", "3929 3930 Blancha Baldi Female \n", "3932 3933 Chiarra Cops Female \n", "3938 3939 Georges Dumbelton Male \n", "3944 3945 Lazarus Donaghy Male \n", "3945 3946 Wylie FitzGilbert Male \n", "3951 3952 Di Borsnall Female \n", "3958 3959 Dannie Sowray Male \n", "3959 3960 Hobart Burgan Male \n", "3967 3968 Alexandra Kroch Female \n", "3971 3972 Maribelle Schaffel Female \n", "3978 3979 Kleon Adam Male \n", "3986 3987 Beckie Wakeham Female \n", "3998 3999 Patrizius None Male \n", "\n", " past_3_years_bike_related_purchases DOB job_title \\\n", "3 33 1961-10-03 NaN \n", "5 35 1966-09-16 NaN \n", "6 6 1976-02-23 NaN \n", "10 99 1954-04-30 NaN \n", "21 79 1962-12-10 NaN \n", "22 43 1995-02-10 NaN \n", "29 18 1961-10-18 NaN \n", "45 98 1972-02-26 NaN \n", "51 57 1988-06-22 NaN \n", "59 18 1970-02-04 NaN \n", "61 38 1979-07-06 NaN \n", "73 94 1969-06-19 NaN \n", "80 58 1986-09-04 NaN \n", "107 4 1994-03-14 NaN \n", "109 23 2000-07-31 NaN \n", "160 49 1976-01-21 NaN \n", "166 95 1969-10-27 NaN \n", "177 2 1967-04-03 NaN \n", "184 80 1979-12-13 NaN \n", "196 32 1977-01-27 NaN \n", "206 9 1994-08-10 NaN \n", "216 63 1979-12-09 NaN \n", "228 30 1966-03-06 NaN \n", "243 57 1974-11-25 NaN \n", "261 97 1977-12-23 NaN \n", "275 47 1990-03-25 NaN \n", "287 71 1985-09-08 NaN \n", "295 34 1972-06-19 NaN \n", "301 32 1977-03-27 NaN \n", "318 32 1971-10-11 NaN \n", "... ... ... ... \n", "3797 13 1968-02-22 NaN \n", "3803 23 1986-08-21 NaN \n", "3805 1 1954-02-08 NaN \n", "3810 60 1997-03-19 NaN \n", "3821 37 1959-03-09 NaN \n", "3823 33 1994-11-21 NaN \n", "3825 33 1978-09-25 NaN \n", "3826 10 1976-09-05 NaN \n", "3839 27 1962-09-30 NaN \n", "3843 87 1987-12-06 NaN \n", "3864 49 1978-03-15 NaN \n", "3880 50 1971-01-12 NaN \n", "3892 7 1996-11-18 NaN \n", "3908 1 1987-10-04 NaN \n", "3915 31 1958-10-17 NaN \n", "3927 71 1982-04-16 NaN \n", "3928 50 1989-10-18 NaN \n", "3929 43 1988-01-06 NaN \n", "3932 65 1983-07-05 NaN \n", "3938 67 1981-06-25 NaN \n", "3944 77 1994-10-21 NaN \n", "3945 85 1960-06-23 NaN \n", "3951 96 1968-05-09 NaN \n", "3958 76 1992-12-07 NaN \n", "3959 6 2000-03-16 NaN \n", "3967 99 1977-12-22 NaN \n", "3971 6 1979-03-28 NaN \n", "3978 67 1974-07-13 NaN \n", "3986 18 1964-05-29 NaN \n", "3998 11 1973-10-24 NaN \n", "\n", " job_industry_category wealth_segment deceased_indicator owns_car \\\n", "3 IT Mass Customer N No \n", "5 Retail High Net Worth N Yes \n", "6 Financial Services Affluent Customer N Yes \n", "10 Property Mass Customer N No \n", "21 IT Mass Customer N No \n", "22 NaN High Net Worth N Yes \n", "29 IT Affluent Customer N Yes \n", "45 NaN Affluent Customer N Yes \n", "51 Financial Services Mass Customer N Yes \n", "59 Manufacturing Mass Customer N No \n", "61 IT Mass Customer N Yes \n", "73 NaN Mass Customer N Yes \n", "80 Health High Net Worth N No \n", "107 NaN High Net Worth N No \n", "109 Financial Services Affluent Customer N No \n", "160 NaN Mass Customer N No \n", "166 Health High Net Worth N Yes \n", "177 NaN Affluent Customer N No \n", "184 Property Mass Customer N Yes \n", "196 NaN High Net Worth N No \n", "206 NaN Mass Customer N No \n", "216 Manufacturing High Net Worth N No \n", "228 Property High Net Worth N No \n", "243 Retail Affluent Customer N No \n", "261 Health High Net Worth N Yes \n", "275 Financial Services Mass Customer N No \n", "287 Manufacturing High Net Worth N No \n", "295 Health High Net Worth N Yes \n", "301 Health Mass Customer N No \n", "318 Retail Mass Customer N Yes \n", "... ... ... ... ... \n", "3797 Manufacturing Mass Customer N Yes \n", "3803 Manufacturing Mass Customer N Yes \n", "3805 Property Mass Customer N No \n", "3810 Financial Services High Net Worth N No \n", "3821 Manufacturing High Net Worth N Yes \n", "3823 Property Mass Customer N No \n", "3825 Health Mass Customer N No \n", "3826 Retail Affluent Customer N No \n", "3839 NaN High Net Worth N No \n", "3843 Manufacturing Affluent Customer N No \n", "3864 Manufacturing Mass Customer N Yes \n", "3880 Financial Services Affluent Customer N No \n", "3892 NaN High Net Worth N Yes \n", "3908 Manufacturing High Net Worth N Yes \n", "3915 Retail Affluent Customer N Yes \n", "3927 Property Affluent Customer N Yes \n", "3928 NaN Affluent Customer N Yes \n", "3929 Financial Services High Net Worth N No \n", "3932 NaN High Net Worth N Yes \n", "3938 Manufacturing Affluent Customer N No \n", "3944 Retail High Net Worth N No \n", "3945 Retail High Net Worth N Yes \n", "3951 Manufacturing Affluent Customer N No \n", "3958 NaN Mass Customer N No \n", "3959 Property Mass Customer N No \n", "3967 Property High Net Worth N No \n", "3971 Retail Mass Customer N No \n", "3978 Financial Services Mass Customer N Yes \n", "3986 Argiculture Mass Customer N No \n", "3998 Manufacturing Affluent Customer N Yes \n", "\n", " tenure Age \n", "3 7.0 59 \n", "5 13.0 54 \n", "6 11.0 45 \n", "10 9.0 67 \n", "21 11.0 58 \n", "22 1.0 26 \n", "29 6.0 59 \n", "45 15.0 49 \n", "51 13.0 32 \n", "59 10.0 51 \n", "61 22.0 41 \n", "73 6.0 51 \n", "80 13.0 34 \n", "107 3.0 27 \n", "109 1.0 20 \n", "160 16.0 45 \n", "166 17.0 51 \n", "177 8.0 54 \n", "184 13.0 41 \n", "196 5.0 44 \n", "206 7.0 26 \n", "216 16.0 41 \n", "228 19.0 55 \n", "243 8.0 46 \n", "261 10.0 43 \n", "275 22.0 31 \n", "287 9.0 35 \n", "295 17.0 48 \n", "301 13.0 44 \n", "318 14.0 49 \n", "... ... ... \n", "3797 17.0 53 \n", "3803 4.0 34 \n", "3805 7.0 67 \n", "3810 4.0 24 \n", "3821 18.0 62 \n", "3823 3.0 26 \n", "3825 7.0 42 \n", "3826 8.0 44 \n", "3839 5.0 58 \n", "3843 15.0 33 \n", "3864 11.0 43 \n", "3880 18.0 50 \n", "3892 4.0 24 \n", "3908 18.0 33 \n", "3915 17.0 62 \n", "3927 6.0 39 \n", "3928 10.0 31 \n", "3929 22.0 33 \n", "3932 10.0 37 \n", "3938 15.0 39 \n", "3944 7.0 26 \n", "3945 10.0 60 \n", "3951 10.0 53 \n", "3958 3.0 28 \n", "3959 1.0 21 \n", "3967 22.0 43 \n", "3971 8.0 42 \n", "3978 18.0 46 \n", "3986 7.0 56 \n", "3998 10.0 47 \n", "\n", "[497 rows x 13 columns]" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Fetching records where Job Title is missing.\n", "\n", "cust_demo[cust_demo['job_title'].isnull()]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Since Percentage of missing Job is 13. We will replace null values with Missing.</b>" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "cust_demo['job_title'].fillna('Missing', inplace=True, axis=0)" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['job_title'].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently there are no mssing values for job_title column." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2.5 Job Industry Category" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>gender</th>\n", " <th>past_3_years_bike_related_purchases</th>\n", " <th>DOB</th>\n", " <th>job_title</th>\n", " <th>job_industry_category</th>\n", " <th>wealth_segment</th>\n", " <th>deceased_indicator</th>\n", " <th>owns_car</th>\n", " <th>tenure</th>\n", " <th>Age</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Sheila-kathryn</td>\n", " <td>Calton</td>\n", " <td>Female</td>\n", " <td>56</td>\n", " <td>1977-05-13</td>\n", " <td>Senior Editor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>8.0</td>\n", " <td>44</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>8</td>\n", " <td>Rod</td>\n", " <td>Inder</td>\n", " <td>Male</td>\n", " <td>31</td>\n", " <td>1962-03-30</td>\n", " <td>Media Manager I</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>59</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>16</td>\n", " <td>Harlin</td>\n", " <td>Parr</td>\n", " <td>Male</td>\n", " <td>38</td>\n", " <td>1977-02-27</td>\n", " <td>Media Manager IV</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>18.0</td>\n", " <td>44</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>17</td>\n", " <td>Heath</td>\n", " <td>Faraday</td>\n", " <td>Male</td>\n", " <td>57</td>\n", " <td>1962-03-19</td>\n", " <td>Sales Associate</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>15.0</td>\n", " <td>59</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>18</td>\n", " <td>Marjie</td>\n", " <td>Neasham</td>\n", " <td>Female</td>\n", " <td>79</td>\n", " <td>1967-07-06</td>\n", " <td>Professor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>11.0</td>\n", " <td>53</td>\n", " </tr>\n", " <tr>\n", " <th>22</th>\n", " <td>23</td>\n", " <td>Olav</td>\n", " <td>Polak</td>\n", " <td>Male</td>\n", " <td>43</td>\n", " <td>1995-02-10</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>1.0</td>\n", " <td>26</td>\n", " </tr>\n", " <tr>\n", " <th>32</th>\n", " <td>33</td>\n", " <td>Ernst</td>\n", " <td>Hacon</td>\n", " <td>Male</td>\n", " <td>44</td>\n", " <td>1957-06-25</td>\n", " <td>Product Engineer</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>11.0</td>\n", " <td>63</td>\n", " </tr>\n", " <tr>\n", " <th>35</th>\n", " <td>36</td>\n", " <td>Lurette</td>\n", " <td>Stonnell</td>\n", " <td>Female</td>\n", " <td>33</td>\n", " <td>1977-11-09</td>\n", " <td>VP Quality Control</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>22.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>45</th>\n", " <td>46</td>\n", " <td>Kaila</td>\n", " <td>Allin</td>\n", " <td>Female</td>\n", " <td>98</td>\n", " <td>1972-02-26</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>15.0</td>\n", " <td>49</td>\n", " </tr>\n", " <tr>\n", " <th>47</th>\n", " <td>48</td>\n", " <td>Rebbecca</td>\n", " <td>Casone</td>\n", " <td>Female</td>\n", " <td>46</td>\n", " <td>1975-08-15</td>\n", " <td>Biostatistician II</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>8.0</td>\n", " <td>45</td>\n", " </tr>\n", " <tr>\n", " <th>48</th>\n", " <td>49</td>\n", " <td>Nolly</td>\n", " <td>Ownsworth</td>\n", " <td>Male</td>\n", " <td>63</td>\n", " <td>1994-01-26</td>\n", " <td>VP Quality Control</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>1.0</td>\n", " <td>27</td>\n", " </tr>\n", " <tr>\n", " <th>56</th>\n", " <td>57</td>\n", " <td>Abba</td>\n", " <td>Masedon</td>\n", " <td>M</td>\n", " <td>87</td>\n", " <td>1988-06-13</td>\n", " <td>Chief Design Engineer</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>32</td>\n", " </tr>\n", " <tr>\n", " <th>58</th>\n", " <td>59</td>\n", " <td>Niki</td>\n", " <td>Heathcote</td>\n", " <td>Male</td>\n", " <td>60</td>\n", " <td>2000-02-08</td>\n", " <td>Physical Therapy Assistant</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>3.0</td>\n", " <td>21</td>\n", " </tr>\n", " <tr>\n", " <th>67</th>\n", " <td>68</td>\n", " <td>Dahlia</td>\n", " <td>Eddoes</td>\n", " <td>Female</td>\n", " <td>37</td>\n", " <td>1974-04-21</td>\n", " <td>Information Systems Manager</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " <td>47</td>\n", " </tr>\n", " <tr>\n", " <th>68</th>\n", " <td>69</td>\n", " <td>Heidi</td>\n", " <td>Milner</td>\n", " <td>Female</td>\n", " <td>16</td>\n", " <td>1969-06-22</td>\n", " <td>Web Developer II</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>6.0</td>\n", " <td>51</td>\n", " </tr>\n", " <tr>\n", " <th>72</th>\n", " <td>73</td>\n", " <td>Minette</td>\n", " <td>Worters</td>\n", " <td>Female</td>\n", " <td>16</td>\n", " <td>1960-05-27</td>\n", " <td>Teacher</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>5.0</td>\n", " <td>60</td>\n", " </tr>\n", " <tr>\n", " <th>73</th>\n", " <td>74</td>\n", " <td>Pansy</td>\n", " <td>Kiddie</td>\n", " <td>Female</td>\n", " <td>94</td>\n", " <td>1969-06-19</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>6.0</td>\n", " <td>51</td>\n", " </tr>\n", " <tr>\n", " <th>83</th>\n", " <td>84</td>\n", " <td>Rich</td>\n", " <td>Mathiasen</td>\n", " <td>Male</td>\n", " <td>78</td>\n", " <td>1958-02-07</td>\n", " <td>Accountant III</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " <td>63</td>\n", " </tr>\n", " <tr>\n", " <th>84</th>\n", " <td>85</td>\n", " <td>Kane</td>\n", " <td>Tixall</td>\n", " <td>Male</td>\n", " <td>1</td>\n", " <td>1958-05-21</td>\n", " <td>Analyst Programmer</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " <td>62</td>\n", " </tr>\n", " <tr>\n", " <th>107</th>\n", " <td>108</td>\n", " <td>Kayle</td>\n", " <td>Mingaud</td>\n", " <td>Female</td>\n", " <td>4</td>\n", " <td>1994-03-14</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>3.0</td>\n", " <td>27</td>\n", " </tr>\n", " <tr>\n", " <th>108</th>\n", " <td>109</td>\n", " <td>Cody</td>\n", " <td>Blabey</td>\n", " <td>Male</td>\n", " <td>16</td>\n", " <td>1978-12-11</td>\n", " <td>Marketing Assistant</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>4.0</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>110</th>\n", " <td>111</td>\n", " <td>Cele</td>\n", " <td>Evason</td>\n", " <td>Female</td>\n", " <td>65</td>\n", " <td>1993-08-29</td>\n", " <td>Analyst Programmer</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>2.0</td>\n", " <td>27</td>\n", " </tr>\n", " <tr>\n", " <th>112</th>\n", " <td>113</td>\n", " <td>Gage</td>\n", " <td>Nickless</td>\n", " <td>Male</td>\n", " <td>67</td>\n", " <td>1956-05-06</td>\n", " <td>Staff Scientist</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>20.0</td>\n", " <td>65</td>\n", " </tr>\n", " <tr>\n", " <th>117</th>\n", " <td>118</td>\n", " <td>Prentice</td>\n", " <td>Pearmain</td>\n", " <td>Male</td>\n", " <td>43</td>\n", " <td>1959-11-12</td>\n", " <td>Budget/Accounting Analyst IV</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>19.0</td>\n", " <td>61</td>\n", " </tr>\n", " <tr>\n", " <th>118</th>\n", " <td>119</td>\n", " <td>Willey</td>\n", " <td>Chastanet</td>\n", " <td>Male</td>\n", " <td>9</td>\n", " <td>1981-12-04</td>\n", " <td>Associate Professor</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>9.0</td>\n", " <td>39</td>\n", " </tr>\n", " <tr>\n", " <th>147</th>\n", " <td>148</td>\n", " <td>Jaquith</td>\n", " <td>Maffey</td>\n", " <td>Female</td>\n", " <td>69</td>\n", " <td>1981-05-08</td>\n", " <td>Programmer Analyst III</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>5.0</td>\n", " <td>40</td>\n", " </tr>\n", " <tr>\n", " <th>153</th>\n", " <td>154</td>\n", " <td>Faydra</td>\n", " <td>Dulieu</td>\n", " <td>Female</td>\n", " <td>90</td>\n", " <td>1958-02-13</td>\n", " <td>Junior Executive</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>11.0</td>\n", " <td>63</td>\n", " </tr>\n", " <tr>\n", " <th>157</th>\n", " <td>158</td>\n", " <td>Hamlin</td>\n", " <td>Odams</td>\n", " <td>Male</td>\n", " <td>99</td>\n", " <td>1984-09-03</td>\n", " <td>Internal Auditor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>5.0</td>\n", " <td>36</td>\n", " </tr>\n", " <tr>\n", " <th>160</th>\n", " <td>161</td>\n", " <td>Tadd</td>\n", " <td>Bloss</td>\n", " <td>Male</td>\n", " <td>49</td>\n", " <td>1976-01-21</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>16.0</td>\n", " <td>45</td>\n", " </tr>\n", " <tr>\n", " <th>177</th>\n", " <td>178</td>\n", " <td>Matthieu</td>\n", " <td>Bertelmot</td>\n", " <td>Male</td>\n", " <td>2</td>\n", " <td>1967-04-03</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>8.0</td>\n", " <td>54</td>\n", " </tr>\n", " <tr>\n", " <th>...</th>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " <td>...</td>\n", " </tr>\n", " <tr>\n", " <th>3851</th>\n", " <td>3852</td>\n", " <td>Zerk</td>\n", " <td>Merrien</td>\n", " <td>Male</td>\n", " <td>44</td>\n", " <td>1982-02-04</td>\n", " <td>Help Desk Operator</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>4.0</td>\n", " <td>39</td>\n", " </tr>\n", " <tr>\n", " <th>3852</th>\n", " <td>3853</td>\n", " <td>Kerri</td>\n", " <td>Marrington</td>\n", " <td>Female</td>\n", " <td>91</td>\n", " <td>1975-06-26</td>\n", " <td>Accounting Assistant IV</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>19.0</td>\n", " <td>45</td>\n", " </tr>\n", " <tr>\n", " <th>3854</th>\n", " <td>3855</td>\n", " <td>Brnaby</td>\n", " <td>Doughtery</td>\n", " <td>Male</td>\n", " <td>89</td>\n", " <td>1965-02-26</td>\n", " <td>General Manager</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>16.0</td>\n", " <td>56</td>\n", " </tr>\n", " <tr>\n", " <th>3859</th>\n", " <td>3860</td>\n", " <td>Sheila-kathryn</td>\n", " <td>Conklin</td>\n", " <td>Female</td>\n", " <td>14</td>\n", " <td>1986-04-05</td>\n", " <td>Mechanical Systems Engineer</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>35</td>\n", " </tr>\n", " <tr>\n", " <th>3863</th>\n", " <td>3864</td>\n", " <td>Ilyssa</td>\n", " <td>Piaggia</td>\n", " <td>Female</td>\n", " <td>23</td>\n", " <td>1963-08-27</td>\n", " <td>Help Desk Technician</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>57</td>\n", " </tr>\n", " <tr>\n", " <th>3870</th>\n", " <td>3871</td>\n", " <td>Magda</td>\n", " <td>Shugg</td>\n", " <td>Female</td>\n", " <td>80</td>\n", " <td>1983-11-13</td>\n", " <td>Recruiting Manager</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>4.0</td>\n", " <td>37</td>\n", " </tr>\n", " <tr>\n", " <th>3876</th>\n", " <td>3877</td>\n", " <td>Georgine</td>\n", " <td>Poutress</td>\n", " <td>Female</td>\n", " <td>55</td>\n", " <td>1971-01-28</td>\n", " <td>Account Coordinator</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>11.0</td>\n", " <td>50</td>\n", " </tr>\n", " <tr>\n", " <th>3877</th>\n", " <td>3878</td>\n", " <td>Waldon</td>\n", " <td>Digges</td>\n", " <td>Male</td>\n", " <td>99</td>\n", " <td>1978-02-24</td>\n", " <td>Programmer III</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>9.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>3878</th>\n", " <td>3879</td>\n", " <td>Vin</td>\n", " <td>Attack</td>\n", " <td>Male</td>\n", " <td>74</td>\n", " <td>1979-08-28</td>\n", " <td>Payment Adjustment Coordinator</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>19.0</td>\n", " <td>41</td>\n", " </tr>\n", " <tr>\n", " <th>3886</th>\n", " <td>3887</td>\n", " <td>Dulcie</td>\n", " <td>Nealon</td>\n", " <td>Female</td>\n", " <td>66</td>\n", " <td>1964-07-16</td>\n", " <td>Computer Systems Analyst IV</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>56</td>\n", " </tr>\n", " <tr>\n", " <th>3891</th>\n", " <td>3892</td>\n", " <td>Roma</td>\n", " <td>Finlater</td>\n", " <td>Male</td>\n", " <td>19</td>\n", " <td>1978-01-29</td>\n", " <td>Staff Scientist</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>15.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>3892</th>\n", " <td>3893</td>\n", " <td>Hadria</td>\n", " <td>Moles</td>\n", " <td>Female</td>\n", " <td>7</td>\n", " <td>1996-11-18</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>4.0</td>\n", " <td>24</td>\n", " </tr>\n", " <tr>\n", " <th>3895</th>\n", " <td>3896</td>\n", " <td>Perla</td>\n", " <td>Blakiston</td>\n", " <td>Female</td>\n", " <td>3</td>\n", " <td>1979-10-15</td>\n", " <td>Tax Accountant</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>41</td>\n", " </tr>\n", " <tr>\n", " <th>3902</th>\n", " <td>3903</td>\n", " <td>Dayna</td>\n", " <td>Cawthera</td>\n", " <td>Female</td>\n", " <td>69</td>\n", " <td>1981-02-13</td>\n", " <td>Research Assistant III</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>17.0</td>\n", " <td>40</td>\n", " </tr>\n", " <tr>\n", " <th>3906</th>\n", " <td>3907</td>\n", " <td>Adriana</td>\n", " <td>Heam</td>\n", " <td>Female</td>\n", " <td>8</td>\n", " <td>1996-01-11</td>\n", " <td>Technical Writer</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>5.0</td>\n", " <td>25</td>\n", " </tr>\n", " <tr>\n", " <th>3910</th>\n", " <td>3911</td>\n", " <td>Valeda</td>\n", " <td>Ezele</td>\n", " <td>Female</td>\n", " <td>81</td>\n", " <td>1954-05-25</td>\n", " <td>Recruiting Manager</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>5.0</td>\n", " <td>66</td>\n", " </tr>\n", " <tr>\n", " <th>3917</th>\n", " <td>3918</td>\n", " <td>Rosalia</td>\n", " <td>Skedge</td>\n", " <td>Female</td>\n", " <td>52</td>\n", " <td>1977-07-05</td>\n", " <td>Junior Executive</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>18.0</td>\n", " <td>43</td>\n", " </tr>\n", " <tr>\n", " <th>3924</th>\n", " <td>3925</td>\n", " <td>Cally</td>\n", " <td>Chaim</td>\n", " <td>Female</td>\n", " <td>81</td>\n", " <td>1978-11-25</td>\n", " <td>Statistician I</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>7.0</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>3928</th>\n", " <td>3929</td>\n", " <td>Jacqui</td>\n", " <td>Fortnam</td>\n", " <td>Female</td>\n", " <td>50</td>\n", " <td>1989-10-18</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>31</td>\n", " </tr>\n", " <tr>\n", " <th>3932</th>\n", " <td>3933</td>\n", " <td>Chiarra</td>\n", " <td>Cops</td>\n", " <td>Female</td>\n", " <td>65</td>\n", " <td>1983-07-05</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>10.0</td>\n", " <td>37</td>\n", " </tr>\n", " <tr>\n", " <th>3946</th>\n", " <td>3947</td>\n", " <td>Tanitansy</td>\n", " <td>McTrustam</td>\n", " <td>Female</td>\n", " <td>26</td>\n", " <td>1970-05-12</td>\n", " <td>GIS Technical Architect</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>12.0</td>\n", " <td>51</td>\n", " </tr>\n", " <tr>\n", " <th>3950</th>\n", " <td>3951</td>\n", " <td>Ephrem</td>\n", " <td>Hollerin</td>\n", " <td>Male</td>\n", " <td>39</td>\n", " <td>1975-02-10</td>\n", " <td>Quality Control Specialist</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>9.0</td>\n", " <td>46</td>\n", " </tr>\n", " <tr>\n", " <th>3956</th>\n", " <td>3957</td>\n", " <td>Bernice</td>\n", " <td>Scotchforth</td>\n", " <td>Female</td>\n", " <td>4</td>\n", " <td>1978-07-20</td>\n", " <td>Business Systems Development Analyst</td>\n", " <td>NaN</td>\n", " <td>High Net Worth</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>14.0</td>\n", " <td>42</td>\n", " </tr>\n", " <tr>\n", " <th>3958</th>\n", " <td>3959</td>\n", " <td>Dannie</td>\n", " <td>Sowray</td>\n", " <td>Male</td>\n", " <td>76</td>\n", " <td>1992-12-07</td>\n", " <td>Missing</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>3.0</td>\n", " <td>28</td>\n", " </tr>\n", " <tr>\n", " <th>3962</th>\n", " <td>3963</td>\n", " <td>Ardelle</td>\n", " <td>Dasent</td>\n", " <td>Female</td>\n", " <td>10</td>\n", " <td>1954-08-22</td>\n", " <td>Software Test Engineer II</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>13.0</td>\n", " <td>66</td>\n", " </tr>\n", " <tr>\n", " <th>3965</th>\n", " <td>3966</td>\n", " <td>Astrix</td>\n", " <td>Sigward</td>\n", " <td>Female</td>\n", " <td>53</td>\n", " <td>1968-09-15</td>\n", " <td>Geologist I</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>11.0</td>\n", " <td>52</td>\n", " </tr>\n", " <tr>\n", " <th>3973</th>\n", " <td>3974</td>\n", " <td>Misha</td>\n", " <td>Ranklin</td>\n", " <td>Female</td>\n", " <td>82</td>\n", " <td>1961-02-11</td>\n", " <td>Technical Writer</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>9.0</td>\n", " <td>60</td>\n", " </tr>\n", " <tr>\n", " <th>3975</th>\n", " <td>3976</td>\n", " <td>Gretel</td>\n", " <td>Chrystal</td>\n", " <td>Female</td>\n", " <td>0</td>\n", " <td>1957-11-20</td>\n", " <td>Internal Auditor</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>13.0</td>\n", " <td>63</td>\n", " </tr>\n", " <tr>\n", " <th>3982</th>\n", " <td>3983</td>\n", " <td>Jarred</td>\n", " <td>Lyste</td>\n", " <td>Male</td>\n", " <td>19</td>\n", " <td>1965-04-21</td>\n", " <td>Graphic Designer</td>\n", " <td>NaN</td>\n", " <td>Mass Customer</td>\n", " <td>N</td>\n", " <td>Yes</td>\n", " <td>9.0</td>\n", " <td>56</td>\n", " </tr>\n", " <tr>\n", " <th>3999</th>\n", " <td>4000</td>\n", " <td>Kippy</td>\n", " <td>Oldland</td>\n", " <td>Male</td>\n", " <td>76</td>\n", " <td>1991-11-05</td>\n", " <td>Software Engineer IV</td>\n", " <td>NaN</td>\n", " <td>Affluent Customer</td>\n", " <td>N</td>\n", " <td>No</td>\n", " <td>11.0</td>\n", " <td>29</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "<p>656 rows × 13 columns</p>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name gender \\\n", "4 5 Sheila-kathryn Calton Female \n", "7 8 Rod Inder Male \n", "15 16 Harlin Parr Male \n", "16 17 Heath Faraday Male \n", "17 18 Marjie Neasham Female \n", "22 23 Olav Polak Male \n", "32 33 Ernst Hacon Male \n", "35 36 Lurette Stonnell Female \n", "45 46 Kaila Allin Female \n", "47 48 Rebbecca Casone Female \n", "48 49 Nolly Ownsworth Male \n", "56 57 Abba Masedon M \n", "58 59 Niki Heathcote Male \n", "67 68 Dahlia Eddoes Female \n", "68 69 Heidi Milner Female \n", "72 73 Minette Worters Female \n", "73 74 Pansy Kiddie Female \n", "83 84 Rich Mathiasen Male \n", "84 85 Kane Tixall Male \n", "107 108 Kayle Mingaud Female \n", "108 109 Cody Blabey Male \n", "110 111 Cele Evason Female \n", "112 113 Gage Nickless Male \n", "117 118 Prentice Pearmain Male \n", "118 119 Willey Chastanet Male \n", "147 148 Jaquith Maffey Female \n", "153 154 Faydra Dulieu Female \n", "157 158 Hamlin Odams Male \n", "160 161 Tadd Bloss Male \n", "177 178 Matthieu Bertelmot Male \n", "... ... ... ... ... \n", "3851 3852 Zerk Merrien Male \n", "3852 3853 Kerri Marrington Female \n", "3854 3855 Brnaby Doughtery Male \n", "3859 3860 Sheila-kathryn Conklin Female \n", "3863 3864 Ilyssa Piaggia Female \n", "3870 3871 Magda Shugg Female \n", "3876 3877 Georgine Poutress Female \n", "3877 3878 Waldon Digges Male \n", "3878 3879 Vin Attack Male \n", "3886 3887 Dulcie Nealon Female \n", "3891 3892 Roma Finlater Male \n", "3892 3893 Hadria Moles Female \n", "3895 3896 Perla Blakiston Female \n", "3902 3903 Dayna Cawthera Female \n", "3906 3907 Adriana Heam Female \n", "3910 3911 Valeda Ezele Female \n", "3917 3918 Rosalia Skedge Female \n", "3924 3925 Cally Chaim Female \n", "3928 3929 Jacqui Fortnam Female \n", "3932 3933 Chiarra Cops Female \n", "3946 3947 Tanitansy McTrustam Female \n", "3950 3951 Ephrem Hollerin Male \n", "3956 3957 Bernice Scotchforth Female \n", "3958 3959 Dannie Sowray Male \n", "3962 3963 Ardelle Dasent Female \n", "3965 3966 Astrix Sigward Female \n", "3973 3974 Misha Ranklin Female \n", "3975 3976 Gretel Chrystal Female \n", "3982 3983 Jarred Lyste Male \n", "3999 4000 Kippy Oldland Male \n", "\n", " past_3_years_bike_related_purchases DOB \\\n", "4 56 1977-05-13 \n", "7 31 1962-03-30 \n", "15 38 1977-02-27 \n", "16 57 1962-03-19 \n", "17 79 1967-07-06 \n", "22 43 1995-02-10 \n", "32 44 1957-06-25 \n", "35 33 1977-11-09 \n", "45 98 1972-02-26 \n", "47 46 1975-08-15 \n", "48 63 1994-01-26 \n", "56 87 1988-06-13 \n", "58 60 2000-02-08 \n", "67 37 1974-04-21 \n", "68 16 1969-06-22 \n", "72 16 1960-05-27 \n", "73 94 1969-06-19 \n", "83 78 1958-02-07 \n", "84 1 1958-05-21 \n", "107 4 1994-03-14 \n", "108 16 1978-12-11 \n", "110 65 1993-08-29 \n", "112 67 1956-05-06 \n", "117 43 1959-11-12 \n", "118 9 1981-12-04 \n", "147 69 1981-05-08 \n", "153 90 1958-02-13 \n", "157 99 1984-09-03 \n", "160 49 1976-01-21 \n", "177 2 1967-04-03 \n", "... ... ... \n", "3851 44 1982-02-04 \n", "3852 91 1975-06-26 \n", "3854 89 1965-02-26 \n", "3859 14 1986-04-05 \n", "3863 23 1963-08-27 \n", "3870 80 1983-11-13 \n", "3876 55 1971-01-28 \n", "3877 99 1978-02-24 \n", "3878 74 1979-08-28 \n", "3886 66 1964-07-16 \n", "3891 19 1978-01-29 \n", "3892 7 1996-11-18 \n", "3895 3 1979-10-15 \n", "3902 69 1981-02-13 \n", "3906 8 1996-01-11 \n", "3910 81 1954-05-25 \n", "3917 52 1977-07-05 \n", "3924 81 1978-11-25 \n", "3928 50 1989-10-18 \n", "3932 65 1983-07-05 \n", "3946 26 1970-05-12 \n", "3950 39 1975-02-10 \n", "3956 4 1978-07-20 \n", "3958 76 1992-12-07 \n", "3962 10 1954-08-22 \n", "3965 53 1968-09-15 \n", "3973 82 1961-02-11 \n", "3975 0 1957-11-20 \n", "3982 19 1965-04-21 \n", "3999 76 1991-11-05 \n", "\n", " job_title job_industry_category \\\n", "4 Senior Editor NaN \n", "7 Media Manager I NaN \n", "15 Media Manager IV NaN \n", "16 Sales Associate NaN \n", "17 Professor NaN \n", "22 Missing NaN \n", "32 Product Engineer NaN \n", "35 VP Quality Control NaN \n", "45 Missing NaN \n", "47 Biostatistician II NaN \n", "48 VP Quality Control NaN \n", "56 Chief Design Engineer NaN \n", "58 Physical Therapy Assistant NaN \n", "67 Information Systems Manager NaN \n", "68 Web Developer II NaN \n", "72 Teacher NaN \n", "73 Missing NaN \n", "83 Accountant III NaN \n", "84 Analyst Programmer NaN \n", "107 Missing NaN \n", "108 Marketing Assistant NaN \n", "110 Analyst Programmer NaN \n", "112 Staff Scientist NaN \n", "117 Budget/Accounting Analyst IV NaN \n", "118 Associate Professor NaN \n", "147 Programmer Analyst III NaN \n", "153 Junior Executive NaN \n", "157 Internal Auditor NaN \n", "160 Missing NaN \n", "177 Missing NaN \n", "... ... ... \n", "3851 Help Desk Operator NaN \n", "3852 Accounting Assistant IV NaN \n", "3854 General Manager NaN \n", "3859 Mechanical Systems Engineer NaN \n", "3863 Help Desk Technician NaN \n", "3870 Recruiting Manager NaN \n", "3876 Account Coordinator NaN \n", "3877 Programmer III NaN \n", "3878 Payment Adjustment Coordinator NaN \n", "3886 Computer Systems Analyst IV NaN \n", "3891 Staff Scientist NaN \n", "3892 Missing NaN \n", "3895 Tax Accountant NaN \n", "3902 Research Assistant III NaN \n", "3906 Technical Writer NaN \n", "3910 Recruiting Manager NaN \n", "3917 Junior Executive NaN \n", "3924 Statistician I NaN \n", "3928 Missing NaN \n", "3932 Missing NaN \n", "3946 GIS Technical Architect NaN \n", "3950 Quality Control Specialist NaN \n", "3956 Business Systems Development Analyst NaN \n", "3958 Missing NaN \n", "3962 Software Test Engineer II NaN \n", "3965 Geologist I NaN \n", "3973 Technical Writer NaN \n", "3975 Internal Auditor NaN \n", "3982 Graphic Designer NaN \n", "3999 Software Engineer IV NaN \n", "\n", " wealth_segment deceased_indicator owns_car tenure Age \n", "4 Affluent Customer N Yes 8.0 44 \n", "7 Mass Customer N No 7.0 59 \n", "15 Mass Customer N Yes 18.0 44 \n", "16 Affluent Customer N Yes 15.0 59 \n", "17 Affluent Customer N No 11.0 53 \n", "22 High Net Worth N Yes 1.0 26 \n", "32 Affluent Customer N Yes 11.0 63 \n", "35 Affluent Customer N No 22.0 43 \n", "45 Affluent Customer N Yes 15.0 49 \n", "47 Mass Customer N Yes 8.0 45 \n", "48 Affluent Customer N No 1.0 27 \n", "56 Mass Customer N Yes 13.0 32 \n", "58 High Net Worth N No 3.0 21 \n", "67 Affluent Customer N No 9.0 47 \n", "68 Mass Customer N No 6.0 51 \n", "72 Affluent Customer N Yes 5.0 60 \n", "73 Mass Customer N Yes 6.0 51 \n", "83 Mass Customer N Yes 14.0 63 \n", "84 Mass Customer N No 8.0 62 \n", "107 High Net Worth N No 3.0 27 \n", "108 Affluent Customer N Yes 4.0 42 \n", "110 Mass Customer N No 2.0 27 \n", "112 Mass Customer N No 20.0 65 \n", "117 High Net Worth N No 19.0 61 \n", "118 High Net Worth N Yes 9.0 39 \n", "147 Mass Customer N Yes 5.0 40 \n", "153 Mass Customer N No 11.0 63 \n", "157 Affluent Customer N No 5.0 36 \n", "160 Mass Customer N No 16.0 45 \n", "177 Affluent Customer N No 8.0 54 \n", "... ... ... ... ... ... \n", "3851 Mass Customer N No 4.0 39 \n", "3852 Mass Customer N Yes 19.0 45 \n", "3854 Mass Customer N No 16.0 56 \n", "3859 Affluent Customer N Yes 13.0 35 \n", "3863 Mass Customer N Yes 10.0 57 \n", "3870 Mass Customer N No 4.0 37 \n", "3876 High Net Worth N No 11.0 50 \n", "3877 Mass Customer N No 9.0 43 \n", "3878 High Net Worth N No 19.0 41 \n", "3886 Affluent Customer N No 7.0 56 \n", "3891 Mass Customer N Yes 15.0 43 \n", "3892 High Net Worth N Yes 4.0 24 \n", "3895 Mass Customer N Yes 13.0 41 \n", "3902 Mass Customer N Yes 17.0 40 \n", "3906 High Net Worth N Yes 5.0 25 \n", "3910 Mass Customer N No 5.0 66 \n", "3917 High Net Worth N No 18.0 43 \n", "3924 High Net Worth N No 7.0 42 \n", "3928 Affluent Customer N Yes 10.0 31 \n", "3932 High Net Worth N Yes 10.0 37 \n", "3946 Mass Customer N No 12.0 51 \n", "3950 Affluent Customer N Yes 9.0 46 \n", "3956 High Net Worth N Yes 14.0 42 \n", "3958 Mass Customer N No 3.0 28 \n", "3962 Mass Customer N No 13.0 66 \n", "3965 Mass Customer N Yes 11.0 52 \n", "3973 Affluent Customer N Yes 9.0 60 \n", "3975 Affluent Customer N Yes 13.0 63 \n", "3982 Mass Customer N Yes 9.0 56 \n", "3999 Affluent Customer N No 11.0 29 \n", "\n", "[656 rows x 13 columns]" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo[cust_demo['job_industry_category'].isnull()]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Since Percentage of missing Job Industry Category is 16. We will replace null values with Missing</b>" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "cust_demo['job_industry_category'].fillna('Missing', inplace=True, axis=0)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['job_industry_category'].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Finally there are no Missing Values in the dataset.</b>" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "customer_id 0\n", "first_name 0\n", "last_name 0\n", "gender 0\n", "past_3_years_bike_related_purchases 0\n", "DOB 0\n", "job_title 0\n", "job_industry_category 0\n", "wealth_segment 0\n", "deceased_indicator 0\n", "owns_car 0\n", "tenure 0\n", "Age 0\n", "dtype: int64" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Total records after removing Missing Values: 3912\n" ] } ], "source": [ "print(\"Total records after removing Missing Values: {}\".format(cust_demo.shape[0]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3. Inconsistency Check in Data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We will check whether there is inconsistent data / typo error data is present in the categorical columns.<br>\n", "The columns to be checked are <b>'gender', 'wealth_segment' ,'deceased_indicator', 'owns_car'</b>" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.1 Gender" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Female 2037\n", "Male 1872\n", "F 1\n", "M 1\n", "Femal 1\n", "Name: gender, dtype: int64" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['gender'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here there are inconsistent data in gender column.There are spelling mistakes and typos. For gender with value <b>M will be replaced with Male</b>, <b>F will be replaced by Female</b> and <b>Femal will be replaced by Female</b>" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [], "source": [ "def replace_gender_names(gender):\n", " \n", " # Making Gender as Male and Female as standards\n", " if gender=='M':\n", " return 'Male'\n", " elif gender=='F':\n", " return 'Female'\n", " elif gender=='Femal':\n", " return 'Female'\n", " else :\n", " return gender\n", "\n", "cust_demo['gender'] = cust_demo['gender'].apply(replace_gender_names)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Female 2039\n", "Male 1873\n", "Name: gender, dtype: int64" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['gender'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The inconsistent data ,spelling mistakes and typos in gender column are removed. " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.2 Wealth Segment" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There is <b>no inconsistent data</b> in <b>wealth_segment</b> column." ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Mass Customer 1954\n", "High Net Worth 996\n", "Affluent Customer 962\n", "Name: wealth_segment, dtype: int64" ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['wealth_segment'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.3 Deceased Indicator" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There is <b>no inconsistent data</b> in <b>deceased_indicator</b> column." ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "N 3910\n", "Y 2\n", "Name: deceased_indicator, dtype: int64" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['deceased_indicator'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 3.4 Owns a Car" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "There is <b>no inconsistent data</b> in <b>owns_car</b> column." ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Yes 1974\n", "No 1938\n", "Name: owns_car, dtype: int64" ] }, "execution_count": 51, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cust_demo['owns_car'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 4. Duplication Checks" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "We need to ensure that there is no duplication of records in the dataset. This may lead to error in data analysis due to poor data quality. If there are duplicate rows of data then we need to drop such records.<br>For checking for duplicate records we need to firstly remove the primary key column of the dataset then apply drop_duplicates() function provided by Python." ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of records after removing customer_id (pk), duplicates : 3912\n", "Number of records in original dataset : 3912\n" ] } ], "source": [ "cust_demo_dedupped = cust_demo.drop('customer_id', axis=1).drop_duplicates()\n", "\n", "print(\"Number of records after removing customer_id (pk), duplicates : {}\".format(cust_demo_dedupped.shape[0]))\n", "print(\"Number of records in original dataset : {}\".format(cust_demo.shape[0]))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "<b>Since both the numbers are same. There are no duplicate records in the dataset.</b>" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 5. Exporting the Cleaned Customer Demographic Data Set to csv" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Currently the Customer Demographics dataset is clean. Hence we can export the data to a csv to continue our data analysis of Customer Segments by joining it to other tables." ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "cust_demo.to_csv('CustomerDemographic_Cleaned.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 2 }