{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Missing Value in Pandas datafrmae"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# import\n",
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"# reading the file\n",
"data = pd.read_csv(\"data/train.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" Loan_ID | \n",
" Gender | \n",
" Married | \n",
" Dependents | \n",
" Education | \n",
" Self_Employed | \n",
" ApplicantIncome | \n",
" CoapplicantIncome | \n",
" LoanAmount | \n",
" Loan_Amount_Term | \n",
" Credit_History | \n",
" Property_Area | \n",
" Loan_Status | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" LP001002 | \n",
" Male | \n",
" No | \n",
" 0 | \n",
" Graduate | \n",
" No | \n",
" 5849 | \n",
" 0.0 | \n",
" NaN | \n",
" 360.0 | \n",
" 1.0 | \n",
" Urban | \n",
" Y | \n",
"
\n",
" \n",
" 1 | \n",
" LP001003 | \n",
" Male | \n",
" Yes | \n",
" 1 | \n",
" Graduate | \n",
" No | \n",
" 4583 | \n",
" 1508.0 | \n",
" 128.0 | \n",
" 360.0 | \n",
" 1.0 | \n",
" Rural | \n",
" N | \n",
"
\n",
" \n",
" 2 | \n",
" LP001005 | \n",
" Male | \n",
" Yes | \n",
" 0 | \n",
" Graduate | \n",
" Yes | \n",
" 3000 | \n",
" 0.0 | \n",
" 66.0 | \n",
" 360.0 | \n",
" 1.0 | \n",
" Urban | \n",
" Y | \n",
"
\n",
" \n",
" 3 | \n",
" LP001006 | \n",
" Male | \n",
" Yes | \n",
" 0 | \n",
" Not Graduate | \n",
" No | \n",
" 2583 | \n",
" 2358.0 | \n",
" 120.0 | \n",
" 360.0 | \n",
" 1.0 | \n",
" Urban | \n",
" Y | \n",
"
\n",
" \n",
" 4 | \n",
" LP001008 | \n",
" Male | \n",
" No | \n",
" 0 | \n",
" Graduate | \n",
" No | \n",
" 6000 | \n",
" 0.0 | \n",
" 141.0 | \n",
" 360.0 | \n",
" 1.0 | \n",
" Urban | \n",
" Y | \n",
"
\n",
" \n",
" 5 | \n",
" LP001011 | \n",
" Male | \n",
" Yes | \n",
" 2 | \n",
" Graduate | \n",
" Yes | \n",
" 5417 | \n",
" 4196.0 | \n",
" 267.0 | \n",
" 360.0 | \n",
" 1.0 | \n",
" Urban | \n",
" Y | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Loan_ID Gender Married Dependents Education Self_Employed \\\n",
"0 LP001002 Male No 0 Graduate No \n",
"1 LP001003 Male Yes 1 Graduate No \n",
"2 LP001005 Male Yes 0 Graduate Yes \n",
"3 LP001006 Male Yes 0 Not Graduate No \n",
"4 LP001008 Male No 0 Graduate No \n",
"5 LP001011 Male Yes 2 Graduate Yes \n",
"\n",
" ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n",
"0 5849 0.0 NaN 360.0 \n",
"1 4583 1508.0 128.0 360.0 \n",
"2 3000 0.0 66.0 360.0 \n",
"3 2583 2358.0 120.0 360.0 \n",
"4 6000 0.0 141.0 360.0 \n",
"5 5417 4196.0 267.0 360.0 \n",
"\n",
" Credit_History Property_Area Loan_Status \n",
"0 1.0 Urban Y \n",
"1 1.0 Rural N \n",
"2 1.0 Urban Y \n",
"3 1.0 Urban Y \n",
"4 1.0 Urban Y \n",
"5 1.0 Urban Y "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Looking the data\n",
"data.head(6)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Loan_ID | \n",
" Gender | \n",
" Married | \n",
" Dependents | \n",
" Education | \n",
" Self_Employed | \n",
" ApplicantIncome | \n",
" CoapplicantIncome | \n",
" LoanAmount | \n",
" Loan_Amount_Term | \n",
" Credit_History | \n",
" Property_Area | \n",
" Loan_Status | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" True | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 1 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 2 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 3 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 4 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
" 5 | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
" False | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome \\\n",
"0 False False False False False False False \n",
"1 False False False False False False False \n",
"2 False False False False False False False \n",
"3 False False False False False False False \n",
"4 False False False False False False False \n",
"5 False False False False False False False \n",
"\n",
" CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area \\\n",
"0 False True False False False \n",
"1 False False False False False \n",
"2 False False False False False \n",
"3 False False False False False \n",
"4 False False False False False \n",
"5 False False False False False \n",
"\n",
" Loan_Status \n",
"0 False \n",
"1 False \n",
"2 False \n",
"3 False \n",
"4 False \n",
"5 False "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# isnull or notnull\n",
"data.isnull().head(6)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Loan_ID | \n",
" Gender | \n",
" Married | \n",
" Dependents | \n",
" Education | \n",
" Self_Employed | \n",
" ApplicantIncome | \n",
" CoapplicantIncome | \n",
" LoanAmount | \n",
" Loan_Amount_Term | \n",
" Credit_History | \n",
" Property_Area | \n",
" Loan_Status | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" False | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 1 | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 2 | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 3 | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 4 | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
" 5 | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
" True | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome \\\n",
"0 True True True True True True True \n",
"1 True True True True True True True \n",
"2 True True True True True True True \n",
"3 True True True True True True True \n",
"4 True True True True True True True \n",
"5 True True True True True True True \n",
"\n",
" CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area \\\n",
"0 True False True True True \n",
"1 True True True True True \n",
"2 True True True True True \n",
"3 True True True True True \n",
"4 True True True True True \n",
"5 True True True True True \n",
"\n",
" Loan_Status \n",
"0 True \n",
"1 True \n",
"2 True \n",
"3 True \n",
"4 True \n",
"5 True "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.notnull().head(6)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"True"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# use of any\n",
"data.isnull().values.any()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"False"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# use of all\n",
"data.isnull().values.all()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Loan_ID 0\n",
"Gender 13\n",
"Married 3\n",
"Dependents 15\n",
"Education 0\n",
"Self_Employed 32\n",
"ApplicantIncome 0\n",
"CoapplicantIncome 0\n",
"LoanAmount 22\n",
"Loan_Amount_Term 14\n",
"Credit_History 50\n",
"Property_Area 0\n",
"Loan_Status 0\n",
"dtype: int64"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# taking the count of Null/NaN in each column of dataframe\n",
"data.isnull().sum()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"149"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# if want to know the total\n",
"data.isnull().sum().sum()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"15"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# if want to check in any particular column\n",
"data['Dependents'].isnull().sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"**_Atul Singh_ \n",
"Follow me - \n",
"http://www.datagenx.net \n",
"https://twitter.com/datagenx \n",
"https://www.facebook.com/datastage4you**"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python [conda root]",
"language": "python",
"name": "conda-root-py"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
}
},
"nbformat": 4,
"nbformat_minor": 1
}