{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "### Missing Value in Pandas datafrmae" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# import\n", "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "# reading the file\n", "data = pd.read_csv(\"data/train.csv\")" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0LP001002MaleNo0GraduateNo58490.0NaN360.01.0UrbanY
1LP001003MaleYes1GraduateNo45831508.0128.0360.01.0RuralN
2LP001005MaleYes0GraduateYes30000.066.0360.01.0UrbanY
3LP001006MaleYes0Not GraduateNo25832358.0120.0360.01.0UrbanY
4LP001008MaleNo0GraduateNo60000.0141.0360.01.0UrbanY
5LP001011MaleYes2GraduateYes54174196.0267.0360.01.0UrbanY
\n", "
" ], "text/plain": [ " Loan_ID Gender Married Dependents Education Self_Employed \\\n", "0 LP001002 Male No 0 Graduate No \n", "1 LP001003 Male Yes 1 Graduate No \n", "2 LP001005 Male Yes 0 Graduate Yes \n", "3 LP001006 Male Yes 0 Not Graduate No \n", "4 LP001008 Male No 0 Graduate No \n", "5 LP001011 Male Yes 2 Graduate Yes \n", "\n", " ApplicantIncome CoapplicantIncome LoanAmount Loan_Amount_Term \\\n", "0 5849 0.0 NaN 360.0 \n", "1 4583 1508.0 128.0 360.0 \n", "2 3000 0.0 66.0 360.0 \n", "3 2583 2358.0 120.0 360.0 \n", "4 6000 0.0 141.0 360.0 \n", "5 5417 4196.0 267.0 360.0 \n", "\n", " Credit_History Property_Area Loan_Status \n", "0 1.0 Urban Y \n", "1 1.0 Rural N \n", "2 1.0 Urban Y \n", "3 1.0 Urban Y \n", "4 1.0 Urban Y \n", "5 1.0 Urban Y " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Looking the data\n", "data.head(6)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0FalseFalseFalseFalseFalseFalseFalseFalseTrueFalseFalseFalseFalse
1FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
2FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
3FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
4FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
5FalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalseFalse
\n", "
" ], "text/plain": [ " Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome \\\n", "0 False False False False False False False \n", "1 False False False False False False False \n", "2 False False False False False False False \n", "3 False False False False False False False \n", "4 False False False False False False False \n", "5 False False False False False False False \n", "\n", " CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area \\\n", "0 False True False False False \n", "1 False False False False False \n", "2 False False False False False \n", "3 False False False False False \n", "4 False False False False False \n", "5 False False False False False \n", "\n", " Loan_Status \n", "0 False \n", "1 False \n", "2 False \n", "3 False \n", "4 False \n", "5 False " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# isnull or notnull\n", "data.isnull().head(6)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Loan_IDGenderMarriedDependentsEducationSelf_EmployedApplicantIncomeCoapplicantIncomeLoanAmountLoan_Amount_TermCredit_HistoryProperty_AreaLoan_Status
0TrueTrueTrueTrueTrueTrueTrueTrueFalseTrueTrueTrueTrue
1TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
2TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
3TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
4TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
5TrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrueTrue
\n", "
" ], "text/plain": [ " Loan_ID Gender Married Dependents Education Self_Employed ApplicantIncome \\\n", "0 True True True True True True True \n", "1 True True True True True True True \n", "2 True True True True True True True \n", "3 True True True True True True True \n", "4 True True True True True True True \n", "5 True True True True True True True \n", "\n", " CoapplicantIncome LoanAmount Loan_Amount_Term Credit_History Property_Area \\\n", "0 True False True True True \n", "1 True True True True True \n", "2 True True True True True \n", "3 True True True True True \n", "4 True True True True True \n", "5 True True True True True \n", "\n", " Loan_Status \n", "0 True \n", "1 True \n", "2 True \n", "3 True \n", "4 True \n", "5 True " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.notnull().head(6)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "True" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use of any\n", "data.isnull().values.any()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# use of all\n", "data.isnull().values.all()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Loan_ID 0\n", "Gender 13\n", "Married 3\n", "Dependents 15\n", "Education 0\n", "Self_Employed 32\n", "ApplicantIncome 0\n", "CoapplicantIncome 0\n", "LoanAmount 22\n", "Loan_Amount_Term 14\n", "Credit_History 50\n", "Property_Area 0\n", "Loan_Status 0\n", "dtype: int64" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# taking the count of Null/NaN in each column of dataframe\n", "data.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "149" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# if want to know the total\n", "data.isnull().sum().sum()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "15" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# if want to check in any particular column\n", "data['Dependents'].isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**_Atul Singh_ \n", "Follow me - \n", "http://www.datagenx.net \n", "https://twitter.com/datagenx \n", "https://www.facebook.com/datastage4you**" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [conda root]", "language": "python", "name": "conda-root-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" } }, "nbformat": 4, "nbformat_minor": 1 }