{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "credit data analysis.ipynb", "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": 35, "metadata": { "id": "uornDyP-_jp8" }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "markdown", "source": [ "# **Read Data**" ], "metadata": { "id": "Ia1hQADHFuuA" } }, { "cell_type": "code", "source": [ "cs = pd.read_csv('/content/Credit.csv')\n", "cs" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "M_MLTl4_AxPX", "outputId": "4bc1f033-d21d-4a50-a54f-3cd41105f813" }, "execution_count": 36, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age Gender Region \\\n", "0 1 0.766127 45.0 Male South \n", "1 0 0.957151 40.0 Female South \n", "2 0 0.658180 38.0 Female South \n", "3 0 0.233810 30.0 Female South \n", "4 0 0.907239 49.0 Male South \n", "... ... ... ... ... ... \n", "149997 0 0.246044 58.0 Male North \n", "149998 0 0.000000 30.0 Male North \n", "149999 0 0.850283 64.0 Male North \n", "150000 0 NaN NaN NaN NaN \n", "150001 1 NaN NaN NaN NaN \n", "\n", " MonthlyIncome Rented_OwnHouse Occupation Education \\\n", "0 9120.0 Ownhouse Self_Emp Matric \n", "1 2600.0 Ownhouse Self_Emp Graduate \n", "2 3042.0 Ownhouse Self_Emp PhD \n", "3 3300.0 Ownhouse Self_Emp Professional \n", "4 63588.0 Ownhouse Self_Emp Post-Grad \n", "... ... ... ... ... \n", "149997 NaN Rented Officer2 Professional \n", "149998 5716.0 Rented Non-officer Professional \n", "149999 8158.0 Ownhouse Self_Emp Professional \n", "150000 NaN NaN NaN NaN \n", "150001 NaN NaN NaN NaN \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio MonthlyIncome.1 \\\n", "0 2.0 0.802982 9120.0 \n", "1 0.0 0.121876 2600.0 \n", "2 1.0 0.085113 3042.0 \n", "3 0.0 0.036050 3300.0 \n", "4 1.0 0.024926 63588.0 \n", "... ... ... ... \n", "149997 0.0 3870.000000 NaN \n", "149998 0.0 0.000000 5716.0 \n", "149999 0.0 0.249908 8158.0 \n", "150000 NaN NaN NaN \n", "150001 NaN NaN NaN \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", " NumberOfDependents Good_Bad \n", "0 2.0 Bad \n", "1 1.0 Good \n", "2 0.0 Good \n", "3 0.0 Good \n", "4 0.0 Good \n", "... ... ... \n", "149997 0.0 Good \n", "149998 0.0 Good \n", "149999 0.0 Good \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", "[150002 rows x 18 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioMonthlyIncome.1NumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
010.76612745.0MaleSouth9120.0OwnhouseSelf_EmpMatric2.00.8029829120.013.00.06.00.02.0Bad
100.95715140.0FemaleSouth2600.0OwnhouseSelf_EmpGraduate0.00.1218762600.04.00.00.00.01.0Good
200.65818038.0FemaleSouth3042.0OwnhouseSelf_EmpPhD1.00.0851133042.02.01.00.00.00.0Good
300.23381030.0FemaleSouth3300.0OwnhouseSelf_EmpProfessional0.00.0360503300.05.00.00.00.00.0Good
400.90723949.0MaleSouth63588.0OwnhouseSelf_EmpPost-Grad1.00.02492663588.07.00.01.00.00.0Good
.........................................................
14999700.24604458.0MaleNorthNaNRentedOfficer2Professional0.03870.000000NaN18.00.01.00.00.0Good
14999800.00000030.0MaleNorth5716.0RentedNon-officerProfessional0.00.0000005716.04.00.00.00.00.0Good
14999900.85028364.0MaleNorth8158.0OwnhouseSelf_EmpProfessional0.00.2499088158.08.00.02.00.00.0Good
1500000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1500011NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

150002 rows × 18 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 36 } ] }, { "cell_type": "code", "source": [ "cs.info()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YxnPG1YPBuRM", "outputId": "81607f83-4a9a-479c-a6d0-98c7ab6899d0" }, "execution_count": 37, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "\n", "RangeIndex: 150002 entries, 0 to 150001\n", "Data columns (total 18 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 NPA Status 150002 non-null int64 \n", " 1 RevolvingUtilizationOfUnsecuredLines 150000 non-null float64\n", " 2 age 150000 non-null float64\n", " 3 Gender 150000 non-null object \n", " 4 Region 150000 non-null object \n", " 5 MonthlyIncome 120269 non-null float64\n", " 6 Rented_OwnHouse 150000 non-null object \n", " 7 Occupation 150000 non-null object \n", " 8 Education 150000 non-null object \n", " 9 NumberOfTime30-59DaysPastDueNotWorse 150000 non-null float64\n", " 10 DebtRatio 150000 non-null float64\n", " 11 MonthlyIncome.1 120269 non-null float64\n", " 12 NumberOfOpenCreditLinesAndLoans 150000 non-null float64\n", " 13 NumberOfTimes90DaysLate 150000 non-null float64\n", " 14 NumberRealEstateLoansOrLines 150000 non-null float64\n", " 15 NumberOfTime60-89DaysPastDueNotWorse 150000 non-null float64\n", " 16 NumberOfDependents 146076 non-null float64\n", " 17 Good_Bad 150000 non-null object \n", "dtypes: float64(11), int64(1), object(6)\n", "memory usage: 20.6+ MB\n" ] } ] }, { "cell_type": "code", "source": [ "cs.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "CtY1v7AvB2Qm", "outputId": "04069c89-8a1a-4736-9a95-e904f9e74af6" }, "execution_count": 38, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(150002, 18)" ] }, "metadata": {}, "execution_count": 38 } ] }, { "cell_type": "markdown", "source": [ "## **To Check And Remove Duplicates**" ], "metadata": { "id": "c8aaJ1icGDKQ" } }, { "cell_type": "code", "source": [ "cs[cs.duplicated()]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 758 }, "id": "6sqvWMqdCKoN", "outputId": "dfbc76a0-5955-4411-c051-e24c63acacf8" }, "execution_count": 12, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age Gender \\\n", "7920 0 1.0 22.0 Male \n", "19814 0 1.0 68.0 Female \n", "19987 0 1.0 37.0 Female \n", "37075 0 1.0 23.0 Male \n", "40396 0 0.0 46.0 Female \n", "43095 0 1.0 22.0 Male \n", "44485 0 1.0 22.0 Female \n", "66276 0 0.0 82.0 Male \n", "67173 0 0.0 87.0 Male \n", "82357 0 0.0 24.0 Male \n", "99854 0 1.0 58.0 Male \n", "105018 0 0.0 87.0 Female \n", "106795 0 0.0 22.0 Male \n", "119531 0 0.0 22.0 Male \n", "121833 0 1.0 63.0 Female \n", "123531 0 1.0 22.0 Female \n", "124731 0 1.0 34.0 Female \n", "126176 0 1.0 55.0 Female \n", "127101 0 1.0 63.0 Female \n", "127270 0 1.0 32.0 Female \n", "\n", " Region MonthlyIncome Rented_OwnHouse Occupation Education \\\n", "7920 South 820.0 Ownhouse Self_Emp Matric \n", "19814 North NaN Ownhouse Officer3 Post-Grad \n", "19987 North NaN Ownhouse Self_Emp Post-Grad \n", "37075 North 0.0 Ownhouse Self_Emp Matric \n", "40396 North NaN Ownhouse Self_Emp Post-Grad \n", "43095 North 820.0 Ownhouse Officer1 Post-Grad \n", "44485 North NaN Ownhouse Officer3 Post-Grad \n", "66276 Central NaN Ownhouse Self_Emp Professional \n", "67173 Central NaN Rented Non-officer Professional \n", "82357 Central NaN Ownhouse Self_Emp Professional \n", "99854 Central NaN Ownhouse Non-officer Graduate \n", "105018 West NaN Rented Self_Emp Graduate \n", "106795 West NaN Rented Non-officer Graduate \n", "119531 West NaN Rented Non-officer Graduate \n", "121833 East NaN Rented Officer3 Post-Grad \n", "123531 East 1000.0 Rented Officer3 Post-Grad \n", "124731 East NaN Rented Officer3 Post-Grad \n", "126176 East NaN Rented Self_Emp Post-Grad \n", "127101 East NaN Rented Officer3 Post-Grad \n", "127270 East NaN Rented Officer1 Post-Grad \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio MonthlyIncome.1 \\\n", "7920 0.0 0.0 820.0 \n", "19814 0.0 0.0 NaN \n", "19987 0.0 0.0 NaN \n", "37075 0.0 0.0 0.0 \n", "40396 0.0 0.0 NaN \n", "43095 0.0 0.0 820.0 \n", "44485 98.0 0.0 NaN \n", "66276 0.0 0.0 NaN \n", "67173 0.0 0.0 NaN \n", "82357 0.0 0.0 NaN \n", "99854 0.0 0.0 NaN \n", "105018 0.0 0.0 NaN \n", "106795 0.0 0.0 NaN \n", "119531 0.0 0.0 NaN \n", "121833 0.0 0.0 NaN \n", "123531 0.0 0.0 1000.0 \n", "124731 98.0 0.0 NaN \n", "126176 0.0 0.0 NaN \n", "127101 0.0 0.0 NaN \n", "127270 0.0 0.0 NaN \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "7920 1.0 0.0 \n", "19814 2.0 0.0 \n", "19987 0.0 0.0 \n", "37075 1.0 0.0 \n", "40396 5.0 0.0 \n", "43095 1.0 0.0 \n", "44485 0.0 98.0 \n", "66276 3.0 0.0 \n", "67173 4.0 0.0 \n", "82357 1.0 0.0 \n", "99854 0.0 0.0 \n", "105018 4.0 0.0 \n", "106795 1.0 0.0 \n", "119531 1.0 0.0 \n", "121833 1.0 0.0 \n", "123531 1.0 0.0 \n", "124731 0.0 98.0 \n", "126176 0.0 0.0 \n", "127101 1.0 0.0 \n", "127270 0.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "7920 0.0 0.0 \n", "19814 0.0 0.0 \n", "19987 0.0 0.0 \n", "37075 0.0 0.0 \n", "40396 0.0 0.0 \n", "43095 0.0 0.0 \n", "44485 0.0 98.0 \n", "66276 0.0 0.0 \n", "67173 0.0 0.0 \n", "82357 0.0 0.0 \n", "99854 0.0 0.0 \n", "105018 0.0 0.0 \n", "106795 0.0 0.0 \n", "119531 0.0 0.0 \n", "121833 0.0 0.0 \n", "123531 0.0 0.0 \n", "124731 0.0 98.0 \n", "126176 0.0 0.0 \n", "127101 0.0 0.0 \n", "127270 0.0 0.0 \n", "\n", " NumberOfDependents Good_Bad \n", "7920 0.0 Good \n", "19814 0.0 Good \n", "19987 0.0 Good \n", "37075 0.0 Good \n", "40396 0.0 Good \n", "43095 0.0 Good \n", "44485 0.0 Good \n", "66276 0.0 Good \n", "67173 0.0 Good \n", "82357 0.0 Good \n", "99854 0.0 Good \n", "105018 NaN Good \n", "106795 0.0 Good \n", "119531 0.0 Good \n", "121833 0.0 Good \n", "123531 0.0 Good \n", "124731 0.0 Good \n", "126176 0.0 Good \n", "127101 0.0 Good \n", "127270 0.0 Good " ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioMonthlyIncome.1NumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
792001.022.0MaleSouth820.0OwnhouseSelf_EmpMatric0.00.0820.01.00.00.00.00.0Good
1981401.068.0FemaleNorthNaNOwnhouseOfficer3Post-Grad0.00.0NaN2.00.00.00.00.0Good
1998701.037.0FemaleNorthNaNOwnhouseSelf_EmpPost-Grad0.00.0NaN0.00.00.00.00.0Good
3707501.023.0MaleNorth0.0OwnhouseSelf_EmpMatric0.00.00.01.00.00.00.00.0Good
4039600.046.0FemaleNorthNaNOwnhouseSelf_EmpPost-Grad0.00.0NaN5.00.00.00.00.0Good
4309501.022.0MaleNorth820.0OwnhouseOfficer1Post-Grad0.00.0820.01.00.00.00.00.0Good
4448501.022.0FemaleNorthNaNOwnhouseOfficer3Post-Grad98.00.0NaN0.098.00.098.00.0Good
6627600.082.0MaleCentralNaNOwnhouseSelf_EmpProfessional0.00.0NaN3.00.00.00.00.0Good
6717300.087.0MaleCentralNaNRentedNon-officerProfessional0.00.0NaN4.00.00.00.00.0Good
8235700.024.0MaleCentralNaNOwnhouseSelf_EmpProfessional0.00.0NaN1.00.00.00.00.0Good
9985401.058.0MaleCentralNaNOwnhouseNon-officerGraduate0.00.0NaN0.00.00.00.00.0Good
10501800.087.0FemaleWestNaNRentedSelf_EmpGraduate0.00.0NaN4.00.00.00.0NaNGood
10679500.022.0MaleWestNaNRentedNon-officerGraduate0.00.0NaN1.00.00.00.00.0Good
11953100.022.0MaleWestNaNRentedNon-officerGraduate0.00.0NaN1.00.00.00.00.0Good
12183301.063.0FemaleEastNaNRentedOfficer3Post-Grad0.00.0NaN1.00.00.00.00.0Good
12353101.022.0FemaleEast1000.0RentedOfficer3Post-Grad0.00.01000.01.00.00.00.00.0Good
12473101.034.0FemaleEastNaNRentedOfficer3Post-Grad98.00.0NaN0.098.00.098.00.0Good
12617601.055.0FemaleEastNaNRentedSelf_EmpPost-Grad0.00.0NaN0.00.00.00.00.0Good
12710101.063.0FemaleEastNaNRentedOfficer3Post-Grad0.00.0NaN1.00.00.00.00.0Good
12727001.032.0FemaleEastNaNRentedOfficer1Post-Grad0.00.0NaN0.00.00.00.00.0Good
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 12 } ] }, { "cell_type": "code", "source": [ "cs.drop_duplicates(inplace = True)" ], "metadata": { "id": "PAFIVsdECbG0" }, "execution_count": 39, "outputs": [] }, { "cell_type": "code", "source": [ "cs[cs.duplicated()]" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 131 }, "id": "AyInKX6MFEdT", "outputId": "af5b0727-87ca-4c5b-aac3-852ceeb1f46b" }, "execution_count": 40, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "Empty DataFrame\n", "Columns: [NPA Status, RevolvingUtilizationOfUnsecuredLines, age, Gender, Region, MonthlyIncome, Rented_OwnHouse, Occupation, Education, NumberOfTime30-59DaysPastDueNotWorse, DebtRatio, MonthlyIncome.1, NumberOfOpenCreditLinesAndLoans, NumberOfTimes90DaysLate, NumberRealEstateLoansOrLines, NumberOfTime60-89DaysPastDueNotWorse, NumberOfDependents, Good_Bad]\n", "Index: []" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioMonthlyIncome.1NumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 40 } ] }, { "cell_type": "code", "source": [ "cs.drop(['MonthlyIncome.1'],axis = 1, inplace = True)\n", "cs" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "iaI_zbkAHPK_", "outputId": "6049399a-bcac-4935-bc1c-237060871ae8" }, "execution_count": 41, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age Gender Region \\\n", "0 1 0.766127 45.0 Male South \n", "1 0 0.957151 40.0 Female South \n", "2 0 0.658180 38.0 Female South \n", "3 0 0.233810 30.0 Female South \n", "4 0 0.907239 49.0 Male South \n", "... ... ... ... ... ... \n", "149997 0 0.246044 58.0 Male North \n", "149998 0 0.000000 30.0 Male North \n", "149999 0 0.850283 64.0 Male North \n", "150000 0 NaN NaN NaN NaN \n", "150001 1 NaN NaN NaN NaN \n", "\n", " MonthlyIncome Rented_OwnHouse Occupation Education \\\n", "0 9120.0 Ownhouse Self_Emp Matric \n", "1 2600.0 Ownhouse Self_Emp Graduate \n", "2 3042.0 Ownhouse Self_Emp PhD \n", "3 3300.0 Ownhouse Self_Emp Professional \n", "4 63588.0 Ownhouse Self_Emp Post-Grad \n", "... ... ... ... ... \n", "149997 NaN Rented Officer2 Professional \n", "149998 5716.0 Rented Non-officer Professional \n", "149999 8158.0 Ownhouse Self_Emp Professional \n", "150000 NaN NaN NaN NaN \n", "150001 NaN NaN NaN NaN \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "... ... ... \n", "149997 0.0 3870.000000 \n", "149998 0.0 0.000000 \n", "149999 0.0 0.249908 \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", " NumberOfDependents Good_Bad \n", "0 2.0 Bad \n", "1 1.0 Good \n", "2 0.0 Good \n", "3 0.0 Good \n", "4 0.0 Good \n", "... ... ... \n", "149997 0.0 Good \n", "149998 0.0 Good \n", "149999 0.0 Good \n", "150000 NaN NaN \n", "150001 NaN NaN \n", "\n", "[149982 rows x 17 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
010.76612745.0MaleSouth9120.0OwnhouseSelf_EmpMatric2.00.80298213.00.06.00.02.0Bad
100.95715140.0FemaleSouth2600.0OwnhouseSelf_EmpGraduate0.00.1218764.00.00.00.01.0Good
200.65818038.0FemaleSouth3042.0OwnhouseSelf_EmpPhD1.00.0851132.01.00.00.00.0Good
300.23381030.0FemaleSouth3300.0OwnhouseSelf_EmpProfessional0.00.0360505.00.00.00.00.0Good
400.90723949.0MaleSouth63588.0OwnhouseSelf_EmpPost-Grad1.00.0249267.00.01.00.00.0Good
......................................................
14999700.24604458.0MaleNorthNaNRentedOfficer2Professional0.03870.00000018.00.01.00.00.0Good
14999800.00000030.0MaleNorth5716.0RentedNon-officerProfessional0.00.0000004.00.00.00.00.0Good
14999900.85028364.0MaleNorth8158.0OwnhouseSelf_EmpProfessional0.00.2499088.00.02.00.00.0Good
1500000NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
1500011NaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaNNaN
\n", "

149982 rows × 17 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 41 } ] }, { "cell_type": "code", "source": [ "cs.dtypes" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "d9t-PEdZMLS2", "outputId": "ecc72043-6434-4c2a-8693-767e257f81ab" }, "execution_count": 42, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "NPA Status int64\n", "RevolvingUtilizationOfUnsecuredLines float64\n", "age float64\n", "Gender object\n", "Region object\n", "MonthlyIncome float64\n", "Rented_OwnHouse object\n", "Occupation object\n", "Education object\n", "NumberOfTime30-59DaysPastDueNotWorse float64\n", "DebtRatio float64\n", "NumberOfOpenCreditLinesAndLoans float64\n", "NumberOfTimes90DaysLate float64\n", "NumberRealEstateLoansOrLines float64\n", "NumberOfTime60-89DaysPastDueNotWorse float64\n", "NumberOfDependents float64\n", "Good_Bad object\n", "dtype: object" ] }, "metadata": {}, "execution_count": 42 } ] }, { "cell_type": "markdown", "source": [ "# **To Check for NULL Values And Remove Them**" ], "metadata": { "id": "tFLah5C_GOpY" } }, { "cell_type": "code", "source": [ "cs.isnull().sum()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "K5kqoDj1FUxx", "outputId": "b14a68bd-98d4-48f1-a0b8-d48ce040ecbe" }, "execution_count": 43, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "NPA Status 0\n", "RevolvingUtilizationOfUnsecuredLines 2\n", "age 2\n", "Gender 2\n", "Region 2\n", "MonthlyIncome 29717\n", "Rented_OwnHouse 2\n", "Occupation 2\n", "Education 2\n", "NumberOfTime30-59DaysPastDueNotWorse 2\n", "DebtRatio 2\n", "NumberOfOpenCreditLinesAndLoans 2\n", "NumberOfTimes90DaysLate 2\n", "NumberRealEstateLoansOrLines 2\n", "NumberOfTime60-89DaysPastDueNotWorse 2\n", "NumberOfDependents 3925\n", "Good_Bad 2\n", "dtype: int64" ] }, "metadata": {}, "execution_count": 43 } ] }, { "cell_type": "markdown", "source": [ "**Use Seaboarn Heatmap**" ], "metadata": { "id": "K7CH5mijILgS" } }, { "cell_type": "code", "source": [ "sns.heatmap(cs.isnull())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 496 }, "id": "H74_8-sgG4kp", "outputId": "6b34c8d5-7972-4904-9b37-9d86dc79d9eb" }, "execution_count": 44, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 44 }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": { "needs_background": "light" } } ] }, { "cell_type": "code", "source": [ "cs1 = cs.fillna(cs.mode().iloc[0])" ], "metadata": { "id": "Q0eSaotfvIzU" }, "execution_count": 45, "outputs": [] }, { "cell_type": "code", "source": [ "\n", "cs1" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "mJSL8XOevcCG", "outputId": "4b503854-d885-4497-abcd-7f58c87a1232" }, "execution_count": 48, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age Gender \\\n", "0 1 0.766127 45.0 Male \n", "1 0 0.957151 40.0 Female \n", "2 0 0.658180 38.0 Female \n", "3 0 0.233810 30.0 Female \n", "4 0 0.907239 49.0 Male \n", "... ... ... ... ... \n", "149997 0 0.246044 58.0 Male \n", "149998 0 0.000000 30.0 Male \n", "149999 0 0.850283 64.0 Male \n", "150000 0 0.000000 49.0 Male \n", "150001 1 0.000000 49.0 Male \n", "\n", " Region MonthlyIncome Rented_OwnHouse Occupation Education \\\n", "0 South 9120.0 Ownhouse Self_Emp Matric \n", "1 South 2600.0 Ownhouse Self_Emp Graduate \n", "2 South 3042.0 Ownhouse Self_Emp PhD \n", "3 South 3300.0 Ownhouse Self_Emp Professional \n", "4 South 63588.0 Ownhouse Self_Emp Post-Grad \n", "... ... ... ... ... ... \n", "149997 North 5000.0 Rented Officer2 Professional \n", "149998 North 5716.0 Rented Non-officer Professional \n", "149999 North 8158.0 Ownhouse Self_Emp Professional \n", "150000 Central 5000.0 Ownhouse Self_Emp Professional \n", "150001 Central 5000.0 Ownhouse Self_Emp Professional \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "... ... ... \n", "149997 0.0 3870.000000 \n", "149998 0.0 0.000000 \n", "149999 0.0 0.249908 \n", "150000 0.0 0.000000 \n", "150001 0.0 0.000000 \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 6.0 0.0 \n", "150001 6.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 0.0 0.0 \n", "150001 0.0 0.0 \n", "\n", " NumberOfDependents Good_Bad \n", "0 2.0 Bad \n", "1 1.0 Good \n", "2 0.0 Good \n", "3 0.0 Good \n", "4 0.0 Good \n", "... ... ... \n", "149997 0.0 Good \n", "149998 0.0 Good \n", "149999 0.0 Good \n", "150000 0.0 Good \n", "150001 0.0 Good \n", "\n", "[149982 rows x 17 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
010.76612745.0MaleSouth9120.0OwnhouseSelf_EmpMatric2.00.80298213.00.06.00.02.0Bad
100.95715140.0FemaleSouth2600.0OwnhouseSelf_EmpGraduate0.00.1218764.00.00.00.01.0Good
200.65818038.0FemaleSouth3042.0OwnhouseSelf_EmpPhD1.00.0851132.01.00.00.00.0Good
300.23381030.0FemaleSouth3300.0OwnhouseSelf_EmpProfessional0.00.0360505.00.00.00.00.0Good
400.90723949.0MaleSouth63588.0OwnhouseSelf_EmpPost-Grad1.00.0249267.00.01.00.00.0Good
......................................................
14999700.24604458.0MaleNorth5000.0RentedOfficer2Professional0.03870.00000018.00.01.00.00.0Good
14999800.00000030.0MaleNorth5716.0RentedNon-officerProfessional0.00.0000004.00.00.00.00.0Good
14999900.85028364.0MaleNorth8158.0OwnhouseSelf_EmpProfessional0.00.2499088.00.02.00.00.0Good
15000000.00000049.0MaleCentral5000.0OwnhouseSelf_EmpProfessional0.00.0000006.00.00.00.00.0Good
15000110.00000049.0MaleCentral5000.0OwnhouseSelf_EmpProfessional0.00.0000006.00.00.00.00.0Good
\n", "

149982 rows × 17 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 48 } ] }, { "cell_type": "code", "source": [ "cs1.isnull().sum()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "R0mQzM_HyXDK", "outputId": "4df06f80-9efd-408c-a369-e2c279a03d68" }, "execution_count": 49, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "NPA Status 0\n", "RevolvingUtilizationOfUnsecuredLines 0\n", "age 0\n", "Gender 0\n", "Region 0\n", "MonthlyIncome 0\n", "Rented_OwnHouse 0\n", "Occupation 0\n", "Education 0\n", "NumberOfTime30-59DaysPastDueNotWorse 0\n", "DebtRatio 0\n", "NumberOfOpenCreditLinesAndLoans 0\n", "NumberOfTimes90DaysLate 0\n", "NumberRealEstateLoansOrLines 0\n", "NumberOfTime60-89DaysPastDueNotWorse 0\n", "NumberOfDependents 0\n", "Good_Bad 0\n", "dtype: int64" ] }, "metadata": {}, "execution_count": 49 } ] }, { "cell_type": "code", "source": [ "cs1['NumberOfTime30-59DaysPastDueNotWorse'].value_counts().sort_index()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "FxRspXlAylWB", "outputId": "e0975616-fde0-4c29-a858-fec44d0a9ca5" }, "execution_count": 52, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0 126002\n", "1.0 16033\n", "2.0 4598\n", "3.0 1754\n", "4.0 747\n", "5.0 342\n", "6.0 140\n", "7.0 54\n", "8.0 25\n", "9.0 12\n", "10.0 4\n", "11.0 1\n", "12.0 2\n", "13.0 1\n", "96.0 5\n", "98.0 262\n", "Name: NumberOfTime30-59DaysPastDueNotWorse, dtype: int64" ] }, "metadata": {}, "execution_count": 52 } ] }, { "cell_type": "code", "source": [ "cs1['NumberOfTimes90DaysLate'].value_counts().sort_index()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "gJ5Kkfht0k0y", "outputId": "c98fce04-cf3c-47c0-a744-82c1b5a1db9c" }, "execution_count": 54, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0 141646\n", "1.0 5243\n", "2.0 1555\n", "3.0 667\n", "4.0 291\n", "5.0 131\n", "6.0 80\n", "7.0 38\n", "8.0 21\n", "9.0 19\n", "10.0 8\n", "11.0 5\n", "12.0 2\n", "13.0 4\n", "14.0 2\n", "15.0 2\n", "17.0 1\n", "96.0 5\n", "98.0 262\n", "Name: NumberOfTimes90DaysLate, dtype: int64" ] }, "metadata": {}, "execution_count": 54 } ] }, { "cell_type": "code", "source": [ "cs1['NumberOfTime60-89DaysPastDueNotWorse'].value_counts().sort_index()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "ByKC7gCf1LAs", "outputId": "2a19732e-0519-4e3b-aa0c-af36020a2bf6" }, "execution_count": 55, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0 142380\n", "1.0 5731\n", "2.0 1118\n", "3.0 318\n", "4.0 105\n", "5.0 34\n", "6.0 16\n", "7.0 9\n", "8.0 2\n", "9.0 1\n", "11.0 1\n", "96.0 5\n", "98.0 262\n", "Name: NumberOfTime60-89DaysPastDueNotWorse, dtype: int64" ] }, "metadata": {}, "execution_count": 55 } ] }, { "cell_type": "code", "source": [ "\n", "cs2 = cs1.drop(cs1[cs1.NumberOfTimes90DaysLate > 17].index)\n" ], "metadata": { "id": "qum-odUuzLuZ" }, "execution_count": 59, "outputs": [] }, { "cell_type": "code", "source": [ "cs2['NumberOfTime60-89DaysPastDueNotWorse'].value_counts().sort_index()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "MLkE1Ab32mYL", "outputId": "f07646dd-b677-4227-9f00-84c1bf1e790c" }, "execution_count": 61, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.0 142380\n", "1.0 5731\n", "2.0 1118\n", "3.0 318\n", "4.0 105\n", "5.0 34\n", "6.0 16\n", "7.0 9\n", "8.0 2\n", "9.0 1\n", "11.0 1\n", "Name: NumberOfTime60-89DaysPastDueNotWorse, dtype: int64" ] }, "metadata": {}, "execution_count": 61 } ] }, { "cell_type": "code", "source": [ "cs2" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "BzRE_s9h281a", "outputId": "287cd705-c9d7-4d8b-e480-54696d649690" }, "execution_count": 62, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age Gender \\\n", "0 1 0.766127 45.0 Male \n", "1 0 0.957151 40.0 Female \n", "2 0 0.658180 38.0 Female \n", "3 0 0.233810 30.0 Female \n", "4 0 0.907239 49.0 Male \n", "... ... ... ... ... \n", "149997 0 0.246044 58.0 Male \n", "149998 0 0.000000 30.0 Male \n", "149999 0 0.850283 64.0 Male \n", "150000 0 0.000000 49.0 Male \n", "150001 1 0.000000 49.0 Male \n", "\n", " Region MonthlyIncome Rented_OwnHouse Occupation Education \\\n", "0 South 9120.0 Ownhouse Self_Emp Matric \n", "1 South 2600.0 Ownhouse Self_Emp Graduate \n", "2 South 3042.0 Ownhouse Self_Emp PhD \n", "3 South 3300.0 Ownhouse Self_Emp Professional \n", "4 South 63588.0 Ownhouse Self_Emp Post-Grad \n", "... ... ... ... ... ... \n", "149997 North 5000.0 Rented Officer2 Professional \n", "149998 North 5716.0 Rented Non-officer Professional \n", "149999 North 8158.0 Ownhouse Self_Emp Professional \n", "150000 Central 5000.0 Ownhouse Self_Emp Professional \n", "150001 Central 5000.0 Ownhouse Self_Emp Professional \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "... ... ... \n", "149997 0.0 3870.000000 \n", "149998 0.0 0.000000 \n", "149999 0.0 0.249908 \n", "150000 0.0 0.000000 \n", "150001 0.0 0.000000 \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 6.0 0.0 \n", "150001 6.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 0.0 0.0 \n", "150001 0.0 0.0 \n", "\n", " NumberOfDependents Good_Bad \n", "0 2.0 Bad \n", "1 1.0 Good \n", "2 0.0 Good \n", "3 0.0 Good \n", "4 0.0 Good \n", "... ... ... \n", "149997 0.0 Good \n", "149998 0.0 Good \n", "149999 0.0 Good \n", "150000 0.0 Good \n", "150001 0.0 Good \n", "\n", "[149715 rows x 17 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
010.76612745.0MaleSouth9120.0OwnhouseSelf_EmpMatric2.00.80298213.00.06.00.02.0Bad
100.95715140.0FemaleSouth2600.0OwnhouseSelf_EmpGraduate0.00.1218764.00.00.00.01.0Good
200.65818038.0FemaleSouth3042.0OwnhouseSelf_EmpPhD1.00.0851132.01.00.00.00.0Good
300.23381030.0FemaleSouth3300.0OwnhouseSelf_EmpProfessional0.00.0360505.00.00.00.00.0Good
400.90723949.0MaleSouth63588.0OwnhouseSelf_EmpPost-Grad1.00.0249267.00.01.00.00.0Good
......................................................
14999700.24604458.0MaleNorth5000.0RentedOfficer2Professional0.03870.00000018.00.01.00.00.0Good
14999800.00000030.0MaleNorth5716.0RentedNon-officerProfessional0.00.0000004.00.00.00.00.0Good
14999900.85028364.0MaleNorth8158.0OwnhouseSelf_EmpProfessional0.00.2499088.00.02.00.00.0Good
15000000.00000049.0MaleCentral5000.0OwnhouseSelf_EmpProfessional0.00.0000006.00.00.00.00.0Good
15000110.00000049.0MaleCentral5000.0OwnhouseSelf_EmpProfessional0.00.0000006.00.00.00.00.0Good
\n", "

149715 rows × 17 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 62 } ] }, { "cell_type": "code", "source": [ "sns.heatmap(cs2.isnull())" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 496 }, "id": "1BNGVJbW3B6x", "outputId": "659303bd-c8b6-4900-eab1-5c804a84164b" }, "execution_count": 64, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "" ] }, "metadata": {}, "execution_count": 64 }, { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": { "needs_background": "light" } } ] }, { "cell_type": "code", "source": [ "cs2" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "wzQ-pyD-6twG", "outputId": "98ef8fbe-31f0-4255-93b5-8cd34f5734f9" }, "execution_count": 71, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age Gender \\\n", "0 1 0.766127 45.0 Male \n", "1 0 0.957151 40.0 Female \n", "2 0 0.658180 38.0 Female \n", "3 0 0.233810 30.0 Female \n", "4 0 0.907239 49.0 Male \n", "... ... ... ... ... \n", "149997 0 0.246044 58.0 Male \n", "149998 0 0.000000 30.0 Male \n", "149999 0 0.850283 64.0 Male \n", "150000 0 0.000000 49.0 Male \n", "150001 1 0.000000 49.0 Male \n", "\n", " Region MonthlyIncome Rented_OwnHouse Occupation Education \\\n", "0 South 9120.0 Ownhouse Self_Emp Matric \n", "1 South 2600.0 Ownhouse Self_Emp Graduate \n", "2 South 3042.0 Ownhouse Self_Emp PhD \n", "3 South 3300.0 Ownhouse Self_Emp Professional \n", "4 South 63588.0 Ownhouse Self_Emp Post-Grad \n", "... ... ... ... ... ... \n", "149997 North 5000.0 Rented Officer2 Professional \n", "149998 North 5716.0 Rented Non-officer Professional \n", "149999 North 8158.0 Ownhouse Self_Emp Professional \n", "150000 Central 5000.0 Ownhouse Self_Emp Professional \n", "150001 Central 5000.0 Ownhouse Self_Emp Professional \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "... ... ... \n", "149997 0.0 3870.000000 \n", "149998 0.0 0.000000 \n", "149999 0.0 0.249908 \n", "150000 0.0 0.000000 \n", "150001 0.0 0.000000 \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 6.0 0.0 \n", "150001 6.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 0.0 0.0 \n", "150001 0.0 0.0 \n", "\n", " NumberOfDependents Good_Bad \n", "0 2.0 Bad \n", "1 1.0 Good \n", "2 0.0 Good \n", "3 0.0 Good \n", "4 0.0 Good \n", "... ... ... \n", "149997 0.0 Good \n", "149998 0.0 Good \n", "149999 0.0 Good \n", "150000 0.0 Good \n", "150001 0.0 Good \n", "\n", "[149715 rows x 17 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageGenderRegionMonthlyIncomeRented_OwnHouseOccupationEducationNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependentsGood_Bad
010.76612745.0MaleSouth9120.0OwnhouseSelf_EmpMatric2.00.80298213.00.06.00.02.0Bad
100.95715140.0FemaleSouth2600.0OwnhouseSelf_EmpGraduate0.00.1218764.00.00.00.01.0Good
200.65818038.0FemaleSouth3042.0OwnhouseSelf_EmpPhD1.00.0851132.01.00.00.00.0Good
300.23381030.0FemaleSouth3300.0OwnhouseSelf_EmpProfessional0.00.0360505.00.00.00.00.0Good
400.90723949.0MaleSouth63588.0OwnhouseSelf_EmpPost-Grad1.00.0249267.00.01.00.00.0Good
......................................................
14999700.24604458.0MaleNorth5000.0RentedOfficer2Professional0.03870.00000018.00.01.00.00.0Good
14999800.00000030.0MaleNorth5716.0RentedNon-officerProfessional0.00.0000004.00.00.00.00.0Good
14999900.85028364.0MaleNorth8158.0OwnhouseSelf_EmpProfessional0.00.2499088.00.02.00.00.0Good
15000000.00000049.0MaleCentral5000.0OwnhouseSelf_EmpProfessional0.00.0000006.00.00.00.00.0Good
15000110.00000049.0MaleCentral5000.0OwnhouseSelf_EmpProfessional0.00.0000006.00.00.00.00.0Good
\n", "

149715 rows × 17 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 71 } ] }, { "cell_type": "code", "source": [ "gender = pd.get_dummies(cs2['Gender'], drop_first = True)\n", "gender" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "5NtlKb5N38KG", "outputId": "ec9753e1-354f-43e1-ccdd-6530a62acd4b" }, "execution_count": 72, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Male\n", "0 1\n", "1 0\n", "2 0\n", "3 0\n", "4 1\n", "... ...\n", "149997 1\n", "149998 1\n", "149999 1\n", "150000 1\n", "150001 1\n", "\n", "[149715 rows x 1 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Male
01
10
20
30
41
......
1499971
1499981
1499991
1500001
1500011
\n", "

149715 rows × 1 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 72 } ] }, { "cell_type": "code", "source": [ "region = pd.get_dummies(cs2['Region'], drop_first= True)\n", "region" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "p5WARv_T7Unq", "outputId": "9c0b48e1-2f43-47fa-b8d0-575d364d7d5c" }, "execution_count": 73, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " East North South West\n", "0 0 0 1 0\n", "1 0 0 1 0\n", "2 0 0 1 0\n", "3 0 0 1 0\n", "4 0 0 1 0\n", "... ... ... ... ...\n", "149997 0 1 0 0\n", "149998 0 1 0 0\n", "149999 0 1 0 0\n", "150000 0 0 0 0\n", "150001 0 0 0 0\n", "\n", "[149715 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
EastNorthSouthWest
00010
10010
20010
30010
40010
...............
1499970100
1499980100
1499990100
1500000000
1500010000
\n", "

149715 rows × 4 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 73 } ] }, { "cell_type": "code", "source": [ "rent = pd.get_dummies(cs2['Rented_OwnHouse'], drop_first = True)\n", "rent" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "uZKLrFxi7mZt", "outputId": "4d5d108d-63b7-42ca-a75b-5d5ca1848be2" }, "execution_count": 74, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Rented\n", "0 0\n", "1 0\n", "2 0\n", "3 0\n", "4 0\n", "... ...\n", "149997 1\n", "149998 1\n", "149999 0\n", "150000 0\n", "150001 0\n", "\n", "[149715 rows x 1 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Rented
00
10
20
30
40
......
1499971
1499981
1499990
1500000
1500010
\n", "

149715 rows × 1 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 74 } ] }, { "cell_type": "code", "source": [ "occupation = pd.get_dummies(cs2['Occupation'], drop_first = True)\n", "occupation" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "5HAxYYmB79Ke", "outputId": "91ef3b85-2c30-4587-b179-53b7ba3920c8" }, "execution_count": 76, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Officer1 Officer2 Officer3 Self_Emp\n", "0 0 0 0 1\n", "1 0 0 0 1\n", "2 0 0 0 1\n", "3 0 0 0 1\n", "4 0 0 0 1\n", "... ... ... ... ...\n", "149997 0 1 0 0\n", "149998 0 0 0 0\n", "149999 0 0 0 1\n", "150000 0 0 0 1\n", "150001 0 0 0 1\n", "\n", "[149715 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Officer1Officer2Officer3Self_Emp
00001
10001
20001
30001
40001
...............
1499970100
1499980000
1499990001
1500000001
1500010001
\n", "

149715 rows × 4 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 76 } ] }, { "cell_type": "code", "source": [ "edu = pd.get_dummies(cs2['Education'], drop_first = True)\n", "edu" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "pZ0QwMtP8OUj", "outputId": "63684aa1-6eb4-4a58-d731-a63d6a6975fd" }, "execution_count": 77, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Matric PhD Post-Grad Professional\n", "0 1 0 0 0\n", "1 0 0 0 0\n", "2 0 1 0 0\n", "3 0 0 0 1\n", "4 0 0 1 0\n", "... ... ... ... ...\n", "149997 0 0 0 1\n", "149998 0 0 0 1\n", "149999 0 0 0 1\n", "150000 0 0 0 1\n", "150001 0 0 0 1\n", "\n", "[149715 rows x 4 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
MatricPhDPost-GradProfessional
01000
10000
20100
30001
40010
...............
1499970001
1499980001
1499990001
1500000001
1500010001
\n", "

149715 rows × 4 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 77 } ] }, { "cell_type": "code", "source": [ "goodbad = pd.get_dummies(cs2['Good_Bad'],drop_first =True)\n", "goodbad" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 424 }, "id": "J6Zh0Ynx8hPk", "outputId": "892befd3-37ea-4a9f-db18-f86a616b4c90" }, "execution_count": 78, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " Good\n", "0 0\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", "... ...\n", "149997 1\n", "149998 1\n", "149999 1\n", "150000 1\n", "150001 1\n", "\n", "[149715 rows x 1 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Good
00
11
21
31
41
......
1499971
1499981
1499991
1500001
1500011
\n", "

149715 rows × 1 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 78 } ] }, { "cell_type": "code", "source": [ "cs3 = cs2.drop(['Gender','Education','Occupation','Region','Rented_OwnHouse'],axis = 1)\n", "cs3.drop(['Good_Bad'],axis = 1, inplace = True)\n", "cs3" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "QCeeKse2825j", "outputId": "9b006f77-63da-46ba-9135-9f7edad089f5" }, "execution_count": 81, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age MonthlyIncome \\\n", "0 1 0.766127 45.0 9120.0 \n", "1 0 0.957151 40.0 2600.0 \n", "2 0 0.658180 38.0 3042.0 \n", "3 0 0.233810 30.0 3300.0 \n", "4 0 0.907239 49.0 63588.0 \n", "... ... ... ... ... \n", "149997 0 0.246044 58.0 5000.0 \n", "149998 0 0.000000 30.0 5716.0 \n", "149999 0 0.850283 64.0 8158.0 \n", "150000 0 0.000000 49.0 5000.0 \n", "150001 1 0.000000 49.0 5000.0 \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "... ... ... \n", "149997 0.0 3870.000000 \n", "149998 0.0 0.000000 \n", "149999 0.0 0.249908 \n", "150000 0.0 0.000000 \n", "150001 0.0 0.000000 \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 6.0 0.0 \n", "150001 6.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 0.0 0.0 \n", "150001 0.0 0.0 \n", "\n", " NumberOfDependents \n", "0 2.0 \n", "1 1.0 \n", "2 0.0 \n", "3 0.0 \n", "4 0.0 \n", "... ... \n", "149997 0.0 \n", "149998 0.0 \n", "149999 0.0 \n", "150000 0.0 \n", "150001 0.0 \n", "\n", "[149715 rows x 11 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageMonthlyIncomeNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorseNumberOfDependents
010.76612745.09120.02.00.80298213.00.06.00.02.0
100.95715140.02600.00.00.1218764.00.00.00.01.0
200.65818038.03042.01.00.0851132.01.00.00.00.0
300.23381030.03300.00.00.0360505.00.00.00.00.0
400.90723949.063588.01.00.0249267.00.01.00.00.0
....................................
14999700.24604458.05000.00.03870.00000018.00.01.00.00.0
14999800.00000030.05716.00.00.0000004.00.00.00.00.0
14999900.85028364.08158.00.00.2499088.00.02.00.00.0
15000000.00000049.05000.00.00.0000006.00.00.00.00.0
15000110.00000049.05000.00.00.0000006.00.00.00.00.0
\n", "

149715 rows × 11 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 81 } ] }, { "cell_type": "code", "source": [ "cs3 = pd.concat([cs3,gender,edu,region,goodbad,occupation,rent],axis = 1)\n", "cs3" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 505 }, "id": "o2vctHxR9YNw", "outputId": "8ebca3ae-c77a-4032-ee1e-02c470730744" }, "execution_count": 82, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age MonthlyIncome \\\n", "0 1 0.766127 45.0 9120.0 \n", "1 0 0.957151 40.0 2600.0 \n", "2 0 0.658180 38.0 3042.0 \n", "3 0 0.233810 30.0 3300.0 \n", "4 0 0.907239 49.0 63588.0 \n", "... ... ... ... ... \n", "149997 0 0.246044 58.0 5000.0 \n", "149998 0 0.000000 30.0 5716.0 \n", "149999 0 0.850283 64.0 8158.0 \n", "150000 0 0.000000 49.0 5000.0 \n", "150001 1 0.000000 49.0 5000.0 \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "... ... ... \n", "149997 0.0 3870.000000 \n", "149998 0.0 0.000000 \n", "149999 0.0 0.249908 \n", "150000 0.0 0.000000 \n", "150001 0.0 0.000000 \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "... ... ... \n", "149997 18.0 0.0 \n", "149998 4.0 0.0 \n", "149999 8.0 0.0 \n", "150000 6.0 0.0 \n", "150001 6.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse \\\n", "0 6.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 0.0 0.0 \n", "4 1.0 0.0 \n", "... ... ... \n", "149997 1.0 0.0 \n", "149998 0.0 0.0 \n", "149999 2.0 0.0 \n", "150000 0.0 0.0 \n", "150001 0.0 0.0 \n", "\n", " ... East North South West Good Officer1 Officer2 Officer3 \\\n", "0 ... 0 0 1 0 0 0 0 0 \n", "1 ... 0 0 1 0 1 0 0 0 \n", "2 ... 0 0 1 0 1 0 0 0 \n", "3 ... 0 0 1 0 1 0 0 0 \n", "4 ... 0 0 1 0 1 0 0 0 \n", "... ... ... ... ... ... ... ... ... ... \n", "149997 ... 0 1 0 0 1 0 1 0 \n", "149998 ... 0 1 0 0 1 0 0 0 \n", "149999 ... 0 1 0 0 1 0 0 0 \n", "150000 ... 0 0 0 0 1 0 0 0 \n", "150001 ... 0 0 0 0 1 0 0 0 \n", "\n", " Self_Emp Rented \n", "0 1 0 \n", "1 1 0 \n", "2 1 0 \n", "3 1 0 \n", "4 1 0 \n", "... ... ... \n", "149997 0 1 \n", "149998 0 1 \n", "149999 1 0 \n", "150000 1 0 \n", "150001 1 0 \n", "\n", "[149715 rows x 26 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageMonthlyIncomeNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorse...EastNorthSouthWestGoodOfficer1Officer2Officer3Self_EmpRented
010.76612745.09120.02.00.80298213.00.06.00.0...0010000010
100.95715140.02600.00.00.1218764.00.00.00.0...0010100010
200.65818038.03042.01.00.0851132.01.00.00.0...0010100010
300.23381030.03300.00.00.0360505.00.00.00.0...0010100010
400.90723949.063588.01.00.0249267.00.01.00.0...0010100010
..................................................................
14999700.24604458.05000.00.03870.00000018.00.01.00.0...0100101001
14999800.00000030.05716.00.00.0000004.00.00.00.0...0100100001
14999900.85028364.08158.00.00.2499088.00.02.00.0...0100100010
15000000.00000049.05000.00.00.0000006.00.00.00.0...0000100010
15000110.00000049.05000.00.00.0000006.00.00.00.0...0000100010
\n", "

149715 rows × 26 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 82 } ] }, { "cell_type": "code", "source": [ "df = cs3['Good']\n", "idf = cs3.drop(['Good'],axis =1)" ], "metadata": { "id": "byajtJH_-Phk" }, "execution_count": 88, "outputs": [] }, { "cell_type": "code", "source": [ "df.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "coFBZi7B-rUT", "outputId": "36aee4c1-feec-40f2-e335-37bb0fe9b415" }, "execution_count": 89, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0 0\n", "1 1\n", "2 1\n", "3 1\n", "4 1\n", "Name: Good, dtype: uint8" ] }, "metadata": {}, "execution_count": 89 } ] }, { "cell_type": "code", "source": [ "idf.head()" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 317 }, "id": "eBf8SnwmBMfJ", "outputId": "f767a7af-4906-4c0e-ed3c-aa3fce6a4b3a" }, "execution_count": 90, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ " NPA Status RevolvingUtilizationOfUnsecuredLines age MonthlyIncome \\\n", "0 1 0.766127 45.0 9120.0 \n", "1 0 0.957151 40.0 2600.0 \n", "2 0 0.658180 38.0 3042.0 \n", "3 0 0.233810 30.0 3300.0 \n", "4 0 0.907239 49.0 63588.0 \n", "\n", " NumberOfTime30-59DaysPastDueNotWorse DebtRatio \\\n", "0 2.0 0.802982 \n", "1 0.0 0.121876 \n", "2 1.0 0.085113 \n", "3 0.0 0.036050 \n", "4 1.0 0.024926 \n", "\n", " NumberOfOpenCreditLinesAndLoans NumberOfTimes90DaysLate \\\n", "0 13.0 0.0 \n", "1 4.0 0.0 \n", "2 2.0 1.0 \n", "3 5.0 0.0 \n", "4 7.0 0.0 \n", "\n", " NumberRealEstateLoansOrLines NumberOfTime60-89DaysPastDueNotWorse ... \\\n", "0 6.0 0.0 ... \n", "1 0.0 0.0 ... \n", "2 0.0 0.0 ... \n", "3 0.0 0.0 ... \n", "4 1.0 0.0 ... \n", "\n", " Professional East North South West Officer1 Officer2 Officer3 \\\n", "0 0 0 0 1 0 0 0 0 \n", "1 0 0 0 1 0 0 0 0 \n", "2 0 0 0 1 0 0 0 0 \n", "3 1 0 0 1 0 0 0 0 \n", "4 0 0 0 1 0 0 0 0 \n", "\n", " Self_Emp Rented \n", "0 1 0 \n", "1 1 0 \n", "2 1 0 \n", "3 1 0 \n", "4 1 0 \n", "\n", "[5 rows x 25 columns]" ], "text/html": [ "\n", "
\n", "
\n", "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NPA StatusRevolvingUtilizationOfUnsecuredLinesageMonthlyIncomeNumberOfTime30-59DaysPastDueNotWorseDebtRatioNumberOfOpenCreditLinesAndLoansNumberOfTimes90DaysLateNumberRealEstateLoansOrLinesNumberOfTime60-89DaysPastDueNotWorse...ProfessionalEastNorthSouthWestOfficer1Officer2Officer3Self_EmpRented
010.76612745.09120.02.00.80298213.00.06.00.0...0001000010
100.95715140.02600.00.00.1218764.00.00.00.0...0001000010
200.65818038.03042.01.00.0851132.01.00.00.0...0001000010
300.23381030.03300.00.00.0360505.00.00.00.0...1001000010
400.90723949.063588.01.00.0249267.00.01.00.0...0001000010
\n", "

5 rows × 25 columns

\n", "
\n", " \n", " \n", " \n", "\n", " \n", "
\n", "
\n", " " ] }, "metadata": {}, "execution_count": 90 } ] }, { "cell_type": "code", "source": [ "from sklearn.model_selection import train_test_split" ], "metadata": { "id": "qclh_1JoBPYs" }, "execution_count": 92, "outputs": [] }, { "cell_type": "code", "source": [ "x_train,x_test,y_train,y_test = train_test_split(idf,df,test_size = 0.25, shuffle=101)" ], "metadata": { "id": "MbGopmYuBbcy" }, "execution_count": 104, "outputs": [] }, { "cell_type": "code", "source": [ "x_train.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "1tNTaxczCC1h", "outputId": "738d7569-6254-42c1-e20e-09668dabe595" }, "execution_count": 105, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(112286, 25)" ] }, "metadata": {}, "execution_count": 105 } ] }, { "cell_type": "code", "source": [ "x_test.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "zhBT_3pSCGJh", "outputId": "d276f962-182d-421d-ca95-d3442ef368d7" }, "execution_count": 106, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(37429, 25)" ] }, "metadata": {}, "execution_count": 106 } ] }, { "cell_type": "code", "source": [ "y_train.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "Wu0mlP5ICQBP", "outputId": "5d785245-c2fc-4567-8b60-e41cc58b946a" }, "execution_count": 107, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(112286,)" ] }, "metadata": {}, "execution_count": 107 } ] }, { "cell_type": "code", "source": [ "y_test.shape" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "llsZXXg1CShI", "outputId": "4f10e511-c074-4d88-dbf4-67a0dfd2f1cf" }, "execution_count": 108, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "(37429,)" ] }, "metadata": {}, "execution_count": 108 } ] }, { "cell_type": "code", "source": [ "from sklearn.linear_model import LogisticRegression" ], "metadata": { "id": "ZcDo3gvxCV-w" }, "execution_count": 109, "outputs": [] }, { "cell_type": "code", "source": [ "cs3_model = LogisticRegression()" ], "metadata": { "id": "H3VlB1sgCj-x" }, "execution_count": 110, "outputs": [] }, { "cell_type": "code", "source": [ "cs3_model.fit(x_train, y_train)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "9BdKvNEXCtiB", "outputId": "28dc24df-6215-4f38-d9c9-82bec2679a8f" }, "execution_count": 111, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.7/dist-packages/sklearn/linear_model/_logistic.py:818: ConvergenceWarning: lbfgs failed to converge (status=1):\n", "STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n", "\n", "Increase the number of iterations (max_iter) or scale the data as shown in:\n", " https://scikit-learn.org/stable/modules/preprocessing.html\n", "Please also refer to the documentation for alternative solver options:\n", " https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n", " extra_warning_msg=_LOGISTIC_SOLVER_CONVERGENCE_MSG,\n" ] }, { "output_type": "execute_result", "data": { "text/plain": [ "LogisticRegression()" ] }, "metadata": {}, "execution_count": 111 } ] }, { "cell_type": "code", "source": [ "cs3_model.score(idf, df)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OOSaz-0sC12H", "outputId": "0dcf6476-5362-4fd7-8af3-3a0832dc833e" }, "execution_count": 112, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.9526767524964098" ] }, "metadata": {}, "execution_count": 112 } ] }, { "cell_type": "code", "source": [ "" ], "metadata": { "id": "qxRB984mDXye" }, "execution_count": null, "outputs": [] } ] }