{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 导入数据,观察数据" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
KBM_INDV_IDresp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
02814780M40511NNNNNNNNNNNNNNNNNNNSANNYNNYYYYNNNNNN1.0538YYCCA67.09911.010174.07190.0738.011164.00514526571.02279.015.064.04288
12904850M00000NNNNNNNNNNNNNNNNNNNPANNNNNNYNNNNNNNN4.0011UYUCA76.0986.015269.06984.0494.09756.00415448199.03765.017.061.04663
22999490F00000NNNNNNNNNNNNNNNNNNNMANNNNNNYYNNNNNNN3.0011UYUCA67.088NaN261232.04450.0516.08350.00417384462.04447.020.061.04673
33146350F04000NNNNNNNNNNNNYNNNNNNSBNNNYYNYYYNNNNNN1.0145YYCCA71.096NaN15482.082103.0473.010552.00414457199.03971.04.062.03789
43637020F00000NNNNNNNNNNNNNYNNNNYMBNNNNNNYYNYYNYYN3.0011UUACA75.088NaN91238.04755.0523.08950.010429321336.01565.09.0NaN3743
\n", "
" ], "text/plain": [ " KBM_INDV_ID resp_flag GEND CA00 CA03 CA06 CA11 CA16 AART ADBT ADEP \\\n", "0 281478 0 M 4 0 5 1 1 N N N \n", "1 290485 0 M 0 0 0 0 0 N N N \n", "2 299949 0 F 0 0 0 0 0 N N N \n", "3 314635 0 F 0 4 0 0 0 N N N \n", "4 363702 0 F 0 0 0 0 0 N N N \n", "\n", " AHBP AHCH ARES AHRT AASN ADGS AHRL ASKN AVIS BANK COLLEGE FINI INLI INMEDI \\\n", "0 N N N N N N N N N N N N N N \n", "1 N N N N N N N N N N N N N N \n", "2 N N N N N N N N N N N N N N \n", "3 N N N N N N N N N Y N N N N \n", "4 N N N N N N N N N N Y N N N \n", "\n", " INVE IOLP MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 N5N59 N6N64 N65P ONLA POEP \\\n", "0 N N S A N N Y N N Y Y Y Y \n", "1 N N P A N N N N N N Y N N \n", "2 N N M A N N N N N N Y Y N \n", "3 N N S B N N N Y Y N Y Y Y \n", "4 N Y M B N N N N N N Y Y N \n", "\n", " SGFA SGLL SGOE SGSE SGTC U18 LIVEWELL NOC19 NAH19 NPH19 POC19 HOMSTAT \\\n", "0 N N N N N N 1.0 5 3 8 Y Y \n", "1 N N N N N N 4.0 0 1 1 U Y \n", "2 N N N N N N 3.0 0 1 1 U Y \n", "3 N N N N N N 1.0 1 4 5 Y Y \n", "4 Y Y N Y Y N 3.0 0 1 1 U U \n", "\n", " HINSUB STATE_NAME age c210apvt c210b200 c210blu c210bpvt c210cip \\\n", "0 C CA 67.0 99 11.0 10 1 74.0 \n", "1 U CA 76.0 98 6.0 15 2 69.0 \n", "2 U CA 67.0 88 NaN 26 12 32.0 \n", "3 C CA 71.0 96 NaN 15 4 82.0 \n", "4 A CA 75.0 88 NaN 9 12 38.0 \n", "\n", " c210ebi c210hmi c210hva c210kses c210mah c210mob c210mys c210pdv \\\n", "0 71 90.0 738.0 111 64.0 0 5 14 \n", "1 69 84.0 494.0 97 56.0 0 4 15 \n", "2 44 50.0 516.0 83 50.0 0 4 17 \n", "3 82 103.0 473.0 105 52.0 0 4 14 \n", "4 47 55.0 523.0 89 50.0 10 4 29 \n", "\n", " c210pmr c210poo c210psu c210pwc c210wht ilor meda pdpe tins zhip19 \n", "0 52 65 71.0 22 79.0 15.0 64.0 42 8 8 \n", "1 44 81 99.0 37 65.0 17.0 61.0 46 6 3 \n", "2 38 44 62.0 44 47.0 20.0 61.0 46 7 3 \n", "3 45 71 99.0 39 71.0 4.0 62.0 37 8 9 \n", "4 32 13 36.0 15 65.0 9.0 NaN 37 4 3 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('./data/ma_resp_data_temp.csv',header=0)\n", "pd.set_option('max_columns',100) # 显示100列数据\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(43666, 76)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 43666 entries, 0 to 43665\n", "Data columns (total 76 columns):\n", "KBM_INDV_ID 43666 non-null int64\n", "resp_flag 43666 non-null int64\n", "GEND 43666 non-null object\n", "CA00 43666 non-null int64\n", "CA03 43666 non-null int64\n", "CA06 43666 non-null int64\n", "CA11 43666 non-null int64\n", "CA16 43666 non-null int64\n", "AART 43666 non-null object\n", "ADBT 43666 non-null object\n", "ADEP 43666 non-null object\n", "AHBP 43666 non-null object\n", "AHCH 43666 non-null object\n", "ARES 43666 non-null object\n", "AHRT 43666 non-null object\n", "AASN 43656 non-null object\n", "ADGS 43666 non-null object\n", "AHRL 43666 non-null object\n", "ASKN 43658 non-null object\n", "AVIS 43666 non-null object\n", "BANK 43666 non-null object\n", "COLLEGE 43658 non-null object\n", "FINI 43666 non-null object\n", "INLI 43666 non-null object\n", "INMEDI 43666 non-null object\n", "INVE 43666 non-null object\n", "IOLP 43666 non-null object\n", "MOBPLUS 43659 non-null object\n", "N2NCY 43656 non-null object\n", "NY8Y9 43657 non-null object\n", "N2N29 43666 non-null object\n", "N3N39 43666 non-null object\n", "N4N49 43666 non-null object\n", "N5N59 43666 non-null object\n", "N6N64 43666 non-null object\n", "N65P 43666 non-null object\n", "ONLA 43666 non-null object\n", "POEP 43658 non-null object\n", "SGFA 43666 non-null object\n", "SGLL 43666 non-null object\n", "SGOE 43666 non-null object\n", "SGSE 43666 non-null object\n", "SGTC 43666 non-null object\n", "U18 43666 non-null object\n", "LIVEWELL 43661 non-null float64\n", "NOC19 43666 non-null int64\n", "NAH19 43666 non-null int64\n", "NPH19 43666 non-null int64\n", "POC19 43666 non-null object\n", "HOMSTAT 43656 non-null object\n", "HINSUB 43655 non-null object\n", "STATE_NAME 43666 non-null object\n", "age 43662 non-null float64\n", "c210apvt 43666 non-null int64\n", "c210b200 43661 non-null float64\n", "c210blu 43666 non-null int64\n", "c210bpvt 43666 non-null int64\n", "c210cip 43664 non-null float64\n", "c210ebi 43666 non-null int64\n", "c210hmi 43659 non-null float64\n", "c210hva 43651 non-null float64\n", "c210kses 43666 non-null int64\n", "c210mah 43651 non-null float64\n", "c210mob 43666 non-null int64\n", "c210mys 43666 non-null int64\n", "c210pdv 43666 non-null int64\n", "c210pmr 43666 non-null int64\n", "c210poo 43666 non-null int64\n", "c210psu 43643 non-null float64\n", "c210pwc 43666 non-null int64\n", "c210wht 43652 non-null float64\n", "ilor 43660 non-null float64\n", "meda 43651 non-null float64\n", "pdpe 43666 non-null int64\n", "tins 43666 non-null int64\n", "zhip19 43666 non-null int64\n", "dtypes: float64(11), int64(24), object(41)\n", "memory usage: 25.3+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#将ID转化为object\n", "df['KBM_INDV_ID'] = df['KBM_INDV_ID'].astype('object')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
resp_flag43666.00.4005180.4900090.00.00.01.01.0
CA0043666.00.2678060.9964590.00.00.00.06.0
CA0343666.00.2140340.8869140.00.00.00.07.0
CA0643666.00.3819681.1782810.00.00.00.07.0
CA1143666.00.3131961.0472930.00.00.00.07.0
CA1643666.00.2243160.8582030.00.00.00.07.0
LIVEWELL43661.02.8421251.1722551.02.03.04.06.0
NOC1943666.00.4657171.0450870.00.00.00.09.0
NAH1943666.02.0808181.2158520.01.02.03.09.0
NPH1943666.02.5465351.9036320.01.02.03.016.0
age43662.071.2255514.31824465.067.071.075.0101.0
c210apvt43666.088.78152311.1788660.085.093.097.099.0
c210b20043661.014.78484215.7493440.04.010.019.099.0
c210blu43666.019.4025799.9707880.012.019.026.067.0
c210bpvt43666.011.05917610.7053400.03.07.015.099.0
c210cip43664.054.15880428.3928861.030.057.079.099.0
c210ebi43666.055.59233723.1206280.040.052.068.0303.0
c210hmi43659.068.35454332.4321290.046.063.085.0255.0
c210hva43651.0317.276007239.5434740.0140.0233.0429.0999.0
c210kses43666.089.26989019.45274260.075.085.099.0170.0
c210mah43651.052.9523956.4920130.049.053.057.085.0
c210mob43666.02.7133938.8085660.00.00.00.099.0
c210mys43666.04.1428571.0713130.03.04.05.08.0
c210pdv43666.015.8034865.4604220.012.015.019.055.0
c210pmr43666.044.28434013.7637550.035.045.055.082.0
c210poo43666.060.23608824.9374120.042.066.081.099.0
c210psu43643.068.04559730.4998820.049.078.095.099.0
c210pwc43666.033.28358411.9621880.026.033.040.099.0
c210wht43652.061.52845215.3201290.051.061.073.099.0
ilor43660.018.01495619.1747410.07.015.019.099.0
meda43651.050.83487215.6759646.039.051.061.093.0
pdpe43666.055.57493213.20480626.046.054.065.099.0
tins43666.07.8651353.5625921.05.08.011.019.0
zhip1943666.04.4067923.0898490.01.04.07.09.0
\n", "
" ], "text/plain": [ " count mean std min 25% 50% 75% max\n", "resp_flag 43666.0 0.400518 0.490009 0.0 0.0 0.0 1.0 1.0\n", "CA00 43666.0 0.267806 0.996459 0.0 0.0 0.0 0.0 6.0\n", "CA03 43666.0 0.214034 0.886914 0.0 0.0 0.0 0.0 7.0\n", "CA06 43666.0 0.381968 1.178281 0.0 0.0 0.0 0.0 7.0\n", "CA11 43666.0 0.313196 1.047293 0.0 0.0 0.0 0.0 7.0\n", "CA16 43666.0 0.224316 0.858203 0.0 0.0 0.0 0.0 7.0\n", "LIVEWELL 43661.0 2.842125 1.172255 1.0 2.0 3.0 4.0 6.0\n", "NOC19 43666.0 0.465717 1.045087 0.0 0.0 0.0 0.0 9.0\n", "NAH19 43666.0 2.080818 1.215852 0.0 1.0 2.0 3.0 9.0\n", "NPH19 43666.0 2.546535 1.903632 0.0 1.0 2.0 3.0 16.0\n", "age 43662.0 71.225551 4.318244 65.0 67.0 71.0 75.0 101.0\n", "c210apvt 43666.0 88.781523 11.178866 0.0 85.0 93.0 97.0 99.0\n", "c210b200 43661.0 14.784842 15.749344 0.0 4.0 10.0 19.0 99.0\n", "c210blu 43666.0 19.402579 9.970788 0.0 12.0 19.0 26.0 67.0\n", "c210bpvt 43666.0 11.059176 10.705340 0.0 3.0 7.0 15.0 99.0\n", "c210cip 43664.0 54.158804 28.392886 1.0 30.0 57.0 79.0 99.0\n", "c210ebi 43666.0 55.592337 23.120628 0.0 40.0 52.0 68.0 303.0\n", "c210hmi 43659.0 68.354543 32.432129 0.0 46.0 63.0 85.0 255.0\n", "c210hva 43651.0 317.276007 239.543474 0.0 140.0 233.0 429.0 999.0\n", "c210kses 43666.0 89.269890 19.452742 60.0 75.0 85.0 99.0 170.0\n", "c210mah 43651.0 52.952395 6.492013 0.0 49.0 53.0 57.0 85.0\n", "c210mob 43666.0 2.713393 8.808566 0.0 0.0 0.0 0.0 99.0\n", "c210mys 43666.0 4.142857 1.071313 0.0 3.0 4.0 5.0 8.0\n", "c210pdv 43666.0 15.803486 5.460422 0.0 12.0 15.0 19.0 55.0\n", "c210pmr 43666.0 44.284340 13.763755 0.0 35.0 45.0 55.0 82.0\n", "c210poo 43666.0 60.236088 24.937412 0.0 42.0 66.0 81.0 99.0\n", "c210psu 43643.0 68.045597 30.499882 0.0 49.0 78.0 95.0 99.0\n", "c210pwc 43666.0 33.283584 11.962188 0.0 26.0 33.0 40.0 99.0\n", "c210wht 43652.0 61.528452 15.320129 0.0 51.0 61.0 73.0 99.0\n", "ilor 43660.0 18.014956 19.174741 0.0 7.0 15.0 19.0 99.0\n", "meda 43651.0 50.834872 15.675964 6.0 39.0 51.0 61.0 93.0\n", "pdpe 43666.0 55.574932 13.204806 26.0 46.0 54.0 65.0 99.0\n", "tins 43666.0 7.865135 3.562592 1.0 5.0 8.0 11.0 19.0\n", "zhip19 43666.0 4.406792 3.089849 0.0 1.0 4.0 7.0 9.0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe().T" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "20" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#检查缺失列的数量\n", "len(df.columns)-df.dropna(axis=1).shape[1]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#统计各列中空值的数量\n", "NA = df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0
0KBM_INDV_ID0
1resp_flag0
2GEND0
3CA000
4CA030
\n", "
" ], "text/plain": [ " index 0\n", "0 KBM_INDV_ID 0\n", "1 resp_flag 0\n", "2 GEND 0\n", "3 CA00 0\n", "4 CA03 0" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#重置索引\n", "NA = NA.reset_index()\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VarNA_count
0KBM_INDV_ID0
1resp_flag0
2GEND0
3CA000
4CA030
\n", "
" ], "text/plain": [ " Var NA_count\n", "0 KBM_INDV_ID 0\n", "1 resp_flag 0\n", "2 GEND 0\n", "3 CA00 0\n", "4 CA03 0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#重置列名\n", "NA.columns = ['Var','NA_count']\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VarNA_count
0AASN10
1ASKN8
2COLLEGE8
3MOBPLUS7
4N2NCY10
\n", "
" ], "text/plain": [ " Var NA_count\n", "0 AASN 10\n", "1 ASKN 8\n", "2 COLLEGE 8\n", "3 MOBPLUS 7\n", "4 N2NCY 10" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#过滤出大于0的数据\n", "NA = NA[NA.NA_count>0].reset_index(drop=True)#重置索引,并把之前的索引删除\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0.000229\n", "1 0.000183\n", "2 0.000183\n", "3 0.000160\n", "4 0.000229\n", "5 0.000206\n", "6 0.000183\n", "7 0.000115\n", "8 0.000229\n", "9 0.000252\n", "10 0.000092\n", "11 0.000115\n", "12 0.000046\n", "13 0.000160\n", "14 0.000344\n", "15 0.000344\n", "16 0.000527\n", "17 0.000321\n", "18 0.000137\n", "19 0.000344\n", "Name: NA_count, dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#把空值个数换化为比例\n", "NA.NA_count/df.shape[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
KBM_INDV_IDresp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [KBM_INDV_ID, resp_flag, GEND, CA00, CA03, CA06, CA11, CA16, AART, ADBT, ADEP, AHBP, AHCH, ARES, AHRT, AASN, ADGS, AHRL, ASKN, AVIS, BANK, COLLEGE, FINI, INLI, INMEDI, INVE, IOLP, MOBPLUS, N2NCY, NY8Y9, N2N29, N3N39, N4N49, N5N59, N6N64, N65P, ONLA, POEP, SGFA, SGLL, SGOE, SGSE, SGTC, U18, LIVEWELL, NOC19, NAH19, NPH19, POC19, HOMSTAT, HINSUB, STATE_NAME, age, c210apvt, c210b200, c210blu, c210bpvt, c210cip, c210ebi, c210hmi, c210hva, c210kses, c210mah, c210mob, c210mys, c210pdv, c210pmr, c210poo, c210psu, c210pwc, c210wht, ilor, meda, pdpe, tins, zhip19]\n", "Index: []" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#查看数据中是否有重复值\n", "df[df.duplicated()]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 探索数据及数据可视化分析" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "plt.style.use('seaborn')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "#支持中文\n", "plt.rcParams['font.sans-serif']=['SimHei']\n", "plt.rcParams['axes.unicode_minus']=False" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索样本分类是否均衡" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAADMCAYAAABa8FYQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADJNJREFUeJzt3X2sZHdZB/DvZRcKLIuWcG3EotgEHkGhGF5seZGFIgKCyIuiYJQQ/gEhGBMFJfgSC/ElUVEChLcGEa0ohJiglEpKaGgQKETln0caLBheZBOaLmikab3+MUO73t6lM7u/szNz7+eTbO7Mb+6e89x5MnO+8ztnztna2dkJAABn5k6rLgAAYD8QqgAABhCqAAAGEKoAAAYQqgAABhCqAAAGOLzqAm6++ZadG27471WXwS7nnnv36Mt60ZP1pC/rR0/W037py/b20a1TPbbymarDhw+tugT2oC/rR0/Wk76sHz1ZTwehLysPVQAA+4FQBQAwgFAFADCAUAUAMIBQBQAwgFAFADDAys9T9bxfe9eqSwAANtzrfvUnV12CmSoAgBGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAQ5PsdCqeluSByV5f3dfOsU6AADWyfCZqqp6VpJD3X1xkguq6v6j1wEAsG6m2P13LMm757c/mOQxE6wDAGCtTBGqjiT54vz215KcN8E6AADWyhSh6htJ7ja/fY+J1gEAsFamCDzX5rZdfhcmuX6CdQAArJUpvv33viRXV9V9kjwlyUUTrAMAYK0Mn6nq7hOZHaz+sSSP7+4bR68DAGDdTHKequ6+Ibd9AxAAYN9zEDkAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAC4eqqjpnykIAADbZMjNVl1fVQ6pqK0mq6her6rUT1QUAsFGWCVXnJPnjJH86v//sJA8cXhEAwAZaJlR9Z3dfkuTh8/tHkxwZXxIAwOZZJlQdr6orkxyqqpcm+YEkO9OUBQCwWQ4v8bs/k1mQ+nySJyc5luRRE9QEALBxFp6p6u5vJvliknsmuSbJvbv7sqkKAwDYJAvPVFXVG5L8+PzuVpIbk/zwFEUBAGyaZY6p+sEkD0vyiSQXJvnPSSoCANhAy4SquyS5d5J7JLk5yfYkFQEAbKBlQtVvJHlEksuTfCnJRyepCABgAy18TFV3X3XS3b+YoBYAgI3lgsoAAAPc4UxVVX3vqR7r7i+MLQcAYDNt7ex8+5OiV9VVmZ05fWvXQzvd/YQBNewcP/71AYthpO3to9GX9aIn60lf1o+erKf90pft7aO789Ct7nCmqrsfP7YcAID9xzFVAAAD3GGoqqpL5j9POd0FAHDQLTJT9ar5zw9NWQgAwCZb5DxVR+azVedW1Y+e/EB3f2SasgAANssioeq3kjwvyXlJXpDbvgW4k0SoAgDIYt/++0CSD1TVVd39wrNQEwDAxlnm23/P2GuwqvYcBwA4SBYOVd194hQPvXxQLQAAG2vEeaqcagEAOPBGhKpvf50bAIADwBnVAQAGsPsPAGCARc5TdauqekCSC5Jc193XJS64DACQLDFTVVWvTvKeJL+Q5H1V9crJqgIA2DDLzFQ9PclDu/uWqjqc5Jokv3emBbzgMmdkAFi1P3zapasuATbeMsdUfSHJ+fPb5yf53PhyAAA20zKh6nFJPltVX0ry2STHqkqwAgDIErv/unt7ykIAADbZwqGqqh6c5O5JTiR5RZJ3dPdVUxUGALBJltn998YkNyS5NMmVSX5/kooAADbQMqHq5u7+tyRHu/tdSf5nopoAADbOMqHqa1X1ySTXVNVzM5u1AgAgy52n6ueSPKi7P11VD03y/IlqAgDYOAvPVHX3N5N8taouTnJj7P4DALjVMpepeUWSK5K8JcklSd46VVEAAJtmmWOqnpHkwUmOd/dbkzxgmpIAADbPUt/+y+zyNDtVdSTJTdOUBACweZY5UP3VmV1E+V5JPpHkJZNUBACwgZYJVXft7vtW1XZ3H5+sIgCADbTM7r9Lq2pLoAIAuL1lQtXvJvmT+fFUAACcZJndf6+b/3x6VW0l2enuCyaoCQBg4ywcqrr7+6csBABgky2z+w8AgFMQqgAABhCqAAAGEKoAAAYQqgAABhCqAAAGEKoAAAYQqgAABhCqAAAGEKoAAAYQqgAABpgsVFXVeVV19VTLBwBYJ5OEqqo6N8k7khyZYvkAAOtmqpmqW5I8N8mJiZYPALBWDk+x0O4+kSRVNcXiAQDWjgPVAQAGEKoAAAYQqgAABpg0VHX3sSmXDwCwLsxUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMsLWzs7PqGnaOH//6qmtgl+3to9GX9aIn60lf1o+erKf90pft7aNbp3rMTBUAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAQhUAwADrcJ4qAICNZ6YKAGAAoQoAYAChCgBgAKEKAGAAoQoAYAChCgBgAKEKAGCAw6tceVW9LcmDkry/uy9dZS0HRVUdTvK5+b8keVmS5yR5apKPd/cvzX/vdxYZ48xU1XlJ/ra7H1tVd07y3iT3SvK27n77mYyt5A/aJ3b15XuS/FOS6+YP/3R3H9/r/WvRMZZTVd+R5PIkh5L8V5LnJnljTvP515Mzd4qeXJeTti3d/a+Lbkv2y/ZlZTNVVfWsJIe6++IkF1TV/VdVywHzkCR/1d3HuvtYkrskeUySRyb5alU9saoetsjYasrfP6rq3CTvSHJkPvSyJNd296OTPKeqjp7hGKdhj778SJLXfOs1Mw9Ut3v/WnRsFX/TPvD8JH/U3U9K8pUkP5vTfP71ZJjdPXllTtq2zAPVQtuS/bR9WeXuv2NJ3j2//cHMnlCmd1GSp1XVx+ef1i5J8p7u3klyRZLHJnncgmOcmVsy+3R3Yn7/WG57TXwkycPPcIzTs7svFyV5UVV9qqpeOx87ltu/fy06xpK6+w3dfeX87naSn8/pP/97jbGkPXpyc07atsz3iiy6Ldk325dVhqojSb44v/21JOetsJaD5BNJntjdj0xy5yR3y+37sFdv9Guw7j7R3TeeNLTo864/E9qjL/+Q2Yb4EUkurqqHRF9WoqouTnJukv+I18paOKknV+b/b1uemgPYk1WGqm9ktkFPknusuJaD5F+6+8vz25/M3n1YdIyxzqQX+jOda7r76919S5JPJ7l/9OWsq6p7JfmzJC+M18pa2NWT3duWA/k6WWXh1+a2adcLk1y/ulIOlHdW1YVVdSjJT2X2CWF3H/bqjX5Nb9HnXX/Oriuq6rur6u5JnpTkM9GXs6qq7pLkb5L8end/Pl4rK7dHT3ZvW/45B7AnWzs7OytZcVXdM8nVST6U5ClJLto15c4EquqHkvxlkq0kf5fk1Zn14ZNJnjz/9/lFxrr73892/ftRVX24u49V1fcl+fsk/5jkUZkdy3P+6Y7NZ1Y4TSf15fGZfdPspiRv7u7X7/X+lWRnkTHvc8urqhcneW1mG+okuSzJr+Q0nv+9xvRkeXv05Kokz85829Ldr6qqO+WAbV9WFqqSW79l82NJPtLdX1lZIQdcVd0tyU8k+VR3f26ZMcaqqvtk9ontim+90Z/JGNPZ6/1r0THO3Jk8/3py9hy07ctKQxUAwH6xsQeDAQCsE6EKAGAAoQo40Krql1ddA7A/CFXAQSdUAUM4UB1Ye/NLXrwpyQMzuxD8S5LcNckfZPbh8Mru/s2qekGS+3X3b8//3/Xdfb+q+nCSD2T2de3zkjwzs/PhvDizr9l/bL6M15zFPwvYZ8xUAZvgRUluml8w+iVJnpDknZld1PVRSR5eVU+6g2V81/wi4pcleWZ3//X8/lfmF4AVqIAzIlQBm+DBSa5Jku6+NsmfJ/nf7r5+fhHWqzObebrV/AzoJ3v7/OdXk5wzbbnAQSRUAZvgM5ntpktVPTTJe5NsVdV9q2oryaMzO7PzTUm25//nGbuW8Y1TLPtO8+VujS4aOFgOr7oAgAW8Ncmbq+rq+f2XZ3ZM1eW57ZiqD1bVdpKXVtXrk3x570Xdzluq6qNJDmUe3ABOhwPVAQAGsPsPAGAAoQoAYAChCgBgAKEKAGAAoQoAYAChCgBgAKEKAGCA/wOy4AvcEyp18gAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10,3))\n", "sns.countplot(y='resp_flag',data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4005175651536665" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#计算购买用户和未购买的比例\n", "df.resp_flag.sum()/df.resp_flag.shape[0]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5994824348463335" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1-df.resp_flag.sum()/df.resp_flag.shape[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 用户年龄分布情况" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.distplot(df['age'],bins=20)#分成20个区间段" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索用户年龄和购买商业医疗保险之间的关系" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Density')" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#两类样本的年龄分布 核密度估计kdeplot 直观的看到数据样本本身的分布特征\n", "sns.kdeplot(df.age[df.resp_flag==1],label='1',shade=True)\n", "sns.kdeplot(df.age[df.resp_flag==0],label='0',shade=True)\n", "plt.xlim([60,90])\n", "plt.xlabel('age')\n", "plt.ylabel('Density')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索用户性别以及性别和购买保险之间的关系" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAADMCAYAAAC1Mj0uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAACzZJREFUeJzt3X+MbHdZx/HPci8I1KtpzVpFDERpnviLmkKxBdGrYEUEtYhpo/7RGGICSjBGDKQBS0JJtIk/YlMNCAUBrRUbY6LSFqW2EbTSGrCJ+ZqGtjGmhaqNLZJYWtc/5pRut9vOep+dnZm7r1dys7Nnb898b5/snPeeMzuzsbW1FQAATtxTlr0AAIB1J6gAAJoEFQBAk6ACAGgSVAAATYIKAKDp6DLv/KGHHt66774vLnMJzHHqqc+MGa0u81l9ZrT6zGi1rdJ8NjePbTzR15Z6huro0SPLvHv2wIxWm/msPjNafWa02tZlPi75AQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGha6utQ/eQvf3iZdw/AAfutN//IspcAC+EMFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE2CCgCgSVABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBp34Oqqt5fVVdPt6+qqvfv930AAKySRZ2hOnPHRwCAk9aigurBqvqaJF9a0P4BAFbGooLq00kumD4CAJzUFhVUtya5aPoIAHBSW2RQnR1BBQAcAosKqjuT/EuSuxa0fwCAlXF0v3c4xrhoulnTx4t2/5sAACcHL+wJANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE2CCgCgSVABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaNrY2tp60r9QVWckeVmS05L8R5Lrxhh37NP9b9177wP7tCsWYXPzWMxodZnP6jOj1WdGq22V5rO5eWzjib72pGeoqur8JDcmeUGSU5KcleRvquoH93WFAABr7Oicr785yVljjLsf2VBVpye5Osm1i1wYAMC6mPccqv/ZHlNJMsb4XJKHF7ckAID1Mu8M1fdU1f1Jdl4zfPqC1gMAsHaeNKjGGEcOaiEAAOvqSYOqqu5IstuvAW6NMb55MUsCAFgv8y75Hd92+zlJ3pLk25L86qIWBACwbp70SeljjLsyi663JXlPkj9N8rwxxhUHsDYAgLUw73WoPpTkn5M8K8nbk/x7kldX1WsOYG0AAGth3iW/LyX58HT7Fdu2byW5ZiErAgBYM/OC6jfGGJ/ZubGqLlzQegAA1s68F/b8zUduVNVfb9v+s4tZDgDA+pkXVBt7uA0AcKjNu+R3tKpOzSy8HnN74SsDAFgT88Lo2UlumW5vbLsNAMBkXlCdn+SyJL+U5PLMAitJvrgfd37RlW/aj90AAIfYZa9657KXMPc5VJcnuTjJbUkeSvK8JBcmuX3B6wIAWBvzgmprjHHzGOPhJL8yfbw5yemLXxoAwHqYd8nvY1V1fZKPJrmvqs5K8uokH1/4ygAA1sS89/J7R2ZvOXMsydlJvi7JZWOMtx7A2gAA1sLclz8YY3wyyScPYC0AAGtp3nOoAACYQ1ABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE1H93uHVXVJkguSfG7a9JExxuX7fT8AAKti34NqcukY40ML2jcAwEpxyQ8AoGlRQXVxVd1QVVcsaP8AACvDJT8AgCaX/AAAmgQVAEDTvl/yG2Ncst/7BABYZc5QAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE2CCgCgSVABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJo2tra2lnn/W/fe+8Ay7585NjePxYxWl/msPjNafWa02lZpPpubxzae6GvOUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE3Lfh0qAIC15wwVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQdHRZd1xV703yrUn+fIzxzmWt47CqqqNJPjv9SZI3JnltklcmuXmM8XPT33vHXraxv6rq9CQfGWO8tKqemuSaJKclee8Y432dbUv5B51kdsznG5L8fZLbpy//xBjj3t0e4/a6jRNXVV+d5KokR5L8d5ILkvxOTnAW5rP/nmBGt2fb8WiM8U97Pf6syjFpKWeoquo1SY6MMc5N8k1VdcYy1nHIPT/JH44xjo8xjid5WpLvTvKiJJ+vqpdX1Qv2sm05yz95VdWpST6Q5JRp0xuT3DLGeEmS11bVseY2GnaZz3clufSR76Upph73GLfXbcv4N51kfirJr48xzktyT5ILc4KzMJ+F2Tmjt2Tb8WiKqT0df1bpmLSsS37Hk1w93b4us/8ZHKxzkryqqm6efgJ7WZI/GWNsJbk2yUuTfO8et7G/Hs7sJ7b7p8+P59HvlxuTvLC5jZ6d8zknyeuq6taqete07Xge/xi31200jDGuGGNcP326meSnc+Kz2G0bTbvM6KFsOx5NV1D2evxZmWPSsoLqlCT/Nt3+zySnL2kdh9k/JHn5GONFSZ6a5Bl5/Ex2m5PZLdgY4/4xxn9t27TXOZjXAdhlPn+Z2YH37CTnVtXzYz5LV1XnJjk1yb/G989K2jaj6/PY49Ers4YzWlZQfSGzA3iSfOUS13GYfWaMcfd0+1PZfSZ73cZidWZjXov3iTHGA2OMh5P8Y5IzYj5LVVWnJfntJD8T3z8raceMdh6P1vJ7aFl3fEsePXV6ZpI7l7SOw+yDVXVmVR1J8mOZVf7Omew2J7M7eHudg3ktx7VV9fVV9cwk5yW5LeazNFX1tCR/nOStY4y74vtn5ewyo53Ho09nDWe0sbW1deB3WlVfleSmJH+V5IeSnLPjFDoLVlXfnuQPkmwk+bMkb8tsJp9K8orpz1172TbGuOOg138YVNUNY4zjVfWcJH+R5GNJXpzZc3aefaLbpjMpNG2bz/dl9ltkDyZ59xjj8t0e45Js7WWbx8Keqnp9kndldlBOkiuT/GJOYBa7bTOfvl1m9PEkP57peDTGuLiqnpI1OyYtJaiSL/+mzA8kuXGMcc9SFsFjVNUzkvxwklvHGJ/9/2xjsarqWZn9FHbtIw/onW0s3m6PcXvdxv7qzMJ8lmfdjklLCyoAgJOFJ9gBADQJKgCAJkEFHFpV9QvLXgNwchBUwGEmqIB94UnpwEqb3obid5N8S2Zv6P6GJE9P8muZ/VB4/Rjj7VV1UZLnjjEumf67O8cYz62qG5J8NLNfqT49yfmZvV7N6zP71fi/m/Zx6QH+s4CTjDNUwKp7XZIHpzd4fkOS70/ywczeYPXFSV5YVefN2cfXTm8CfmWS88cYfzR9fs/0ZqxiCmgRVMCq+44kn0iSMcYtSX4/yf+OMe6c3hD1pszOOH3Z9Krl271v+vj5JF+x2OUCh5GgAlbdbZldmktVfWeSa5JsVNU3VtVGkpdk9orLD2b2zvVJ8qM79vGFJ9j3U6b9buz3ooHD5eiyFwAwx+8leXdV3TR9/qbMnkN1VR59DtV1VbWZ5Oer6vIkd+++q8d5T1X9bZIjmaIN4ER4UjoAQJNLfgAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQNP/AePrwPsJ3AvHAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 查看性别比例\n", "plt.figure(figsize=(10,3))\n", "sns.countplot(y = 'GEND',data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAECCAYAAAALqiumAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAErZJREFUeJzt3XuQnXV9x/F32EQn2QS74BoMIBAHvxphIopKTCIpQpWKl6It1htUVKxiRUcsDlopjYOCWpEURSdaWq1KvXG/eCFctKJGLo4434I1yCREN2YNiQGFZPvHeWLWkN/ybPac55wk79fMzj7n+5w9z/dkds8nv99zmzQyMoIkSduzR7cbkCT1LkNCklRkSEiSigwJSVKRISFJKprc7QbaaWhovYdqSdI4DQ7OmFRa50hCklRkSEiSigwJSVKRISFJKjIkJElFhoQkqciQkCQVGRKSpCJDQpJUZEhIUo8655yzWbz4A1x99RV/rJ188usa7WGXuiyHtCt7x3mXdbuFnnH+6S/tdguNuPvuu1i69D+72oMhIUk76MQT/5ZnPetw+vunc/LJp3D77bdy5ZWX8fvfP8jhhz+Xl7zk5Vx00b+xbt1vWbNmDe9+9xlcfvk3uPPOn7LffvsxZcpjOPXU0x7xuvfdt4qvfvUShoZ+zZIlH+e4417GgQce9Ijn3XzzDVx22TeYMmUyxx33cubNm8/553+UtWvXsGnTJhYsOJIXvejFE3qPTjdJ0g5aufJeXvGKEzj55FMA+NSnlrDnno9j5sx9uO22HwPw85/fRcTTOOmkk9lzz8cB8IIXHMM73/ke7r33HtasWfOI133iE2dx6qmnMTj4BE499bTtBgTAvvvuz7HHvpjBwZksW/ZtAG69dTnvf/+/sGrVqgkHBBgSkrTDDjjgIPbdd79RlRFOOumNvPnNb+PQQ+cCcOKJb+Sgg2bzpS99gR/96AcAPPTQQwA8/PAm9tijeAHWR7VkycfZvHkzCxceyebNm9m8eTOzZu3LOeec/cfgmiinmySpTd70preyePE/MWnSHixadBQA11//LTZu/B0bN/6OffZ5Ipk/4+abb+AnP7md2bOfzF577b3D29tvv/24447b2LBhAxs2rOfhhx9m9er7iHgat99+KwcdNJtZs/ad0HuaNDKy69yCwftJaFfmjuutduYd10uXXsRTnzqH+fMXArBmzRA333zDnzzn2c8+YpsRSj2//OUKvvzl/2Lfffdj9er7mD//+Tz3ufMe9efGup+EISHtJAyJrXbmkOhF3nRIkrRDDAlJUpEhIUkq8ugmSWqDdu8z6pX9Lo4kJElFjiQkaSd2zjlns2LFL5g3bz4nnfTGtr9+R0YSETEzIm7apnZIRHyzWp4SEZdHxHcj4g3jqUmSWm644Tts3ryZiy76HKtWreTee3/Z9m20PSQiYgC4GOgfVZsEfAyYUpXeDizPzPnAKyNixjhqkiRa12k66qijAXjOc47gjjtua/s2OjHdtAk4Abh0VO3vgOuBF1aPFwFnVMs3AoePo3Z9acMDA9OYPLlvgu1L6nWDg7v+/xfrvMeRkYd5ylMOYnBwBvvvvw933nln2/9t2h4SmXk/QERQfd8beC2tgNgSEv3Aymp5LTBzHLWi4eGN7XgLknrc0ND6brfQcXXe46RJU7jvvrXsvfd6Vq/+DevXP7BD/zZjBUsTO64/BLw3Mx/aEhzABmAqsA6YXj2uW5OkntONQ1Yjnsodd9zGIYccyt1338X++x/Q9m00cQjskcCHI2IZ8IyIWAwsBxZU6+cCK8ZRkyQBz3/+Iq699iouuOBjfOc73+R5z1vw6D80Th0fSWTmU7YsR8SyzHxfRBwAXBURC4E5wC20ppXq1CRJQH//dC644CJ++MNbePWrX8/06dPbvo2uXQU2ImbRGiVcm5nrxlMr8Sqw2pV5FditeuVs5F3FWFeB7drJdJm5CrhkR2qSpGZ4WQ5JUpGX5ZCkNjj9ive19fXOO25xW19vRzmSkKSd2Nq1v+Gtb23/NZu2MCQkaSd1//33s3jxWTz44AMd24YhIUk7qb6+PTj77HOYNq3/0Z+8g9wnIUk7qf7+9p8XsS1HEpKkIkNCklTkdJMktUGvHLLabo4kJGknt2TJpzv22oaEJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUUdOeM6ImYCX8nMhRHxJOA/gM3A3cAp1Xa/BuwFLM3Mz0bElDq1TvQrSdq+to8kImIAuBjYcu3aU4C/z8yjgP2BQ4G3A8szcz7wyoiYMY6aJKkhnRhJbAJOAC4FyMwzR63bG1gDLALOqGo3AoePo3Z9acMDA9OYPLlv4u9AUk8bHPT/i01pe0hk5v0AEfEn9Yg4AfhpZq6KiH5gZbVqLTCT1sijTq1oeHhjG96BpF43NLS+2y3sUsYK3UZ2XEfEbODdwGlVaQMwtVqeXvVRtyZJakjHP3SrfRRfBN6Qmeuq8nJgQbU8F1gxjpokqSFN3E/iDOBJwAXVFNQHaO3YvioiFgJzgFtoTSvVqUmSGjJpZGSkKxuOiFm0RgnXbhlh1K2VDA2t786bkRrwjvMu63YLPeP801/a7RZ2KYODMyaV1nXtznSZuQq4ZEdqkqRmuCNYklRkSEiSigwJSVKRISFJKjIkJElFhoQkqahrh8D2Ko9F38pj0SU5kpAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUWGhCSpyJCQJBUZEpKkIi/LIWmnc/oV7+t2Cz3jvOMWd/T1HUlIkoo6MpKIiJnAVzJzYURMAb4G7AUszczPTqTWiX4lSdvX9pFERAwAFwP9VentwPLMnA+8MiJmTLAmSWpIJ0YSm4ATgEurx4uAM6rlG4HDJ1i7vrThgYFpTJ7cN+E3oJbBQTNZ6nWd/jtte0hk5v0AEbGl1A+srJbXAjMnWCsaHt444f611dDQ+m63IOlRtOPvdKygaWLH9QZgarU8vdrmRGqSpIY08aG7HFhQLc8FVkywJklqSBPnSVwMXBURC4E5wC20ppB2tCZJakjHRhKZuaj6fg9wDPBd4OjM3DSRWqf6lSQ9UiNnXGfmKuCSdtUkSc1wR7AkqciQkCQVGRKSpCJDQpJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqSiWiEREe/d5vH3OtOOJKmX1B1JHLPN483tbkSS1HvGvApsRLwDOA2YGRH/B0yqvj7TQG+SpC4bMyQy83zg/Ii4PjP/vKGeJEk9ou500yc72oUkqSfVvenQbRHxj8BjtxQy8+zOtCRJ6hV1RxJfp7Wz+p5RX5KkXVzdkcSvM/O8HdlARAwAXwCeACzPzFMiYikwB7gyMxdXz6tVkyQ1p+5I4pqI+EhEzImIJ0XEk8axjdcBX8jMw4EZEfEeoC8z5wGzI+LgiDi+Tm08b0ySNHF1RxIvqr4/q/o+AhxV82d/AxwSEX8G7A+sAy6p1l0HLAAOq1m7a6wNDQxMY/Lkvppt6dEMDs7odguSHkWn/05rhcQED3+9GXgx8A/Az4DHACurdWuBZwL9NWtjGh7eOIE2ta2hofXdbkHSo2jH3+lYQVMrJCLiF7RGD1uMZOaTa27/A8BbMvP+iHgX8EG2now3ndaU1wZgao2aJKlBtT54M/OgzJwNPB04E1g6jm0MAIdGRB/wXOBDtKaOAOYCK4DlNWuSpAbV3ScBQGY+AHwxIj4xjh87B/gccADwP8C/AjdFxCzgWOAIWqOUOjVJUoPqTjd9gK3TTU8AnlF3A5n5A1ojkNGvt4jWRQPPzcx146lJkppTdySxYtTyXcBZE9loZg6z9cilcdUkSc2pu0/iYlpHGj0eGMrMNR3tSpLUE+redOgs4J3AFOAd1WNJ0i6u7nTTMZk5HyAiJtE69+GsTjUlSeoNdc89GImI/avlWfzpOROSpF1U3ZHEe4BlEbGJ1p3pTuxcS+oVp1/xvm630DPOO87rS2r3VHck8QCtGw8tAH4FPNixjiRJPaNuSFwIfDszfw2cSuuEOEnSLq5uSGzKzFsBMvM2WlNOkqRdXN19Ej+LiCXA92hdHuPuzrUkSeoVdUcSbwF+AjwPuBN4c8c6kiT1jLr3k9gEXNThXiRJPcZ7NEiSigwJSVKRISFJKjIkJElFhoQkqciQkCQVGRKSpCJDQpJUVPeyHBMWERcCV2fm5RGxFJgDXJmZi6v1tWqSpOY0MpKIiIXAPlVAHA/0ZeY8YHZEHFy31kSvkqStOj6SiIgpwGeAqyLiZcAi4JJq9XW07lFxWM3aXWNta2BgGpMn97WzfQmAwcEZ3W5B2q5O/242Md30eloXBTwXeDvwNmBptW4t8EygH1hZozam4eGNbWtaGm1oaH23W5C2qx2/m2MFTRPTTYcBn87M1cDngRuBqdW66VUPG2rWJEkNauKD925gdrV8OHAgrakjgLnACmB5zZokqUFNTDctBT4bEa8CptDaJ3FZRMwCjqV1E6MR4KYaNUlSgzoeEpm5Hvjr0bWIWAQcA5ybmevGU5MkNaex8yRGy8xhth65NK6aJKk57gyWJBUZEpKkIkNCklRkSEiSigwJSVKRISFJKjIkJElFhoQkqciQkCQVGRKSpCJDQpJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqSixm5fGhEzgWsy87CIWArMAa7MzMXV+lo1SVJzmhxJfASYGhHHA32ZOQ+YHREH16012KskiYZGEhFxFPA7YDWwCLikWnUdsAA4rGbtrrG2MzAwjcmT+9rZugTA4OCMbrcgbVenfzc7HhIR8Rjg/cBfAd8A+oGV1eq1wDPHURvT8PDGtvUtjTY0tL7bLUjb1Y7fzbGCponppjOACzPzt9XjDcDUanl61UPdmiSpQU188B4NvC0ilgHPAF5Ca+oIYC6wAlhesyZJalDHp5sy8/lblqugeClwU0TMAo4FjgBGatYkSQ1qdAonMxdl5v20dl5/H/jzzFxXt9Zkr5KkBs+TGC0zh9l65NK4apKk5rgzWJJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUWGhCSpyJCQJBUZEpKkIkNCklRkSEiSijp++9KIeBzwJaAP+B1wAvBJYA5wZWYurp63tE5NktScJkYSrwE+lpl/AawGXgX0ZeY8YHZEHBwRx9epNdCrJGmUjo8kMvPCUQ8HgdcCH68eXwcsAA4DLqlRu2usbQ0MTGPy5L72NC6NMjg4o9stSNvV6d/NjofEFhExDxgAVgArq/Ja4JlAf83amIaHN7avYWmUoaH13W5B2q52/G6OFTSN7LiOiL2AC4A3ABuAqdWq6VUPdWuSpAZ1/IM3Ih4D/Dfw3sy8B1hOa+oIYC6tkUXdmiSpQU1MN51Ma6rozIg4E/gc8LqImAUcCxwBjAA31ahJkhrUxI7rT9I65PWPIuIy4Bjg3MxcV9UW1alJkprT2I7r0TJzmK1HLo2rJklqjjuDJUlFhoQkqciQkCQVGRKSpCJDQpJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUWGhCSpyJCQJBV15R7X4xERS4E5wJWZubjb/UjS7qSnRxIRcTzQl5nzgNkRcXC3e5Kk3cmkkZGRbvdQFBGfAK7JzKsi4lXA1Mz8XLf7kqTdRU+PJIB+YGW1vBaY2cVeJGm30+shsQGYWi1Pp/f7laRdSq9/6C4HFlTLc4EV3WtFknY/vX500zeAmyJiFnAscESX+5Gk3UpP77gGiIgB4Bjgxsxc3e1+JGl30vMhIUnqnl7fJyFJ6qJe3yehLoiIfwemZebfRMSXgAcz86TudiVBRJwFnAD8qip9JTOXdK+jXZ8hoZK5o77f0s1GpG18MDM/3+0mdhdON6nkDxGxN/BQtxuR1D2GhEpupzWsv73bjUjbODMilkXEhd1uZHfgdJNKfgycBHyRrVNPUi9wuqlBjiRU8mPg2dV3SbspQ0IlK4D/Be7pch+SusiT6SRJRY4kJElFhoQkqciQkCQVGRKSpCLPk5AmqLqe0NG0/tP1NuDrtG67u+Vs9dOqrymZ+ZqIOBBYlpkHRsQm4GbgscC5mfm1htuXxuRIQpqAiHgesDAzFwDvAs6rVr0sMxdVX7dVtRMi4uBtXuKBzDwSOB64MCKimc6legwJaWJeCFxTLf8IePcYz/0W8N7trcjMVcAVwAva2p00QU43SRMzE7g3Io4E/pmtf1OXRsRDAJm5qKpdDJwNHFB4rV8De3WuVWn8DAlpYtYBe2bmDRFxNHB3VX9ZZq7Z5rkPAR8Fzii81uOBWzvTprRjnG6SJmYZ8JKI2AN4Vo3nfxZ4+rbFiJgJ/CVwXVu7kybIkYQ0AZl5dUQsAr5P625pDwBTGTXdBHx61PP/EBEfBk6vSlMjYhnQB7w5M3/eVO9SHV67SZJU5HSTJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUtH/Ax2+5WB2jOGDAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.countplot('GEND',hue='resp_flag',data=df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索用户的学历情况,以及学历与购买保险之间的关系" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4 18597\n", "3 12437\n", "6 7493\n", "5 4474\n", "2 462\n", "7 130\n", "0 60\n", "1 9\n", "8 4\n", "Name: c210mys, dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.c210mys.value_counts()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAADMCAYAAAC1Mj0uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEPFJREFUeJzt3X+wXGV9x/F3CEEQMyZ0rrHYqMMM/Y4gIIpohEiS8hsKgth0BnH8QYuBom2nM/6IPwoDOsOotcUhggQElIBQpPijJDqDkurwK6hgtd+OUhhBUy+CJOCMKNz+cU7kcrN77zbPHvbc3fdr5g67Z3fP89zvHM795DnPPmfOxMQEkiRJ2nE7DboDkiRJs52BSpIkqZCBSpIkqZCBSpIkqZCBSpIkqZCBSpIkqdDOg2z8979/auLRR38zyC602sKFz8f6dGZtpmd9urM207M+3Vmb7kalNmNj8+d0e22gI1Q77zx3kM23nvXpztpMz/p0Z22mZ326szbdWZsBB6rxNV+A6/9tkF2QJEkq5hwqSZKkQgYqSZKkQgYqSZKkQo18yy8i1gL7AF/LzPOaaEOSJKkt+j5CFREnA3MzcwmwV0Ts3e82JEmS2qSJS37LgC/VjzcAhzbQhiRJUms0Eah2Bx6qHz8CLGqgDUmSpNZoIlA9DuxWP35BQ21IkiS1RhNhZxPPXOY7ALi/gTYkSZJao4lv+d0IbIyIPYFjgNc30IYkSVJr9H2EKjO3UE1Mvw1YnpmP9bsNSZKkNmlkHarMfJRnvuknSZI01JwwLkmSVKiREapeja16K+PjWwfZBUmSpGKOUEmSJBUyUEmSJBUyUEmSJBUaaKC6Z80Jg2xekiSpLxyhkiRJKmSgkiRJKmSgkiRJKtRIoIqIRRGxsYl9S5IktU3fA1VELASuAHbv974lSZLaqIkRqqeAlcCWBvYtSZLUOn2/9UxmbgGIiH7vWpIkqZWclC5JklTIQCVJklTIQCVJklSosUCVmcua2rckSVKbOEIlSZJUyEAlSZJUaKCBav9VNw2yeUmSpL5whEqSJKmQgUqSJKmQgUqSJKlQ32898/9x1eeP2qHPHX3c9X3uiSRJ0o5zhEqSJKmQgUqSJKmQgUqSJKlQ3+dQRcQLgWuAucATwMrMfLLf7UiSJLVFEyNUpwKfyswjgc3A0Q20IUmS1Bp9H6HKzIsmPR0DftnvNiRJktqksTlUEbEEWJiZtzXVhiRJUhs0sg5VROwBXAi8uYn9S5IktUnfR6giYhfgOuADmflAv/cvSZLUNk1c8nsX8GpgdUR8KyJWNtCGJElSazQxKX0NsKbf+5UkSWorF/aUJEkqZKCSJEkq1Mi3/Hp12tvXMz6+dZBdkCRJKuYIlSRJUiEDlSRJUiEDlSRJUqGBzqF6x5e9b7IktcEFh1436C5Is5ojVJIkSYUMVJIkSYWavDnya4DvZebDTbQhSZLUFk3cHHkh8FXgYOCWiBjrdxuSJElt0sQlv/2Bv8/M84H1VDdKliRJGlpN3Bz52wAR8UaqUapz+92GJElSmzQyKT0i5gArgUeB3zXRhiRJUls0EqgycyIzzwLuAU5oog1JkqS26OmSX0SsAOYCTwAfAy7OzHVd3vs+4BeZeSWwAPh1n/oqSZLUSr2OUJ0P3At8EFgN/N00770EOC0ibqUKYRuKeihJktRyvU5K/x3wMDAvM78TEU92e2NmPgoc0Y/OSZIkzQa9jlD9N/Ag8JWIOBP4aXNdkiRJml16ClSZeTrwisz8DHAT8M5GeyVJkjSL9Dop/a+ALwJk5oP9avzyk25mfHxrv3Y3dMbG5lufLqzN9KxPd9ZGUhN6veT3MuDOiFgTEQc02SFJkqTZptdLfh8CXgncCNwQEbdFxJGN9kySJGmW6PWS32KqeVMrgW8DVwKfxCURJEmSel424atU60styczHACJiTWnjx91wUekuJEnSiPv80tMG3YWeA9U7gH2BEyMCgMy8tKlOSZIkzSa9BqprgauBp+vnE810R5IkafbpNVBtAP4HuK/BvkiSJM1KvQaqo4AXAIfVzyeAW7u9OSIWATdn5oFl3ZMkSWq/XgPV7cDl9D5C9Qlgtx3qkSRJ0izTa6DaE/jIlG0rOr0xIlYATwCbC/olSZI0a8wYqCLiT4DLqELVHKpRqq92ee8uwIeBk6gWAZUkSRp6066UHhGrqC71LQIeAZ4ETgDujogXdvjI+4GLMvPX/e6oJElSW800QvXWzDxk6saI+DRwLLBuykuHAysi4izgVRFxaWae3p+uSpIktdNM9/KbExFHTd4QES8B3gDcPfXNmfnGzFyWmcuA7xumJEnSKJhphOovgEsj4vPAQ8CuwK+A8zIzp/tgHaokSZKG3rSBKjMfBI6OiJ2A5wO/ycynp/uMJEnSqOlp2YQ6RD3ecF8kSZJmpWkDVUS8sdtrmdl1pXRJkqRRMtMI1fuAxcBdVGtQbTPtrWd69bWTz2R8fGvpbobW2Nh869OFtZme9enO2kzP+nRnbbqzNjMHqjdRLdD5ocz8+XPQH0mSpFln2mUTMvN3VAt5ehsZSZKkLnqZlL4v8L/1DwARsTwzbylt/ITrO97BRiNi7WGHDboLkiT1xUy3nvk01X38NkbEJyNiXv3SOY33TJIkaZaYaaX0AzPzIGAf4GFgQ0Qs5NkT1CVJkkbaTJf8nhcR8+q5VB+PiLuB9cD85rsmSZI0O8wUqP4JuBY4GSAz10fEFuC6Tm+OiJ2B++ofgLMz894+9VWSJKmVZrr1zLURcWNEHAgsBz4NXED1zb9O9gfWZeb7+ttNSZKk9pppDhWZ+VvgIuCW+hY0Z1ONXHXyeuD4iLgjItbWI1aSJElDbcZAVXsqM78HkJnfp/uk9DuBwzPzYGAecGx5FyVJktqt1xGkH0fEZ4DvUo1C/aTL++6pR7Sgul3N3oX9kyRJar1eR6jeDdwLvAH4EfDXXd53VUQcEBFzqW5b84PyLkqSJLVbTyNUmfkUcHEPbz0XuJrqkuBNmfnNgr5JkiTNCn2dNJ6ZP6T6pp8kSdLI6PWSnyRJkrowUEmSJBUyUEmSJBUa6MKbN51yPOPjWwfZhVYbG5tvfSRJmgUcoZIkSSpkoJIkSSo00Et+7/nyzwbZ/Czw60F3oMWsTTcfPnTBoLsgSSPHESpJkqRCBipJkqRCBipJkqRCjQWqiLgoIv68qf1LkiS1RSOBKiKWAi/OzK80sX9JkqQ26Xugioh5wOeA+yPixH7vX5IkqW2aGKF6G/Aj4ALg4Ig4u4E2JEmSWqOJQHUgcElmbga+ACxvoA1JkqTWaCJQ/QTYq358EPBAA21IkiS1RhMrpa8FLouIvwTmAac00IYkSVJr9D1QZeZW4C393q8kSVJbubCnJElSIQOVJElSIQOVJElSoSYmpffsX05azPj41kF2odXGxuZbny6sjSSpTRyhkiRJKmSgkiRJKjTQS37fuXL8Wc//9JhdB9QTSZKkHecIlSRJUiEDlSRJUiEDlSRJUiEDlSRJUqG+T0qPiFXAyvrpAuD2zDyj3+1IkiS1RRM3R14DrAGIiAuBK/rdhiRJUps0dskvIl4CLMrMu5pqQ5IkqQ2anEN1FvVIlSRJ0jBrJFBFxE7AcuBbTexfkiSpTZoaoVpKNRl9oqH9S5IktUZTgeoo4NaG9i1JktQqjdzLLzM/2MR+JUmS2siFPSVJkgoZqCRJkgo1csmvV4e8bYzx8a2D7IIkSVIxR6gkSZIKGagkSZIKGagkSZIKGagkSZIKGagkSZIKGagkSZIKGagkSZIK9X0dqohYCHwReBGwKTPP6HcbkiRJbdLECNVpwBcz8yBgfkQc1EAbkiRJrdFEoPoV8MqIWAAsBn7WQBuSJEmt0USg+g/gZcB7gB8DjzTQhiRJUms0Eag+Crw7M88F/gt4RwNtSJIktUYTgWohsF9EzAVeB0w00IYkSVJrNBGoPg5cAjwG7AGsa6ANSZKk1uj7sgmZeQewb7/3K0mS1FYu7ClJklTIQCVJklTIQCVJklTIQCVJklRozsSEqxpIkiSVcIRKkiSpkIFKkiSpkIFKkiSpkIFKkiSpkIFKkiSpkIFKkiSpkIFKkiSpUN9vjtyriFgL7AN8LTPPG1Q/BiEiXghcA8wFngBWAj8B7qvfcnZm3hsR5wDHAndk5ln1Z7fbNkwiYmeqOvyhFsAp9FCHYa8NQESsojpeABYAm4Aj8NghIhYB12fm0oiYB9wA7AGszczLSrYN5Bfqoym1eSlwJfA01XnnDGBP4Pb6OcBbMnO803l62M7dU2rzEgrqMGy1ge3qcw5wWP3Si4ErqI6lkTx2phrICFVEnAzMzcwlwF4Rsfcg+jFApwKfyswjgc3A+4F1mbms/rk3Il4DHAocDPwyIg7vtG1Qv0CD9mdSLYBd6KEOI1IbMnPNpNpsBC7GY4eIWEh1ct+93nQ2sCkzDwFOiYj5hdtmrQ61OQNYlZkrgMXAfsDrgPMnHUfjnc7Tw3bu7lCbHa7DsNUGtq9PZn500vnnh1RhaiSPnU4GdclvGfCl+vEGqhP9yMjMizLzG/XTMeD3wPERcUdErK1HaQ4D/jUzJ4D1wNIu24bN65lUC+DP6K0Oo1CbP6j/Jb0IOAiPHYCnqEbuttTPl/HMOeZWqjqVbJvNnlWbzFydmT+uX/sj4GGq/+9Oj4i7I+Jj9WvL2P483WnbbDb1uCmpQ6dts93U+gAQEa8FHszMhxjdY2c7gwpUuwMP1Y8fofrDMHIiYgmwEPgGcHhmHgzMo7os06lGo1C3O3l2LXajtzqMQm0mOwtYw/b1GsljJzO3ZOZjkzb1WoOhr1WH2gAQESuB/8zMnwP/TvUH77XAkojYn9GsTUkdhqo20P3YAd4LXFg/Hsljp5NBzaF6nOoPJcALGMHJ8RGxB9UB+WZgc2b+tn7pLmBvOtdoFOp2z5RabAtVMH0dRqE2AETETsByYDWwi8dOR9t+38eoft/HC7cNlYjYC/gHYNul3+9uO44i4nuM7nFUUodhrw0AEbEAeFFm/rTe5LFTG9QvtIlnhvsOAO4fUD8GIiJ2Aa4DPpCZDwBXRcQBETEXeBPwAzrXaBTqNrUWu9NbHUahNtssBW6vL9957HTWaw1Grlb1vJh1wDsnjT6sj4g/jojnA0dSzY8ZudpQVodhr802JwJfn/TcY6c2qBGqG4GNEbEncAzVNdhR8i7g1cDqiFgN3AJcBcwBbsrMb9ajEB+PiH8Gjq5/HuiwbdicC1xNXQvgPKpjZaY6jEJttjmKam4PTKnXiB87k10BfD0illJ9q+h2qssNO7ptmLwfeClwYUQAfBQ4h+o89CTw2czMiPgF25+nJzpsGyYldRj22mxzFPCJSc89dmpzJiYmBtJw/a+kI4BbM3PzQDrRchGxG3AccHdm3tdt27DrtQ6jWJturA/UJ+5DgfXbRmJKto2iTufpUTx391qHUaxNN6NYn4EFKkmSpGExdJPCJEmSnmsGKkmSpEIGKkkjKyL+dtB9kDQcDFSSRpmBSlJfOCldUqvVt9P5LPAKqqVezgR2BS6g+kfhNzLzIxHxduDlmfmP9efuz8yXR8S3gJuplopYBJxEtQ7OKqqvbt9W7+P85/DXkjRkHKGS1HanA0/WNys+E1hBtW7bqcAbgIMi4sgZ9vGi+oaulwMnZea19fPN9Q1dDVOSihioJLXdfsB3ATJzE9Ud7p/OzPvr1eI3Uo04/UG9avNkl9X//SXwvGa7K2kUGagktd0PqVdVjohXATcAcyJicUTMAQ6huuXOk8BY/ZkTp+yj2/34dqr3O6ffnZY0WgZ16xlJ6tWlwCURsbF+/l6qOVTX8Mwcqg0RMQb8TUR8BvhFj/v+XER8B5jLEN4KQ9Jzx0npkiRJhbzkJ0mSVMhAJUmSVMhAJUmSVMhAJUmSVMhAJUmSVMhAJUmSVMhAJUmSVOj/ANTw5ICdallaAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10,3))\n", "sns.countplot(y = 'c210mys',data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, '购买数量')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEFCAYAAAASWssjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFkJJREFUeJzt3X+cXXV95/FXMgkYEoKhDMEAC6TL4yMsGPkhkEJo+NWFGtCiPqCyCGuguPwoSo3FFUqbpo/sg3TVFAqFGpAqilRbDD9luyuYuCgQ+bVVPxvYBmkgdULCjxCIQGb/OCdhDGTOzcy999yZeT0fj3nk3O+Ze8/nMsN9z/d8z/d7RvX29iJJUn9G112AJKnzGRaSpEqGhSSpkmEhSapkWEiSKo2pu4BW6el52cu8JGkbdHfvOGpr++xZSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkioZFpLU4ebPn8u8eVdw9913bG6bPfvMttYwbJf7kFrp4gWLB/zchXNOaWIlGgmefHI5ixZ9rdYaDAtJGqSzzvp9DjnkUMaPn8Ds2efx2GOPcOedi9mw4TUOPfRwTj75w1x33V/z4osvsHr1aj772Uu5/fbb+OlP/5k99tiDsWO348ILP/22133uuWf5zndupafnl1x99ZeZNetD7L33Pm/7vqVL72fx4tsYO3YMs2Z9mOnTj2Thwv/OmjWrefPNNznqqN/mxBM/OKj36GkoSRqklSuf4SMfOY3Zs88D4G/+5momTtyJyZN349FHfwLAU08tJ2I/zj57NhMn7gTAccedwGc+8zmeeeZpVq9e/bbXfc97pnDhhZ+mu3tXLrzw0+8YFAC7774nJ530Qbq7J3Pfff8TgEceWcbll/85zz777KCDAgwLSRq0vfbah91336NPSy9nn30Of/AHF3DggdMAOOusc9hnn6nccsvNPPzwgwC8/vrrALzxxpuMHr3VBV8rXX31l9m4cSMzZvw2GzduZOPGjUyZsjvz58/dHGCD5WkoSWqyc889n3nz/oRRo0Yzc+axAHz/+//E+vWvsH79K+y223vI/BlLl97PE088xtSpv8nOO//GgI+3xx578Pjjj7Ju3TrWrXuZN954g1WrniNiPx577BH22WcqU6bsPqj3NKq3d3je9sH7WaiVHODWYC1adB3vfe/+HHnkDABWr+5h6dL7f+17PvCBI7bosTTmF79Ywbe+9Q12330PVq16jiOPPJrDD59e+bz+7mdhWEgDYFhoOPLmR5KkQTEsJEmVDAtJUiWvhpKkJhvMmNY76YRxLnsWkqRK9iwkaRiYP38uK1b8C9OnH8nZZ5/T9Ne3ZyFJQ9z99/8vNm7cyHXX3cizz67kmWd+0fRjGBaSNMQ98sgyjj32eAAOO+wIHn/80aYfw7CQpCHu1VdfZZdddgVg4sSdWLNmTdOPYVhI0hA3btwObNiwAYBXX11Pb+/Gph/DAW5JarJ2X+oa8V4ef/xRDjjgQJ58cjl77rlX049hWEjSEHf00TM5//xzef75Hn70o//Nddd9tenHMCwkaYgbP34CV111HQ899GM+/vFPMGHChKYfo+VhERGTgW9n5oyIGAv8A7AzsCgzbxhMW6trl6ShYuLEiRx33Akte/2WDnBHxCTgJmB82XQRsCwzjwQ+GhE7DrJNktQGre5ZvAmcBny3fDwTuLTc/gFw6CDbvr+1A0+atANjxnQN+g1Izdbd7d85GnpaGhaZ+RJARGxqGg+sLLfXAJMH2bZVa9euH3T9Uiv09LxcdwlqsTl3XNbU11swa15TX29r+vtDpt3zLNYB48rtCeXxB9MmSSqtWfM855/f/HWhoP0fuMuAo8rtacCKQbZJkoCXXnqJefP+lNdee7Ulr9/uS2dvAu6KiBnA/sCPKU4tDbRNkgR0dY1m7tz5XHrpJS15/bb0LDJzZvnv08AJwA+B4zPzzcG0taN2SRoKxo+f0JL5FZu0fVJeZj4L3NqsNklS6zlILEmq5HIfktRk7brUtZ3sWUjSMHL11de35HUNC0lSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSpTHtPFhETAJuBnYFlmXmeRGxCNgfuDMz55Xf11CbJKk92t2zOBO4OTMPBXaMiM8BXZk5HZgaEftGxKmNtLW5bkka0draswCeBw6IiHcDewIvAreW++4FjgIOarBteX8HmjRpB8aM6Wpq8VIzdHfvWHcJ0jZrd1gsBT4I/CHwM2A7YGW5bw1wMDC+wbZ+rV27vmlFS83U0/Ny3SVI76i/P2TafRrqCuBTmTkX+DnwcWBcuW9CWc+6BtskSW3S7g/dScCBEdEFHA78N4pTSgDTgBXAsgbbJElt0u7TUPOBG4G9gAeALwFLImIKcBJwBNDbYJskqU3aGhaZ+SDwH/q2RcRM4ATgysx8cVvaJEnt0e6exdtk5lreutJpm9okSe3hQLEkqZJhIUmqZFhIkioZFpKkSoaFJKlS7VdDSSPNnDsuG/BzF8xywWXVw56FJKmSYSFJquRpKHW8ixcsHvBzF845pYmVSCOXPQtJUiXDQpJUybCQJFUyLCRJlRoOi4iYtpX2XZtXjiSpE21Lz+LKLRsi4t8D32xeOZKkTlR56WxEPA38FHghIs4F5gA7AF8Hfgt4oaUVSpJq10jPYk/gDmAjMJYiLJYDzwFvtq40SVKnaPQ0VO8W271btEmShrFtmcHdC4wC5gNTgH8H7AK83IK6JEkdZFsGuEdRBMZ/BR4FrgIeKb8kScPYQOZZeBpKkkaYRsPiDKCLondxJXAIxcD32BbVJUnqIJVjFpk5GiAivgdcD3y13DUKOAa4uFXFSZI6w7YMcN+Sma8Dr/dpuz0iVjS1IklSx2noNFRE7JOZN25l97NNrEeS1IEa7Vl8PSK+RhEMy4GfZ2ZvRHQBiyPiosz8ScuqlCTVqtGw2Ai8CuwHnAgcUC4DMh6416CQpOGt37CIiPkU60KNz8ybImIUMA34XeB0ihC5rOVVSpJqVTVm8T2gG/hFRPwceAX4LMWkvIMpQmNRRGy/LQeNiGsi4uRye1FEPBARl/XZ31CbJKk9qsJiX2AJxQS8DwDfBp6nWOpjR+AoYCnFJbQNiYgZwG6ZeXtEnAp0ZeZ0YGpE7Nto27a9TUnSYFSNWXwfOBbYGfgz4IHMvDYingJOAyYD78vMNxo5WESMBf4WuCsiPgTMBG4td99LET4HNdi2vL9jTZq0A2PGdDVSloax7u4d6y6hqYbb+9HQURUW36U49RTA9sAxEdEDPAWcA/wQ+H3gaw0e7xMUYyBXAhcBFwCLyn1rKE5tjQdWNtDWr7Vr1zdYkoaznp7htc7lcHs/6iz9/TFSdRrqEODzFCvLbgBOpjgFdQRwA8WH/ekR8a4GazkIuD4zV1HcPOkHwLhy34SynnUNtkmS2qTqQ/dz5b8rKYLhu8DfAcuAuRSnqW6juJy2EU8CU8vtQ4G9KU4pQXGV1YrytRtpkyS1Sb+noTJzLkBEfCQz/zUiPkVxG9U/zswHy33fyMxXGjzeIuCGiDidYhHCmRST+qYAJ1H0WHqBJQ20SZLapJF7cE/IzH8FyMyHyrkWO/f5ltcaPVhmvgx8bIvXnwmcAFyZmS9uS5skqT0amcH9y4i4DfizzEyKU1efA+6JiMnA3RHxgcwc0P24M3Mtb13ptE1tkqT2aGSg+GHgFmA7gDIUNl0quwBYONCgkCQNDY30LN4AlpR/2W8WEdOAKZl5U0sqkyR1jEbCYhTFkh7vA/6NYjLcZIplQC5qYW2SpA7RyGmo3sw8lWIi3DHAlyjC4iqKlWglScNcozc/2gW4i+IS1rXAExT35b61vKeFJGkYayQsRgF/BMwpvz5K0dv4CfAtisl6kqRhrJGwGA18AXiEYhLdl/vsuwY4q/llSZI6Sb9hUU7A+4fM3JiZr1HMnh4FvAsgM1+lWEF2fMsrlSTVpmq5j15gYUS8H/gVxWW0rwNnRMSuwKuZeXnry5QGZs4dA79X1oJZ85pYiTS0NXoP7nuB/0ExMa+rfN4YYHJE3J+Zl7SoPklSB2h0qe+nMvOMzPwYsBB4DngIOAz4vVYVJ0nqDFVjFpvuhNFbPt6bYlnxycCXymU+jm9hfZKkDlDVs5gTEUuBiRExFbie4r4Sz1PelyIzn2ptiZKkuvUbFpn5J8ApwFcobp16DsW9uCdQ3OpUkjQC9DvAHRFjgL+iuApqFcXd8UYBL2Xmkoi4BBifmX/e8kolSbWpuhqqi6IXcQWwC8W9sNcCL0XEmcBvAWe2tEJJUu0auXR2PbAb8EmKORa7UNwpbxUwv5yYJ0kaxhq5dHYGcB7FHIvxFCvNrgRuAi6PiEtbV54kqRNUzeDeEBGHU/Qoesuv0eXzeoB/pFhYUJI0jFWehsrMZyu+xftiS3pHFy9YPODnLpxzShMr0WA1OoNbkjSCGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmq1OhtVZsqIiYD92TmQRGxCNgfuDMz55X7G2qTJLVHXT2LvwTGRcSpQFdmTgemRsS+jbbVVLckjUht71lExLHAKxSr1s7kreVC7qW4C99BDbYt7+84kybtwJgxXc0sXSNMd/eO1d/UZp1YU6uMpPc6FLQ1LCJiO+By4PeA2yhWsV1Z7l4DHLwNbf1au3Z90+rWyNTT83LdJbxNJ9bUKiPpvXaK/gK63aehLgWuycwXysfrgHHl9oSynkbbJElt0u4P3eOBCyLiPuD9wMkUp5QApgErgGUNtkmS2qStp6Ey8+hN22VgnAIsiYgpwEnAERT3zGikTZLUJrWdzsnMmZn5EsUg94+AYzLzxUbb6qlakkamWuZZ9JWZa9niBkqNtkmS2sOBYklSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSpdpXnZWkdzLnjssG/NwFs+Y1sRKBPQtJUgMMC0lSJcNCklTJMQtpmLh4weIBP3fhnFOaWImGI3sWkqRKhoUkqZJhIUmqZFhIkioZFpKkSoaFJKmSYSFJqmRYSJIqGRaSpEqGhSSpUluX+4iInYBbgC7gFeA04Fpgf+DOzJxXft+iRtokSe3R7p7FGcAXM/N3gFXA6UBXZk4HpkbEvhFxaiNtba5bkka0tvYsMvOaPg+7gf8EfLl8fC9wFHAQcGsDbcv7O9akSTswZkxXcwrXiNTdvWPdJbxNq2rqxPc6GMPt/XSCWladjYjpwCRgBbCybF4DHAyMb7CtX2vXrm9ewRqRenperruEt2lVTZ34XgdjuL2fdukvZNs+wB0ROwNXAZ8E1gHjyl0TynoabZMktUlbP3QjYjvg74HPZ+bTwDKKU0oA0yh6Go22SZLapN2noWZTnEL6QkR8AbgRODMipgAnAUcAvcCSBtokNcmcOy4b8HMXzPLixJGg3QPc11JcKrtZRCwGTgCuzMwXy7aZjbRJktqj9tuqZuZa3rrSaZvaJEnt4UCxJKmSYSFJqmRYSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkioZFpKkSoaFJKmSYSFJqmRYSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkiqNqbsAdZaLFywe8HMXzjmliZVI6iT2LCRJlQwLSVIlw0KSVMmwkCRVGlID3BGxCNgfuDMz59Vdj37dnDsuG9DzFszyRyl1uiETFhFxKtCVmdMj4oaI2Dczlzfy3MFc4bPdfg8O+Ll+CEqdqY7PhKH+eTCqt7e37hoaEhF/BdyTmXdFxOnAuMy8se66JGkkGEpjFuOBleX2GmByjbVI0ogylMJiHTCu3J7A0Kpdkoa0ofSBuww4qtyeBqyorxRJGlmGzAA3cBuwJCKmACcBR9RcjySNGENmgBsgIiYBJwA/yMxVddcjSSPFkAoLSVI9htJpKGmbRMTOwCHAI5m5uu56pKHMnkU/OnXGeERMBr6dmTM6oJadgFuALuAV4LTM/FW9VW0+ZXln+XU6cGxm9tRbVaH8+d2TmQd1QC1jgP9XfgFclJlP1FjSZhFxDXB3Zt5edy0AEfFfgNPKh+8GfpyZ59VY0qbf85uBXYFlraxnKF0N1VZ9Z4wDUyNi37prgs2/HDdRzDvpBGcAX8zM3wFWASfWXM8m7wMuycy/AL4HHFxzPX39JW9dBl639wHfzMyZ5VenBMUMYLdOCQqAzLx2038nYAnwtzWXBHAmcHNmHgrsGBGHtupAnobaupnAreX2vRSX7Ta0vEiLvUnx18136y4EIDOv6fOwG/hlXbX0lZn3A0TE0cBhwNx6KypExLEUPbBOuUDjCGBWRBwDPAGcl5lv1FlQRIyl+CC+KyI+lJkd8bu+SUTsDkzOzIfrrgV4HjggIt4N7Ak806oD2bPYuo6cMZ6ZL2Xmi3XXsaWImA5Myswf1V3LJhExiiJY1wKv11wOEbEdcDlwad219PEQcHxmHgaMBX635noAPgH8FLgSOCwiLqq5ni1dAFxbdxGlpcBewB8CP6P4rGoJw2LrnDHeoHIg+Srgk3XX0ldm9mbmBcDjQCfc8/VS4JrMfKHuQvp4PDOfK7cfBjrhdOtBwPXl5fFfB46puZ7NImI0RT331VzKJlcAn8rMucDPgf/cqgP5Abh1zhhvQPnX8t8Dn8/Mp+uuZ5OI+OOI+ET58N1AJ3xAHw9cEBH3Ae+PiK/UXA/A1yJiWkR0AR8GHqu7IOBJYGq5fSjQMb9XwAyKge1OuTJoEnBg+fM7HGhZXY5ZbJ0zxhszm2Lw+AsR8QXg2sz8Vs01AVwP3BoR5wD/h2LcqVaZefSm7Yi4LzPPqbOe0lzgG8AoYHFm/lPN9QAsAm4oV5ceC3y05nr6+o/AD+ouoo/5wI0Up6IeAL7ZqgN56Ww/nDEuSQXDQpJUyTELSVIlw0KSVMmwkCRVMiykFomI7SPiXX0e/2ad9UiDYVhITRQRx0XEF8uHFwHXbGoH7ikX7mvkdca2qERpQJxnITXXEuCSiDgcWAh8qZy4eAUwe9O6SxFxCvAV3lrtdUs/o4WzcaVt5aWzUpOUC8x95x12bQcExUJ9UMwCPgr4eGaeWz73NIpl1Gtd8lraGnsWUvOMBV4rl7B+RxHxJMWSDL1Ab0SMzczXKf5fbNkicNJg2bOQmiQitgf2pug1fIpiKXIobgw1DvhTitVU/4ViMbqPAb+iWEpmMkWA/BswEfi/mfnh9lUv9c+ehdQkmbkByPJrEUBEHEkxNvEAxZ3Mnivbd6bohXymfLwA+GFm3hYRJ9MZS4VLm3k1lNQkETGqXMK6r/kUK/J+MjOf67O/m18/7XQM8ONy+zfwlJQ6jD0LqXneT7Fa6oY+be8FroiITTc8Glv2HA4EHgSIiBOBnj73lXgPnbGkurSZYxZSC0XEPcCFmflkn7bRwFO8db/yu4FTM/PRiNgV+Gvgzsz8arvrlbbGnoXUAuWH/qnAfsCGLXafSjGQ/RLFqac/ysxHy33fobjUtvb7b0h92bOQWqCcqT0H+OfMXPwO+3fOzDURsX05MC51NMNCklTJq6EkSZUMC0lSJcNCklTJsJAkVfr/Pem//4fniZQAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#不同学历情况下,购买保险的情况\n", "sns.countplot(x='c210mys',hue='resp_flag',data=df)\n", "plt.xlabel('学历')\n", "plt.ylabel('购买数量')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, '购买数量')" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#不同的县级别,购买保险的情况\n", "sns.countplot(x='N2NCY',hue='resp_flag',data=df)\n", "plt.xlabel('县级')\n", "plt.ylabel('购买数量')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数据预处理" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 空值填充" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "#统计每一列数据类型\n", "temp = []\n", "for i in NA.Var:\n", " temp.append(df[i].dtypes) " ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VarNA_count数据类型
0AASN10object
1ASKN8object
2COLLEGE8object
3MOBPLUS7object
4N2NCY10object
\n", "
" ], "text/plain": [ " Var NA_count 数据类型\n", "0 AASN 10 object\n", "1 ASKN 8 object\n", "2 COLLEGE 8 object\n", "3 MOBPLUS 7 object\n", "4 N2NCY 10 object" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "NA['数据类型']=temp\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "#填充策略\n", "\n", "#分类变量:通常分类水平出现的次数多,出现的概率就是最高的,用众数填充\n", "\n", "#数值变量:幸福指数 收入所处排名 都是分类变量" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 AASN\n", "1 ASKN\n", "2 COLLEGE\n", "3 MOBPLUS\n", "4 N2NCY\n", "5 NY8Y9\n", "6 POEP\n", "7 LIVEWELL\n", "8 HOMSTAT\n", "9 HINSUB\n", "11 c210b200\n", "12 c210cip\n", "13 c210hmi\n", "14 c210hva\n", "15 c210mah\n", "16 c210psu\n", "17 c210wht\n", "18 ilor\n", "19 meda\n", "Name: Var, dtype: object" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#去掉年龄变量\n", "NA[NA.Var != 'age'].Var" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "#对列名进行遍历,依次进行众数填充\n", "for i in NA[NA.Var != 'age'].Var:\n", " df[i].fillna(df[i].mode()[0],inplace=True)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "#年龄采取均值填补\n", "df.age.fillna(df.age.mean(),inplace=True)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "KBM_INDV_ID 0\n", "resp_flag 0\n", "GEND 0\n", "CA00 0\n", "CA03 0\n", "CA06 0\n", "CA11 0\n", "CA16 0\n", "AART 0\n", "ADBT 0\n", "ADEP 0\n", "AHBP 0\n", "AHCH 0\n", "ARES 0\n", "AHRT 0\n", "AASN 0\n", "ADGS 0\n", "AHRL 0\n", "ASKN 0\n", "AVIS 0\n", "BANK 0\n", "COLLEGE 0\n", "FINI 0\n", "INLI 0\n", "INMEDI 0\n", "INVE 0\n", "IOLP 0\n", "MOBPLUS 0\n", "N2NCY 0\n", "NY8Y9 0\n", " ..\n", "NAH19 0\n", "NPH19 0\n", "POC19 0\n", "HOMSTAT 0\n", "HINSUB 0\n", "STATE_NAME 0\n", "age 0\n", "c210apvt 0\n", "c210b200 0\n", "c210blu 0\n", "c210bpvt 0\n", "c210cip 0\n", "c210ebi 0\n", "c210hmi 0\n", "c210hva 0\n", "c210kses 0\n", "c210mah 0\n", "c210mob 0\n", "c210mys 0\n", "c210pdv 0\n", "c210pmr 0\n", "c210poo 0\n", "c210psu 0\n", "c210pwc 0\n", "c210wht 0\n", "ilor 0\n", "meda 0\n", "pdpe 0\n", "tins 0\n", "zhip19 0\n", "Length: 76, dtype: int64" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 变量编码" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "#把无效特征用户ID删掉\n", "del df['KBM_INDV_ID']" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
resp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
00M40511NNNNNNNNNNNNNNNNNNNSANNYNNYYYYNNNNNN1.0538YYCCA67.09911.010174.07190.0738.011164.00514526571.02279.015.064.04288
10M00000NNNNNNNNNNNNNNNNNNNPANNNNNNYNNNNNNNN4.0011UYUCA76.0986.015269.06984.0494.09756.00415448199.03765.017.061.04663
20F00000NNNNNNNNNNNNNNNNNNNMANNNNNNYYNNNNNNN3.0011UYUCA67.0884.0261232.04450.0516.08350.00417384462.04447.020.061.04673
30F04000NNNNNNNNNNNNYNNNNNNSBNNNYYNYYYNNNNNN1.0145YYCCA71.0964.015482.082103.0473.010552.00414457199.03971.04.062.03789
40F00000NNNNNNNNNNNNNYNNNNYMBNNNNNNYYNYYNYYN3.0011UUACA75.0884.091238.04755.0523.08950.010429321336.01565.09.061.03743
\n", "
" ], "text/plain": [ " resp_flag GEND CA00 CA03 CA06 CA11 CA16 AART ADBT ADEP AHBP AHCH ARES \\\n", "0 0 M 4 0 5 1 1 N N N N N N \n", "1 0 M 0 0 0 0 0 N N N N N N \n", "2 0 F 0 0 0 0 0 N N N N N N \n", "3 0 F 0 4 0 0 0 N N N N N N \n", "4 0 F 0 0 0 0 0 N N N N N N \n", "\n", " AHRT AASN ADGS AHRL ASKN AVIS BANK COLLEGE FINI INLI INMEDI INVE IOLP \\\n", "0 N N N N N N N N N N N N N \n", "1 N N N N N N N N N N N N N \n", "2 N N N N N N N N N N N N N \n", "3 N N N N N N Y N N N N N N \n", "4 N N N N N N N Y N N N N Y \n", "\n", " MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 N5N59 N6N64 N65P ONLA POEP SGFA SGLL \\\n", "0 S A N N Y N N Y Y Y Y N N \n", "1 P A N N N N N N Y N N N N \n", "2 M A N N N N N N Y Y N N N \n", "3 S B N N N Y Y N Y Y Y N N \n", "4 M B N N N N N N Y Y N Y Y \n", "\n", " SGOE SGSE SGTC U18 LIVEWELL NOC19 NAH19 NPH19 POC19 HOMSTAT HINSUB \\\n", "0 N N N N 1.0 5 3 8 Y Y C \n", "1 N N N N 4.0 0 1 1 U Y U \n", "2 N N N N 3.0 0 1 1 U Y U \n", "3 N N N N 1.0 1 4 5 Y Y C \n", "4 N Y Y N 3.0 0 1 1 U U A \n", "\n", " STATE_NAME age c210apvt c210b200 c210blu c210bpvt c210cip c210ebi \\\n", "0 CA 67.0 99 11.0 10 1 74.0 71 \n", "1 CA 76.0 98 6.0 15 2 69.0 69 \n", "2 CA 67.0 88 4.0 26 12 32.0 44 \n", "3 CA 71.0 96 4.0 15 4 82.0 82 \n", "4 CA 75.0 88 4.0 9 12 38.0 47 \n", "\n", " c210hmi c210hva c210kses c210mah c210mob c210mys c210pdv c210pmr \\\n", "0 90.0 738.0 111 64.0 0 5 14 52 \n", "1 84.0 494.0 97 56.0 0 4 15 44 \n", "2 50.0 516.0 83 50.0 0 4 17 38 \n", "3 103.0 473.0 105 52.0 0 4 14 45 \n", "4 55.0 523.0 89 50.0 10 4 29 32 \n", "\n", " c210poo c210psu c210pwc c210wht ilor meda pdpe tins zhip19 \n", "0 65 71.0 22 79.0 15.0 64.0 42 8 8 \n", "1 81 99.0 37 65.0 17.0 61.0 46 6 3 \n", "2 44 62.0 44 47.0 20.0 61.0 46 7 3 \n", "3 71 99.0 39 71.0 4.0 62.0 37 8 9 \n", "4 13 36.0 15 65.0 9.0 61.0 37 4 3 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDAARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18POC19HOMSTATHINSUBSTATE_NAME
0MNNNNNNNNNNNNNNNNNNNSANNYNNYYYYNNNNNNYYCCA
1MNNNNNNNNNNNNNNNNNNNPANNNNNNYNNNNNNNNUYUCA
2FNNNNNNNNNNNNNNNNNNNMANNNNNNYYNNNNNNNUYUCA
3FNNNNNNNNNNNNYNNNNNNSBNNNYYNYYYNNNNNNYYCCA
4FNNNNNNNNNNNNNYNNNNYMBNNNNNNYYNYYNYYNUUACA
\n", "
" ], "text/plain": [ " GEND AART ADBT ADEP AHBP AHCH ARES AHRT AASN ADGS AHRL ASKN AVIS BANK \\\n", "0 M N N N N N N N N N N N N N \n", "1 M N N N N N N N N N N N N N \n", "2 F N N N N N N N N N N N N N \n", "3 F N N N N N N N N N N N N Y \n", "4 F N N N N N N N N N N N N N \n", "\n", " COLLEGE FINI INLI INMEDI INVE IOLP MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 \\\n", "0 N N N N N N S A N N Y N \n", "1 N N N N N N P A N N N N \n", "2 N N N N N N M A N N N N \n", "3 N N N N N N S B N N N Y \n", "4 Y N N N N Y M B N N N N \n", "\n", " N5N59 N6N64 N65P ONLA POEP SGFA SGLL SGOE SGSE SGTC U18 POC19 HOMSTAT \\\n", "0 N Y Y Y Y N N N N N N Y Y \n", "1 N N Y N N N N N N N N U Y \n", "2 N N Y Y N N N N N N N U Y \n", "3 Y N Y Y Y N N N N N N Y Y \n", "4 N N Y Y N Y Y N Y Y N U U \n", "\n", " HINSUB STATE_NAME \n", "0 C CA \n", "1 U CA \n", "2 U CA \n", "3 C CA \n", "4 A CA " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#把类型object的提取出来,数值化\n", "df_object = df.select_dtypes('object')\n", "df_object.head()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 0., 0., ..., 4., 2., 0.],\n", " [1., 0., 0., ..., 4., 3., 0.],\n", " [0., 0., 0., ..., 4., 3., 0.],\n", " ...,\n", " [1., 0., 0., ..., 4., 3., 0.],\n", " [1., 0., 0., ..., 4., 2., 4.],\n", " [1., 0., 0., ..., 0., 1., 5.]])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import OrdinalEncoder #对二维特征数值型编码\n", "df_object = OrdinalEncoder().fit_transform(df_object)\n", "df_object" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
resp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
001.0405110.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.02.00.00.00.01.00.00.02.01.01.01.00.00.00.00.00.00.01.05382.04.02.00.067.09911.010174.07190.0738.011164.00514526571.02279.015.064.04288
101.0000000.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.01.01.00.00.00.00.00.00.00.00.04.00111.04.03.00.076.0986.015269.06984.0494.09756.00415448199.03765.017.061.04663
200.0000000.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.01.01.00.00.00.00.00.00.00.03.00111.04.03.00.067.0884.0261232.04450.0516.08350.00417384462.04447.020.061.04673
300.0040000.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.02.01.00.00.00.01.01.01.01.01.01.00.00.00.00.00.00.01.01452.04.02.00.071.0964.015482.082103.0473.010552.00414457199.03971.04.062.03789
400.0000000.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.01.00.01.00.00.00.00.00.01.01.01.00.01.01.00.01.01.00.03.00111.03.00.00.075.0884.091238.04755.0523.08950.010429321336.01565.09.061.03743
\n", "
" ], "text/plain": [ " resp_flag GEND CA00 CA03 CA06 CA11 CA16 AART ADBT ADEP AHBP \\\n", "0 0 1.0 4 0 5 1 1 0.0 0.0 0.0 0.0 \n", "1 0 1.0 0 0 0 0 0 0.0 0.0 0.0 0.0 \n", "2 0 0.0 0 0 0 0 0 0.0 0.0 0.0 0.0 \n", "3 0 0.0 0 4 0 0 0 0.0 0.0 0.0 0.0 \n", "4 0 0.0 0 0 0 0 0 0.0 0.0 0.0 0.0 \n", "\n", " AHCH ARES AHRT AASN ADGS AHRL ASKN AVIS BANK COLLEGE FINI INLI \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "\n", " INMEDI INVE IOLP MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 N5N59 \\\n", "0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "1 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 2.0 1.0 0.0 0.0 0.0 1.0 1.0 \n", "4 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " N6N64 N65P ONLA POEP SGFA SGLL SGOE SGSE SGTC U18 LIVEWELL \\\n", "0 2.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "1 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 \n", "2 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 \n", "3 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "4 1.0 1.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 0.0 3.0 \n", "\n", " NOC19 NAH19 NPH19 POC19 HOMSTAT HINSUB STATE_NAME age c210apvt \\\n", "0 5 3 8 2.0 4.0 2.0 0.0 67.0 99 \n", "1 0 1 1 1.0 4.0 3.0 0.0 76.0 98 \n", "2 0 1 1 1.0 4.0 3.0 0.0 67.0 88 \n", "3 1 4 5 2.0 4.0 2.0 0.0 71.0 96 \n", "4 0 1 1 1.0 3.0 0.0 0.0 75.0 88 \n", "\n", " c210b200 c210blu c210bpvt c210cip c210ebi c210hmi c210hva c210kses \\\n", "0 11.0 10 1 74.0 71 90.0 738.0 111 \n", "1 6.0 15 2 69.0 69 84.0 494.0 97 \n", "2 4.0 26 12 32.0 44 50.0 516.0 83 \n", "3 4.0 15 4 82.0 82 103.0 473.0 105 \n", "4 4.0 9 12 38.0 47 55.0 523.0 89 \n", "\n", " c210mah c210mob c210mys c210pdv c210pmr c210poo c210psu c210pwc \\\n", "0 64.0 0 5 14 52 65 71.0 22 \n", "1 56.0 0 4 15 44 81 99.0 37 \n", "2 50.0 0 4 17 38 44 62.0 44 \n", "3 52.0 0 4 14 45 71 99.0 39 \n", "4 50.0 10 4 29 32 13 36.0 15 \n", "\n", " c210wht ilor meda pdpe tins zhip19 \n", "0 79.0 15.0 64.0 42 8 8 \n", "1 65.0 17.0 61.0 46 6 3 \n", "2 47.0 20.0 61.0 46 7 3 \n", "3 71.0 4.0 62.0 37 8 9 \n", "4 65.0 9.0 61.0 37 4 3 " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#进行数据编码转换\n", "for i in df.columns:\n", " if df[i].dtypes=='object':\n", " df[i]=OrdinalEncoder().fit_transform(df[[i]])\n", " \n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数据建模" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "from sklearn import tree\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import GridSearchCV \n", "from sklearn.model_selection import cross_val_score \n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "#确定特征矩阵和标签\n", "X = df.iloc[:,1:]\n", "y = df['resp_flag']" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "#划分数据集\n", "Xtrain,Xtest,Ytrain,Ytest=train_test_split(X,y,test_size=0.3,\n", " random_state=420)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5946564885496183" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = tree.DecisionTreeClassifier()\n", "clf = clf.fit(Xtrain,Ytrain)\n", "score = clf.score(Xtest,Ytest)\n", "score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 网格搜索" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'min_samples_leaf': 1500, 'min_samples_split': 4000}" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#尝试使用最小叶节点样本数量和最小分割样本数量进行调参\n", "param_grid = {'min_samples_leaf':list(range(1000,6000,100)),\n", " 'min_samples_split':list(range(4000,6000,100))}\n", "GR = GridSearchCV(tree.DecisionTreeClassifier(),param_grid,cv=5)\n", "GR.fit(Xtrain,Ytrain)\n", "\n", "GR.best_params_" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6417557251908397" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = tree.DecisionTreeClassifier(criterion='gini',max_depth=4,\n", " min_samples_leaf=1500,\n", " min_samples_split=4000)\n", "clf.fit(Xtrain,Ytrain)\n", "clf.score(Xtest,Ytest)\n", "#模型能力提高了" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "import graphviz\n", "features = list(df.columns[1:])\n", "\n", "dot_data = tree.export_graphviz(clf,feature_names=features,\n", " class_names=['NP','P'],filled=True,rounded=True)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "Tree\r\n", "\r\n", "\r\n", "0\r\n", "\r\n", "STATE_NAME <= 0.5\r\n", "gini = 0.481\r\n", "samples = 30566\r\n", "value = [18295, 12271]\r\n", "class = NP\r\n", "\r\n", "\r\n", "1\r\n", "\r\n", "ilor <= 8.5\r\n", "gini = 0.348\r\n", "samples = 8147\r\n", "value = [6320, 1827]\r\n", "class = NP\r\n", "\r\n", "\r\n", "0->1\r\n", "\r\n", "\r\n", "True\r\n", "\r\n", "\r\n", "6\r\n", "\r\n", "N2NCY <= 0.5\r\n", "gini = 0.498\r\n", "samples = 22419\r\n", "value = [11975, 10444]\r\n", "class = NP\r\n", "\r\n", "\r\n", "0->6\r\n", "\r\n", "\r\n", "False\r\n", "\r\n", "\r\n", "2\r\n", "\r\n", "gini = 0.412\r\n", "samples = 2402\r\n", "value = [1705, 697]\r\n", "class = NP\r\n", "\r\n", "\r\n", "1->2\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "3\r\n", "\r\n", "tins <= 7.5\r\n", "gini = 0.316\r\n", "samples = 5745\r\n", "value = [4615, 1130]\r\n", "class = NP\r\n", "\r\n", "\r\n", "1->3\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "4\r\n", "\r\n", "gini = 0.24\r\n", "samples = 2853\r\n", "value = [2455, 398]\r\n", "class = NP\r\n", "\r\n", "\r\n", "3->4\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "5\r\n", "\r\n", "gini = 0.378\r\n", "samples = 2892\r\n", "value = [2160, 732]\r\n", "class = NP\r\n", "\r\n", "\r\n", "3->5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "7\r\n", "\r\n", "age <= 70.5\r\n", "gini = 0.5\r\n", "samples = 11539\r\n", "value = [5613, 5926]\r\n", "class = P\r\n", "\r\n", "\r\n", "6->7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "14\r\n", "\r\n", "ilor <= 7.5\r\n", "gini = 0.486\r\n", "samples = 10880\r\n", "value = [6362, 4518]\r\n", "class = NP\r\n", "\r\n", "\r\n", "6->14\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "8\r\n", "\r\n", "STATE_NAME <= 7.5\r\n", "gini = 0.479\r\n", "samples = 5170\r\n", "value = [2052, 3118]\r\n", "class = P\r\n", "\r\n", "\r\n", "7->8\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "11\r\n", "\r\n", "tins <= 5.5\r\n", "gini = 0.493\r\n", "samples = 6369\r\n", "value = [3561, 2808]\r\n", "class = NP\r\n", "\r\n", "\r\n", "7->11\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "9\r\n", "\r\n", "gini = 0.493\r\n", "samples = 2444\r\n", "value = [1364, 1080]\r\n", "class = NP\r\n", "\r\n", "\r\n", "8->9\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "10\r\n", "\r\n", "gini = 0.377\r\n", "samples = 2726\r\n", "value = [688, 2038]\r\n", "class = P\r\n", "\r\n", "\r\n", "8->10\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "12\r\n", "\r\n", "gini = 0.439\r\n", "samples = 1734\r\n", "value = [1171, 563]\r\n", "class = NP\r\n", "\r\n", "\r\n", "11->12\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "13\r\n", "\r\n", "gini = 0.5\r\n", "samples = 4635\r\n", "value = [2390, 2245]\r\n", "class = NP\r\n", "\r\n", "\r\n", "11->13\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "15\r\n", "\r\n", "gini = 0.5\r\n", "samples = 2985\r\n", "value = [1451, 1534]\r\n", "class = P\r\n", "\r\n", "\r\n", "14->15\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "16\r\n", "\r\n", "HINSUB <= 1.5\r\n", "gini = 0.47\r\n", "samples = 7895\r\n", "value = [4911, 2984]\r\n", "class = NP\r\n", "\r\n", "\r\n", "14->16\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "17\r\n", "\r\n", "gini = 0.495\r\n", "samples = 2245\r\n", "value = [1240, 1005]\r\n", "class = NP\r\n", "\r\n", "\r\n", "16->17\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "18\r\n", "\r\n", "gini = 0.455\r\n", "samples = 5650\r\n", "value = [3671, 1979]\r\n", "class = NP\r\n", "\r\n", "\r\n", "16->18\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ], "text/plain": [ "" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 对数据进行PCA压缩" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "from sklearn.decomposition import PCA \n", "pca = PCA(n_components=20)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(30566, 74)" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Xtrain.shape" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(30566, 20)" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Xtrain_pca = pca.fit_transform(Xtrain)\n", "Xtrain_pca.shape" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.9153591782418453,\n", " 0.04623390351316754,\n", " 0.009614205126099036,\n", " 0.0059733295547924215,\n", " 0.005376298245309462,\n", " 0.0038206947441833477,\n", " 0.0032810657682490097,\n", " 0.002282561885296771,\n", " 0.0016778924397305934,\n", " 0.0010966357436341806,\n", " 0.0009433417755888605,\n", " 0.0009092016983611149,\n", " 0.0007054275771638145,\n", " 0.0004457010640798597,\n", " 0.0003960043714565213,\n", " 0.00030893456867792167,\n", " 0.0002769756111792194,\n", " 0.00022580708084060145,\n", " 0.00021132827352552525,\n", " 0.00017942417200120434]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#累计方差\n", "list(pca.explained_variance_ratio_)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9993179114551825" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(pca.explained_variance_ratio_).sum()\n", "#主成分分析效果还可以,有一个元素累计方差0.91" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "Xtest_pca = pca.transform(Xtest)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6115267175572519" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = tree.DecisionTreeClassifier(criterion='gini',max_depth=4,\n", " min_samples_leaf=1500,\n", " min_samples_split=4000)\n", "clf.fit(Xtrain_pca,Ytrain)\n", "clf.score(Xtest_pca,Ytest)#降低了,模型信息损失了" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }