{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 导入数据,观察数据" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
KBM_INDV_IDresp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
02814780M40511NNNNNNNNNNNNNNNNNNNSANNYNNYYYYNNNNNN1.0538YYCCA67.09911.010174.07190.0738.011164.00514526571.02279.015.064.04288
12904850M00000NNNNNNNNNNNNNNNNNNNPANNNNNNYNNNNNNNN4.0011UYUCA76.0986.015269.06984.0494.09756.00415448199.03765.017.061.04663
22999490F00000NNNNNNNNNNNNNNNNNNNMANNNNNNYYNNNNNNN3.0011UYUCA67.088NaN261232.04450.0516.08350.00417384462.04447.020.061.04673
33146350F04000NNNNNNNNNNNNYNNNNNNSBNNNYYNYYYNNNNNN1.0145YYCCA71.096NaN15482.082103.0473.010552.00414457199.03971.04.062.03789
43637020F00000NNNNNNNNNNNNNYNNNNYMBNNNNNNYYNYYNYYN3.0011UUACA75.088NaN91238.04755.0523.08950.010429321336.01565.09.0NaN3743
\n", "
" ], "text/plain": [ " KBM_INDV_ID resp_flag GEND CA00 CA03 CA06 CA11 CA16 AART ADBT ADEP \\\n", "0 281478 0 M 4 0 5 1 1 N N N \n", "1 290485 0 M 0 0 0 0 0 N N N \n", "2 299949 0 F 0 0 0 0 0 N N N \n", "3 314635 0 F 0 4 0 0 0 N N N \n", "4 363702 0 F 0 0 0 0 0 N N N \n", "\n", " AHBP AHCH ARES AHRT AASN ADGS AHRL ASKN AVIS BANK COLLEGE FINI INLI INMEDI \\\n", "0 N N N N N N N N N N N N N N \n", "1 N N N N N N N N N N N N N N \n", "2 N N N N N N N N N N N N N N \n", "3 N N N N N N N N N Y N N N N \n", "4 N N N N N N N N N N Y N N N \n", "\n", " INVE IOLP MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 N5N59 N6N64 N65P ONLA POEP \\\n", "0 N N S A N N Y N N Y Y Y Y \n", "1 N N P A N N N N N N Y N N \n", "2 N N M A N N N N N N Y Y N \n", "3 N N S B N N N Y Y N Y Y Y \n", "4 N Y M B N N N N N N Y Y N \n", "\n", " SGFA SGLL SGOE SGSE SGTC U18 LIVEWELL NOC19 NAH19 NPH19 POC19 HOMSTAT \\\n", "0 N N N N N N 1.0 5 3 8 Y Y \n", "1 N N N N N N 4.0 0 1 1 U Y \n", "2 N N N N N N 3.0 0 1 1 U Y \n", "3 N N N N N N 1.0 1 4 5 Y Y \n", "4 Y Y N Y Y N 3.0 0 1 1 U U \n", "\n", " HINSUB STATE_NAME age c210apvt c210b200 c210blu c210bpvt c210cip \\\n", "0 C CA 67.0 99 11.0 10 1 74.0 \n", "1 U CA 76.0 98 6.0 15 2 69.0 \n", "2 U CA 67.0 88 NaN 26 12 32.0 \n", "3 C CA 71.0 96 NaN 15 4 82.0 \n", "4 A CA 75.0 88 NaN 9 12 38.0 \n", "\n", " c210ebi c210hmi c210hva c210kses c210mah c210mob c210mys c210pdv \\\n", "0 71 90.0 738.0 111 64.0 0 5 14 \n", "1 69 84.0 494.0 97 56.0 0 4 15 \n", "2 44 50.0 516.0 83 50.0 0 4 17 \n", "3 82 103.0 473.0 105 52.0 0 4 14 \n", "4 47 55.0 523.0 89 50.0 10 4 29 \n", "\n", " c210pmr c210poo c210psu c210pwc c210wht ilor meda pdpe tins zhip19 \n", "0 52 65 71.0 22 79.0 15.0 64.0 42 8 8 \n", "1 44 81 99.0 37 65.0 17.0 61.0 46 6 3 \n", "2 38 44 62.0 44 47.0 20.0 61.0 46 7 3 \n", "3 45 71 99.0 39 71.0 4.0 62.0 37 8 9 \n", "4 32 13 36.0 15 65.0 9.0 NaN 37 4 3 " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('./data/ma_resp_data_temp.csv',header=0)\n", "pd.set_option('max_columns',100) # 显示100列数据\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(43666, 76)" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 43666 entries, 0 to 43665\n", "Data columns (total 76 columns):\n", "KBM_INDV_ID 43666 non-null int64\n", "resp_flag 43666 non-null int64\n", "GEND 43666 non-null object\n", "CA00 43666 non-null int64\n", "CA03 43666 non-null int64\n", "CA06 43666 non-null int64\n", "CA11 43666 non-null int64\n", "CA16 43666 non-null int64\n", "AART 43666 non-null object\n", "ADBT 43666 non-null object\n", "ADEP 43666 non-null object\n", "AHBP 43666 non-null object\n", "AHCH 43666 non-null object\n", "ARES 43666 non-null object\n", "AHRT 43666 non-null object\n", "AASN 43656 non-null object\n", "ADGS 43666 non-null object\n", "AHRL 43666 non-null object\n", "ASKN 43658 non-null object\n", "AVIS 43666 non-null object\n", "BANK 43666 non-null object\n", "COLLEGE 43658 non-null object\n", "FINI 43666 non-null object\n", "INLI 43666 non-null object\n", "INMEDI 43666 non-null object\n", "INVE 43666 non-null object\n", "IOLP 43666 non-null object\n", "MOBPLUS 43659 non-null object\n", "N2NCY 43656 non-null object\n", "NY8Y9 43657 non-null object\n", "N2N29 43666 non-null object\n", "N3N39 43666 non-null object\n", "N4N49 43666 non-null object\n", "N5N59 43666 non-null object\n", "N6N64 43666 non-null object\n", "N65P 43666 non-null object\n", "ONLA 43666 non-null object\n", "POEP 43658 non-null object\n", "SGFA 43666 non-null object\n", "SGLL 43666 non-null object\n", "SGOE 43666 non-null object\n", "SGSE 43666 non-null object\n", "SGTC 43666 non-null object\n", "U18 43666 non-null object\n", "LIVEWELL 43661 non-null float64\n", "NOC19 43666 non-null int64\n", "NAH19 43666 non-null int64\n", "NPH19 43666 non-null int64\n", "POC19 43666 non-null object\n", "HOMSTAT 43656 non-null object\n", "HINSUB 43655 non-null object\n", "STATE_NAME 43666 non-null object\n", "age 43662 non-null float64\n", "c210apvt 43666 non-null int64\n", "c210b200 43661 non-null float64\n", "c210blu 43666 non-null int64\n", "c210bpvt 43666 non-null int64\n", "c210cip 43664 non-null float64\n", "c210ebi 43666 non-null int64\n", "c210hmi 43659 non-null float64\n", "c210hva 43651 non-null float64\n", "c210kses 43666 non-null int64\n", "c210mah 43651 non-null float64\n", "c210mob 43666 non-null int64\n", "c210mys 43666 non-null int64\n", "c210pdv 43666 non-null int64\n", "c210pmr 43666 non-null int64\n", "c210poo 43666 non-null int64\n", "c210psu 43643 non-null float64\n", "c210pwc 43666 non-null int64\n", "c210wht 43652 non-null float64\n", "ilor 43660 non-null float64\n", "meda 43651 non-null float64\n", "pdpe 43666 non-null int64\n", "tins 43666 non-null int64\n", "zhip19 43666 non-null int64\n", "dtypes: float64(11), int64(24), object(41)\n", "memory usage: 25.3+ MB\n" ] } ], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#将ID转化为object\n", "df['KBM_INDV_ID'] = df['KBM_INDV_ID'].astype('object')" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countmeanstdmin25%50%75%max
resp_flag43666.00.4005180.4900090.00.00.01.01.0
CA0043666.00.2678060.9964590.00.00.00.06.0
CA0343666.00.2140340.8869140.00.00.00.07.0
CA0643666.00.3819681.1782810.00.00.00.07.0
CA1143666.00.3131961.0472930.00.00.00.07.0
CA1643666.00.2243160.8582030.00.00.00.07.0
LIVEWELL43661.02.8421251.1722551.02.03.04.06.0
NOC1943666.00.4657171.0450870.00.00.00.09.0
NAH1943666.02.0808181.2158520.01.02.03.09.0
NPH1943666.02.5465351.9036320.01.02.03.016.0
age43662.071.2255514.31824465.067.071.075.0101.0
c210apvt43666.088.78152311.1788660.085.093.097.099.0
c210b20043661.014.78484215.7493440.04.010.019.099.0
c210blu43666.019.4025799.9707880.012.019.026.067.0
c210bpvt43666.011.05917610.7053400.03.07.015.099.0
c210cip43664.054.15880428.3928861.030.057.079.099.0
c210ebi43666.055.59233723.1206280.040.052.068.0303.0
c210hmi43659.068.35454332.4321290.046.063.085.0255.0
c210hva43651.0317.276007239.5434740.0140.0233.0429.0999.0
c210kses43666.089.26989019.45274260.075.085.099.0170.0
c210mah43651.052.9523956.4920130.049.053.057.085.0
c210mob43666.02.7133938.8085660.00.00.00.099.0
c210mys43666.04.1428571.0713130.03.04.05.08.0
c210pdv43666.015.8034865.4604220.012.015.019.055.0
c210pmr43666.044.28434013.7637550.035.045.055.082.0
c210poo43666.060.23608824.9374120.042.066.081.099.0
c210psu43643.068.04559730.4998820.049.078.095.099.0
c210pwc43666.033.28358411.9621880.026.033.040.099.0
c210wht43652.061.52845215.3201290.051.061.073.099.0
ilor43660.018.01495619.1747410.07.015.019.099.0
meda43651.050.83487215.6759646.039.051.061.093.0
pdpe43666.055.57493213.20480626.046.054.065.099.0
tins43666.07.8651353.5625921.05.08.011.019.0
zhip1943666.04.4067923.0898490.01.04.07.09.0
\n", "
" ], "text/plain": [ " count mean std min 25% 50% 75% max\n", "resp_flag 43666.0 0.400518 0.490009 0.0 0.0 0.0 1.0 1.0\n", "CA00 43666.0 0.267806 0.996459 0.0 0.0 0.0 0.0 6.0\n", "CA03 43666.0 0.214034 0.886914 0.0 0.0 0.0 0.0 7.0\n", "CA06 43666.0 0.381968 1.178281 0.0 0.0 0.0 0.0 7.0\n", "CA11 43666.0 0.313196 1.047293 0.0 0.0 0.0 0.0 7.0\n", "CA16 43666.0 0.224316 0.858203 0.0 0.0 0.0 0.0 7.0\n", "LIVEWELL 43661.0 2.842125 1.172255 1.0 2.0 3.0 4.0 6.0\n", "NOC19 43666.0 0.465717 1.045087 0.0 0.0 0.0 0.0 9.0\n", "NAH19 43666.0 2.080818 1.215852 0.0 1.0 2.0 3.0 9.0\n", "NPH19 43666.0 2.546535 1.903632 0.0 1.0 2.0 3.0 16.0\n", "age 43662.0 71.225551 4.318244 65.0 67.0 71.0 75.0 101.0\n", "c210apvt 43666.0 88.781523 11.178866 0.0 85.0 93.0 97.0 99.0\n", "c210b200 43661.0 14.784842 15.749344 0.0 4.0 10.0 19.0 99.0\n", "c210blu 43666.0 19.402579 9.970788 0.0 12.0 19.0 26.0 67.0\n", "c210bpvt 43666.0 11.059176 10.705340 0.0 3.0 7.0 15.0 99.0\n", "c210cip 43664.0 54.158804 28.392886 1.0 30.0 57.0 79.0 99.0\n", "c210ebi 43666.0 55.592337 23.120628 0.0 40.0 52.0 68.0 303.0\n", "c210hmi 43659.0 68.354543 32.432129 0.0 46.0 63.0 85.0 255.0\n", "c210hva 43651.0 317.276007 239.543474 0.0 140.0 233.0 429.0 999.0\n", "c210kses 43666.0 89.269890 19.452742 60.0 75.0 85.0 99.0 170.0\n", "c210mah 43651.0 52.952395 6.492013 0.0 49.0 53.0 57.0 85.0\n", "c210mob 43666.0 2.713393 8.808566 0.0 0.0 0.0 0.0 99.0\n", "c210mys 43666.0 4.142857 1.071313 0.0 3.0 4.0 5.0 8.0\n", "c210pdv 43666.0 15.803486 5.460422 0.0 12.0 15.0 19.0 55.0\n", "c210pmr 43666.0 44.284340 13.763755 0.0 35.0 45.0 55.0 82.0\n", "c210poo 43666.0 60.236088 24.937412 0.0 42.0 66.0 81.0 99.0\n", "c210psu 43643.0 68.045597 30.499882 0.0 49.0 78.0 95.0 99.0\n", "c210pwc 43666.0 33.283584 11.962188 0.0 26.0 33.0 40.0 99.0\n", "c210wht 43652.0 61.528452 15.320129 0.0 51.0 61.0 73.0 99.0\n", "ilor 43660.0 18.014956 19.174741 0.0 7.0 15.0 19.0 99.0\n", "meda 43651.0 50.834872 15.675964 6.0 39.0 51.0 61.0 93.0\n", "pdpe 43666.0 55.574932 13.204806 26.0 46.0 54.0 65.0 99.0\n", "tins 43666.0 7.865135 3.562592 1.0 5.0 8.0 11.0 19.0\n", "zhip19 43666.0 4.406792 3.089849 0.0 1.0 4.0 7.0 9.0" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.describe().T" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "20" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#检查缺失列的数量\n", "len(df.columns)-df.dropna(axis=1).shape[1]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "#统计各列中空值的数量\n", "NA = df.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
index0
0KBM_INDV_ID0
1resp_flag0
2GEND0
3CA000
4CA030
\n", "
" ], "text/plain": [ " index 0\n", "0 KBM_INDV_ID 0\n", "1 resp_flag 0\n", "2 GEND 0\n", "3 CA00 0\n", "4 CA03 0" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#重置索引\n", "NA = NA.reset_index()\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VarNA_count
0KBM_INDV_ID0
1resp_flag0
2GEND0
3CA000
4CA030
\n", "
" ], "text/plain": [ " Var NA_count\n", "0 KBM_INDV_ID 0\n", "1 resp_flag 0\n", "2 GEND 0\n", "3 CA00 0\n", "4 CA03 0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#重置列名\n", "NA.columns = ['Var','NA_count']\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VarNA_count
0AASN10
1ASKN8
2COLLEGE8
3MOBPLUS7
4N2NCY10
\n", "
" ], "text/plain": [ " Var NA_count\n", "0 AASN 10\n", "1 ASKN 8\n", "2 COLLEGE 8\n", "3 MOBPLUS 7\n", "4 N2NCY 10" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#过滤出大于0的数据\n", "NA = NA[NA.NA_count>0].reset_index(drop=True)#重置索引,并把之前的索引删除\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 0.000229\n", "1 0.000183\n", "2 0.000183\n", "3 0.000160\n", "4 0.000229\n", "5 0.000206\n", "6 0.000183\n", "7 0.000115\n", "8 0.000229\n", "9 0.000252\n", "10 0.000092\n", "11 0.000115\n", "12 0.000046\n", "13 0.000160\n", "14 0.000344\n", "15 0.000344\n", "16 0.000527\n", "17 0.000321\n", "18 0.000137\n", "19 0.000344\n", "Name: NA_count, dtype: float64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#把空值个数换化为比例\n", "NA.NA_count/df.shape[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
KBM_INDV_IDresp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [KBM_INDV_ID, resp_flag, GEND, CA00, CA03, CA06, CA11, CA16, AART, ADBT, ADEP, AHBP, AHCH, ARES, AHRT, AASN, ADGS, AHRL, ASKN, AVIS, BANK, COLLEGE, FINI, INLI, INMEDI, INVE, IOLP, MOBPLUS, N2NCY, NY8Y9, N2N29, N3N39, N4N49, N5N59, N6N64, N65P, ONLA, POEP, SGFA, SGLL, SGOE, SGSE, SGTC, U18, LIVEWELL, NOC19, NAH19, NPH19, POC19, HOMSTAT, HINSUB, STATE_NAME, age, c210apvt, c210b200, c210blu, c210bpvt, c210cip, c210ebi, c210hmi, c210hva, c210kses, c210mah, c210mob, c210mys, c210pdv, c210pmr, c210poo, c210psu, c210pwc, c210wht, ilor, meda, pdpe, tins, zhip19]\n", "Index: []" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#查看数据中是否有重复值\n", "df[df.duplicated()]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 探索数据及数据可视化分析" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "plt.style.use('seaborn')" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "#支持中文\n", "plt.rcParams['font.sans-serif']=['SimHei']\n", "plt.rcParams['axes.unicode_minus']=False" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索样本分类是否均衡" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlUAAADMCAYAAABa8FYQAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAADJNJREFUeJzt3X2sZHdZB/DvZRcKLIuWcG3EotgEHkGhGF5seZGFIgKCyIuiYJQQ/gEhGBMFJfgSC/ElUVEChLcGEa0ohJiglEpKaGgQKETln0caLBheZBOaLmikab3+MUO73t6lM7u/szNz7+eTbO7Mb+6e89x5MnO+8ztnztna2dkJAABn5k6rLgAAYD8QqgAABhCqAAAGEKoAAAYQqgAABhCqAAAGOLzqAm6++ZadG27471WXwS7nnnv36Mt60ZP1pC/rR0/W037py/b20a1TPbbymarDhw+tugT2oC/rR0/Wk76sHz1ZTwehLysPVQAA+4FQBQAwgFAFADCAUAUAMIBQBQAwgFAFADDAys9T9bxfe9eqSwAANtzrfvUnV12CmSoAgBGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAYQqAIABhCoAgAGEKgCAAQ5PsdCqeluSByV5f3dfOsU6AADWyfCZqqp6VpJD3X1xkguq6v6j1wEAsG6m2P13LMm757c/mOQxE6wDAGCtTBGqjiT54vz215KcN8E6AADWyhSh6htJ7ja/fY+J1gEAsFamCDzX5rZdfhcmuX6CdQAArJUpvv33viRXV9V9kjwlyUUTrAMAYK0Mn6nq7hOZHaz+sSSP7+4bR68DAGDdTHKequ6+Ibd9AxAAYN9zEDkAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAC4eqqjpnykIAADbZMjNVl1fVQ6pqK0mq6her6rUT1QUAsFGWCVXnJPnjJH86v//sJA8cXhEAwAZaJlR9Z3dfkuTh8/tHkxwZXxIAwOZZJlQdr6orkxyqqpcm+YEkO9OUBQCwWQ4v8bs/k1mQ+nySJyc5luRRE9QEALBxFp6p6u5vJvliknsmuSbJvbv7sqkKAwDYJAvPVFXVG5L8+PzuVpIbk/zwFEUBAGyaZY6p+sEkD0vyiSQXJvnPSSoCANhAy4SquyS5d5J7JLk5yfYkFQEAbKBlQtVvJHlEksuTfCnJRyepCABgAy18TFV3X3XS3b+YoBYAgI3lgsoAAAPc4UxVVX3vqR7r7i+MLQcAYDNt7ex8+5OiV9VVmZ05fWvXQzvd/YQBNewcP/71AYthpO3to9GX9aIn60lf1o+erKf90pft7aO789Ct7nCmqrsfP7YcAID9xzFVAAAD3GGoqqpL5j9POd0FAHDQLTJT9ar5zw9NWQgAwCZb5DxVR+azVedW1Y+e/EB3f2SasgAANssioeq3kjwvyXlJXpDbvgW4k0SoAgDIYt/++0CSD1TVVd39wrNQEwDAxlnm23/P2GuwqvYcBwA4SBYOVd194hQPvXxQLQAAG2vEeaqcagEAOPBGhKpvf50bAIADwBnVAQAGsPsPAGCARc5TdauqekCSC5Jc193XJS64DACQLDFTVVWvTvKeJL+Q5H1V9crJqgIA2DDLzFQ9PclDu/uWqjqc5Jokv3emBbzgMmdkAFi1P3zapasuATbeMsdUfSHJ+fPb5yf53PhyAAA20zKh6nFJPltVX0ry2STHqkqwAgDIErv/unt7ykIAADbZwqGqqh6c5O5JTiR5RZJ3dPdVUxUGALBJltn998YkNyS5NMmVSX5/kooAADbQMqHq5u7+tyRHu/tdSf5nopoAADbOMqHqa1X1ySTXVNVzM5u1AgAgy52n6ueSPKi7P11VD03y/IlqAgDYOAvPVHX3N5N8taouTnJj7P4DALjVMpepeUWSK5K8JcklSd46VVEAAJtmmWOqnpHkwUmOd/dbkzxgmpIAADbPUt/+y+zyNDtVdSTJTdOUBACweZY5UP3VmV1E+V5JPpHkJZNUBACwgZYJVXft7vtW1XZ3H5+sIgCADbTM7r9Lq2pLoAIAuL1lQtXvJvmT+fFUAACcZJndf6+b/3x6VW0l2enuCyaoCQBg4ywcqrr7+6csBABgky2z+w8AgFMQqgAABhCqAAAGEKoAAAYQqgAABhCqAAAGEKoAAAYQqgAABhCqAAAGEKoAAAYQqgAABpgsVFXVeVV19VTLBwBYJ5OEqqo6N8k7khyZYvkAAOtmqpmqW5I8N8mJiZYPALBWDk+x0O4+kSRVNcXiAQDWjgPVAQAGEKoAAAYQqgAABpg0VHX3sSmXDwCwLsxUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMIFQBAAwgVAEADCBUAQAMsLWzs7PqGnaOH//6qmtgl+3to9GX9aIn60lf1o+erKf90pft7aNbp3rMTBUAwABCFQDAAEIVAMAAQhUAwABCFQDAAEIVAMAAQhUAwADrcJ4qAICNZ6YKAGAAoQoAYAChCgBgAKEKAGAAoQoAYAChCgBgAKEKAGCAw6tceVW9LcmDkry/uy9dZS0HRVUdTvK5+b8keVmS5yR5apKPd/cvzX/vdxYZ48xU1XlJ/ra7H1tVd07y3iT3SvK27n77mYyt5A/aJ3b15XuS/FOS6+YP/3R3H9/r/WvRMZZTVd+R5PIkh5L8V5LnJnljTvP515Mzd4qeXJeTti3d/a+Lbkv2y/ZlZTNVVfWsJIe6++IkF1TV/VdVywHzkCR/1d3HuvtYkrskeUySRyb5alU9saoetsjYasrfP6rq3CTvSHJkPvSyJNd296OTPKeqjp7hGKdhj778SJLXfOs1Mw9Ut3v/WnRsFX/TPvD8JH/U3U9K8pUkP5vTfP71ZJjdPXllTtq2zAPVQtuS/bR9WeXuv2NJ3j2//cHMnlCmd1GSp1XVx+ef1i5J8p7u3klyRZLHJnncgmOcmVsy+3R3Yn7/WG57TXwkycPPcIzTs7svFyV5UVV9qqpeOx87ltu/fy06xpK6+w3dfeX87naSn8/pP/97jbGkPXpyc07atsz3iiy6Ldk325dVhqojSb44v/21JOetsJaD5BNJntjdj0xy5yR3y+37sFdv9Guw7j7R3TeeNLTo864/E9qjL/+Q2Yb4EUkurqqHRF9WoqouTnJukv+I18paOKknV+b/b1uemgPYk1WGqm9ktkFPknusuJaD5F+6+8vz25/M3n1YdIyxzqQX+jOda7r76919S5JPJ7l/9OWsq6p7JfmzJC+M18pa2NWT3duWA/k6WWXh1+a2adcLk1y/ulIOlHdW1YVVdSjJT2X2CWF3H/bqjX5Nb9HnXX/Oriuq6rur6u5JnpTkM9GXs6qq7pLkb5L8end/Pl4rK7dHT3ZvW/45B7AnWzs7OytZcVXdM8nVST6U5ClJLto15c4EquqHkvxlkq0kf5fk1Zn14ZNJnjz/9/lFxrr73892/ftRVX24u49V1fcl+fsk/5jkUZkdy3P+6Y7NZ1Y4TSf15fGZfdPspiRv7u7X7/X+lWRnkTHvc8urqhcneW1mG+okuSzJr+Q0nv+9xvRkeXv05Kokz85829Ldr6qqO+WAbV9WFqqSW79l82NJPtLdX1lZIQdcVd0tyU8k+VR3f26ZMcaqqvtk9ontim+90Z/JGNPZ6/1r0THO3Jk8/3py9hy07ctKQxUAwH6xsQeDAQCsE6EKAGAAoQo40Krql1ddA7A/CFXAQSdUAUM4UB1Ye/NLXrwpyQMzuxD8S5LcNckfZPbh8Mru/s2qekGS+3X3b8//3/Xdfb+q+nCSD2T2de3zkjwzs/PhvDizr9l/bL6M15zFPwvYZ8xUAZvgRUluml8w+iVJnpDknZld1PVRSR5eVU+6g2V81/wi4pcleWZ3//X8/lfmF4AVqIAzIlQBm+DBSa5Jku6+NsmfJ/nf7r5+fhHWqzObebrV/AzoJ3v7/OdXk5wzbbnAQSRUAZvgM5ntpktVPTTJe5NsVdV9q2oryaMzO7PzTUm25//nGbuW8Y1TLPtO8+VujS4aOFgOr7oAgAW8Ncmbq+rq+f2XZ3ZM1eW57ZiqD1bVdpKXVtXrk3x570Xdzluq6qNJDmUe3ABOhwPVAQAGsPsPAGAAoQoAYAChCgBgAKEKAGAAoQoAYAChCgBgAKEKAGCA/wOy4AvcEyp18gAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10,3))\n", "sns.countplot(y='resp_flag',data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4005175651536665" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#计算购买用户和未购买的比例\n", "df.resp_flag.sum()/df.resp_flag.shape[0]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5994824348463335" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "1-df.resp_flag.sum()/df.resp_flag.shape[0]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 用户年龄分布情况" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAXgAAAECCAYAAAD0JMwBAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3WtwXOd93/Hv2St2sbhjAZAESZEU9VAXipREyaYt25RtOXYS26maxGoydpNJZhIndTKdNm3cOPWMq04ST+O8SGvXduVOJnGccTIZxa5sV3Vq1bIsRRZ1oyjy0YUkeMONuC+AxWIvfbG7JAXhsgAWOAfn/D4zlJaLZ7l/HIA/PHzOc3FKpRIiIuI/IbcLEBGRjaGAFxHxKQW8iIhPKeBFRHxKAS8i4lMRtwuoGh6e2tTpPG1tScbGZjbzLWum2lbPq3WBalsrr9bmtbrS6SZnqY8FtgcfiYTdLmFJqm31vFoXqLa18mptXq1rMYENeBERv1PAi4j4lAJeRMSnFPAiIj6lgBcR8SkFvIiITyngRUR8SgEvIuJTCngREZ/yzFYFm+17T51jKpNd9euOHd5R/2JERDaAevAiIj6lgBcR8SkFvIiITyngRUR8SgEvIuJTCngREZ9SwIuI+JQCXkTEpwK70GmtHn/h0ppepwVSIrLZ1IMXEfEpBbyIiE8p4EVEfEoBv4hcvsDzrw5zqm/M7VJERNZMN1kX6BuY4plTg8zOFQBoS8Xp6Ui6XJWIyOqpB3+dgdEZ/t8Ll5mbL2J2teIAT50cIF8oul2aiMiqKeCv0zcwBcB9d+zgbbd0c/MNbUzNzPPi6yMuVyYisnoK+IpSqcTFoQyxSIhtlSGZQzd2kkpEeeXcKAOjMy5XKCKyOgr4ivFMjulsnu3pRkIhB4BoJMQ7D/bgAD947hJjU3PuFikisgoK+IqLQxkAetOpNz3f3Z7kHQe3MZ8v8o/PXmR6dt6N8kREVk0BX3FxOIPjwI7Oxrd8bO/2Zu40aWbm8jz+/CUKRd10FRHvU8ADs3N5hsezdLUmiMfCi7a59YY29u1oZmRyjuN2eJMrFBFZPQU8cPnKNAA7ulJLtnEch3tu7qYlFeN03zjnB6c2qzwRkTVRwAOXhssB35t+6/DM9aKREO85tJ1wyOHHJwbI5gqbUZ6IyJoo4IErE1ni0TAtjbEV27Y2xTm8v5Ncvsipc6ObUJ2IyNoEPuCzuTyZ2Xk6WhpwHKem15hdrSTiYU71jZHN5Te4QhGRtQl8wI9MlOe2d7Y01PyaSDjEbXs6yBdKnDyrDclExJsU8JNZADpWEfAAN+1sIRGPYM+PMZNVL15EvEcBP1EJ+ObVBXw4HOLgvnbyhRLfevIsL58ZYXp2ntm5PPN5zZMXEfcFfrvgkYksiXiYZMPqL4XZ2UqhUOLEmRGee/UKz716BYBoOMR779pBd/u1bYZXc5ZrU6qBqUz5B4/OchWRtaop1YwxDwO3AI9aax+qpY0xpg34OtAFHLfW/kadaq6bmWyembk8vcvMf1+O4zjcuqed/b0tnO4bY3w6R6kE5weneOKlfj78zhuIRxdfOCUistFWHKIxxjwAhK21R4G9xpj9Nbb5OPB1a+0RoMkYc6TOta/baGX8vbM5vq4/JxYNc/uNnbz70Hbec3g7h/Z1MJPN8/TJQUqlUj1KFRFZtVp68MeAb1YePwbcC7xWQ5sR4DZjTCuwE7iw3Ju0tSWJRDaxt/v6CJOz5Zujvd3NNKVWNwa/nKOHdjA4lqVvYIq+bc0c3Ne56j+jWk863VS3uurFizWBd+sC1bZWXq3Nq3UtVEvANwLVAeRR4M4a23wD+Bngd4BTleeXNDa2+fut918p7yCZjIeujnnXy9Hbunn0x3388PlLzM7Oc/MNbTW/9vox+OFhb22JkE43ea4m8G5doNrWyqu1ea2u5X7Y1DKLJgMkKo9TS7xmsTafBX7TWvs54DTwqzXWuylKpRIjE1kaGyI0xOp/rzmViPKBe3aSiIf5yekhjtshZrLaalhENk8tAX+c8pALwCHgXI1t2oCDxpgw8DbAU4PRs3N5srkC7aucHrkabU1xPvi2XaQSUU6eHePvHj/Dd5/uY2omt2HvKSJSVUvAPwJ83BjzBeAXgZPGmIUzaRa2eRT4I+ArwATQTnnIxjMylYM7GhMbO1O0KRnjZ47u5p6bu+huTzA8nuX5ynRKEZGNtGK6WWsnjTHHgPuBz1trB4AXV2gzATwD3Fr3iuukejJTMr7xSwHisTAHdrdhdrXyv37cR9/gFNPZeRobohv+3iISXDWlm7V2jGuzZNbcxkumK9sLrGWB01o5jsOB3a089fIg9vw4d96U3rT3FpHgCexWBdUefGITevDX27OtmXg0zGsXJsgXtKWBiGycwAd8Mr65wySRcIibdrYwN1/g7OXJTX1vEQmW4AZ8ZcriZg7RVJldrTgO2Avjm/7eIhIcwQ342XkiYYdoZPMvQbIhyraOJKOTc1dn84iI1FugAz7p4iyW6gZnF4cyrtUgIv4WyO2C5/NFsrkCran1bTK2HjvTKZ5hiAtDGQ7sXnobg9VsM3w9bTMsIoHswU9Ml4/pS8Td28q3MRGlvTnO4OgMuXzBtTpExL8CGfDjmfJWAW7cYL1ebzpFsQSXr2z+Rmsi4n/BDPipcg9+s6dILrRT4/AisoECGfBjmcoQjcs9+PbmOMl4hIvDGYpFT+3FJiI+EMiAH89Ue/DuHqfnOA69XSly80UuXZl2tRYR8Z9gBrxHhmigvOgJ4MQbIzreT0TqKpgBX7nJmmhw/0DstqY4u7pTXJnI0j+im60iUj8BDfg5GmJhwiFvfPq37+sA4MXXr6gXLyJ1442E22TjmTkaE+4Pz1S1NzfQ25VieDzLwKh68SJSH4EL+Gwuz+xcwXOHbVR78c+eHqZQ1DbCIrJ+gQv46vi7l3rwAJ0tDezvbWFsao5nTw25XY6I+EDwAr4yg6bR5Tnwi7nrQJrGhgjHTw8yMpF1uxwR2eICF/DVRU5e68EDxCJhjt7WQ6kEPzrRz9y89qgRkbULXMBfXeTkwYAH2N7ZyO03djKRyfH9Zy+SqzHki8USl4aneeG1K8zO5Te4ShHZCrw3TrHBZiqHbTfE3J8Dv5R7D21neibHG5cn+T/PXqS1McbIZJZsrkCxWCISCXFgdxsHdrUyk81zqm+Ms/2T5ObLN2cvDGU4dnjHpp83KyLeErgEqPZu41HvBrzjOBw92EOxVOJs/xQjE1kiYYdkQ5RwyCEzO89zdpiXz4xcDfVEPMyBXa3ls177p/jiIy/zuz9/O5Hw6v6RttL+802pBqYyb70/oP3nRbwnsAEfjYSh5N3piCHH4Z23b+OmXa3Eo2GaG2OEHAeAufkCr5wdxV4Yp7OlgZtvaGN3dxOhkEOxWCKXL3Ly7Chf/oeT/OpPH3D15CoRcU8AA748ph2PhsnlvBvwUA757rbkW56PR8PccVOaO25Kv/U1IYd3H9rO8dNDHH91mLMDkzz43v04jsN0dp7b9rTT3tywGeWLiMsCGPCVHnw0RC7ncjEbJBoJ8Xu/dAeP/riPbz15ji8+8vLVj6USUT75c7dx8zLHBIqIPwQy4OOx8NXhDr8Kh0J85N49HNzXwXOvDpNKRMnNF/jWk+f40795gY+970bef1cvjs+vg0iQBS7gZ+byJAM0u2TPtmb2bGu++vsDu9v4b39/gm98/zXOD07xiZ8y5fsRIuI7gZsHn80VAj19cH9vK//xV+5md08TT54Y4I+//jw/fPEyZ/sntQeOiM8EKulKpRKzc3m62xJul+Kq9uYGPv3Ld/IX3zvNUycHOds/CcBNO1u55+YuQiEN24j4QaACPpcvUiiWAt2Dr4pFw/z6z97CB+7exbmBSZ45NcSpvjEiYYcjB7rcLk9E6iBQQzTZygwaBXyZ4zjs7mniPYd38K8eOEh3e5JXzo1xcSjjdmkiUgeBCviZqwGvm4oLJeIRPvnRWwmFHJ48MUA2p/1sRLa6QAV8dZGTevCL29XdxOEbO5ibL/DaxQm3yxGRdQpWwFd6pYmYAn4pN+1sJRJ2ePX8OMWizocV2cpqCnhjzMPGmKeMMZ9ZbRtjzBeNMR9eb6H1MJvVGPxKYtEwe7c3M53Nc3FYY/EiW9mKAW+MeQAIW2uPAnuNMftrbWOMeRfQY639dp3rXpNZ3WStidlV3sbg9PlxlysRkfWopQd/DPhm5fFjwL21tDHGRIGvAueMMR9dX5n1MZurjsHrJuty2pridLcnGBiZuXpAiohsPbV0ZRuB6ibho8CdNbb5BPAK8HngU8aYXdbaP1/qTdrakkQ2eMl8qLI3+rauZvpHpmlKeXdXxfXWlk43ret977ipi+893cerFyZ43927Vqxtre9XT16oYSmqbW28WptX61qoloDPANWlnykW7/Uv1uYO4CvW2gFjzF8B/xlYMuDHxmZqrXnNrlTeYy5b3kZysYMrvGCpQzVWY3h4ak2vq75vZ0uc1lSM031j7NveTEdLw7K1rfX96iWdbnK9hqWotrXxam1eq2u5Hza1DNEc59qwzCHgXI1tXgf2Vp47AvTV8F4bSmPwtQs5DnffXF7R+uzpIUolzagR2WpqSbpHgCeMMduBDwEPGmMestZ+Zpk2bweKwNeMMQ8CUeDn61v66ingV2dbRyO9XSkuDmU4P5hhd8/W+GepiJStmHTW2kljzDHgfuDz1toB4MUV2lRXyfxCfctdn+pN1qRustbsiElzeTjDcTtMb7rR7XJEZBVq6spaa8e4NktmzW3cNjuXJxxyVn0QdZA1N8Y4sLuNV86NcapvjKO3v/UIQRHxpkAl3excnkQ8olOMVun2fR3Eo2FOvDHKdHbe7XJEpEaBC/ggneZUL7FomMP7O5gvFHnm5IDb5YhIjQIW8AUaNP6+Jvt7W2lNxXjl7Cijk96cXioibxaYgC8Ui8zNF9SDX6NQyOEuU542+eLrIy5XIyK1CEzAZyszaBq0k+Sabe9M0tOR5MJQhpEJ9eJFvC4wAa+dJNfPcRzuuaUHgBdfv+JyNSKyksAEfPU0Jw3RrE9vV4qutgQXh6e5Mj7rdjkisozApN3VIRrdZF0Xx3E4fGMnj/3kAs+cGuKn7tlJOBzi8RcurfziRRw7vKPOFYpIlXrwsmo9HUlu6GniykSWJ17qp6h9akQ8KTABr31o6uudB3vobk9wfjDDT05pMzIRLwpMwGcrAa8hmvoIh0Pcd8cO2pri2PPjPHt6WCEv4jGBCXgN0dRfLBrm/Ud6aUnFONU3xjPqyYt4SmACfnauelyfAr6eEvEIH7h7J62pGPb8OC+9oUVQIl4RnIDPVcbgtdCp7hLxCB+4ZxeJeIQTZ0aZmsm5XZKIEKSA103WDdUQC3OXSVMsljhuh90uR0QIUMBnrw7R6CbrRtmzrYl0awPnBzP0j0y7XY5I4AUm4Ks3WbUXzcZxHId7bu4G4J9ODpKbL7hckUiwBSbtZufyNMTChEI67GMjdbQ0cMsN5ROgfvDcJd5/pJfwMidoaQWsyMYJTA++epqTbLw7TZrd3SkGx2Z54qV+8oWi2yWJBFJgAj6bK9AQ0/j7Zgg5Dvce2kZPe5Lzgxn+9gdv8PTJASanNbtGZDMp4GVDhEMh7rtzB7fv6yAaDvHqhQkefapPN19FNlEgAj5fKJIvFHWDdZNFIyEO7+/kgWN7ecdtPRQKJb7/7EXeuDThdmkigRCIgL92mpN68G4IOQ439rZw/929RMMhnjwxwA9fuHx1bYKIbIxAdGmzueoUSQW8m7rbk3zo7bv58cv9nBuY4tKVaTqaGwiHHTpbGji4t0OznETqKGA9+ED8PPO0llSMD75tF/fc0kXIcRgYneHS8DQvvj7CD56/xHxeM25E6iUQiTenIRpPcRyHA7vaOLCrjWKxRC5f4IkX+7k0PM1jz5zn/XfvJB7V10pkvQLVg48r4D0nFHJoiEV431297NvRzMjkHC+8pgO9ReohIAGvbQq8LhRyOHprD82NMV49P87Y1JzbJYlseQEJeA3RbAWhkMORA2lKwLOndXiIyHop4MVTetMptnc20j8yw8VhLYoSWY+ABLyGaLaSIwfSOA4896rOeRVZj4AEvHrwW0lrKs6ebc1MZHJcGMq4XY7IlqWAF0+6bW87ACfOjKoXL7JGAQl4rWTdalpTcXZ1pxiZyNI/MuN2OSJbUkACXitZt6KDezsAOHFmxOVKRLammhLPGPMwcAvwqLX2odW0McZ0A9+z1t5Rh3rXREM0W1NHSwPbOpL0j8wwkcnRkoq5XZLIlrJiD94Y8wAQttYeBfYaY/avss1/ARL1Kngtsrk8IcchGgnEP1h8ZXdPEwADo5oyKbJatfTgjwHfrDx+DLgXeK2WNsaY9wLTwMBKb9LWliQS2ZgedqEIiYYIXV3N1558fYSmVMOGvF89rLe2dLppw953M6/bvp1tPH1ykCsTcxy57n0X+/zW+jlvBtW2Nl6tzat1LVRLwDcC1ZORR4E7a2ljjIkBfwj8M+CRld5kbGzjbqRlZnLEoyGGh6fe9PxUJrth77keTamGdde28HOt1UrvW4/aViNUKpFsiHBxKMPk1CyOU95OeOHnl043rflz3miqbW28WpvX6lruh00tYxYZrg2xpJZ4zWJtfh/4orV2vOZKN0g2V9DuhFuU4zhsa08yN1/Q/jQiq1RLwB+nPOQCcAg4V2Ob9wO/bYx5HDhsjPkf6yl0PbK5vGbQbGE9HUkABkY1XVJkNWpJvUeAJ4wx24EPAQ8aYx6y1n5mmTZvt9b+dfWDxpjHrbW/Xs/Ca1U+j7WkGTRbWE97JeBHZrjlhnaXqxHZOlbswVtrJynfRH0auM9a++KCcF+szcSCjx+rU72rpimSW19jIkpTMsrg6CzFola1itSqpnELa+0Y12bJrLmNG7TRmD/0tCd57eIEo5NZOltdnXUrsmX4fmL41R58XD34rUzj8CKrF5yA1xDNltbZUp4Dr5k0IrXz/bjF1SGagE2TfPyFSys32kIaE1HCIYeJ6ZzbpYhsGf7vwc9pozE/CDkOzY0xJqdz2j5YpEb+D3gN0fhGS2OMfKHEdDbvdikiW4LvA35uvnqTVT34ra66m+RERsM0IrXwfcDrsA//aEnFAZiY1o1WkVoEIOA1ROMXLY3qwYushv8DvnKTVZuNbX3NjVEc0EwakRr5P+CrQzQag9/ywqEQqWRUPXiRGgUg4DVE4yctqThz8wUmZxTyIivxf8BXZtEkFPC+UB2H77+iI/xEVuL/gM/lCYccImHff6qB0FqZKtk/oj1pRFbi+9TL5go0xMJXj3qTra3ag788oh68yEr8H/BzBY2/+0izevAiNfN/wOfyxLUPjW/EImGS8Qj96sGLrCgAAa8evN80p2KMTs4xV5khJSKL83XA5wtFCkWdx+o3zcnyMM3gmIZpRJbj64C/NgdeQzR+Ur3RqtOdRJbn74Cf00ZjftSsgBepib8DXqtYfam5MQoo4EVWEpCA1xCNnzQmokTCIQY0VVJkWb4O+FntBe9LIcehuy3B4NiMju8TWYavA346Ow9AY4N68H7T055kdq7ApLYOFlmSrwN+pnJ2Z2Mi6nIlUm89HUlA4/Aiy/F1wE/PVnvwCni/6W4rB3y/Al5kSf4O+EoPPqkhGt+p9uAHFfAiS/J5wFd68Bqi8Z2e9soQjWbSiCzJ3wE/W+7Bp9SD951UIkoqEdUYvMgy/B3w2XkcR+ex+lVPe5Lh8Sz5QtHtUkQ8ydcBP5PNk4xHCOmwD1/qaU9SLJUYHp91uxQRT/J1wGey8xp/97HqjVYd/iGyON8GfKlUYno2rymSPra9sxGASzqAW2RRvg34XL5IvlDUKlYf601XAn4443IlIt7k24DXKlb/62huIBEPc3FYPXiRxdTUvTXGPAzcAjxqrX2oljbGmBbgb4AwMA18zFq7aRuHXFvFqh68XzmOw47OFGcuTzKf1/F9Igut2IM3xjwAhK21R4G9xpj9Nbb5ZeAL1toPAAPAB+tb+vKqi5ySGoP3td50I8VSiYtDGqYRWaiW7u0x4JuVx48B9wKvrdTGWvvF6z6eBoaWe5O2tiSRSP229X19oPwXvrszRTrdtEiDEZpSDXV7v3pTbcurfk3Nng4ef+Ey5/onue+unS5XtbRFvwc9QrWtnlfrWqiWgG8ELlUejwJ3rqaNMeYo0GatfXq5Nxmr8wHK/YOT5QeFAsPDU4u2mcpk6/qe9dKUalBtK6h+TVsT5W/hvv7JJb/Obkunm1TbGni1Nq/VtdwPm1oCPgMkKo9TLD6ss2gbY0w78OfAP6+x1rrRRmPBsCOdAuBc/6TLlYh4Ty2zaI5THpYBOAScq6WNMSYG/C3waWtt3zrrXLVrh31oDN7PUokorakYfQPe6VGJeEUtAf8I8HFjzBeAXwROGmMWzqRZ2OZR4NcoD9X8gTHmcWPMx+pY94qmNU0yMHrTKa6MzzJT+aEuImUrjl9YayeNMceA+4HPW2sHgBdXaDMBfKnyyxWaJhkcvekUL58d5eLwNDftbHW7HBHPqCn9rLVjXJsls+Y2m0nnsQbHjutWtCrgRa7x7UrW6WyeWDREtI5TL8Wbeis3WrWiVeTN/Bvws/O6wRoQ2zuThEIOF7TYSeRN/Bvw2byGZwIiGgmzsyvFhaEMxWLJ7XJEPMOXAV8slpid01bBQbKvt5W5+QKDdV4wJ7KV+TLgZ+Y0RTJo9u5oAeD8oIZpRKp8GfDVKZJaxRoc1YDvG9SCJ5EqfwZ8ZZFTSkM0gbF3e7UHr4AXqfJpwFfmwCfUgw+KxkSUrtYE5wczlEq60SoCfg34We0FH0S7ulNkZucZnZxzuxQRT/BnwFf3odEYfKDs6i5vm6phGpEynwZ8dYhGPfgg2d1TDnjdaBUp82fAz6oHH0TXevCaKikCfg147QUfSC2NMVpSMfXgRSp8GfBjU+WbbM3JmMuVyGbb3d3E2NQck9M5t0sRcZ0vA35obJbWVIx4TDtJBs2ebc0AnLmsI/xEfBfw8/kCo5NZutuSbpciLti3oxzwb1yecLkSEff5LuCHxrOUgK62xIptxX/2qgcvcpX/An60vJtgd7t68EGUbIiyrSPJmf5JbR0sgee7eYSDY7MAdKsH72uPv3DpTb9vSjUwlckC5dlT/SMz/MOTZ2hranhTu2OHd2xajSJu818PvrIfuMbgg6uztRzqw+NZlysRcZfvAr7ag0+rBx9Y6dby1354fNblSkTc5buAHxqboa0pTjyqKZJB1ZKKEQ2HuKIevAScrwK+PEVyTuPvARdyHDpaG5iYzjE3X3C7HBHX+Crgh8ZmK1MkNf4edOmW8ji8evESZL4KeM2gkarqOLwO4ZYg81XAD1UCXj146W5PEg45OuFJAs1XAV/trXW3qwcfdNFIiB3pRianc0xktPGYBJO/Ar6yirWrVQEv5Z0lQQeASHD5K+DHZmlrihPTFEkBdnQ1Ego59A0o4CWYfBPwM9l5xqY0RVKuiUXCbO9sZDyTYyKjg7gleHwT8D96qR+AW/e0u1yJeMnu7hQAfTrGTwLIFwFfLJb4/vGLxCIh3qPNpOQ6O7tShBw4e1m7S0rw+CLgn3/tClcmsrzjth5SCZ3DKtfEomH2bm9hYjrHS2+MuF2OyKbyRcB//9kLALzvyE6XKxEvOnIgTWNDhBNnRnTSkwTKlg/484NT2Avj3LqnnR2djW6XIx4Ui4Z558FtlErw1W+/wpB2mZSAqOnAD2PMw8AtwKPW2odqbVPL69ZraGwWB/jQ23ZtxB8vPtHTkeTWPW2cPDvGp7/8FHcf6OLw/k7aUnFSyRghp7xJmRNyCAGO4+A45f+HQuXHAJTe9D+orJItvfm3lceV30QijE5mr36stOAPufraRf68t7xfneVwGB2tbTsHp/Kf8v+dq89de/6660T52l3X9E1WWlxcKpVwouXrtnSbmsout63DFXTKnymlSJjRiXJdi31uS75+QeNCocjU7Dwz2Tx7tjWTbKj/+UvOSsu4jTEPAB+x1v6KMeZrwB9Za19bqQ1wcKXXXW94eGpNX4FSqcTkzDwtjbFVve746yNXTwDymutPJ/Iar9ZWS12lUolELMJ3nj7PxWHNqhHv+MDdO3nwffvX9Np0umnJHzO1/Mg4Bnyz8vgx4F5gYVAv1uaOGl5XU5Er6VrDaz6Yblrr28kW9+Fja/uLJLLV1DIG3whUD8AcBbprbFPL60REZIPUEvAZoLo8NLXEaxZrU8vrRERkg9QSuscpD68AHALO1dimlteJiMgGqeUmazPwBPCPwIeAB4FfsNZ+Zpk2b6d84/9Nz1lrNQlZRGSTrBjwAMaYNuB+4IfW2oFa29TyOhER2Rg1BbyIiGw9uvEpIuJT9V865WHGmC8C3638OlP5BfApa+0Jl2r6JPCxym9bgX+i/HXZ0BXAa6ztOOUhNy9ctzbg65SXQRy31v7GZqycXm1dwG/jne+1PcB/BZqBZ6y1/8YL12yx2oB/jweumzGmG/g7a+27jDFR4O+BduBha+3XFntus2tcTmB68MaYdwE91tpvA7cD37DWHqv8cuUvHIC19kvVOijflH4DCFtrjwJ7jTGurcpZpLYv45HrBnwc+Lq19gjQZIz5d3jjur2pLuBOvHPN/gT4T9badwG9lRXoXrhmb6kN+B1cvm6VH9Z/QXlND8CnKHcm3gn8vDGmaYnnPCMQAV/5KftV4Jwx5qOUZ/n8rDHmGWPMw8YY1/8lY4zZQXkxWC9vXQHsqutqO4J3rtsIcJsxphXYCezBG9dtYV0fwjvX7CbgucrjIeBP8cY1g7fWVsD961ag/C/Yycrvj3Htev2Q8t+HxZ7zjEAEPPAJ4BXg88A9QAfwfmvtPUAU+GkXa6v6beBLeHMFcLW2n+Cd6/YjYDflnt4pIIY3rtvCur6Dd67Z3wGfNcZ8GPgg8H/xxjWDt9b2HC5fN2vt5IKp3VtuxX5QAv4O4CuVqZp/Bdxpre2vfOxZwNXNSYwxIeA+4HE8tgJ4QW0veei6fRb4TWvt54DTwC/hjeu2sC7PfK9Vxti/C/w65aEHz3yvLVLbM165btfthO0HAAACEUlEQVTZciv2PVXMBnod2Ft5fATAGHPIGBMGfg540a3CKt4F/JO1toT3VgBfX9tfeui6tQEHK7W8DfhjvHHd3lKXh64ZwAvALuALeO977fravPS9VrXlVuy7Pva8SR4GvmaMeZDyP/d+H/hLyltZf8ta+303iwN+ivL4HcAjwBPGmO1cWxXsputr+xzw13jjuv0R8D8pD4c8BfwZ3rhuC+t6F965ZgC/B3zBWjtjjPHa99r1tXnpe63qL4DvVCZs3EJ5xtulRZ7zDC108iCtAF4bXbfV0zVbncoPw3uB/10dn1/sOa9QwIuI+FRQxuBFRAJHAS8i4lMKeBERn1LAi4j4lAJeRMSngjIPXmRZlb1OqvPXY5S3Z9gF/CGQByzwJPAN4GuUdw+cA/6FtXbMjZpFVqIevEhZF+UdM99DebvfXwP+JfC7wL8Fmq21/x34A+A5a+19lfafdKdckZWpBy9SlqMc7h+pPJ4Engceorya8j9U2t0G9Bhj7geSlHv1Ip6kHrxI2SeAC9banwV+UHnufuA+a+27rbU/qjz3MvBnlT3yfwsFvHiYVrKKAMaY2ykPzYxT3l8kDVwGbqW8x/tFyr34LOWzBTopj9X/lrX2BTdqFlmJAl5kEcaYOPDtym/ngTDwr621p9yrSmR1FPAiIj6lMXgREZ9SwIuI+JQCXkTEpxTwIiI+pYAXEfGp/w+WfVNSDFPvVQAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.distplot(df['age'],bins=20)#分成20个区间段" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索用户年龄和购买商业医疗保险之间的关系" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, 'Density')" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAECCAYAAAALqiumAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAIABJREFUeJzt3Xd0nOd94PvvO/NOL6iDDnbyISmREimKIiWRomgVy92yLTvO2htnc2+SzUlyNjd34028m7VXG984e52ztq8dF7mrWFaXKVm9i5QoFpFieQl2EERvU4Gp948ZgBQFEANyOn6fc3A0HDwz7+/RC8wPT9dSqRRCCCHEVEzFDkAIIUTpkiQhhBBiWpIkhBBCTEuShBBCiGlJkhBCCDEtvdgBXKp4PJEaHg4XO4y8qalxIvUrX5Vcv0quG1R+/Xw+jzab8mXbktB1c7FDyCupX3mr5PpVct2g8us3W2WbJIQQQuSfJAkhhBDTkiQhhBBiWpIkhBBCTEuShBBCiGnlZQqsUuoeYCWwzTCMu6cp0wg8ZBjGpsy/LcAjQC1wj2EYP81HbEIIIbKX85aEUupOwGwYxkZgkVJq6RRlaoBfAK7znv5LYJdhGDcAn1VKeXIdmxBCiNnJR0tiC/Bg5vGzwI1AxwVlEsDngccveN1XM49fBdYBL13sQj5fZeeR2dQvkUgSjMSoctvyGFFuyf0rX5VcN6j8+s1GPpKEC+jKPB4C1l5YwDAMP4BS6mKva5zpQv39gcuJs6T5fJ6s6jfkH+PVd8/y2rvdjIaj/NVnVrF6cX0BIrw82davXFVy/Sq5blB+9RsaGuRrX/s7vv/9n2RVfrYJMB9JIgg4Mo/dZN+lNfG60czrgrkPrXIkkyl+/vRh3nivm1QKrBYTGvBvjx/g7790DW0+d7FDFELkmd/v5+67/ztjY5G8XSMfSWIX6S6mHcBVgDHL1z2Ued2OPMRWMQ6eGuL1/d3Uem1cqxpYMb+GY2f9PPnmSb7z0D6+9uV1eF3WYocpxJzw4ItH2Xm4L6fvee3yBu7auuSiZcxmE9/4xjf56lf/JqfXPl8+psA+BnxJKfVt4C7ggFJqyhlOF/gF8HWl1P8mPTPqrTzEVjFe39cNwB3XzeeqJfVYLWZWzK/hhlVNDIyO8b1H9hOLJ4ocpRAin1wuN253fnsNct6SMAzDr5TaAtwKfMswjB7g3WnKbjnv8Sml1K2kWxP/zTAM+YSbRmgsxu4j/dR6bbTUOd/3veuvaGLIP86hU8P85sWj/Lvb1DTvIoTIlbu2Lpnxr/5ylZd1EoZhDHNuhtNsXnf2Ul4317x9sJd4IsWqhXVo2vt3/dU0jTuum0fvcJiX93Rxy7p2mmqd07yTEEJcnKy4LkOv7+9B02Dlgtopv6+bTWxe3UIyBY++erzA0QkhKokkiTLTNRDiRLefhc1ePE7LtOWWtlXRXOdk5+E+Tvb4CxihEKLQvve9H+XtvSVJlJk3MgPWqxZO3YqYoGkam69qAeDhl4/lPS4hRGWSJFFG4okkbx7owW41s7i1asby8xs9LGjycODkMAdPDhUgQiFEpZEkUUbeOz6EPxRl5YJadHN2t26yNfHKMVKpVD7DE0JUIEkSZWR3Rz8AV0wzYD2Vplonal41J7oD7D7Sn6/QhBAVSpJEGTndE0A3azTWOGYufJ5Nq5rRNHj4leMkksk8RSeEqESSJMpEPJGkayCEr9qByaTN/ILz1HrtrF5UR89QmDf39+QpQiFEJZIkUSa6+kMkkikaZtmKmHD9lU3oZo3HXj8h23UIUSG++c1v8Kd/+hV+/vPsdoC9FHlZcS1y73RveuvixppLWz3tcVpZu8zH24f6eHF3F7evn5fL8ISY0x45+jv29O3P6XuuaVjFnUs+Nu33X3nlRZLJJD/84c/4p3/6Op2dp2lvz/3vtbQkysTp3vTO6bMdjzjfdSsasVnMbNt+kvBYPEeRCSGKYc+eXWzdegsA69dvYN++vXm5jrQkysSpvgCaBvVVl54kHDad61Y08Oq+bn7/9inu3Lw4hxEKMXfdueRjF/2rPx8ikQj19Q0AeL1VGMbhvFxHWhJlIJlK0dkbpM5rx6Jf3i1bq3y4HRZ+/9ZpeofDOYpQCFFoDoeT8fFxACKRMKlUfmYuSpIoA33DEcZjiUsetD6fVTezdW0r8USKe589IgvshChTSi2f7GI6erSDpqaWvFxHkkQZuNxB6wup9moWNHl478QQuwxZYCdEOdq8eQvPPPMU3/3ut3nxxee4/vob83IdSRJl4FQmSeSiJQHpzf9uWdeG2aRx//MdjEVlEFuIcuNyufnud3/IypWr+M53/i1vJ9RJkigDuZjZdKFaj531KxoZDo7zxBsnc/a+QojC8Xq9fOhDt1JXV5+3a0iSKHGpVIrTvQGqXFbs1txORtuwspEql5Vnd3Zy7OxoTt9bCFEZJEmUuJFglEA4lrOupvNZdBMfXj+PVCrF//fIfkaD4zm/hhCivEmSKHGncjxofaH5TR42r25hJBjl+4+9RzwhGwAKIc6RJFHiTud40Hoq61c0oOZV03FmlPtf6MjbdYQQ5UeSRInLx6D1hTRN447r5uGrtvPS7i5e3tOVt2sJIcqLJIkSd7o3gNOm43ZY8nodq27m05sW4bCZ+eUzBi/sOpPX6wkhyoMkiRIWHoszMDpGQ40DTZvdGRKXotpt4wtbl+Ky69z73BG2bT+Z92sKIUqbJIkSNjAaAaDGYyvYNX3VDv7glqV4nRYefuU4v335qGzdIcQcJkmihA2OjgHgdVkLet1aj50v3rKMWo+Np3ec5rsP7yc0FitoDEKI0iBJooQN+NNJoqrASQLSiemLtyxlfqObvUcH+PrPdnKyx1/wOIQQxSVJooRNtiSchU8SAE67hc9tWcLGK5oYGB3jn361i+ff6SQp3U9CzBmSJErYoL843U3nM5k0Nq1u5nNbFmPRzdz3fAf/+pu9DGViE0JUNkkSJWxwdAyzScNlL/4BggubvXzljuUsavFy4OQw/+2et9lxsKfYYQkh8kySRAkb9I/hcVoLMv01G26Hhc9sXsRt17YTSyT50RMHefz1EzL7SYgKJkmiRI3HEgTCMapc+V1EN1uapnH1knr+/YcVVS4rj79+godeOSaJQogKJUmiRPVnzp8u5njExaSnyS6l1pueJnvf8x0yoC1EBcpLZ7dS6h5gJbDNMIy7symjlKoB7gUagF2GYfxpPmIrF33D6YV0pZokADxOK3+wdSkPvnyUF3adwWzS+MKHlhY7LCFEDuW8JaGUuhMwG4axEViklPrAp8Y0Zb4E3GsYxjrAo5Ral+vYyslES6KqSNNfs+VyWPjC1qXUee08u7OTAyeHih2SECKH8tGS2AI8mHn8LHAjcOH+01OVGQSuVEpVA+1A50wX8vk8lx9tiep7J73BXkujl+rq7M+SSKVShOMhUoBJM6FrOlZzfhNNNfCF2xQ/eGQfv/i9wff+9mZcWWxIWMn3Dyq7fpVcN6j8+s1GPpKEC5jYa3oIWJtlmfuBjwJ/BRzKPH9R/f2By421ZPVlWhKmVJKRkfBFy/pjo+we2cFAtJfB8X6iqej7vu/Vq2m0N9Nsa2WFdzVWU+73gnJZTGxY2cib7/XwvQf38McfWXHR8j6fp6LvXyXXr5LrBnOjfrORjyQRBCYOP3AzdZfWVGX+EfgzwzD8Sqm/Ab4C/CgP8ZWF/syYhOcif5GnUikOB9/j1f5niaaiaGi4dS91egMmTKRIEUtGGY0N0xE8REfwEDuH32R97Y1c4b0as2bOacwbr2jiWNcor+/r5pplPq5akr/D2YUQhZGPJLGLdPfRDuAqwMiyzLXAKqXUDuA64Pk8xFY2+obCuB0WzOaph41iySjP923jaOgwumZhTfV1tDsWTPnBn0qlCCdCdEZO0BE8xCsDz7J3dCdb6m9nnnNhzmI2mzQ+smE+v3jG4OdPH+bu/+M6XPbSmsIrhJidfEyBfQz4klLq28BdwAGl1IUznC4ssw34JumWwyhQS7r7aU5KJJMM+scuurHfG4MvcTR0mDqrj62+O1jgXDxty0DTNFy6m+WeVdzW8AkWOZfij43wePcDPNv7BJHExbuzZsNX7eCGK5sYDUV58o2TOXtfIURx5Lwlkeku2gLcCnzLMIwe4N0ZyowCbwNX5DqecjQSiJJMpqad/toZPsl+/268ehU31G2dVbeRzWznquprme9azJ6RtzGCBzgRPsoy90qWe66kydZ62Su8r13ewP7jg7yw6ww3r2mlsTb7gXchRGnJyzoJwzCGOTd76ZLLzFWTG/tNMf01mhznhf5taGisrd54yeMK1ZZabqq/jeOhI3QED/Gefw/v+ffg0b0029smv+qtDbNOGrrZxE1Xt/L46yd48KWj/OVnVl9SjEKI4iv+znHiAy522NBrAy8QiPtR7iupsdZe1nVMmokl7uUsdi2jb7yXzsgJesbOciR4kCPBgwA4zS7mOxfRap9HlaWWaksNdrODRCpBMpXAYrJi0j7Ya7msrYo2n4s9HQMcOjXMivk1lxWrEKI4JEmUoHOHDb1/0Ldn7CwHA+9Spdew3JO7njlNM9Fob6bR3kwqlSKYCDAUHaB/vJe+8W4OBfZzKLB/ytfaTQ5WVa1hlfcaXLr7vPfUuHlNG7961uCBFzr4xz+6FpOpNDYqFEJkT5JECZquJXEwkB7aucJ7NaYcT1+doGkaHt2LR/cy37mIVCrFSGyYkdggoXiQYCJALBnDrJnQMDEU7Wfn8JvsGt7BmurruL5uy+R7Ndc5uWJBDQdODvPGe91sWt2Sl5iFEPkjSaIETTUmEUvG6AgcxGFy0mBrLFgsmqZRY62dtmsrnozTGTnBkeAhdo1sp8ZaxwrPqsnvb76qBaNzhEdeOc61yxuwW+VHTohyIrvAlqDB0TGcNh2r5Vxr4XjoCNFUlHbnQrQpxgCKRTfpLHQt5ca6reiahZf7n2E4Ojj5fY/TyvoVjYyGojy943QRIxVCXIrS+bQRQHrh26B/jGrP+7fOOBTYB8D8HC5+yyWX7mZN9XriqRi/732MeDI++b31KxpwOyz8/u3TcuypEGVGkkSJCURixOLJ9yWJQGyUzshJaq0+3Lq3iNFdXJtjPgucixmI9vHm0EuTz1t1M5tXNxOLJ3n4lWNFjFAIMVuSJErMxKB1tftckjgcfA+A+Y7SbEWcb5X3Gty6l32juxgc7598/oqFtTTWONh+oJfjZ/1FjFAIMRuSJErMZJLItCRSqRSH/Psxa2ZaHfOLGVpWdJPOKu8aUqR4ffCFyec1TePmta0APPBihxx3KkSZkCRRYiZmNlV77AD0R3sZjQ/TYm/HYiqPzfIabS34rE2cjpzgVPhc99K8Bg9L26o4emaUN/adLWKEQohsSZIoMZNJwp2e/to7lv4wrS/gtNfLpWkaq6rWAPD6wIskU8nJ7225uhWTSePnvztILJ4oVohCiCxJkigxI8H0gUFeV7q7qXe8G4AaS13RYroUVZYa5jsXMxQb4IB/7+TzNR4ba5fW0zsU5vnM6XtCiNIlSaLE+EPpJDFx/GffeDdmzYynhGc1TWelZzW6pvPm4EuMxkYmn7/+yiacdp0n3zw5WV8hRGmS5a8lxh+K4rDpmE0asWSUoegAtdb6KTfRK3V2s4PVVevYPbKDZ3uf4DOt/w6TZsJu1fnQtfN48rXjPPb6Cb58u7ro+4RjYQ4NdTA6PkowFiYUCxFLxoklYyRSSWxmK3azHZfFyXxvG4uqFuCyTL89eSwZJxyLYDaZcFtcua62EBVFkkSJ8YeiuOzp29I/3kuKFNWWy9vttZjmORbSN97Nmcgp3h5+jQ21NwGwfkUjb7zbxSt7u9i6tpU2n/t9r0umkuztf4+dPXs4MHiYRGp24xcNznoaHPXU2mtx6Hb6wv30hPsYjAwRTcYA0NBYXruUDU3XsNp3JVZzeUwMEKKQJEmUkFg8SXg8Tn11emZTuY5HnE/TNK6uupah6AA7h9+k1T6fducCzGYTN69p5eFXjnP/8x387Reunjy34pS/kweMRzkdSI9Z1NtrWVqzmFp7DQ7djt1sx2LWMWtmTJqJWDLGeCJKKBaiO9RLd7CXvkg/feGB98ViNVmoslVh123YzTaCsRCHho5waOgILt3JHQtvYVPrBnST/FoIMUF+G0pIIJwZj7CfG48AqLGWb5IAsJisrKu5ntcGnueJ7t+wvvZGPlS1lUXNXha1eDl0apjtB3pYttjGs6de5M2zO0mRQtUs4drGNdQ5Lt6ScpBOqj5HHQu88yafH4+P448GGEuMU22rwm1xfeAApeGxEQ4OGezrP8hDHU/wcufrfHzR7axpWI3ZlJ+ddoUoJ5IkSshoZhDXmelu6h3rxqJZcZndF3tZWaiz+thYu4XdIzvYMfQqnePHWe5czbIrzZwZ7+Pejr1ofenutTp7DVvabqTNc3lbi9t0Gz7ddtEyNfZqbmi5jrUNq3m7Zzf7+g/ys4P38/jxp7m57UY2tqzHodsvKw4hypkkiRIykSRcdp1IPMJofJgGW9NlnzldKhrtzXyo4aPsG32HztBJukLp7iTz4vT3LfFqbl68lqU1iy75WNZL5dAd3NR2A1f5rmR33z4ODR7h4aO/4/enXuLzyz7F2obVFXMfhJgNSRIlZHL6q91Cd7gLoKwHradizXQ9ray/gv7AIPFkjFgyxrFDTka7q7A3N2KuLV43T7Wtiq3tm9jYfC37+g/wTu8efnrgXnb3vctdyz5Nlc1TtNiEKIbym1dZwfzntSTOhtJJopwHrS+mwdHIfOciFrsVy71XslHNQ9M0fv/qELFYcuY3yDOHbue65mv44vLP0eJqYm//e/zLO99hdDxQ7NCEKChJEiXEPzkmYeFspiVR7oPW2aqt1lm+2M5IIM5TrwyWzAaANfYqPrv0E6xvXMvw+Cg/3v9LYuedlSFEpZMkUUL84fe3JGwmO3aTo8hRFc7qlQ58dTqHjoV5c89oscOZpGkaG5rXoWqWcMJ/ivsPP1wySUyIfJMkUUImt6iwRAnE/NRY6ubUYKnZpLFpvRunw8RrO0cxjoeLHdIkTdO4Zd5NNDp9vNWzixc6Xy12SEIUhCSJEjIaimK3mgnEhwHwWKqKHFHh2W0mbtrgRjfDky8N0DtQOns76Sadjy26HZfFyePHnqYr2F3skITIO0kSJSS9JYcFfzzd1eIyz819hWqqdDZe4yYeT/HA73o5fbZ0zsV2W1zc0n4TyVSSXx16kERStjsXlU2SRImIJ5KExuI47TqBTJJwztEkAdDeYuW6q12MRZM8sK2XfYeDxQ5p0oKqeayoXUZnoIvnT79S7HCEyCtJEiXi/Omv/sy22k69/FdaX47FC2xsvd6DbtZ46pVBXtoxTCJRGgPGm1s34tSdbDvxHD2h3mKHI0TeSJIoEf7z9m3yT7Ykpt/ueq5o9Fm47SYvHreJt9718+MHz3LoWKjos4vsup2t7ZtIpBL8UrqdRAWTJFEi3t+SGMVhdmDWZEE8gNdt5vbNXpYtsjEaiPP48wP88tEejp2OkEwWL1ksrl6AqlnCKX8nz556qWhxCJFP8ilUIib2bXLYzQTjfurs9UWOqLRYrSbWrXahFtl591CE011Rfvt0Hx6XmdXL3axWbqo8hf9xvrntRrqC3Tx14nlW1inme9sLHoMQ+SQtiRIx0ZIw28ZJksRlmdvjEdPxuM3ceK2bD2/xsmSBjch4gjd2jfKD+7p48KleDh8PFXTcwqbbuG3+zSRJ8ouDDxBNlM6UXSFyQVoSJcIfSp+WlrREYBzckiQuqrZaZ/3VOmuudHK6K8qxU+Mc7xzjeOcYToeJq5a7WbfKi8uR/80C2z2trPGtYk//fh45uo0vqE/n/ZpCFEpekoRS6h5gJbDNMIy7Z1NGKfV94GnDMJ7MR2ylajQ0DkDCnJ7q6ZrjM5uyZdE1Fs+3sXi+jRF/nGOnxjlxOsr2PX527guwermb9Vd5qc5zV9T1Les5Hejita7tLKleyB2+TXm9nhCFklV3k1Iq670hlFJ3AmbDMDYCi5RSS7Mto5TaBDTNtQQB57qbxrV0knBbZEvq2ar26lyzysWnPlzNutVObFaN3QcC/PD+Lh55tp+TXZG8zYrSTTofXXgrFpOFew89RJe/Jy/XEaLQsv3z6rBS6l7gJ4ZhnJ2h7BbgwczjZ4EbgY6ZyiilTgI/Bp5SSn3SMIzHZwrK56ucD9LQeAKHTSdqDgHp7ia3tbJPRHO781e/tasdXH1liqMnI+w/FOTIiTBHToRpqLOybnU1VyoPtVXWnF6zGief0G7l4YNP8e03fsT/vPXvsM9wMl65qqTfvalUev1mI9sksQ74PPCAUmoQ+KFhGL+fpqwL6Mo8HgLWZlnmy8BB4FvAXyql5hmG8d2LBdXfXzl7+w/7x3DYzAyEBoF0d1MwWDrbUeSa220vSP1aGkw0+zwMDMU5cmKczq4oT73Ux1Mv9dFYb2XZQgcLWx001lsxmy9/M8U2WztX+a7k3f73+N4bv+TLKz5fcZs0+nyeivrdu9BcqN9sZJUkDMMIKKXuA5LAfwG+qpT6c8MwPjlF8SAwsb+1m6m7tKYqswb4kWEYPUqpXwP/E7hokqgU8USSYCRGu8eNPz6K3eTAbDIDsWKHVhE0TcNXZ8FXZ2FsVZIz3VE6z8bo6Y/SOxDltZ2jWCwa7U025rXYmddsv6yksallAwNjA7zds5slVQu5ofW6HNdIiMLJKkkopX4CbCbdRXSrYRgnlVKvTVN8F+kuph3AVYCRZZkwsCjz/XXAqSzrUPYC4XQycNhNDMb9FXsaXSmw20wsWWBnyQI70WiS7r4YvQNx+gZik7OjACwWjXnNdq5e4WbJfMesWgNmk5nPXvlRfvj2r3nwyOO0e1uZ52nLV5WEyKtsu5t+D/yZYRjnH8l10zRlHwNeU0q1AHcAX1BK3W0YxtcuUmYD6VbKT5VSXwAswGdnUY+yNjFobXXGSJHCqc/djf0KyWo1Mb/Nxvy29LhBZCxJ38C5pHHsdIRjpyPU11i47movKxe7sm5dVNu93D5/K48ff5p79v+av7v2r3Fa5s4BUqJyZJskHjYMY3JaiFLqDsMwnp6qoGEYfqXUFuBW4FuGYfQA785QZuIYss/NMv6KMLFvk9keAcBllumvxeCwvz9pjPjjHOwY49SZKNteGuStvX7uvM1HbbUlq/dbUDWPaxvXsLN3Dz87cB9/tvqPMt2IQpSPbJPEC8DW8/7998CUSQLAMIxhzs1euuQyc8VoMLNK15pOEnN5i/BSUu3Vuf4aN1etSPCeMcaxU+P8/JFuPr61nqULstt8cUPzOvoiAxwcMnjAeJQvLv9MxQ1ki8p20SShlJoHLASqlVKbM097kO08cmqiJZE0hyAhW4SXGpfTzHVrXDTU67y9N8TDz/Rzw9oqblxXNeMHvkkz8ZEFt/JwxxO82f02tfZq7lh4S4EiF+LyzfRhvwb4I6Ap89+vAB8B/jKvUc0xE2MSMVN6IZ20JErTwnYbt23y4nKaeGP3KNteHsxqF1qr2cInFt+B1+rhdyee5Y2utwoQrRC5cdGWRGZB2+NKqZcMw/jjAsU050wkiTHSScIh50iUrJpqnQ9v8fLy9gDvHQkRj6f4+Nb6GQe0XRYnn1x8B7898gT3G4+gaSaub7m2QFELcemy7Taaaj2EyJGJbcJDST8OkxOzJoObpcxmNbH1ei++Op3Dx8M8+lw/8fjMLYpaew13LvkYNrON+w4/xJtndxYgWiEuT1ZJwjAMf74Dmcv84Sg2i0YoEZDpr2XCYtG4eaOHJp/O0VMRHn6mL6tE4XPWSaIQZeWiSUIptTrz380XfhUmvLnBH4ricMdJkZKupjKi6xo3bfDQ0mjhxJkxHn2uP6uzLCRRiHIyU0vijsx/v3LB1x/lMaY5JZFMEgzHsLnSq67tJllwVU7MZo1N6900NegcOx3h8eclUYjKMtPA9T9n/vuVwoQz9wTDMVKA1Z5ezG43S5IoN2azxub1Hl7ZEeDIyQi/e2mAL905c4twIlE8cvR33Hf4IZKpBDe2bihAxEJkT9Y7FJk/s2+T2ZYevLabKnt78Eql6xqbN3jw1eocOhbm0Wd6sjq7YiJR2HUb9xuP8OyplwoQrRDZy/rQIaWURyllUkptUUrJ6GqOTCyk06zpk+mkJVG+LLrGlo0eaqvM7Nw3wqtvj2T1Op+zjs8u/QQei5vHjz3NY0efytvhSELMVrYtiV+T3pbjW8A/AA/lLaI5JpCZ/prS07uP2mRMoqxZLBpbrvdQ5TGzfa+ft/dlNzGw1l7DZ5d9ghpbFc+dfpkf7v8F/mjlnmkgyke2SaIts7DuKsMwbgVkCk6OTHQ3JU3pJCEtifJnt5n4yIfqcNg1Xtw+zHtHglm9zmv18Nmln6TN3cL+gYPc/da32dv/Xp6jFeLisk0SVqXUXwF9SqlW0lt5ixwIZLqbYloEEyYsmvyvrQQet87N13uwWjS2vTzI0VPhrF7ntDi4c8nHuKn1esbjY/x4/y/53t6fcMrfmeeIhZhatkniPwGLga+RXn39tYsXF9maSBJRwtjNszvcRpS2aq/OTRvcmEzw2HMDnOnJ7rhWTdO4umEVf7D8M7S5Wzg0dIRvvfNd/m3fzzkxejrPUQvxftkeX7qD9ClyAN/PXzhzjz8UA1JEkmGqLTXFDkfkmK/Owqb1bl7ZEeS3T/fxh59ooqHOmtVra+01fGbpx+kMdLG9eyf7Bw6yf+Agi6sW8qF5m1hRq7CapeUp8ivb40v/GvhzwApoQMowjEUXf5XIRiAcxWSJkSIp4xEVqqXRyoa1LrbvCvHgU3188RON1FZl/+He7mmlzd3CmeBZdvW9y7HRExzbfwJd01lUNZ9F1Qtw6HZ0k47DbKfN00KTs0EOOBI5ke2hQ/8nsMkwjP58BjMX+cNRHK44SWS1dSVb2G4jGk2xa3+Yex/v4fMfbcy6RQHpLqh2TyvtnlYGIkMcGjLoDHRxZOQYR0aOfaC8xaSz0Dufu9SnaHY15rIqYo7JNkmcALLrUBWzEgjHcNbHiQB2syykq2Rc7vfzAAAY4UlEQVRqsR1Ng3f2hbn3iR7uuqOR1ibbrN+n3lHLptaNAETiEfojg8STcRLJBOH4GP2RAfrCAxwZOcY/7/wOn1/2KTY0r5PxLnFJsk0SXcAepdQjkD70wDCMb+QtqjkiGkswFk1Q5YgRQdZIzAXLFtmxWDR27A5x/7ZePnObj4Xtl37fHbqDeZ62Kb93dOQ4z59+hV8f/i0dI8f5w+WflS4oMWvZJok3M18ihwITW3JYM1tyyJjEnLCw3YZF13h9Z3ow+9Yba1mz0pPz6yypXoTPUc/TJ5/nrZ5duC0u7lz6sZxfR1S2bM+T+AXwPHAEeBW4N59BzRUf2JJD9m2aM9qarWy9wYPFovHMa0M8/8ZQVkehzlaVzcunl3yMGls1L3S+ys6ePTm/hqhs2e7d9J+BZ4AfAx8CfpLPoOaKiTUS6LJv01zUUGfh9pu8VHnMvPNegN8+3cfYeCLn17GZrXx80e1YTVbuPfxbTgfO5PwaonJlu5juU8AqoN8wjJ8Ay/IX0twx0d2UME/s2yQtibnG7TJz22bv5MFF9/y2m87u3M8RqbFXc/uCrcSScX68/5eMxcdzfg1RmbJNEnGgDUhldoCN5i+kuWOiuymuRbCZbJg02bl9LrJYNDZvcLNquYNgKMF9T/by2s6RnHc/Laqaz7rGqxkaG+E52ZJcZCnbT6X/CrwBrAd2Av89XwHNJYFQuiUxngrLzKY5zqRprFru4JZNHpx2E2/sHuUXj/ZwvDOS023D1zeuxW1x8fzpVxmMDOXsfUXlmumM62ql1C+BB4EW0ru/bgd2FSC2iucPR8GUIE5U1kgIIL2Nxx03e1nQbqV3IMqDT/Vx7xO9nD47lpNkYTFbuKHlOuKpOI8e3ZaDiEWlm6kl8R2gE1gLOIBW4DTwv/Ic15zgD0fRLOm+YWlJiAlWq4nrr3Fzx81eWpssnOkZ574ne/nhA2d5cfswZ3ouL2GomiU0uxrZ07+fjuEPrtYW4nwzrZNYYBjGl8/7dzfwdaXUK3mMac4IhGPoNlkjIaZWU6Vz0wYPA8NxDh8d42xvlLf3pQ8yslo0GuutNNZbaaq30tZko8qjZ7WqWtM0Nrdez2+OPMpDHU/y1Wv/WlZji2nNlCRalFJ/M9Xz+QhmrgmEothccRLIGgkxvfoanRuvdZNIpOjpj3GmO0b/YIzO7nE6u8/NUnI7zbQ321i6wMmKxc6LfvA3uRpYWr2IjpHjHBs9yZLqhYWoiihDMyWJXwFTLQX9dR5imVNSqRT+cBRPfSydJKQlIWZgNmu0NllpbUpvDBiLpxgZjTM4kqB/MEb/YJxDx8IcOhZmx14LWzfUsKBt+p+rVfUr6Rg5zvazOyVJiGldNEkYhvH1QgUy14xFE8QTqXPdTTImIWbJomv46iz46iwsX2xP/+ERSHKgI8LJzigPbOtjUbudO26qw+P64K96m7sFr9XD7r53+dyyT2DXpTUrPkgm5hfJ5JYcEwPXMrtJXCZN06jymrn+Gjcf3uKlsV7neOcY9z/ZSyjywZXcmqaxsk4RTcbY1fduESIW5SAvSUIpdY9SartSatpjTqcro5RqVEpV/AYzE6utU5aJfZukJSFyp7ZaZ+sNHlYssTM0Guc323qn3PJjZa0CYPvZdwodoigTOU8SSqk7AbNhGBuBRUqppbMs879IT7etaIFQuiWRNI1h1szoWrYb8gqRHU3TuPoKB0sW2OgbjPHgU32MR5PvK+OxupnvaeeE/xQ9od4iRSpKWT4+mbaQXnwH8CxwI9CRTRml1FYgBPRkcyGfL/fbKxdK6tggAAlzBKfuxOP5YF50uyu7C0rqVxg332BH00boOBHhdy8N8cd3tb9v5tO181Zz6kAne0be5csLPpPVe5bz7142Kr1+s5GPJOEifUgRwBDphXgzllFKWUlv//Fp4LFsLtTfH7i8SIuoqzcApBhPRXBp9QSD79/Uze22f+C5SiL1K6xrVtkJhuN0nAzx+tv9rFLuye816s3YzXZeOb6D21tumXEPMZ/PU9a/ezOZC/WbjXyMSQQ5113knuYaU5X5KvB9wzBG8hBTyQmEomCJAilZIyHyzmTSWH+VE7MZXtox/L7xCd1kZmHVPAKxIGeCZ4sYpShF+UgSu0h3HwFcBZzMsswtwF8opV4GrlZKVfSZFYFIbHJmk6yREIXgcpq5UjkIjyV59e33/y02cQTq4cELe4bFXJePJPEY8CWl1LeBu4ADSqm7ZyizzTCMzYZhbDEMYwuw1zCMP8lDbCXDHzq3b5PMbBKFsnyJHa/bxO6DQXr6z63WnkgSh4YlSYj3y3mSMAzDT3pgegdws2EY7xqG8bUZyoxe8P0tuY6r1ATCUSz29DRY2QFWFIrZpLHuKhcAz7w+NLlRoNPioN5Rx7GRE0QTclyMOCcv8y4Nwxjm3OylSy5TyfzhGFZfjDiyA6worCafhfmtVk51RTnQEeLKZelB7PmeNgYigxwdOcHKOlXkKEWpkBXXRZBMpQiGo5hlB1hRJFdf4UADdr13bhZP+8S4xJB0OYlzJEkUQXgsTjIFJuvEmIR0N4nCcjnNtDRZ6O6P0t2X/jlsdTdh1swclnEJcR5JEkXgz6y2Rh8HNGwmW1HjEXPT0oXpn7vdB9KtCd2k0+JuoivYzeh45a4TELMjSaIIApnN/RLmMWwmG9oMi5eEyIfmBgsel4lDx8JExtLrJiZmORnSmhAZ8ulUBP5wDEgR1yIyHiGKRtM0liy0EU+k2GeEgPPWS8i4hMiQJFEE/lAUTAmSWlzWSIiiWjTPhtkEew4GSKVS+Bx1OHQ7xnDHZZ2jLSqHJIkiCISjaBOD1rJGQhSRzWpifpuVEX+cE51jaJpGi6uZkXE/Q2NzYoccMQNJEkXgD8dktbUoGUsXpv9Q2ZUZwG5xNQJwfPRksUISJUSSRBGMBsflRDpRMupqdGqrzRzvjBAZS9DsbgLg+OipIkcmSoEkiSIYDoxjssrZ1qJ0tLdYSaXg6KkIPkc9Zs3MCWlJCCRJFMVwcBybM7NvkyQJUQLamq0AHDkZRjeZaXD6OBPsZixeOmdiiOKQJFFgiWQSfyiKbpvY3E+ShCi+Ko8Zr9vEic4xYrEkLa5GUqQ46e8sdmiiyCRJFNhoMEoqxeTsJptsySFKRFuLlXgixYkzYzS7JsYlThY3KFF0kiQKbCSYHotI6ePomgXdlJeNeIWYtfO7nJonZzjJ4PVcJ0miwIYD6RZEwiSrrUVpqas247BrHD0VwW62U22r4sToKZKpZLFDE0UkSaLARoLjoCWJa2Oy+6soKZqm0dZsZWw8SWf3OM2uRsYS43SHeosdmigiSRIFNhwYB13OkRClqf28LqcWl6yXEJIkCm44cN5COmlJiBLTUK9jtWgcORGmydkAyOD1XCdJosBGguPn9m2SNRKixJhMGi2NFgKhBLGgC5vZKi2JOU6SRIGNBMexOSbWSEhLQpSeiVlOR0+N0eRqZCAyiD8qhxDNVZIkCmw4MI7VIautRelqbrCgaXD8TETGJYQkiUKKjMcZiybQ7ekkYZOBa1GCLBYNX61Od1+UWt0HyLjEXCZJooBGgumxCM0ysbmfdDeJ0tTSaAEgPORBQ+P4iLQk5ipJEgU0kllIl9LH0DBhNdmKHJEQU2vOJIlTnXF8jjpOB84QS8SKHJUoBkkSBTQcPG+1tcmOpmlFjkiIqVV706uvj5+J0OxqJJFK0BnsKnZYoggkSRRQekuOFDEtIocNiZKmaRrNDRYiY0nsiToAjo2cLG5QoigkSRTQSCAKepQkCRxmZ7HDEeKiWhrTU2HDg1UAnJAZTnOSJIkCGg6Oo9nSh7g4za4iRyPExTX5dDQNzpzWcFtcHBs9SSqVKnZYosAkSRTQSHAcsz0CSJIQpc9qNVFfq3O2L0qDo5FgLER/ZLDYYYkCkyRRQMOBcWyu9PRXSRKiHLQ0pGc5WcZrAelymoskSRRIMpliNDiOxZGe4SRJQpSDiamwwX4vAMdkUd2cI0miQPzhKMkUmDJjEg5dkoQofTVVZuw2ja6TVnSTLkliDpIkUSATJ9IlLWHMmo5VsxY5IiFmpmnpXWEjY1BnaaAn1MvomL/YYYkCyssBy0qpe4CVwDbDMO7OpoxSqgp4ADADIeDzhmFE8xFfMUysto6bQjjNLllIJ8pGc6OF46ej6GN1oJ3lYH8HS+zLih2WKJCctySUUncCZsMwNgKLlFJLsyzzh8C3DcO4DegBPpzr2IppJDgO5hgJLSrjEaKsNPksaIC/pxqAA31HihuQKKh8tCS2AA9mHj8L3Ah0zFTGMIzvn/d9H9A304V8Ps/lxFlQ40nQrOnxiCq7F7d75hXX2ZQpZ1K/8uAGGnwh+roceFp1DvZ18CfXlM/v3qUop8+WfMtHknABE5u8DAFrZ1NGKbURqDEMY8dMF+rvL5+DULp6/Wi29BoJS9JOMDh20fJu98xlypnUr7w01uv09sfwaj7O+Ls53tWNx+oudlh54fN5yuqzZbZmmwDzMXAdBCYOSnBPc40pyyilaoHvAn+ch7iKaiQwjmbNLKTTZUsOUV4mtg4nmN7H6cjwsSJGIwopH0liF+kuJoCrgJPZlFFKWYHfAv/FMIyKW7EzHIxicaYHrx0yJiHKzMRU2KGu9F+hHSPHixyRKJR8JInHgC8ppb4N3AUcUEpdOMPpwjLbgP9AutvpH5RSLyulPp+H2IpmJDCObpd9m0R50jSNpgYL4WEPuqbTIS2JOSPnYxKGYfiVUluAW4FvGYbRA7w7Q5lR4AeZr4ozHksQHo/jsY1hwiRnW4uy1NJo4WRnFI/WQE/4LP5oAK9VBngrXV7WSRiGMcy52UuXXKZSDIykxyJSljAOs1PWSIiy1NyQngobG6kB71k6ho9xTePVxQ5L5JmsuC6AroEQaAkSpjHpahJly2Y1UVtjZuhMuvVwRMYl5gRJEgVwdiA0eY6EDFqLctbSaCUR9KJj4fDQhcufRCWSJFEAXf2h86a/SpIQ5au1yQKYsEZ9DEQG6QsPFDskkWeSJAqgayA0Of1VuptEOaupMuN2mQn2pM+XODhkFDkikW+SJPIsFk/SNxzG6YkBkiREedM0jYXtdqJD6UV1hwYlSVQ6SRJ51jMUJpkCi1PWSIjKsHCenVTUgTXhwRg+RiwRK3ZIIo8kSeRZ10AQIDMmoeEwy5Ycorw1+qw47BrRwXpiyRhHR08UOySRR5Ik8qyrPwRAzBTCYXJg0uR/uShvmqbR1mxlPNPldFC6nCqafGLl2dmBEJjiRFJBXHpl7pop5p72FivJQC1ayixJosJJksizroEQjup0a6LaUlvkaITIjYY6HZvFTCpQR0+4j8HIcLFDEnkiSSKPorEE/cMRnDVhQJKEqBwmk0Zrk2VylpNMha1ckiTyqHswTArQ3ekDTKotNcUNSIgcam+xkhz1AXBg8FCRoxH5IkkijyZmNsWtw5g1HbcuO2aKytHks6AnXDDm4dBgB5F45ZzEJ86RJJFHXQMhMCUIayNUW2rQZGaTqCBms0Z7i4XYQCPxVJz9AweLHZLIA/nUyqOz/SE0RwBISVeTqEiL59tIDDUBsKdvf5GjEfkgSSKPugZC2KvTXU5VMmgtKlB9rY7XUkUy7ObgoCFdThVIkkSejEcTDIyOYfemk4TMbBKVSNO0ydaEdDlVJkkSeXJ2ML02IuUYxYQZj+4tckRC5MeCdiupkYkup31FjkbkmiSJPOnqD4GWJGoeocpSLdtxiIplt5loraklGXZzQLqcKo58cuVJ10AQzREgpaWkq0lUvCWZLqdEKiFdThVGkkQepFIp3j06iO6RRXRibmj06VjDLQBsP/tOkaMRuSRJIg9O9wbpGQpTXZ8+srTaKi0JUdk0TWNJcx0Jfy1HRo5yNthT7JBEjkiSyIPtB9K/IGZ3AA0TXr2qyBEJkX+L59tI9S0A4LlTrxY3GJEzkiRyLJlM8dahXuyucUZTfdRY6zBp5mKHJUTeOewmlvnaSY45ead3D/5ooNghiRyQJJFjh08PMxqMUrewH4AFzkVFjkiIwlm5xIk2sIAkCZ478XqxwxE5IEkix3Yc6AWShF0n0DULrfb5xQ5JiIKxWDRW1i8hFbfw6pntROX867InSSKHYvEEu4w+XI3DjKVCtDsWoJv0YoclREEtW+DCNDyPuDbGc8e2FzsccZkkSeTQu0cHiUQTuFq7AVjgWlzkiIQoPJNJ48r65aQSZn5/+lmG5NS6siZJIoe2H+gByxgBvYtqS60sohNz1uIWL7bBlSRNUf51xy9JppLFDklcIkkSOdLVH2T/8UGq2nuAFAucS4odkhBFo2kaN6uV4G9gKNXFD7f/rtghiUskSSIHugdD/MsDe0nYR0jWH0fXdNocMmAt5janw8zmlo2kYlb2h9/gyb17ih2SuASSJC5T73CYf7l/DwH6ca3cRYIYa6rXYzFZih2aEEVX53Wx2n0dminF0wO/4fuvP8lYNF7ssMQsSJK4DAOjEf7lvj2MJtMJIqnFuKZ6I22OBcUOTYiSsaSunRX6jZAwcyD6Gn/71P/moR17GZdkURbyMj9TKXUPsBLYZhjG3dmWyeZ1peTlPWcZCozTfE0Xo1qMa6o30O5cUOywhCg5yxvm0RKp442+Nxjz9vJS+D5efN6JK9aKz15Ps7eOOmcVLpsNu9WCXbdgNpnRTSbMJjOaBiZNQ9M0TKRnUGmahqZ98Fqp1IX/Tj+RIvWBMuc/N/FwJB5ieDhMKpUilUqXTWXe59zjJKkUJCefmyib/i8XxDCd1AUFtakqNIPpXqIx9Tdu8S2f1fvnPEkope4EzIZhbFRK/VQptdQwjI6ZygCrZnpdqbl9fTutPhd2zyLOhruos9Xn7L29dgfEZv8DUy6kfuXrUuvmtjn4TNXHOTpyjIP9xwlYuwnbOzhFB6ciQCT3sYoPuuXaH8yqfD5aEluABzOPnwVuBC78sJ+qzJosXnc+zefzXH60l8EHLJpfl/nXqmKGIkQZWV/sAMQs5GNMwgV0ZR4PAY1ZlsnmdUIIIQooH0kiCDgyj93TXGOqMtm8TgghRAHl44N4F+muIoCrgJNZlsnmdUIIIQpIS104FeAyKaW8wGvAC8AdwBeAzxmG8bWLlNlAej7A+54zDGM0p8EJIYSYlZwnCQClVA1wK/CqYRhTnmM4VZlsXieEEKJw8pIkhBBCVAYZHBZCCDGtsjsRRyn1feBpwzCeLLcV2jOZqFvm63jmC+AvDcPYX7TALpNS6s+Bz2f+WQ28RfpnryLu3RT120W627RS7l8NcC/QAOwyDONPK+V378K6AX9BZf3uLQS+B3iBtw3D+L9me+/KqiWhlNoENGUSxOSqbWBRZtV22Tq/bsBq4H7DMLZkvsr2hxTAMIwfTNSF9OSEY1TQvZuifj+kgu4f8CXgXsMw1gEepdR/pnLu3/vqBqylsu7dPwP/wzCMTUDbpXxulk2SUEpZgB8DJ5VSn2TqVdtlaYq6bQA+ppR6Wyl1j1Kq7Fp8U1FKtZJeJNlGhdy7851Xv3VU1v0bBK5USlUD7cBCKuf+XVi3O6ise7cM2J153Af8v8zy3pVNkgC+DBwEvkV6Xf9fUDkrtC+sWx1wi2EY6wEL8JEixpZLfwH8gMpdXT9Rv51U1v17HZgP/BVwCLBSOffvwro9RWXdu4eAf1RKfRz4MPAis7x35ZQk1gA/ykyN/TXwKpWzQvvCuq01DKM78713gHJuzgOglDIBNwMvU4Gr6y+o374Ku3//CPyZYRjfAA4DX6Ry7t+Fdauo373MmMPTwJ8Av+ASfvfK6eYeBRZlHq8DFlA5K7QvrBtKqauUUmbgU8C7xQoshzYBbxmGkaIyV9efX79fVdj9qwFWZepzHfD/UDn37wN1q7B7B7AXmAd8m0v43Sun/rZ7gJ8qpb5Auhm4BXhCKdXCuVXb5erCun0V+BWgAU8YhvF8MYPLkdtJt/4AHgNeq5B7N+H8+n0DuI/KuX/fBH5GultmO/CvVM79u7Bum6isewfwfwPfNgwjrJSa9e9eWS+mkxXa5UvuXXmT+1e+ZnvvyjpJCCGEyK9yGpMQQghRYJIkhBBCTEuShBBCiGlJkhBCCDEtSRJCCCGmVU7rJIQoGZk9fSbm11tJb8kxD/ivQBwwgDeA+4GfArXAOPAHhmEMFyNmIS6FtCSEuDQNpHd8vYn0VtP/Afj3wF8Dfwt4DcP4N+AfgN2GYdycKf/nxQlXiEsjLQkhLk2UdIL4ROaxH9gD3E16te7fZ8pdCTQppW4FnKRbF0KUDWlJCHFpvgx0GobxMeClzHO3AjcbhrHZMIzXM8+9B/xr5qyJ/4gkCVFmZMW1EJdAKbWadDfTCOmtl33AWeAK0mcUnCHdmhgjfVZIPemxi/9oGMbeYsQsxKWQJCFEDiilbMCTmX/GADPwnwzDOFS8qIS4fJIkhBBCTEvGJIQQQkxLkoQQQohpSZIQQggxLUkSQgghpiVJQgghxLT+f+MdAFQF9MKdAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#两类样本的年龄分布 核密度估计kdeplot 直观的看到数据样本本身的分布特征\n", "sns.kdeplot(df.age[df.resp_flag==1],label='1',shade=True)\n", "sns.kdeplot(df.age[df.resp_flag==0],label='0',shade=True)\n", "plt.xlim([60,90])\n", "plt.xlabel('age')\n", "plt.ylabel('Density')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索用户性别以及性别和购买保险之间的关系" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAADMCAYAAAC1Mj0uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAACzZJREFUeJzt3X+MbHdZx/HPci8I1KtpzVpFDERpnviLmkKxBdGrYEUEtYhpo/7RGGICSjBGDKQBS0JJtIk/YlMNCAUBrRUbY6LSFqW2EbTSGrCJ+ZqGtjGmhaqNLZJYWtc/5pRut9vOep+dnZm7r1dys7Nnb898b5/snPeeMzuzsbW1FQAATtxTlr0AAIB1J6gAAJoEFQBAk6ACAGgSVAAATYIKAKDp6DLv/KGHHt66774vLnMJzHHqqc+MGa0u81l9ZrT6zGi1rdJ8NjePbTzR15Z6huro0SPLvHv2wIxWm/msPjNafWa02tZlPi75AQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGha6utQ/eQvf3iZdw/AAfutN//IspcAC+EMFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE2CCgCgSVABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBp34Oqqt5fVVdPt6+qqvfv930AAKySRZ2hOnPHRwCAk9aigurBqvqaJF9a0P4BAFbGooLq00kumD4CAJzUFhVUtya5aPoIAHBSW2RQnR1BBQAcAosKqjuT/EuSuxa0fwCAlXF0v3c4xrhoulnTx4t2/5sAACcHL+wJANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE2CCgCgSVABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaNrY2tp60r9QVWckeVmS05L8R5Lrxhh37NP9b9177wP7tCsWYXPzWMxodZnP6jOj1WdGq22V5rO5eWzjib72pGeoqur8JDcmeUGSU5KcleRvquoH93WFAABr7Oicr785yVljjLsf2VBVpye5Osm1i1wYAMC6mPccqv/ZHlNJMsb4XJKHF7ckAID1Mu8M1fdU1f1Jdl4zfPqC1gMAsHaeNKjGGEcOaiEAAOvqSYOqqu5IstuvAW6NMb55MUsCAFgv8y75Hd92+zlJ3pLk25L86qIWBACwbp70SeljjLsyi663JXlPkj9N8rwxxhUHsDYAgLUw73WoPpTkn5M8K8nbk/x7kldX1WsOYG0AAGth3iW/LyX58HT7Fdu2byW5ZiErAgBYM/OC6jfGGJ/ZubGqLlzQegAA1s68F/b8zUduVNVfb9v+s4tZDgDA+pkXVBt7uA0AcKjNu+R3tKpOzSy8HnN74SsDAFgT88Lo2UlumW5vbLsNAMBkXlCdn+SyJL+U5PLMAitJvrgfd37RlW/aj90AAIfYZa9657KXMPc5VJcnuTjJbUkeSvK8JBcmuX3B6wIAWBvzgmprjHHzGOPhJL8yfbw5yemLXxoAwHqYd8nvY1V1fZKPJrmvqs5K8uokH1/4ygAA1sS89/J7R2ZvOXMsydlJvi7JZWOMtx7A2gAA1sLclz8YY3wyyScPYC0AAGtp3nOoAACYQ1ABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE1H93uHVXVJkguSfG7a9JExxuX7fT8AAKti34NqcukY40ML2jcAwEpxyQ8AoGlRQXVxVd1QVVcsaP8AACvDJT8AgCaX/AAAmgQVAEDTvl/yG2Ncst/7BABYZc5QAQA0CSoAgCZBBQDQJKgAAJoEFQBAk6ACAGgSVAAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE2CCgCgSVABADQJKgCAJkEFANAkqAAAmgQVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQJKgAAJo2tra2lnn/W/fe+8Ay7585NjePxYxWl/msPjNafWa02lZpPpubxzae6GvOUAEANAkqAIAmQQUA0CSoAACaBBUAQJOgAgBoElQAAE3Lfh0qAIC15wwVAECToAIAaBJUAABNggoAoElQAQA0CSoAgCZBBQDQdHRZd1xV703yrUn+fIzxzmWt47CqqqNJPjv9SZI3JnltklcmuXmM8XPT33vHXraxv6rq9CQfGWO8tKqemuSaJKclee8Y432dbUv5B51kdsznG5L8fZLbpy//xBjj3t0e4/a6jRNXVV+d5KokR5L8d5ILkvxOTnAW5rP/nmBGt2fb8WiM8U97Pf6syjFpKWeoquo1SY6MMc5N8k1VdcYy1nHIPT/JH44xjo8xjid5WpLvTvKiJJ+vqpdX1Qv2sm05yz95VdWpST6Q5JRp0xuT3DLGeEmS11bVseY2GnaZz3clufSR76Upph73GLfXbcv4N51kfirJr48xzktyT5ILc4KzMJ+F2Tmjt2Tb8WiKqT0df1bpmLSsS37Hk1w93b4us/8ZHKxzkryqqm6efgJ7WZI/GWNsJbk2yUuTfO8et7G/Hs7sJ7b7p8+P59HvlxuTvLC5jZ6d8zknyeuq6taqete07Xge/xi31200jDGuGGNcP326meSnc+Kz2G0bTbvM6KFsOx5NV1D2evxZmWPSsoLqlCT/Nt3+zySnL2kdh9k/JHn5GONFSZ6a5Bl5/Ex2m5PZLdgY4/4xxn9t27TXOZjXAdhlPn+Z2YH37CTnVtXzYz5LV1XnJjk1yb/G989K2jaj6/PY49Ers4YzWlZQfSGzA3iSfOUS13GYfWaMcfd0+1PZfSZ73cZidWZjXov3iTHGA2OMh5P8Y5IzYj5LVVWnJfntJD8T3z8raceMdh6P1vJ7aFl3fEsePXV6ZpI7l7SOw+yDVXVmVR1J8mOZVf7Omew2J7M7eHudg3ktx7VV9fVV9cwk5yW5LeazNFX1tCR/nOStY4y74vtn5ewyo53Ho09nDWe0sbW1deB3WlVfleSmJH+V5IeSnLPjFDoLVlXfnuQPkmwk+bMkb8tsJp9K8orpz1172TbGuOOg138YVNUNY4zjVfWcJH+R5GNJXpzZc3aefaLbpjMpNG2bz/dl9ltkDyZ59xjj8t0e45Js7WWbx8Keqnp9kndldlBOkiuT/GJOYBa7bTOfvl1m9PEkP57peDTGuLiqnpI1OyYtJaiSL/+mzA8kuXGMcc9SFsFjVNUzkvxwklvHGJ/9/2xjsarqWZn9FHbtIw/onW0s3m6PcXvdxv7qzMJ8lmfdjklLCyoAgJOFJ9gBADQJKgCAJkEFHFpV9QvLXgNwchBUwGEmqIB94UnpwEqb3obid5N8S2Zv6P6GJE9P8muZ/VB4/Rjj7VV1UZLnjjEumf67O8cYz62qG5J8NLNfqT49yfmZvV7N6zP71fi/m/Zx6QH+s4CTjDNUwKp7XZIHpzd4fkOS70/ywczeYPXFSV5YVefN2cfXTm8CfmWS88cYfzR9fs/0ZqxiCmgRVMCq+44kn0iSMcYtSX4/yf+OMe6c3hD1pszOOH3Z9Krl271v+vj5JF+x2OUCh5GgAlbdbZldmktVfWeSa5JsVNU3VtVGkpdk9orLD2b2zvVJ8qM79vGFJ9j3U6b9buz3ooHD5eiyFwAwx+8leXdV3TR9/qbMnkN1VR59DtV1VbWZ5Oer6vIkd+++q8d5T1X9bZIjmaIN4ER4UjoAQJNLfgAATYIKAKBJUAEANAkqAIAmQQUA0CSoAACaBBUAQNP/AePrwPsJ3AvHAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 查看性别比例\n", "plt.figure(figsize=(10,3))\n", "sns.countplot(y = 'GEND',data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYkAAAECCAYAAAALqiumAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAErZJREFUeJzt3XuQnXV9x/F32EQn2QS74BoMIBAHvxphIopKTCIpQpWKl6It1htUVKxiRUcsDlopjYOCWpEURSdaWq1KvXG/eCFctKJGLo4434I1yCREN2YNiQGFZPvHeWLWkN/ybPac55wk79fMzj7n+5w9z/dkds8nv99zmzQyMoIkSduzR7cbkCT1LkNCklRkSEiSigwJSVKRISFJKprc7QbaaWhovYdqSdI4DQ7OmFRa50hCklRkSEiSigwJSVKRISFJKjIkJElFhoQkqciQkCQVGRKSpCJDQpJUZEhIUo8655yzWbz4A1x99RV/rJ188usa7WGXuiyHtCt7x3mXdbuFnnH+6S/tdguNuPvuu1i69D+72oMhIUk76MQT/5ZnPetw+vunc/LJp3D77bdy5ZWX8fvfP8jhhz+Xl7zk5Vx00b+xbt1vWbNmDe9+9xlcfvk3uPPOn7LffvsxZcpjOPXU0x7xuvfdt4qvfvUShoZ+zZIlH+e4417GgQce9Ijn3XzzDVx22TeYMmUyxx33cubNm8/553+UtWvXsGnTJhYsOJIXvejFE3qPTjdJ0g5aufJeXvGKEzj55FMA+NSnlrDnno9j5sx9uO22HwPw85/fRcTTOOmkk9lzz8cB8IIXHMM73/ke7r33HtasWfOI133iE2dx6qmnMTj4BE499bTtBgTAvvvuz7HHvpjBwZksW/ZtAG69dTnvf/+/sGrVqgkHBBgSkrTDDjjgIPbdd79RlRFOOumNvPnNb+PQQ+cCcOKJb+Sgg2bzpS99gR/96AcAPPTQQwA8/PAm9tijeAHWR7VkycfZvHkzCxceyebNm9m8eTOzZu3LOeec/cfgmiinmySpTd70preyePE/MWnSHixadBQA11//LTZu/B0bN/6OffZ5Ipk/4+abb+AnP7md2bOfzF577b3D29tvv/24447b2LBhAxs2rOfhhx9m9er7iHgat99+KwcdNJtZs/ad0HuaNDKy69yCwftJaFfmjuutduYd10uXXsRTnzqH+fMXArBmzRA333zDnzzn2c8+YpsRSj2//OUKvvzl/2Lfffdj9er7mD//+Tz3ufMe9efGup+EISHtJAyJrXbmkOhF3nRIkrRDDAlJUpEhIUkq8ugmSWqDdu8z6pX9Lo4kJElFjiQkaSd2zjlns2LFL5g3bz4nnfTGtr9+R0YSETEzIm7apnZIRHyzWp4SEZdHxHcj4g3jqUmSWm644Tts3ryZiy76HKtWreTee3/Z9m20PSQiYgC4GOgfVZsEfAyYUpXeDizPzPnAKyNixjhqkiRa12k66qijAXjOc47gjjtua/s2OjHdtAk4Abh0VO3vgOuBF1aPFwFnVMs3AoePo3Z9acMDA9OYPLlvgu1L6nWDg7v+/xfrvMeRkYd5ylMOYnBwBvvvvw933nln2/9t2h4SmXk/QERQfd8beC2tgNgSEv3Aymp5LTBzHLWi4eGN7XgLknrc0ND6brfQcXXe46RJU7jvvrXsvfd6Vq/+DevXP7BD/zZjBUsTO64/BLw3Mx/aEhzABmAqsA6YXj2uW5OkntONQ1Yjnsodd9zGIYccyt1338X++x/Q9m00cQjskcCHI2IZ8IyIWAwsBxZU6+cCK8ZRkyQBz3/+Iq699iouuOBjfOc73+R5z1vw6D80Th0fSWTmU7YsR8SyzHxfRBwAXBURC4E5wC20ppXq1CRJQH//dC644CJ++MNbePWrX8/06dPbvo2uXQU2ImbRGiVcm5nrxlMr8Sqw2pV5FditeuVs5F3FWFeB7drJdJm5CrhkR2qSpGZ4WQ5JUpGX5ZCkNjj9ive19fXOO25xW19vRzmSkKSd2Nq1v+Gtb23/NZu2MCQkaSd1//33s3jxWTz44AMd24YhIUk7qb6+PTj77HOYNq3/0Z+8g9wnIUk7qf7+9p8XsS1HEpKkIkNCklTkdJMktUGvHLLabo4kJGknt2TJpzv22oaEJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUUdOeM6ImYCX8nMhRHxJOA/gM3A3cAp1Xa/BuwFLM3Mz0bElDq1TvQrSdq+to8kImIAuBjYcu3aU4C/z8yjgP2BQ4G3A8szcz7wyoiYMY6aJKkhnRhJbAJOAC4FyMwzR63bG1gDLALOqGo3AoePo3Z9acMDA9OYPLlv4u9AUk8bHPT/i01pe0hk5v0AEfEn9Yg4AfhpZq6KiH5gZbVqLTCT1sijTq1oeHhjG96BpF43NLS+2y3sUsYK3UZ2XEfEbODdwGlVaQMwtVqeXvVRtyZJakjHP3SrfRRfBN6Qmeuq8nJgQbU8F1gxjpokqSFN3E/iDOBJwAXVFNQHaO3YvioiFgJzgFtoTSvVqUmSGjJpZGSkKxuOiFm0RgnXbhlh1K2VDA2t786bkRrwjvMu63YLPeP801/a7RZ2KYODMyaV1nXtznSZuQq4ZEdqkqRmuCNYklRkSEiSigwJSVKRISFJKjIkJElFhoQkqahrh8D2Ko9F38pj0SU5kpAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUWGhCSpyJCQJBUZEpKkIi/LIWmnc/oV7+t2Cz3jvOMWd/T1HUlIkoo6MpKIiJnAVzJzYURMAb4G7AUszczPTqTWiX4lSdvX9pFERAwAFwP9VentwPLMnA+8MiJmTLAmSWpIJ0YSm4ATgEurx4uAM6rlG4HDJ1i7vrThgYFpTJ7cN+E3oJbBQTNZ6nWd/jtte0hk5v0AEbGl1A+srJbXAjMnWCsaHt444f611dDQ+m63IOlRtOPvdKygaWLH9QZgarU8vdrmRGqSpIY08aG7HFhQLc8FVkywJklqSBPnSVwMXBURC4E5wC20ppB2tCZJakjHRhKZuaj6fg9wDPBd4OjM3DSRWqf6lSQ9UiNnXGfmKuCSdtUkSc1wR7AkqciQkCQVGRKSpCJDQpJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqSiWiEREe/d5vH3OtOOJKmX1B1JHLPN483tbkSS1HvGvApsRLwDOA2YGRH/B0yqvj7TQG+SpC4bMyQy83zg/Ii4PjP/vKGeJEk9ou500yc72oUkqSfVvenQbRHxj8BjtxQy8+zOtCRJ6hV1RxJfp7Wz+p5RX5KkXVzdkcSvM/O8HdlARAwAXwCeACzPzFMiYikwB7gyMxdXz6tVkyQ1p+5I4pqI+EhEzImIJ0XEk8axjdcBX8jMw4EZEfEeoC8z5wGzI+LgiDi+Tm08b0ySNHF1RxIvqr4/q/o+AhxV82d/AxwSEX8G7A+sAy6p1l0HLAAOq1m7a6wNDQxMY/Lkvppt6dEMDs7odguSHkWn/05rhcQED3+9GXgx8A/Az4DHACurdWuBZwL9NWtjGh7eOIE2ta2hofXdbkHSo2jH3+lYQVMrJCLiF7RGD1uMZOaTa27/A8BbMvP+iHgX8EG2now3ndaU1wZgao2aJKlBtT54M/OgzJwNPB04E1g6jm0MAIdGRB/wXOBDtKaOAOYCK4DlNWuSpAbV3ScBQGY+AHwxIj4xjh87B/gccADwP8C/AjdFxCzgWOAIWqOUOjVJUoPqTjd9gK3TTU8AnlF3A5n5A1ojkNGvt4jWRQPPzcx146lJkppTdySxYtTyXcBZE9loZg6z9cilcdUkSc2pu0/iYlpHGj0eGMrMNR3tSpLUE+redOgs4J3AFOAd1WNJ0i6u7nTTMZk5HyAiJtE69+GsTjUlSeoNdc89GImI/avlWfzpOROSpF1U3ZHEe4BlEbGJ1p3pTuxcS+oVp1/xvm630DPOO87rS2r3VHck8QCtGw8tAH4FPNixjiRJPaNuSFwIfDszfw2cSuuEOEnSLq5uSGzKzFsBMvM2WlNOkqRdXN19Ej+LiCXA92hdHuPuzrUkSeoVdUcSbwF+AjwPuBN4c8c6kiT1jLr3k9gEXNThXiRJPcZ7NEiSigwJSVKRISFJKjIkJElFhoQkqciQkCQVGRKSpCJDQpJUVPeyHBMWERcCV2fm5RGxFJgDXJmZi6v1tWqSpOY0MpKIiIXAPlVAHA/0ZeY8YHZEHFy31kSvkqStOj6SiIgpwGeAqyLiZcAi4JJq9XW07lFxWM3aXWNta2BgGpMn97WzfQmAwcEZ3W5B2q5O/242Md30eloXBTwXeDvwNmBptW4t8EygH1hZozam4eGNbWtaGm1oaH23W5C2qx2/m2MFTRPTTYcBn87M1cDngRuBqdW66VUPG2rWJEkNauKD925gdrV8OHAgrakjgLnACmB5zZokqUFNTDctBT4bEa8CptDaJ3FZRMwCjqV1E6MR4KYaNUlSgzoeEpm5Hvjr0bWIWAQcA5ybmevGU5MkNaex8yRGy8xhth65NK6aJKk57gyWJBUZEpKkIkNCklRkSEiSigwJSVKRISFJKjIkJElFhoQkqciQkCQVGRKSpCJDQpJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqSixm5fGhEzgWsy87CIWArMAa7MzMXV+lo1SVJzmhxJfASYGhHHA32ZOQ+YHREH16012KskiYZGEhFxFPA7YDWwCLikWnUdsAA4rGbtrrG2MzAwjcmT+9rZugTA4OCMbrcgbVenfzc7HhIR8Rjg/cBfAd8A+oGV1eq1wDPHURvT8PDGtvUtjTY0tL7bLUjb1Y7fzbGCponppjOACzPzt9XjDcDUanl61UPdmiSpQU188B4NvC0ilgHPAF5Ca+oIYC6wAlhesyZJalDHp5sy8/lblqugeClwU0TMAo4FjgBGatYkSQ1qdAonMxdl5v20dl5/H/jzzFxXt9Zkr5KkBs+TGC0zh9l65NK4apKk5rgzWJJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUWGhCSpyJCQJBUZEpKkIkNCklRkSEiSijp++9KIeBzwJaAP+B1wAvBJYA5wZWYurp63tE5NktScJkYSrwE+lpl/AawGXgX0ZeY8YHZEHBwRx9epNdCrJGmUjo8kMvPCUQ8HgdcCH68eXwcsAA4DLqlRu2usbQ0MTGPy5L72NC6NMjg4o9stSNvV6d/NjofEFhExDxgAVgArq/Ja4JlAf83amIaHN7avYWmUoaH13W5B2q52/G6OFTSN7LiOiL2AC4A3ABuAqdWq6VUPdWuSpAZ1/IM3Ih4D/Dfw3sy8B1hOa+oIYC6tkUXdmiSpQU1MN51Ma6rozIg4E/gc8LqImAUcCxwBjAA31ahJkhrUxI7rT9I65PWPIuIy4Bjg3MxcV9UW1alJkprT2I7r0TJzmK1HLo2rJklqjjuDJUlFhoQkqciQkCQVGRKSpCJDQpJUZEhIkooMCUlSkSEhSSoyJCRJRYaEJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUpEhIUkqMiQkSUWGhCSpyJCQJBV15R7X4xERS4E5wJWZubjb/UjS7qSnRxIRcTzQl5nzgNkRcXC3e5Kk3cmkkZGRbvdQFBGfAK7JzKsi4lXA1Mz8XLf7kqTdRU+PJIB+YGW1vBaY2cVeJGm30+shsQGYWi1Pp/f7laRdSq9/6C4HFlTLc4EV3WtFknY/vX500zeAmyJiFnAscESX+5Gk3UpP77gGiIgB4Bjgxsxc3e1+JGl30vMhIUnqnl7fJyFJ6qJe3yehLoiIfwemZebfRMSXgAcz86TudiVBRJwFnAD8qip9JTOXdK+jXZ8hoZK5o77f0s1GpG18MDM/3+0mdhdON6nkDxGxN/BQtxuR1D2GhEpupzWsv73bjUjbODMilkXEhd1uZHfgdJNKfgycBHyRrVNPUi9wuqlBjiRU8mPg2dV3SbspQ0IlK4D/Be7pch+SusiT6SRJRY4kJElFhoQkqciQkCQVGRKSpCLPk5AmqLqe0NG0/tP1NuDrtG67u+Vs9dOqrymZ+ZqIOBBYlpkHRsQm4GbgscC5mfm1htuXxuRIQpqAiHgesDAzFwDvAs6rVr0sMxdVX7dVtRMi4uBtXuKBzDwSOB64MCKimc6legwJaWJeCFxTLf8IePcYz/0W8N7trcjMVcAVwAva2p00QU43SRMzE7g3Io4E/pmtf1OXRsRDAJm5qKpdDJwNHFB4rV8De3WuVWn8DAlpYtYBe2bmDRFxNHB3VX9ZZq7Z5rkPAR8Fzii81uOBWzvTprRjnG6SJmYZ8JKI2AN4Vo3nfxZ4+rbFiJgJ/CVwXVu7kybIkYQ0AZl5dUQsAr5P625pDwBTGTXdBHx61PP/EBEfBk6vSlMjYhnQB7w5M3/eVO9SHV67SZJU5HSTJKnIkJAkFRkSkqQiQ0KSVGRISJKKDAlJUtH/Ax2+5WB2jOGDAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "sns.countplot('GEND',hue='resp_flag',data=df)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 探索用户的学历情况,以及学历与购买保险之间的关系" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "4 18597\n", "3 12437\n", "6 7493\n", "5 4474\n", "2 462\n", "7 130\n", "0 60\n", "1 9\n", "8 4\n", "Name: c210mys, dtype: int64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.c210mys.value_counts()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAlQAAADMCAYAAAC1Mj0uAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAEPFJREFUeJzt3X+wXGV9x/F3CEEQMyZ0rrHYqMMM/Y4gIIpohEiS8hsKgth0BnH8QYuBom2nM/6IPwoDOsOotcUhggQElIBQpPijJDqDkurwK6hgtd+OUhhBUy+CJOCMKNz+cU7kcrN77zbPHvbc3fdr5g67Z3fP89zvHM795DnPPmfOxMQEkiRJ2nE7DboDkiRJs52BSpIkqZCBSpIkqZCBSpIkqZCBSpIkqZCBSpIkqdDOg2z8979/auLRR38zyC602sKFz8f6dGZtpmd9urM207M+3Vmb7kalNmNj8+d0e22gI1Q77zx3kM23nvXpztpMz/p0Z22mZ326szbdWZsBB6rxNV+A6/9tkF2QJEkq5hwqSZKkQgYqSZKkQgYqSZKkQo18yy8i1gL7AF/LzPOaaEOSJKkt+j5CFREnA3MzcwmwV0Ts3e82JEmS2qSJS37LgC/VjzcAhzbQhiRJUms0Eah2Bx6qHz8CLGqgDUmSpNZoIlA9DuxWP35BQ21IkiS1RhNhZxPPXOY7ALi/gTYkSZJao4lv+d0IbIyIPYFjgNc30IYkSVJr9H2EKjO3UE1Mvw1YnpmP9bsNSZKkNmlkHarMfJRnvuknSZI01JwwLkmSVKiREapeja16K+PjWwfZBUmSpGKOUEmSJBUyUEmSJBUyUEmSJBUaaKC6Z80Jg2xekiSpLxyhkiRJKmSgkiRJKmSgkiRJKtRIoIqIRRGxsYl9S5IktU3fA1VELASuAHbv974lSZLaqIkRqqeAlcCWBvYtSZLUOn2/9UxmbgGIiH7vWpIkqZWclC5JklTIQCVJklTIQCVJklSosUCVmcua2rckSVKbOEIlSZJUyEAlSZJUaKCBav9VNw2yeUmSpL5whEqSJKmQgUqSJKmQgUqSJKlQ32898/9x1eeP2qHPHX3c9X3uiSRJ0o5zhEqSJKmQgUqSJKmQgUqSJKlQ3+dQRcQLgWuAucATwMrMfLLf7UiSJLVFEyNUpwKfyswjgc3A0Q20IUmS1Bp9H6HKzIsmPR0DftnvNiRJktqksTlUEbEEWJiZtzXVhiRJUhs0sg5VROwBXAi8uYn9S5IktUnfR6giYhfgOuADmflAv/cvSZLUNk1c8nsX8GpgdUR8KyJWNtCGJElSazQxKX0NsKbf+5UkSWorF/aUJEkqZKCSJEkq1Mi3/Hp12tvXMz6+dZBdkCRJKuYIlSRJUiEDlSRJUiEDlSRJUqGBzqF6x5e9b7IktcEFh1436C5Is5ojVJIkSYUMVJIkSYWavDnya4DvZebDTbQhSZLUFk3cHHkh8FXgYOCWiBjrdxuSJElt0sQlv/2Bv8/M84H1VDdKliRJGlpN3Bz52wAR8UaqUapz+92GJElSmzQyKT0i5gArgUeB3zXRhiRJUls0EqgycyIzzwLuAU5oog1JkqS26OmSX0SsAOYCTwAfAy7OzHVd3vs+4BeZeSWwAPh1n/oqSZLUSr2OUJ0P3At8EFgN/N00770EOC0ibqUKYRuKeihJktRyvU5K/x3wMDAvM78TEU92e2NmPgoc0Y/OSZIkzQa9jlD9N/Ag8JWIOBP4aXNdkiRJml16ClSZeTrwisz8DHAT8M5GeyVJkjSL9Dop/a+ALwJk5oP9avzyk25mfHxrv3Y3dMbG5lufLqzN9KxPd9ZGUhN6veT3MuDOiFgTEQc02SFJkqTZptdLfh8CXgncCNwQEbdFxJGN9kySJGmW6PWS32KqeVMrgW8DVwKfxCURJEmSel424atU60styczHACJiTWnjx91wUekuJEnSiPv80tMG3YWeA9U7gH2BEyMCgMy8tKlOSZIkzSa9BqprgauBp+vnE810R5IkafbpNVBtAP4HuK/BvkiSJM1KvQaqo4AXAIfVzyeAW7u9OSIWATdn5oFl3ZMkSWq/XgPV7cDl9D5C9Qlgtx3qkSRJ0izTa6DaE/jIlG0rOr0xIlYATwCbC/olSZI0a8wYqCLiT4DLqELVHKpRqq92ee8uwIeBk6gWAZUkSRp6066UHhGrqC71LQIeAZ4ETgDujogXdvjI+4GLMvPX/e6oJElSW800QvXWzDxk6saI+DRwLLBuykuHAysi4izgVRFxaWae3p+uSpIktdNM9/KbExFHTd4QES8B3gDcPfXNmfnGzFyWmcuA7xumJEnSKJhphOovgEsj4vPAQ8CuwK+A8zIzp/tgHaokSZKG3rSBKjMfBI6OiJ2A5wO/ycynp/uMJEnSqOlp2YQ6RD3ecF8kSZJmpWkDVUS8sdtrmdl1pXRJkqRRMtMI1fuAxcBdVGtQbTPtrWd69bWTz2R8fGvpbobW2Nh869OFtZme9enO2kzP+nRnbbqzNjMHqjdRLdD5ocz8+XPQH0mSpFln2mUTMvN3VAt5ehsZSZKkLnqZlL4v8L/1DwARsTwzbylt/ITrO97BRiNi7WGHDboLkiT1xUy3nvk01X38NkbEJyNiXv3SOY33TJIkaZaYaaX0AzPzIGAf4GFgQ0Qs5NkT1CVJkkbaTJf8nhcR8+q5VB+PiLuB9cD85rsmSZI0O8wUqP4JuBY4GSAz10fEFuC6Tm+OiJ2B++ofgLMz894+9VWSJKmVZrr1zLURcWNEHAgsBz4NXED1zb9O9gfWZeb7+ttNSZKk9pppDhWZ+VvgIuCW+hY0Z1ONXHXyeuD4iLgjItbWI1aSJElDbcZAVXsqM78HkJnfp/uk9DuBwzPzYGAecGx5FyVJktqt1xGkH0fEZ4DvUo1C/aTL++6pR7Sgul3N3oX9kyRJar1eR6jeDdwLvAH4EfDXXd53VUQcEBFzqW5b84PyLkqSJLVbTyNUmfkUcHEPbz0XuJrqkuBNmfnNgr5JkiTNCn2dNJ6ZP6T6pp8kSdLI6PWSnyRJkrowUEmSJBUyUEmSJBUa6MKbN51yPOPjWwfZhVYbG5tvfSRJmgUcoZIkSSpkoJIkSSo00Et+7/nyzwbZ/Czw60F3oMWsTTcfPnTBoLsgSSPHESpJkqRCBipJkqRCBipJkqRCjQWqiLgoIv68qf1LkiS1RSOBKiKWAi/OzK80sX9JkqQ26Xugioh5wOeA+yPixH7vX5IkqW2aGKF6G/Aj4ALg4Ig4u4E2JEmSWqOJQHUgcElmbga+ACxvoA1JkqTWaCJQ/QTYq358EPBAA21IkiS1RhMrpa8FLouIvwTmAac00IYkSVJr9D1QZeZW4C393q8kSVJbubCnJElSIQOVJElSIQOVJElSoSYmpffsX05azPj41kF2odXGxuZbny6sjSSpTRyhkiRJKmSgkiRJKjTQS37fuXL8Wc//9JhdB9QTSZKkHecIlSRJUiEDlSRJUiEDlSRJUiEDlSRJUqG+T0qPiFXAyvrpAuD2zDyj3+1IkiS1RRM3R14DrAGIiAuBK/rdhiRJUps0dskvIl4CLMrMu5pqQ5IkqQ2anEN1FvVIlSRJ0jBrJFBFxE7AcuBbTexfkiSpTZoaoVpKNRl9oqH9S5IktUZTgeoo4NaG9i1JktQqjdzLLzM/2MR+JUmS2siFPSVJkgoZqCRJkgo1csmvV4e8bYzx8a2D7IIkSVIxR6gkSZIKGagkSZIKGagkSZIKGagkSZIKGagkSZIKGagkSZIKGagkSZIK9X0dqohYCHwReBGwKTPP6HcbkiRJbdLECNVpwBcz8yBgfkQc1EAbkiRJrdFEoPoV8MqIWAAsBn7WQBuSJEmt0USg+g/gZcB7gB8DjzTQhiRJUms0Eag+Crw7M88F/gt4RwNtSJIktUYTgWohsF9EzAVeB0w00IYkSVJrNBGoPg5cAjwG7AGsa6ANSZKk1uj7sgmZeQewb7/3K0mS1FYu7ClJklTIQCVJklTIQCVJklTIQCVJklRozsSEqxpIkiSVcIRKkiSpkIFKkiSpkIFKkiSpkIFKkiSpkIFKkiSpkIFKkiSpkIFKkiSpUN9vjtyriFgL7AN8LTPPG1Q/BiEiXghcA8wFngBWAj8B7qvfcnZm3hsR5wDHAndk5ln1Z7fbNkwiYmeqOvyhFsAp9FCHYa8NQESsojpeABYAm4Aj8NghIhYB12fm0oiYB9wA7AGszczLSrYN5Bfqoym1eSlwJfA01XnnDGBP4Pb6OcBbMnO803l62M7dU2rzEgrqMGy1ge3qcw5wWP3Si4ErqI6lkTx2phrICFVEnAzMzcwlwF4Rsfcg+jFApwKfyswjgc3A+4F1mbms/rk3Il4DHAocDPwyIg7vtG1Qv0CD9mdSLYBd6KEOI1IbMnPNpNpsBC7GY4eIWEh1ct+93nQ2sCkzDwFOiYj5hdtmrQ61OQNYlZkrgMXAfsDrgPMnHUfjnc7Tw3bu7lCbHa7DsNUGtq9PZn500vnnh1RhaiSPnU4GdclvGfCl+vEGqhP9yMjMizLzG/XTMeD3wPERcUdErK1HaQ4D/jUzJ4D1wNIu24bN65lUC+DP6K0Oo1CbP6j/Jb0IOAiPHYCnqEbuttTPl/HMOeZWqjqVbJvNnlWbzFydmT+uX/sj4GGq/+9Oj4i7I+Jj9WvL2P483WnbbDb1uCmpQ6dts93U+gAQEa8FHszMhxjdY2c7gwpUuwMP1Y8fofrDMHIiYgmwEPgGcHhmHgzMo7os06lGo1C3O3l2LXajtzqMQm0mOwtYw/b1GsljJzO3ZOZjkzb1WoOhr1WH2gAQESuB/8zMnwP/TvUH77XAkojYn9GsTUkdhqo20P3YAd4LXFg/Hsljp5NBzaF6nOoPJcALGMHJ8RGxB9UB+WZgc2b+tn7pLmBvOtdoFOp2z5RabAtVMH0dRqE2AETETsByYDWwi8dOR9t+38eoft/HC7cNlYjYC/gHYNul3+9uO44i4nuM7nFUUodhrw0AEbEAeFFm/rTe5LFTG9QvtIlnhvsOAO4fUD8GIiJ2Aa4DPpCZDwBXRcQBETEXeBPwAzrXaBTqNrUWu9NbHUahNtssBW6vL9957HTWaw1Grlb1vJh1wDsnjT6sj4g/jojnA0dSzY8ZudpQVodhr802JwJfn/TcY6c2qBGqG4GNEbEncAzVNdhR8i7g1cDqiFgN3AJcBcwBbsrMb9ajEB+PiH8Gjq5/HuiwbdicC1xNXQvgPKpjZaY6jEJttjmKam4PTKnXiB87k10BfD0illJ9q+h2qssNO7ptmLwfeClwYUQAfBQ4h+o89CTw2czMiPgF25+nJzpsGyYldRj22mxzFPCJSc89dmpzJiYmBtJw/a+kI4BbM3PzQDrRchGxG3AccHdm3tdt27DrtQ6jWJturA/UJ+5DgfXbRmJKto2iTufpUTx391qHUaxNN6NYn4EFKkmSpGExdJPCJEmSnmsGKkmSpEIGKkkjKyL+dtB9kDQcDFSSRpmBSlJfOCldUqvVt9P5LPAKqqVezgR2BS6g+kfhNzLzIxHxduDlmfmP9efuz8yXR8S3gJuplopYBJxEtQ7OKqqvbt9W7+P85/DXkjRkHKGS1HanA0/WNys+E1hBtW7bqcAbgIMi4sgZ9vGi+oaulwMnZea19fPN9Q1dDVOSihioJLXdfsB3ATJzE9Ud7p/OzPvr1eI3Uo04/UG9avNkl9X//SXwvGa7K2kUGagktd0PqVdVjohXATcAcyJicUTMAQ6huuXOk8BY/ZkTp+yj2/34dqr3O6ffnZY0WgZ16xlJ6tWlwCURsbF+/l6qOVTX8Mwcqg0RMQb8TUR8BvhFj/v+XER8B5jLEN4KQ9Jzx0npkiRJhbzkJ0mSVMhAJUmSVMhAJUmSVMhAJUmSVMhAJUmSVMhAJUmSVMhAJUmSVOj/ANTw5ICdallaAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(10,3))\n", "sns.countplot(y = 'c210mys',data=df)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, '购买数量')" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEFCAYAAAASWssjAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAFkJJREFUeJzt3X+cXXV95/FXMgkYEoKhDMEAC6TL4yMsGPkhkEJo+NWFGtCiPqCyCGuguPwoSo3FFUqbpo/sg3TVFAqFGpAqilRbDD9luyuYuCgQ+bVVPxvYBmkgdULCjxCIQGb/OCdhDGTOzcy999yZeT0fj3nk3O+Ze8/nMsN9z/d8z/d7RvX29iJJUn9G112AJKnzGRaSpEqGhSSpkmEhSapkWEiSKo2pu4BW6el52cu8JGkbdHfvOGpr++xZSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkioZFpLU4ebPn8u8eVdw9913bG6bPfvMttYwbJf7kFrp4gWLB/zchXNOaWIlGgmefHI5ixZ9rdYaDAtJGqSzzvp9DjnkUMaPn8Ds2efx2GOPcOedi9mw4TUOPfRwTj75w1x33V/z4osvsHr1aj772Uu5/fbb+OlP/5k99tiDsWO348ILP/22133uuWf5zndupafnl1x99ZeZNetD7L33Pm/7vqVL72fx4tsYO3YMs2Z9mOnTj2Thwv/OmjWrefPNNznqqN/mxBM/OKj36GkoSRqklSuf4SMfOY3Zs88D4G/+5momTtyJyZN349FHfwLAU08tJ2I/zj57NhMn7gTAccedwGc+8zmeeeZpVq9e/bbXfc97pnDhhZ+mu3tXLrzw0+8YFAC7774nJ530Qbq7J3Pfff8TgEceWcbll/85zz777KCDAgwLSRq0vfbah91336NPSy9nn30Of/AHF3DggdMAOOusc9hnn6nccsvNPPzwgwC8/vrrALzxxpuMHr3VBV8rXX31l9m4cSMzZvw2GzduZOPGjUyZsjvz58/dHGCD5WkoSWqyc889n3nz/oRRo0Yzc+axAHz/+//E+vWvsH79K+y223vI/BlLl97PE088xtSpv8nOO//GgI+3xx578Pjjj7Ju3TrWrXuZN954g1WrniNiPx577BH22WcqU6bsPqj3NKq3d3je9sH7WaiVHODWYC1adB3vfe/+HHnkDABWr+5h6dL7f+17PvCBI7bosTTmF79Ywbe+9Q12330PVq16jiOPPJrDD59e+bz+7mdhWEgDYFhoOPLmR5KkQTEsJEmVDAtJUiWvhpKkJhvMmNY76YRxLnsWkqRK9iwkaRiYP38uK1b8C9OnH8nZZ5/T9Ne3ZyFJQ9z99/8vNm7cyHXX3cizz67kmWd+0fRjGBaSNMQ98sgyjj32eAAOO+wIHn/80aYfw7CQpCHu1VdfZZdddgVg4sSdWLNmTdOPYVhI0hA3btwObNiwAYBXX11Pb+/Gph/DAW5JarJ2X+oa8V4ef/xRDjjgQJ58cjl77rlX049hWEjSEHf00TM5//xzef75Hn70o//Nddd9tenHMCwkaYgbP34CV111HQ899GM+/vFPMGHChKYfo+VhERGTgW9n5oyIGAv8A7AzsCgzbxhMW6trl6ShYuLEiRx33Akte/2WDnBHxCTgJmB82XQRsCwzjwQ+GhE7DrJNktQGre5ZvAmcBny3fDwTuLTc/gFw6CDbvr+1A0+atANjxnQN+g1Izdbd7d85GnpaGhaZ+RJARGxqGg+sLLfXAJMH2bZVa9euH3T9Uiv09LxcdwlqsTl3XNbU11swa15TX29r+vtDpt3zLNYB48rtCeXxB9MmSSqtWfM855/f/HWhoP0fuMuAo8rtacCKQbZJkoCXXnqJefP+lNdee7Ulr9/uS2dvAu6KiBnA/sCPKU4tDbRNkgR0dY1m7tz5XHrpJS15/bb0LDJzZvnv08AJwA+B4zPzzcG0taN2SRoKxo+f0JL5FZu0fVJeZj4L3NqsNklS6zlILEmq5HIfktRk7brUtZ3sWUjSMHL11de35HUNC0lSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSpTHtPFhETAJuBnYFlmXmeRGxCNgfuDMz55Xf11CbJKk92t2zOBO4OTMPBXaMiM8BXZk5HZgaEftGxKmNtLW5bkka0draswCeBw6IiHcDewIvAreW++4FjgIOarBteX8HmjRpB8aM6Wpq8VIzdHfvWHcJ0jZrd1gsBT4I/CHwM2A7YGW5bw1wMDC+wbZ+rV27vmlFS83U0/Ny3SVI76i/P2TafRrqCuBTmTkX+DnwcWBcuW9CWc+6BtskSW3S7g/dScCBEdEFHA78N4pTSgDTgBXAsgbbJElt0u7TUPOBG4G9gAeALwFLImIKcBJwBNDbYJskqU3aGhaZ+SDwH/q2RcRM4ATgysx8cVvaJEnt0e6exdtk5lreutJpm9okSe3hQLEkqZJhIUmqZFhIkioZFpKkSoaFJKlS7VdDSSPNnDsuG/BzF8xywWXVw56FJKmSYSFJquRpKHW8ixcsHvBzF845pYmVSCOXPQtJUiXDQpJUybCQJFUyLCRJlRoOi4iYtpX2XZtXjiSpE21Lz+LKLRsi4t8D32xeOZKkTlR56WxEPA38FHghIs4F5gA7AF8Hfgt4oaUVSpJq10jPYk/gDmAjMJYiLJYDzwFvtq40SVKnaPQ0VO8W271btEmShrFtmcHdC4wC5gNTgH8H7AK83IK6JEkdZFsGuEdRBMZ/BR4FrgIeKb8kScPYQOZZeBpKkkaYRsPiDKCLondxJXAIxcD32BbVJUnqIJVjFpk5GiAivgdcD3y13DUKOAa4uFXFSZI6w7YMcN+Sma8Dr/dpuz0iVjS1IklSx2noNFRE7JOZN25l97NNrEeS1IEa7Vl8PSK+RhEMy4GfZ2ZvRHQBiyPiosz8ScuqlCTVqtGw2Ai8CuwHnAgcUC4DMh6416CQpOGt37CIiPkU60KNz8ybImIUMA34XeB0ihC5rOVVSpJqVTVm8T2gG/hFRPwceAX4LMWkvIMpQmNRRGy/LQeNiGsi4uRye1FEPBARl/XZ31CbJKk9qsJiX2AJxQS8DwDfBp6nWOpjR+AoYCnFJbQNiYgZwG6ZeXtEnAp0ZeZ0YGpE7Nto27a9TUnSYFSNWXwfOBbYGfgz4IHMvDYingJOAyYD78vMNxo5WESMBf4WuCsiPgTMBG4td99LET4HNdi2vL9jTZq0A2PGdDVSloax7u4d6y6hqYbb+9HQURUW36U49RTA9sAxEdEDPAWcA/wQ+H3gaw0e7xMUYyBXAhcBFwCLyn1rKE5tjQdWNtDWr7Vr1zdYkoaznp7htc7lcHs/6iz9/TFSdRrqEODzFCvLbgBOpjgFdQRwA8WH/ekR8a4GazkIuD4zV1HcPOkHwLhy34SynnUNtkmS2qTqQ/dz5b8rKYLhu8DfAcuAuRSnqW6juJy2EU8CU8vtQ4G9KU4pQXGV1YrytRtpkyS1Sb+noTJzLkBEfCQz/zUiPkVxG9U/zswHy33fyMxXGjzeIuCGiDidYhHCmRST+qYAJ1H0WHqBJQ20SZLapJF7cE/IzH8FyMyHyrkWO/f5ltcaPVhmvgx8bIvXnwmcAFyZmS9uS5skqT0amcH9y4i4DfizzEyKU1efA+6JiMnA3RHxgcwc0P24M3Mtb13ptE1tkqT2aGSg+GHgFmA7gDIUNl0quwBYONCgkCQNDY30LN4AlpR/2W8WEdOAKZl5U0sqkyR1jEbCYhTFkh7vA/6NYjLcZIplQC5qYW2SpA7RyGmo3sw8lWIi3DHAlyjC4iqKlWglScNcozc/2gW4i+IS1rXAExT35b61vKeFJGkYayQsRgF/BMwpvz5K0dv4CfAtisl6kqRhrJGwGA18AXiEYhLdl/vsuwY4q/llSZI6Sb9hUU7A+4fM3JiZr1HMnh4FvAsgM1+lWEF2fMsrlSTVpmq5j15gYUS8H/gVxWW0rwNnRMSuwKuZeXnry5QGZs4dA79X1oJZ85pYiTS0NXoP7nuB/0ExMa+rfN4YYHJE3J+Zl7SoPklSB2h0qe+nMvOMzPwYsBB4DngIOAz4vVYVJ0nqDFVjFpvuhNFbPt6bYlnxycCXymU+jm9hfZKkDlDVs5gTEUuBiRExFbie4r4Sz1PelyIzn2ptiZKkuvUbFpn5J8ApwFcobp16DsW9uCdQ3OpUkjQC9DvAHRFjgL+iuApqFcXd8UYBL2Xmkoi4BBifmX/e8kolSbWpuhqqi6IXcQWwC8W9sNcCL0XEmcBvAWe2tEJJUu0auXR2PbAb8EmKORa7UNwpbxUwv5yYJ0kaxhq5dHYGcB7FHIvxFCvNrgRuAi6PiEtbV54kqRNUzeDeEBGHU/Qoesuv0eXzeoB/pFhYUJI0jFWehsrMZyu+xftiS3pHFy9YPODnLpxzShMr0WA1OoNbkjSCGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmq1OhtVZsqIiYD92TmQRGxCNgfuDMz55X7G2qTJLVHXT2LvwTGRcSpQFdmTgemRsS+jbbVVLckjUht71lExLHAKxSr1s7kreVC7qW4C99BDbYt7+84kybtwJgxXc0sXSNMd/eO1d/UZp1YU6uMpPc6FLQ1LCJiO+By4PeA2yhWsV1Z7l4DHLwNbf1au3Z90+rWyNTT83LdJbxNJ9bUKiPpvXaK/gK63aehLgWuycwXysfrgHHl9oSynkbbJElt0u4P3eOBCyLiPuD9wMkUp5QApgErgGUNtkmS2qStp6Ey8+hN22VgnAIsiYgpwEnAERT3zGikTZLUJrWdzsnMmZn5EsUg94+AYzLzxUbb6qlakkamWuZZ9JWZa9niBkqNtkmS2sOBYklSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSpdpXnZWkdzLnjssG/NwFs+Y1sRKBPQtJUgMMC0lSJcNCklTJMQtpmLh4weIBP3fhnFOaWImGI3sWkqRKhoUkqZJhIUmqZFhIkioZFpKkSoaFJKmSYSFJqmRYSJIqGRaSpEqGhSSpUluX+4iInYBbgC7gFeA04Fpgf+DOzJxXft+iRtokSe3R7p7FGcAXM/N3gFXA6UBXZk4HpkbEvhFxaiNtba5bkka0tvYsMvOaPg+7gf8EfLl8fC9wFHAQcGsDbcv7O9akSTswZkxXcwrXiNTdvWPdJbxNq2rqxPc6GMPt/XSCWladjYjpwCRgBbCybF4DHAyMb7CtX2vXrm9ewRqRenperruEt2lVTZ34XgdjuL2fdukvZNs+wB0ROwNXAZ8E1gHjyl0TynoabZMktUlbP3QjYjvg74HPZ+bTwDKKU0oA0yh6Go22SZLapN2noWZTnEL6QkR8AbgRODMipgAnAUcAvcCSBtokNcmcOy4b8HMXzPLixJGg3QPc11JcKrtZRCwGTgCuzMwXy7aZjbRJktqj9tuqZuZa3rrSaZvaJEnt4UCxJKmSYSFJqmRYSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkioZFpKkSoaFJKmSYSFJqmRYSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkiqNqbsAdZaLFywe8HMXzjmliZVI6iT2LCRJlQwLSVIlw0KSVMmwkCRVGlID3BGxCNgfuDMz59Vdj37dnDsuG9DzFszyRyl1uiETFhFxKtCVmdMj4oaI2Dczlzfy3MFc4bPdfg8O+Ll+CEqdqY7PhKH+eTCqt7e37hoaEhF/BdyTmXdFxOnAuMy8se66JGkkGEpjFuOBleX2GmByjbVI0ogylMJiHTCu3J7A0Kpdkoa0ofSBuww4qtyeBqyorxRJGlmGzAA3cBuwJCKmACcBR9RcjySNGENmgBsgIiYBJwA/yMxVddcjSSPFkAoLSVI9htJpKGmbRMTOwCHAI5m5uu56pKHMnkU/OnXGeERMBr6dmTM6oJadgFuALuAV4LTM/FW9VW0+ZXln+XU6cGxm9tRbVaH8+d2TmQd1QC1jgP9XfgFclJlP1FjSZhFxDXB3Zt5edy0AEfFfgNPKh+8GfpyZ59VY0qbf85uBXYFlraxnKF0N1VZ9Z4wDUyNi37prgs2/HDdRzDvpBGcAX8zM3wFWASfWXM8m7wMuycy/AL4HHFxzPX39JW9dBl639wHfzMyZ5VenBMUMYLdOCQqAzLx2038nYAnwtzWXBHAmcHNmHgrsGBGHtupAnobaupnAreX2vRSX7Ta0vEiLvUnx18136y4EIDOv6fOwG/hlXbX0lZn3A0TE0cBhwNx6KypExLEUPbBOuUDjCGBWRBwDPAGcl5lv1FlQRIyl+CC+KyI+lJkd8bu+SUTsDkzOzIfrrgV4HjggIt4N7Ak806oD2bPYuo6cMZ6ZL2Xmi3XXsaWImA5Myswf1V3LJhExiiJY1wKv11wOEbEdcDlwad219PEQcHxmHgaMBX635noAPgH8FLgSOCwiLqq5ni1dAFxbdxGlpcBewB8CP6P4rGoJw2LrnDHeoHIg+Srgk3XX0ldm9mbmBcDjQCfc8/VS4JrMfKHuQvp4PDOfK7cfBjrhdOtBwPXl5fFfB46puZ7NImI0RT331VzKJlcAn8rMucDPgf/cqgP5Abh1zhhvQPnX8t8Dn8/Mp+uuZ5OI+OOI+ET58N1AJ3xAHw9cEBH3Ae+PiK/UXA/A1yJiWkR0AR8GHqu7IOBJYGq5fSjQMb9XwAyKge1OuTJoEnBg+fM7HGhZXY5ZbJ0zxhszm2Lw+AsR8QXg2sz8Vs01AVwP3BoR5wD/h2LcqVaZefSm7Yi4LzPPqbOe0lzgG8AoYHFm/lPN9QAsAm4oV5ceC3y05nr6+o/AD+ouoo/5wI0Up6IeAL7ZqgN56Ww/nDEuSQXDQpJUyTELSVIlw0KSVMmwkCRVMiykFomI7SPiXX0e/2ad9UiDYVhITRQRx0XEF8uHFwHXbGoH7ikX7mvkdca2qERpQJxnITXXEuCSiDgcWAh8qZy4eAUwe9O6SxFxCvAV3lrtdUs/o4WzcaVt5aWzUpOUC8x95x12bQcExUJ9UMwCPgr4eGaeWz73NIpl1Gtd8lraGnsWUvOMBV4rl7B+RxHxJMWSDL1Ab0SMzczXKf5fbNkicNJg2bOQmiQitgf2pug1fIpiKXIobgw1DvhTitVU/4ViMbqPAb+iWEpmMkWA/BswEfi/mfnh9lUv9c+ehdQkmbkByPJrEUBEHEkxNvEAxZ3Mnivbd6bohXymfLwA+GFm3hYRJ9MZS4VLm3k1lNQkETGqXMK6r/kUK/J+MjOf67O/m18/7XQM8ONy+zfwlJQ6jD0LqXneT7Fa6oY+be8FroiITTc8Glv2HA4EHgSIiBOBnj73lXgPnbGkurSZYxZSC0XEPcCFmflkn7bRwFO8db/yu4FTM/PRiNgV+Gvgzsz8arvrlbbGnoXUAuWH/qnAfsCGLXafSjGQ/RLFqac/ysxHy33fobjUtvb7b0h92bOQWqCcqT0H+OfMXPwO+3fOzDURsX05MC51NMNCklTJq6EkSZUMC0lSJcNCklTJsJAkVfr/Pem//4fniZQAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#不同学历情况下,购买保险的情况\n", "sns.countplot(x='c210mys',hue='resp_flag',data=df)\n", "plt.xlabel('学历')\n", "plt.ylabel('购买数量')" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Text(0, 0.5, '购买数量')" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYsAAAEECAYAAADZBhiGAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvnQurowAAF8lJREFUeJzt3XuUXWWZ5/FvSEKTVBKsSBEMIJBp5lEGiShKYhIIl4go0BLpBYIILdj0IAg6RKOALpWe9BC7FUNrRw2It0YaGQiXAWZarq0iHbnYTfsM0BPEhLQVUuQKSJKaP/YORDS1T6XqnF1V+X7WqpVdzzlV+zm1ss7vvPvd+93Duru7kSSpJzvV3YAkaeAzLCRJlQwLSVIlw0KSVMmwkCRVGlF3A83S2bnW07wkqRc6OsYO29ZjjiwkSZUMC0lSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUqenXWUTEBOD6zJyxVe1m4NLMfDgiRgI3AOOBRZl5VaO1ZvcuSSo0dWQREe3ANUDbVrXTgCcz8+GydD6wJDOnASdFxNhe1CRJLdDskcUm4GTgJoCIGA/8NfC1iDgiM+8CZgJzy+ffCxzSi9pd29pxe/toRowY3n+vRJJq8ulPf5pNmzYxZcoUTjzxRABmz57NDTfc0LIemhoWmbkGICK2lD4G/AOwEJhXjg7agGXl46uACb2obVNX14Yee7tg/uLGX0iTXDHnhLpbkDQI/Mu/PMaiRd8BoLNzLQAbN25+ebu/dHRs+4BNq9eGOhi4KDNXRMR1wCxgHTAKWA2MKb9vtCZJtTvjjPfz1rceQlvbGM466xweeeQhbr11MS+++AKHHHIoxx//XhYu/FtWr36OlStXctFFc7n55ht57LF/Za+99mLkyJ0577wLf+/3PvPMcn74w+vo7PwNV175ZY477k/Yd9/9fu95999/D4sX38jIkSM47rj3MnXqNK644q9ZtWolmzZtYvr0w3nXu97Tp9fY6rOhngAmlduHAE8BS4DpZW0ysLQXNUmq3bJlT/O+953MWWedA8Df/d2VjBu3KxMm7MHDD/8cgCeffJyIN3LmmWcxbtyuABx11Cw+9rFP8PTTT7Fy5crf+72ve91EzjvvQjo6due88y78g0EBsOeee3Psse+ho2MCd9/9jwA89NASLr30CyxfvrzPQQGtH1lcDnwzIi4GNgCzKc5uui0iZgAHAA9QHG5qpCZJtdtnn/3Yc8+9tqp0c+aZZ7PLLrtwyy03AXDGGWezadNGrr32e7zznccC8NJLLwGwceMmdtppmwu+Vrryyi/z7ncfx4wZh3P77beyefNmJk7ck3nzPv9ygPVVS8IiM2eW/y4H3v2qh9dGxCyKUcNnMnMT8FSDNUkacD784XO57LLPMGzYTsyceSQAd931f9iwYT0bNqxnjz1eR+a/cf/99/CLXzzCpEn/ifHjX7vd+9trr7149NGHWbduHevWrWXjxo2sWPEMEW/kkUceYr/9JjFx4p59ek3DuruH5m0fqu5n4QS3pDotWrSQN7zhAKZNKy5BW7myk/vvv+d3nvO2t0151YilMb/61VJ+8IPvs+eee7FixTNMm3YYhx46tfLnerqfhWFRI8NC0kDizY8kSX1iWEiSKhkWkqRKrT51VpKGvP6eEx0I85uOLCRJlRxZSNIQMG/e51m69P8xdeo0zjzz7H7//Y4sJGmQu+eeH7F582YWLrya5cuX8fTTv+r3fRgWkjTIPfTQEo488mgA3v72KTz66MMVP9F7hoUkDXLPP/88u+22OwDjxu3KqlWr+n0fhoUkDXKjRo3mxRdfBOD55zfQ3b253/fhBLck9bNWn+oa8QYeffRhDjzwTTzxxOPsvfc+/b4Pw0KSBrnDDpvJued+mGef7eSnP/0xCxd+q9/3YVhI0iDX1jaGBQsW8uCDD3DqqR9kzJgx/b4Pw0KShoBx48Zx1FGzmvb7neCWJFUyLCRJlZp+GCoiJgDXZ+aMrWoHAl/KzFkRMRK4geJe3Isy86pGa83uXZK2x5xbLunX3zf/uMv69fdtj6aOLCKiHbgGaNuqNgz4G2BkWTofWJKZ04CTImJsL2qSpNKqVc9y7rn9vy4UNH9ksQk4Gbhpq9qfAXcBx5TfzwTmltv3Aof0onbXtnbc3j6aESOG97H95uroMO8kVWvkvWL16tVcfvkX2Ljxt015b2lqWGTmGoCIoPz3tcAHKIJiS1i0AcvK7VXAhF7Utqmra0N/vISm6uxcW3cLkgaBRt4r1q/fwCWXXMbcuR/f7veWnkKm1RPcfwV8KjNf2qq2DhhVbo8pe2q0JkmiuNaiGddXbNHqN9zDgf8REXcDb46Iy4AlwPTy8cnA0l7UJEkt0NKL8jLzP2/Zjoi7M/OSiNgHuC0iZgAHAA9QHG5qpCZJaoGWhEVmztxWLTOfiohZFKOGz2TmJqDRmiQNOAPhVNf+Nqy7u7vuHpqis3Ntjy+sv2+ovj0Gwk3YJWmLjo6xw7b1mJPEkqRKhoUkqZJhIUmqZFhIkioZFpKkSoaFJKmSYSFJqmRYSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkioZFpKkSk2/rWpETACuz8wZEfF64NvAZuAJ4JyyhxuA8cCizLwqIkY2Umt275KkQlNHFhHRDlwDtJWlc4D/mplHAnsDbwLOB5Zk5jTgpIgY24uaJKkFmj2y2AScDNwEkJkXb/XYa4GVwExgblm7FzikF7W7trXj9vbRjBgxvO+voIk6Osw7SYNDU8MiM9cARMTv1CPiZOBfM3N5RLQBy8qHVgETKEYijdS2qatrQz+8gubq7FxbdwuS9LKePsC2fII7IiYBFwEXlqV1wKhye0zZU6M1SVILtPQNt5zD+HvgQ5m5uiwvAaaX25OBpb2oSZJaoOlnQ73KXOD1wILy0NRnKSbAb4uIGcABwAMUh5saqUmSWmBYd3d33T0QERMpRg13bBlxNFrbls7OtT2+sAvmL+6P1vvkijkn1N2CJL2so2PssG091uqRxR+UmcuB67anJklqPieJJUmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSJcNCklRpQKwNtaOac8sldbcAwPzjLqu7BUkDnCMLSVIlw0KSVMmwkCRVMiwkSZUMC0lSpaafDRURE4DrM3NGRIwEbgDGA4sy86q+1JrduySp0NSRRUS0A9cAbWXpfGBJZk4DToqIsX2sSZJaoNkji03AycBN5fczgbnl9r3AIX2s3bWtHbe3j2bEiOF9fgE7go4Oc1dSz5oaFpm5BiAitpTagGXl9ipgQh9r29TVtaHP/e8oOjvX1t2CpAGgpw+ODR+GiojJ26jv3ote1gGjyu0x5f77UpMktUBv3nAvf3UhIv4Y+Pte/I4lwPRyezKwtI81SVILVB6GioingMeA5yLiw8AcYDTwXeAdwHO92N81wG0RMQM4AHiA4tDS9tYkSS3QyMhib+AWYDMwkiIsHgeeoZjArpSZM8t/nwJmAf8EHJ2Zm/pSa/hVSpL6pNEJ7u5XbXe/qtawzFwOXNdfNUlS8/XmbKhuYBgwD5gIvB7YDfBUGkka4nozwT2MIjA+DTwMLAAeKr8kSUPY9px+2qfDUJKkwafRsDgNGE4xurgceCvFxPfIJvUlSRpAKucsMnMngIi4A/g68K3yoWHAEcAFzWpOkjQw9GaC+9rMfAl4aavazRGxtF87kiQNOA0dhoqI/TLz6m08vLwf+5EkDUCNjiy+GxHfoQiGx4FfZmZ3RAwHFkfE+Zn586Z1KUmqVaNhsRl4Hngj8C7gwHIZkDbgToNCkoa2HsMiIuZRrAvVlpnXRMQwikX83g2cQhEilzS9S0lSrarmLO4AOoBfRcQvgfXARRQX5b2FIjQWRcQfNbVLSVKtqsJif+A+igvw3gZcDzxLsdTHWIolw++nOIVWkjREVc1Z3AUcCYwHPgf8JDO/FhFPUtwudQJwUGZubG6bkqQ6VYXFTRSHngL4I+CIiOgEngTOplgu/P3Ad5rZpCSpXlWHod4KfIpiZdkXgeMpDkFNAa4CPgKcEhG7NLNJSVK9qsLiE+W/yyiC4Sbg2xS3OP08xWGqGylOp5UkDVE9HobKzM8DRMT7MvPXEfEXFLdR/WRm/qx87PuZub75rUqS6tLIPbjHZOavATLzwfJai/FbPeWFRncWEe3A94DdgSWZeU5ELKK4p/atmXlZ+byGapKk1mhkbajfRMT3IyK2+plPAETEBODBctmPRpwOfC8zDwHGRsQngOGZORWYFBH7R8TsRmq9eI2SpD5qZLmPfwauBXYGyMxNEbHlVNn5wBWZuanB/T1LsVTIayjuh7GaV+6pfSfFdRsHN1h7vKcdtbePZsSIRjNsx9bRMbbuFiQNcI2ExUbgvszs2roYEZOBiZl5TS/2dz/wHuCjwL9RBNCy8rFVFFeFtzVY61FX14ZetLVj6+z0NuqSev7g2EhYDKNY0uMg4D8oPtFPoFgG5Pxe9vJZ4C8yc01EfBz4S+Ab5WNjKA5xrQNGNVCTJLVII2+63Zk5m+LT/BHAlyjCYgHFSrS90Q68qZzjOBT4K4pDSlAsULiU4rTcRmqSpBZpaInyiNiN4nqKI4Au4BfAJ4HrImJqL+Ys5gFXA/sAP6EInvsiYiJwLMXFft0N1jSAXDB/cd0tcMWcE+puQRqyGj0M9d+AOeXXCxSjjZ9HxA8oLtb7SiM7K6/N+C9b1yJiJjALuDwzV/emJklqjUbCYifgYorJ6M9RXK295Yrtr1JMWjcUFn9IOXF+3fbUJEmt0eOcRXkB3g2ZuTkzX6A4BDQM2AUgM58HbouItqZ3KkmqTdVyH93AFRHxZuC3FKfRvgScFhG7A89n5qXNb1OSVKdG78F9J/C/KQ5FDS9/bgQwISLuycyPN6k/SdIA0Oj1Ck9m5mmZ+afAFcAzwIPA24ETm9WcJGlgqJqz2HI5X3f5/b7AvhTXWXypPGX26Cb2J0kaAKpGFnMi4n5gXERMAr5OcXHcs8AkgMx8srktSpLq1mNYZOZngBOAb1LcOvVsitNnx1Cs1yRJ2gH0OMEdESMorqHYCKyguDveMGBNZt5Xru/UlplfaHqnkqTaVJ0NNZxiFPFZYDeKBf26gDURcTrwDop7VEiShrBGTp3dAOwBfIjiGovdKO6UtwKYV16YJ0kawho5dXYGcA7FNRZtFCvNLgOuAS6NiLnNa0+SNBBUXcH9YkQcSjGi6C6/dip/rhP4n8BJzW5SklSvysNQmbm84iku7idJQ5x3nJMkVTIsJEmVDAtJUiXDQpJUqdElyvtVRHwV+F+ZeXNELAIOAG7NzMvKxxuqSZJao+Uji4iYAexRBsVsYHhmTgUmRcT+jdZa3bck7chaOrKIiJHANyhuxfonwExeOfX2TooVbQ9usPZ4T/tqbx/NiBHD+7P9IaujY2z1kwaBofI6pIGo1YehPgg8BlwOnA98BFhUPrYKeAvFVeLLGqj1qKtrQ781PdR1dq6tu4V+MVReh1SXnj5wtfow1MHA1zNzBfBd4F5gVPnYmLKfdQ3WJEkt0uo33Scob5oEHEJx173p5feTgaXAkgZrkqQWafVhqEXAVRFxCjCSYs5icURMBI4FplCsP3VfAzVJUou0NCwycy3wp1vXImImMAu4PDNX96YmSWqNWq6z2FpmdvGqxQgbrUmSWsOJYklSJcNCklTJsJAkVTIsJEmVDAtJUqXaz4aS+sucWy6puwUA5h/nosgaehxZSJIqGRaSpEqGhSSpkmEhSapkWEiSKhkWkqRKhoUkqZJhIUmqZFhIkioZFpKkSoaFJKlSLWtDRcQE4PbMPDgiFgEHALdm5mXl4w3VJEmtUdfI4ovAqIiYDQzPzKnApIjYv9FaTX1L0g6p5SOLiDgSWA+sAGbyyn217wSmAwc3WHu8p/20t49mxIjh/dn6kNXRMbbuFoYU/54ailoaFhGxM3ApcCJwI9AGLCsfXgW8pRe1HnV1bei3voe6zs61dbcwpPj31GDV0wedVh+Gmgt8NTOfK79fB4wqt8eU/TRakyS1SKvfdI8GPhIRdwNvBo6nOKQEMBlYCixpsCZJapGWHobKzMO2bJeBcQJwX0RMBI4FpgDdDdYkSS1S2+GczJyZmWsoJrl/ChyRmasbrdXTtSTtmGq/B3dmdvHKmU69qkmSWsOJYklSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSJcNCklTJsJAkVar9fhaSftcF8xfX3QIAV8w5oe4WNIA4spAkVWrpyCIidgWuBYYD64GTga8BBwC3ZuZl5fMWNVKTJLVGq0cWpwF/k5nvBFYApwDDM3MqMCki9o+I2Y3UWty3JO3QWjqyyMyvbvVtB/AB4Mvl93cC04GDeeVe2z3VHu9pX+3toxkxYnj/ND7EdXSMrbuFIWWo/D2HyutQ/6hlgjsipgLtwFJgWVleBbwFaGuw1qOurg391/AQ19m5tu4WhpSh8vccKq9DjevpA0LLJ7gjYjywAPgQsA4YVT40puyn0ZokqUVa+qYbETsD/wB8KjOfApZQHFICmEwx0mi0JklqkVYfhjqL4hDSxRFxMXA1cHpETASOBaYA3cB9DdQkSS3S6gnur1GcKvuyiFgMzAIuz8zVZW1mIzVJzTPnlkvqboH5x3mW/EBR+xXcmdnFK2c69aomSWoNJ4olSZUMC0lSJcNCklTJsJAkVTIsJEmVDAtJUiXDQpJUybCQJFUyLCRJlQwLSVIlw0KSVMmwkCRVMiwkSZUMC0lSJcNCklSp9vtZSFIzXTB/cd0tALDzG39Wdwt9upmUIwtJUqVBNbKIiEXAAcCtmen9FiWpRQbNyCIiZgPDM3MqMCki9q+7J0naUQzr7u6uu4eGRMRXgNsz87aIOAUYlZlX192XJO0IBs3IAmgDlpXbq4AJNfYiSTuUwRQW64BR5fYYBlfvkjSoDaY33CXA9HJ7MrC0vlYkaccymM6GuhG4LyImAscCU2ruR5J2GINmghsgItqBWcC9mbmi7n4kaUcxqMJCklSPwTRnIUmqyWCasxhSImI88BTQkZkv1N3PYBYR36I46WE18B/AqZm5qdamBrmI+AJwFMXf8/TMXFdzS4NS+X/zzcBGYGFmfqPejrafI4v6zAJ2AQ6ru5Eh4vzMnAl0Ae+suZdBLSLeAcwApgF3An9eb0eD3nnAMcBnI+KgupvZXoZFfd4F/G35r/rPbsD6upsY5I4BbsvMbuAO4PGa+xn0MvNZ4FYG8YdDD0PVZyrFdSP/WHcjQ8SCiBhFcXX/T+puZpCbAPwzQGb+O/Dv9bYzZDwLvKbuJraXI4salEPR3YDrgX0jYu+aWxoKzqdYkfhBYG7NvQx2ayhWSSAi3h4Rc2ruZ6gYT/FhZlAyLOpxDPDfy2PsXym/Vx9l5maKOYuxdfcyyP0TxZwawOHA8zX2MiRExGsoLib+Ud29bC8PQ9XjGOCicvtHwEeAb9bXzpCwICI2lNun1trJ4LcYODoifgysBN5fcz+D3QLgReCTmfnLupvZXl6UJ0mq5GEoSVIlw0KSVMmwkCRVcoJbqklEnEoxmfw8sLm8CE4akJzglvpZRHyT4n4rz21V3hV4KDM/WD5nd4p7tMwAPgecAfya4urzqcDrMnNNK/uWeuLIQup/21oY8rdbbV8KXAicC9xGcWXvAmA5cIdBoYHGsJCa46LMvH3LNxExE/hAuX08cDTFKrknAN8CRlKEyTuAe1vcq1TJCW6pOb4YEfdv+aIYNWzxMPBJ4CCKC7XWAuOADmAP4PDygjhpwDAspOa4KDOnb/miWLsKgMx8utx8ErgzIoYB+wEfy8xrKNa2uqvlHUs9cIJb6icRcRgwj2LV1ucobniz5aZB4ygWj1wGXAd8Efg5xaJ9sylWH/6/wNeB04GrM/Onrexf6olhITVBRJwHHJSZf17eFfHjwGcyc3NEDKcIlJ2BI8ofmQhcTXEq7frMPLyOvqVtMSykflQGwUcpbvP6UWA0xZlOV1KcGvtnFKfGLgQeAR4D3gscCewFXAv8BnhfOZchDQiGhdSPIuKPgXso5iOeATrLr5UUN7u6A/h2+fRu4IfAfOA95ePnAAcCfwl8MDN/1sr+pW0xLKQWKUcdf/BK7YiYBvx4y2MRsR+wLDN/++rnSnUwLCRJlTx1VpJUybCQJFUyLCRJlQwLSVIlw0KSVOn/A9B4vS0107PmAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#不同的县级别,购买保险的情况\n", "sns.countplot(x='N2NCY',hue='resp_flag',data=df)\n", "plt.xlabel('县级')\n", "plt.ylabel('购买数量')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数据预处理" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 空值填充" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "#统计每一列数据类型\n", "temp = []\n", "for i in NA.Var:\n", " temp.append(df[i].dtypes) " ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
VarNA_count数据类型
0AASN10object
1ASKN8object
2COLLEGE8object
3MOBPLUS7object
4N2NCY10object
\n", "
" ], "text/plain": [ " Var NA_count 数据类型\n", "0 AASN 10 object\n", "1 ASKN 8 object\n", "2 COLLEGE 8 object\n", "3 MOBPLUS 7 object\n", "4 N2NCY 10 object" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "NA['数据类型']=temp\n", "NA.head()" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [], "source": [ "#填充策略\n", "\n", "#分类变量:通常分类水平出现的次数多,出现的概率就是最高的,用众数填充\n", "\n", "#数值变量:幸福指数 收入所处排名 都是分类变量" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 AASN\n", "1 ASKN\n", "2 COLLEGE\n", "3 MOBPLUS\n", "4 N2NCY\n", "5 NY8Y9\n", "6 POEP\n", "7 LIVEWELL\n", "8 HOMSTAT\n", "9 HINSUB\n", "11 c210b200\n", "12 c210cip\n", "13 c210hmi\n", "14 c210hva\n", "15 c210mah\n", "16 c210psu\n", "17 c210wht\n", "18 ilor\n", "19 meda\n", "Name: Var, dtype: object" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#去掉年龄变量\n", "NA[NA.Var != 'age'].Var" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "#对列名进行遍历,依次进行众数填充\n", "for i in NA[NA.Var != 'age'].Var:\n", " df[i].fillna(df[i].mode()[0],inplace=True)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "#年龄采取均值填补\n", "df.age.fillna(df.age.mean(),inplace=True)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/plain": [ "KBM_INDV_ID 0\n", "resp_flag 0\n", "GEND 0\n", "CA00 0\n", "CA03 0\n", "CA06 0\n", "CA11 0\n", "CA16 0\n", "AART 0\n", "ADBT 0\n", "ADEP 0\n", "AHBP 0\n", "AHCH 0\n", "ARES 0\n", "AHRT 0\n", "AASN 0\n", "ADGS 0\n", "AHRL 0\n", "ASKN 0\n", "AVIS 0\n", "BANK 0\n", "COLLEGE 0\n", "FINI 0\n", "INLI 0\n", "INMEDI 0\n", "INVE 0\n", "IOLP 0\n", "MOBPLUS 0\n", "N2NCY 0\n", "NY8Y9 0\n", " ..\n", "NAH19 0\n", "NPH19 0\n", "POC19 0\n", "HOMSTAT 0\n", "HINSUB 0\n", "STATE_NAME 0\n", "age 0\n", "c210apvt 0\n", "c210b200 0\n", "c210blu 0\n", "c210bpvt 0\n", "c210cip 0\n", "c210ebi 0\n", "c210hmi 0\n", "c210hva 0\n", "c210kses 0\n", "c210mah 0\n", "c210mob 0\n", "c210mys 0\n", "c210pdv 0\n", "c210pmr 0\n", "c210poo 0\n", "c210psu 0\n", "c210pwc 0\n", "c210wht 0\n", "ilor 0\n", "meda 0\n", "pdpe 0\n", "tins 0\n", "zhip19 0\n", "Length: 76, dtype: int64" ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.isnull().sum()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 变量编码" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "#把无效特征用户ID删掉\n", "del df['KBM_INDV_ID']" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
resp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
00M40511NNNNNNNNNNNNNNNNNNNSANNYNNYYYYNNNNNN1.0538YYCCA67.09911.010174.07190.0738.011164.00514526571.02279.015.064.04288
10M00000NNNNNNNNNNNNNNNNNNNPANNNNNNYNNNNNNNN4.0011UYUCA76.0986.015269.06984.0494.09756.00415448199.03765.017.061.04663
20F00000NNNNNNNNNNNNNNNNNNNMANNNNNNYYNNNNNNN3.0011UYUCA67.0884.0261232.04450.0516.08350.00417384462.04447.020.061.04673
30F04000NNNNNNNNNNNNYNNNNNNSBNNNYYNYYYNNNNNN1.0145YYCCA71.0964.015482.082103.0473.010552.00414457199.03971.04.062.03789
40F00000NNNNNNNNNNNNNYNNNNYMBNNNNNNYYNYYNYYN3.0011UUACA75.0884.091238.04755.0523.08950.010429321336.01565.09.061.03743
\n", "
" ], "text/plain": [ " resp_flag GEND CA00 CA03 CA06 CA11 CA16 AART ADBT ADEP AHBP AHCH ARES \\\n", "0 0 M 4 0 5 1 1 N N N N N N \n", "1 0 M 0 0 0 0 0 N N N N N N \n", "2 0 F 0 0 0 0 0 N N N N N N \n", "3 0 F 0 4 0 0 0 N N N N N N \n", "4 0 F 0 0 0 0 0 N N N N N N \n", "\n", " AHRT AASN ADGS AHRL ASKN AVIS BANK COLLEGE FINI INLI INMEDI INVE IOLP \\\n", "0 N N N N N N N N N N N N N \n", "1 N N N N N N N N N N N N N \n", "2 N N N N N N N N N N N N N \n", "3 N N N N N N Y N N N N N N \n", "4 N N N N N N N Y N N N N Y \n", "\n", " MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 N5N59 N6N64 N65P ONLA POEP SGFA SGLL \\\n", "0 S A N N Y N N Y Y Y Y N N \n", "1 P A N N N N N N Y N N N N \n", "2 M A N N N N N N Y Y N N N \n", "3 S B N N N Y Y N Y Y Y N N \n", "4 M B N N N N N N Y Y N Y Y \n", "\n", " SGOE SGSE SGTC U18 LIVEWELL NOC19 NAH19 NPH19 POC19 HOMSTAT HINSUB \\\n", "0 N N N N 1.0 5 3 8 Y Y C \n", "1 N N N N 4.0 0 1 1 U Y U \n", "2 N N N N 3.0 0 1 1 U Y U \n", "3 N N N N 1.0 1 4 5 Y Y C \n", "4 N Y Y N 3.0 0 1 1 U U A \n", "\n", " STATE_NAME age c210apvt c210b200 c210blu c210bpvt c210cip c210ebi \\\n", "0 CA 67.0 99 11.0 10 1 74.0 71 \n", "1 CA 76.0 98 6.0 15 2 69.0 69 \n", "2 CA 67.0 88 4.0 26 12 32.0 44 \n", "3 CA 71.0 96 4.0 15 4 82.0 82 \n", "4 CA 75.0 88 4.0 9 12 38.0 47 \n", "\n", " c210hmi c210hva c210kses c210mah c210mob c210mys c210pdv c210pmr \\\n", "0 90.0 738.0 111 64.0 0 5 14 52 \n", "1 84.0 494.0 97 56.0 0 4 15 44 \n", "2 50.0 516.0 83 50.0 0 4 17 38 \n", "3 103.0 473.0 105 52.0 0 4 14 45 \n", "4 55.0 523.0 89 50.0 10 4 29 32 \n", "\n", " c210poo c210psu c210pwc c210wht ilor meda pdpe tins zhip19 \n", "0 65 71.0 22 79.0 15.0 64.0 42 8 8 \n", "1 81 99.0 37 65.0 17.0 61.0 46 6 3 \n", "2 44 62.0 44 47.0 20.0 61.0 46 7 3 \n", "3 71 99.0 39 71.0 4.0 62.0 37 8 9 \n", "4 13 36.0 15 65.0 9.0 61.0 37 4 3 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
GENDAARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18POC19HOMSTATHINSUBSTATE_NAME
0MNNNNNNNNNNNNNNNNNNNSANNYNNYYYYNNNNNNYYCCA
1MNNNNNNNNNNNNNNNNNNNPANNNNNNYNNNNNNNNUYUCA
2FNNNNNNNNNNNNNNNNNNNMANNNNNNYYNNNNNNNUYUCA
3FNNNNNNNNNNNNYNNNNNNSBNNNYYNYYYNNNNNNYYCCA
4FNNNNNNNNNNNNNYNNNNYMBNNNNNNYYNYYNYYNUUACA
\n", "
" ], "text/plain": [ " GEND AART ADBT ADEP AHBP AHCH ARES AHRT AASN ADGS AHRL ASKN AVIS BANK \\\n", "0 M N N N N N N N N N N N N N \n", "1 M N N N N N N N N N N N N N \n", "2 F N N N N N N N N N N N N N \n", "3 F N N N N N N N N N N N N Y \n", "4 F N N N N N N N N N N N N N \n", "\n", " COLLEGE FINI INLI INMEDI INVE IOLP MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 \\\n", "0 N N N N N N S A N N Y N \n", "1 N N N N N N P A N N N N \n", "2 N N N N N N M A N N N N \n", "3 N N N N N N S B N N N Y \n", "4 Y N N N N Y M B N N N N \n", "\n", " N5N59 N6N64 N65P ONLA POEP SGFA SGLL SGOE SGSE SGTC U18 POC19 HOMSTAT \\\n", "0 N Y Y Y Y N N N N N N Y Y \n", "1 N N Y N N N N N N N N U Y \n", "2 N N Y Y N N N N N N N U Y \n", "3 Y N Y Y Y N N N N N N Y Y \n", "4 N N Y Y N Y Y N Y Y N U U \n", "\n", " HINSUB STATE_NAME \n", "0 C CA \n", "1 U CA \n", "2 U CA \n", "3 C CA \n", "4 A CA " ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#把类型object的提取出来,数值化\n", "df_object = df.select_dtypes('object')\n", "df_object.head()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1., 0., 0., ..., 4., 2., 0.],\n", " [1., 0., 0., ..., 4., 3., 0.],\n", " [0., 0., 0., ..., 4., 3., 0.],\n", " ...,\n", " [1., 0., 0., ..., 4., 3., 0.],\n", " [1., 0., 0., ..., 4., 2., 4.],\n", " [1., 0., 0., ..., 0., 1., 5.]])" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import OrdinalEncoder #对二维特征数值型编码\n", "df_object = OrdinalEncoder().fit_transform(df_object)\n", "df_object" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
resp_flagGENDCA00CA03CA06CA11CA16AARTADBTADEPAHBPAHCHARESAHRTAASNADGSAHRLASKNAVISBANKCOLLEGEFINIINLIINMEDIINVEIOLPMOBPLUSN2NCYNY8Y9N2N29N3N39N4N49N5N59N6N64N65PONLAPOEPSGFASGLLSGOESGSESGTCU18LIVEWELLNOC19NAH19NPH19POC19HOMSTATHINSUBSTATE_NAMEagec210apvtc210b200c210bluc210bpvtc210cipc210ebic210hmic210hvac210ksesc210mahc210mobc210mysc210pdvc210pmrc210pooc210psuc210pwcc210whtilormedapdpetinszhip19
001.0405110.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.02.00.00.00.01.00.00.02.01.01.01.00.00.00.00.00.00.01.05382.04.02.00.067.09911.010174.07190.0738.011164.00514526571.02279.015.064.04288
101.0000000.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.01.01.00.00.00.00.00.00.00.00.04.00111.04.03.00.076.0986.015269.06984.0494.09756.00415448199.03765.017.061.04663
200.0000000.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.00.01.01.01.00.00.00.00.00.00.00.03.00111.04.03.00.067.0884.0261232.04450.0516.08350.00417384462.04447.020.061.04673
300.0040000.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.00.00.02.01.00.00.00.01.01.01.01.01.01.00.00.00.00.00.00.01.01452.04.02.00.071.0964.015482.082103.0473.010552.00414457199.03971.04.062.03789
400.0000000.00.00.00.00.00.00.00.00.00.00.00.00.01.00.00.00.00.01.00.01.00.00.00.00.00.01.01.01.00.01.01.00.01.01.00.03.00111.03.00.00.075.0884.091238.04755.0523.08950.010429321336.01565.09.061.03743
\n", "
" ], "text/plain": [ " resp_flag GEND CA00 CA03 CA06 CA11 CA16 AART ADBT ADEP AHBP \\\n", "0 0 1.0 4 0 5 1 1 0.0 0.0 0.0 0.0 \n", "1 0 1.0 0 0 0 0 0 0.0 0.0 0.0 0.0 \n", "2 0 0.0 0 0 0 0 0 0.0 0.0 0.0 0.0 \n", "3 0 0.0 0 4 0 0 0 0.0 0.0 0.0 0.0 \n", "4 0 0.0 0 0 0 0 0 0.0 0.0 0.0 0.0 \n", "\n", " AHCH ARES AHRT AASN ADGS AHRL ASKN AVIS BANK COLLEGE FINI INLI \\\n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 0.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "\n", " INMEDI INVE IOLP MOBPLUS N2NCY NY8Y9 N2N29 N3N39 N4N49 N5N59 \\\n", "0 0.0 0.0 0.0 2.0 0.0 0.0 0.0 1.0 0.0 0.0 \n", "1 0.0 0.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 \n", "3 0.0 0.0 0.0 2.0 1.0 0.0 0.0 0.0 1.0 1.0 \n", "4 0.0 0.0 1.0 0.0 1.0 0.0 0.0 0.0 0.0 0.0 \n", "\n", " N6N64 N65P ONLA POEP SGFA SGLL SGOE SGSE SGTC U18 LIVEWELL \\\n", "0 2.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "1 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 4.0 \n", "2 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 3.0 \n", "3 1.0 1.0 1.0 1.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "4 1.0 1.0 1.0 0.0 1.0 1.0 0.0 1.0 1.0 0.0 3.0 \n", "\n", " NOC19 NAH19 NPH19 POC19 HOMSTAT HINSUB STATE_NAME age c210apvt \\\n", "0 5 3 8 2.0 4.0 2.0 0.0 67.0 99 \n", "1 0 1 1 1.0 4.0 3.0 0.0 76.0 98 \n", "2 0 1 1 1.0 4.0 3.0 0.0 67.0 88 \n", "3 1 4 5 2.0 4.0 2.0 0.0 71.0 96 \n", "4 0 1 1 1.0 3.0 0.0 0.0 75.0 88 \n", "\n", " c210b200 c210blu c210bpvt c210cip c210ebi c210hmi c210hva c210kses \\\n", "0 11.0 10 1 74.0 71 90.0 738.0 111 \n", "1 6.0 15 2 69.0 69 84.0 494.0 97 \n", "2 4.0 26 12 32.0 44 50.0 516.0 83 \n", "3 4.0 15 4 82.0 82 103.0 473.0 105 \n", "4 4.0 9 12 38.0 47 55.0 523.0 89 \n", "\n", " c210mah c210mob c210mys c210pdv c210pmr c210poo c210psu c210pwc \\\n", "0 64.0 0 5 14 52 65 71.0 22 \n", "1 56.0 0 4 15 44 81 99.0 37 \n", "2 50.0 0 4 17 38 44 62.0 44 \n", "3 52.0 0 4 14 45 71 99.0 39 \n", "4 50.0 10 4 29 32 13 36.0 15 \n", "\n", " c210wht ilor meda pdpe tins zhip19 \n", "0 79.0 15.0 64.0 42 8 8 \n", "1 65.0 17.0 61.0 46 6 3 \n", "2 47.0 20.0 61.0 46 7 3 \n", "3 71.0 4.0 62.0 37 8 9 \n", "4 65.0 9.0 61.0 37 4 3 " ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#进行数据编码转换\n", "for i in df.columns:\n", " if df[i].dtypes=='object':\n", " df[i]=OrdinalEncoder().fit_transform(df[[i]])\n", " \n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 数据建模" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [], "source": [ "from sklearn import tree\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.model_selection import GridSearchCV \n", "from sklearn.model_selection import cross_val_score \n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "from sklearn.tree import DecisionTreeClassifier" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "#确定特征矩阵和标签\n", "X = df.iloc[:,1:]\n", "y = df['resp_flag']" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "#划分数据集\n", "Xtrain,Xtest,Ytrain,Ytest=train_test_split(X,y,test_size=0.3,\n", " random_state=420)" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.5946564885496183" ] }, "execution_count": 46, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = tree.DecisionTreeClassifier()\n", "clf = clf.fit(Xtrain,Ytrain)\n", "score = clf.score(Xtest,Ytest)\n", "score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 网格搜索" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'min_samples_leaf': 1500, 'min_samples_split': 4000}" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#尝试使用最小叶节点样本数量和最小分割样本数量进行调参\n", "param_grid = {'min_samples_leaf':list(range(1000,6000,100)),\n", " 'min_samples_split':list(range(4000,6000,100))}\n", "GR = GridSearchCV(tree.DecisionTreeClassifier(),param_grid,cv=5)\n", "GR.fit(Xtrain,Ytrain)\n", "\n", "GR.best_params_" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6417557251908397" ] }, "execution_count": 48, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = tree.DecisionTreeClassifier(criterion='gini',max_depth=4,\n", " min_samples_leaf=1500,\n", " min_samples_split=4000)\n", "clf.fit(Xtrain,Ytrain)\n", "clf.score(Xtest,Ytest)\n", "#模型能力提高了" ] }, { "cell_type": "code", "execution_count": 49, "metadata": {}, "outputs": [], "source": [ "import graphviz\n", "features = list(df.columns[1:])\n", "\n", "dot_data = tree.export_graphviz(clf,feature_names=features,\n", " class_names=['NP','P'],filled=True,rounded=True)" ] }, { "cell_type": "code", "execution_count": 50, "metadata": {}, "outputs": [ { "data": { "image/svg+xml": [ "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "Tree\r\n", "\r\n", "\r\n", "0\r\n", "\r\n", "STATE_NAME <= 0.5\r\n", "gini = 0.481\r\n", "samples = 30566\r\n", "value = [18295, 12271]\r\n", "class = NP\r\n", "\r\n", "\r\n", "1\r\n", "\r\n", "ilor <= 8.5\r\n", "gini = 0.348\r\n", "samples = 8147\r\n", "value = [6320, 1827]\r\n", "class = NP\r\n", "\r\n", "\r\n", "0->1\r\n", "\r\n", "\r\n", "True\r\n", "\r\n", "\r\n", "6\r\n", "\r\n", "N2NCY <= 0.5\r\n", "gini = 0.498\r\n", "samples = 22419\r\n", "value = [11975, 10444]\r\n", "class = NP\r\n", "\r\n", "\r\n", "0->6\r\n", "\r\n", "\r\n", "False\r\n", "\r\n", "\r\n", "2\r\n", "\r\n", "gini = 0.412\r\n", "samples = 2402\r\n", "value = [1705, 697]\r\n", "class = NP\r\n", "\r\n", "\r\n", "1->2\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "3\r\n", "\r\n", "tins <= 7.5\r\n", "gini = 0.316\r\n", "samples = 5745\r\n", "value = [4615, 1130]\r\n", "class = NP\r\n", "\r\n", "\r\n", "1->3\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "4\r\n", "\r\n", "gini = 0.24\r\n", "samples = 2853\r\n", "value = [2455, 398]\r\n", "class = NP\r\n", "\r\n", "\r\n", "3->4\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "5\r\n", "\r\n", "gini = 0.378\r\n", "samples = 2892\r\n", "value = [2160, 732]\r\n", "class = NP\r\n", "\r\n", "\r\n", "3->5\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "7\r\n", "\r\n", "age <= 70.5\r\n", "gini = 0.5\r\n", "samples = 11539\r\n", "value = [5613, 5926]\r\n", "class = P\r\n", "\r\n", "\r\n", "6->7\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "14\r\n", "\r\n", "ilor <= 7.5\r\n", "gini = 0.486\r\n", "samples = 10880\r\n", "value = [6362, 4518]\r\n", "class = NP\r\n", "\r\n", "\r\n", "6->14\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "8\r\n", "\r\n", "STATE_NAME <= 7.5\r\n", "gini = 0.479\r\n", "samples = 5170\r\n", "value = [2052, 3118]\r\n", "class = P\r\n", "\r\n", "\r\n", "7->8\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "11\r\n", "\r\n", "tins <= 5.5\r\n", "gini = 0.493\r\n", "samples = 6369\r\n", "value = [3561, 2808]\r\n", "class = NP\r\n", "\r\n", "\r\n", "7->11\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "9\r\n", "\r\n", "gini = 0.493\r\n", "samples = 2444\r\n", "value = [1364, 1080]\r\n", "class = NP\r\n", "\r\n", "\r\n", "8->9\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "10\r\n", "\r\n", "gini = 0.377\r\n", "samples = 2726\r\n", "value = [688, 2038]\r\n", "class = P\r\n", "\r\n", "\r\n", "8->10\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "12\r\n", "\r\n", "gini = 0.439\r\n", "samples = 1734\r\n", "value = [1171, 563]\r\n", "class = NP\r\n", "\r\n", "\r\n", "11->12\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "13\r\n", "\r\n", "gini = 0.5\r\n", "samples = 4635\r\n", "value = [2390, 2245]\r\n", "class = NP\r\n", "\r\n", "\r\n", "11->13\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "15\r\n", "\r\n", "gini = 0.5\r\n", "samples = 2985\r\n", "value = [1451, 1534]\r\n", "class = P\r\n", "\r\n", "\r\n", "14->15\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "16\r\n", "\r\n", "HINSUB <= 1.5\r\n", "gini = 0.47\r\n", "samples = 7895\r\n", "value = [4911, 2984]\r\n", "class = NP\r\n", "\r\n", "\r\n", "14->16\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "17\r\n", "\r\n", "gini = 0.495\r\n", "samples = 2245\r\n", "value = [1240, 1005]\r\n", "class = NP\r\n", "\r\n", "\r\n", "16->17\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "18\r\n", "\r\n", "gini = 0.455\r\n", "samples = 5650\r\n", "value = [3671, 1979]\r\n", "class = NP\r\n", "\r\n", "\r\n", "16->18\r\n", "\r\n", "\r\n", "\r\n", "\r\n", "\r\n" ], "text/plain": [ "" ] }, "execution_count": 50, "metadata": {}, "output_type": "execute_result" } ], "source": [ "graph = graphviz.Source(dot_data)\n", "graph" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 对数据进行PCA压缩" ] }, { "cell_type": "code", "execution_count": 51, "metadata": {}, "outputs": [], "source": [ "from sklearn.decomposition import PCA \n", "pca = PCA(n_components=20)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(30566, 74)" ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Xtrain.shape" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(30566, 20)" ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Xtrain_pca = pca.fit_transform(Xtrain)\n", "Xtrain_pca.shape" ] }, { "cell_type": "code", "execution_count": 54, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[0.9153591782418453,\n", " 0.04623390351316754,\n", " 0.009614205126099036,\n", " 0.0059733295547924215,\n", " 0.005376298245309462,\n", " 0.0038206947441833477,\n", " 0.0032810657682490097,\n", " 0.002282561885296771,\n", " 0.0016778924397305934,\n", " 0.0010966357436341806,\n", " 0.0009433417755888605,\n", " 0.0009092016983611149,\n", " 0.0007054275771638145,\n", " 0.0004457010640798597,\n", " 0.0003960043714565213,\n", " 0.00030893456867792167,\n", " 0.0002769756111792194,\n", " 0.00022580708084060145,\n", " 0.00021132827352552525,\n", " 0.00017942417200120434]" ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#累计方差\n", "list(pca.explained_variance_ratio_)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9993179114551825" ] }, "execution_count": 55, "metadata": {}, "output_type": "execute_result" } ], "source": [ "(pca.explained_variance_ratio_).sum()\n", "#主成分分析效果还可以,有一个元素累计方差0.91" ] }, { "cell_type": "code", "execution_count": 56, "metadata": {}, "outputs": [], "source": [ "Xtest_pca = pca.transform(Xtest)" ] }, { "cell_type": "code", "execution_count": 57, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6115267175572519" ] }, "execution_count": 57, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = tree.DecisionTreeClassifier(criterion='gini',max_depth=4,\n", " min_samples_leaf=1500,\n", " min_samples_split=4000)\n", "clf.fit(Xtrain_pca,Ytrain)\n", "clf.score(Xtest_pca,Ytest)#降低了,模型信息损失了" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": false } }, "nbformat": 4, "nbformat_minor": 2 }