{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 處理 outliers\n", "* 新增欄位註記\n", "* outliers 或 NA 填補\n", " 1. 平均數 (mean)\n", " 2. 中位數 (median, or Q50)\n", " 3. 最大/最小值 (max/min, Q100, Q0)\n", " 4. 分位數 (quantile)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Import 需要的套件\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", "\n", "# 設定 data_path\n", "dir_data = './data/'" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Path of read in data: ./data/application_train.csv\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITY...FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5...00000.00.00.00.00.01.0
11000030Cash loansFNN0270000.01293502.535698.5...00000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0...00000.00.00.00.00.00.0
31000060Cash loansFNY0135000.0312682.529686.5...0000NaNNaNNaNNaNNaNNaN
41000070Cash loansMNY0121500.0513000.021865.5...00000.00.00.00.00.00.0
\n", "

5 rows × 122 columns

\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "0 100002 1 Cash loans M N \n", "1 100003 0 Cash loans F N \n", "2 100004 0 Revolving loans M Y \n", "3 100006 0 Cash loans F N \n", "4 100007 0 Cash loans M N \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY \\\n", "0 Y 0 202500.0 406597.5 24700.5 \n", "1 N 0 270000.0 1293502.5 35698.5 \n", "2 Y 0 67500.0 135000.0 6750.0 \n", "3 Y 0 135000.0 312682.5 29686.5 \n", "4 Y 0 121500.0 513000.0 21865.5 \n", "\n", " ... FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 \\\n", "0 ... 0 0 0 0 \n", "1 ... 0 0 0 0 \n", "2 ... 0 0 0 0 \n", "3 ... 0 0 0 0 \n", "4 ... 0 0 0 0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0.0 1.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "\n", "[5 rows x 122 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f_app = os.path.join(dir_data, 'application_train.csv')\n", "print('Path of read in data: %s' % (f_app))\n", "app_train = pd.read_csv(f_app)\n", "app_train.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 265992.000000\n", "mean 0.006402\n", "std 0.083849\n", "min 0.000000\n", "25% 0.000000\n", "50% 0.000000\n", "75% 0.000000\n", "max 4.000000\n", "Name: AMT_REQ_CREDIT_BUREAU_HOUR, dtype: float64" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "app_train['AMT_REQ_CREDIT_BUREAU_HOUR'].describe()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 307499.000000\n", "mean 27108.573909\n", "std 14493.737315\n", "min 1615.500000\n", "25% 16524.000000\n", "50% 24903.000000\n", "75% 34596.000000\n", "max 258025.500000\n", "Name: AMT_ANNUITY, dtype: float64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 如果欄位中有 NA, describe 會有問題\n", "app_train['AMT_ANNUITY'].describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1615.5, 16524.0, 24903.0, 34596.0, 258025.5]\n" ] } ], "source": [ "# Ignore NA, 計算五值\n", "five_num = [0, 25, 50, 75, 100]\n", "quantile_5s = [np.percentile(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'], q = i) for i in five_num]\n", "print(quantile_5s)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFBlJREFUeJzt3X+sZHV5x/H300WoQS2LlJsNu+nFZv+QlhTxBtbQmFtNlwX/WEywgZCyKMkaC6kmNHGxf0BEE2yiJqSKrnEjNFag/ggbXbvdUCaNiSBLS4EV6V7pVq67YUMXkdWkdu3TP+Z78bDfmXtn7t57Z+7M+5VM5swz33PmPHuG++H8mJnITCRJavqtQa+AJGn4GA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqnDboFVisc845JycnJ/ua5xe/+AVnnnnm8qzQkBmnXmG8+rXX0bUS/T7++OMvZubvLjRu1YbD5OQk+/fv72ueVqvF9PT08qzQkBmnXmG8+rXX0bUS/UbEf/UyzsNKkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTKqv2E9GoxueM7r04fuvM9A1wTSeqdew6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqLBgOEbEhIh6OiGci4kBEfLjUb4+In0bEE+V2ZWOeWyNiJiKejYjLG/UtpTYTETsa9fMj4tGIOBgR90fE6UvdqCSpd73sOZwAbsnMtwKbgJsi4oLy3Gcz86Jy2wNQnrsG+ANgC/D5iFgTEWuAzwFXABcA1zaW86myrI3AS8CNS9SfJGkRFgyHzDySmf9apl8BngHOm2eWrcB9mfk/mfmfwAxwSbnNZOZzmfkr4D5ga0QE8C7g62X+e4CrFtuQJOnU9fV7DhExCbwNeBS4DLg5Iq4H9tPeu3iJdnA80phtlt+EyfMn1S8F3gz8LDNPdBi/avi7DZJGSc/hEBFvAL4BfCQzfx4RdwN3AFnuPw18AIgOsyed91JynvGd1mE7sB1gYmKCVqvV6+oDcPz48b7n6dUtF554dbr5Gt3qy205ex1G49SvvY6uYeq3p3CIiNfRDoavZuY3ATLzhcbzXwK+XR7OAhsas68HDpfpTvUXgbMi4rSy99Ac/xqZuRPYCTA1NZXT09O9rP6rWq0W/c7Tqxuaew7XTS9YX27L2eswGqd+7XV0DVO/vVytFMCXgWcy8zON+rrGsPcCT5fp3cA1EXFGRJwPbAR+ADwGbCxXJp1O+6T17sxM4GHg6jL/NuDBU2tLknQqetlzuAz4c+CpiHii1D5G+2qji2gfAjoEfBAgMw9ExAPAD2lf6XRTZv4aICJuBvYCa4BdmXmgLO+jwH0R8Qng32iHkSRpQBYMh8z8Hp3PC+yZZ55PAp/sUN/Tab7MfI721UySpCHgJ6QlSRXDQZJUMRwkSZW+PgSn3jQ/ECdJq5F7DpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkip9z6IE/5CNp3LjnIEmquOfQJ/ciJI0D9xwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRU/IT0gftJa0jBzz0GSVDEcJEkVw0GSVDEcJEkVw0GSVFkwHCJiQ0Q8HBHPRMSBiPhwqZ8dEfsi4mC5X1vqERF3RcRMRDwZERc3lrWtjD8YEdsa9bdHxFNlnrsiIpaj2aU2ueM7r94kaZT0sudwArglM98KbAJuiogLgB3AQ5m5EXioPAa4AthYbtuBu6EdJsBtwKXAJcBtc4FSxmxvzLfl1FuTJC3WguGQmUcy81/L9CvAM8B5wFbgnjLsHuCqMr0VuDfbHgHOioh1wOXAvsw8lpkvAfuALeW5N2Xm9zMzgXsby5IkDUBf5xwiYhJ4G/AoMJGZR6AdIMC5Zdh5wPON2WZLbb76bIe6JGlAev6EdES8AfgG8JHM/Pk8pwU6PZGLqHdah+20Dz8xMTFBq9VaYK1f6/jx433PA3DLhSf6nqeT5ms3l7mYdVrIYntdrcapX3sdXcPUb0/hEBGvox0MX83Mb5byCxGxLjOPlENDR0t9FtjQmH09cLjUp0+qt0p9fYfxlczcCewEmJqayunp6U7Dumq1WvQ7D8ANS3TC+dB1v3nt5jKb9aWy2F5Xq3Hq115H1zD128vVSgF8GXgmMz/TeGo3MHfF0TbgwUb9+nLV0ibg5XLYaS+wOSLWlhPRm4G95blXImJTea3rG8uSJA1AL3sOlwF/DjwVEU+U2seAO4EHIuJG4CfA+8pze4ArgRngl8D7ATLzWETcATxWxn08M4+V6Q8BXwFeD3y33CRJA7JgOGTm9+h8XgDg3R3GJ3BTl2XtAnZ1qO8H/nChdZEkrQw/IS1JqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqRKzz/2o1M3uUS/CyFJy809B0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFX8EFxD80Nqh+58zwDXRJIGyz0HSVLFcJAkVQwHSVLFcJAkVQwHSVLFq5W68Ou1JY0z9xwkSZUFwyEidkXE0Yh4ulG7PSJ+GhFPlNuVjedujYiZiHg2Ii5v1LeU2kxE7GjUz4+IRyPiYETcHxGnL2WDkqT+9bLn8BVgS4f6ZzPzonLbAxARFwDXAH9Q5vl8RKyJiDXA54ArgAuAa8tYgE+VZW0EXgJuPJWGJEmnbsFwyMx/AY71uLytwH2Z+T+Z+Z/ADHBJuc1k5nOZ+SvgPmBrRATwLuDrZf57gKv67EGStMRO5ZzDzRHxZDnstLbUzgOeb4yZLbVu9TcDP8vMEyfVJUkDtNirle4G7gCy3H8a+AAQHcYmnUMo5xnfUURsB7YDTExM0Gq1+lrp48ePzzvPLRee6Prccuq3j14s1OuoGad+7XV0DVO/iwqHzHxhbjoivgR8uzycBTY0hq4HDpfpTvUXgbMi4rSy99Ac3+l1dwI7AaampnJ6erqv9W61Wsw3zw0Dunz10HXTS77MhXodNePUr72OrmHqd1GHlSJiXePhe4G5K5l2A9dExBkRcT6wEfgB8BiwsVyZdDrtk9a7MzOBh4Gry/zbgAcXs06SpKWz4J5DRHwNmAbOiYhZ4DZgOiIuon0I6BDwQYDMPBARDwA/BE4AN2Xmr8tybgb2AmuAXZl5oLzER4H7IuITwL8BX16y7iRJi7JgOGTmtR3KXf+AZ+YngU92qO8B9nSoP0f7aiZJ0pDw6zOGgD8yJGnY+PUZkqSK4SBJqhgOkqSK4SBJqhgOkqSKVysNmZN/ZMirlyQNgnsOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSKn5Aecv7Wg6RBcM9BklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJlQXDISJ2RcTRiHi6UTs7IvZFxMFyv7bUIyLuioiZiHgyIi5uzLOtjD8YEdsa9bdHxFNlnrsiIpa6SUlSf3rZc/gKsOWk2g7goczcCDxUHgNcAWwst+3A3dAOE+A24FLgEuC2uUApY7Y35jv5tSRJK2zBcMjMfwGOnVTeCtxTpu8BrmrU7822R4CzImIdcDmwLzOPZeZLwD5gS3nuTZn5/cxM4N7GsiRJA7LYcw4TmXkEoNyfW+rnAc83xs2W2nz12Q51SdIALfWP/XQ6X5CLqHdeeMR22oegmJiYoNVq9bVyx48fn3eeWy480dfyVlo//S7U66gZp37tdXQNU7+LDYcXImJdZh4ph4aOlvossKExbj1wuNSnT6q3Sn19h/EdZeZOYCfA1NRUTk9PdxvaUavVYr55bmj86towOnTddM9jF+p11IxTv/Y6uoap38UeVtoNzF1xtA14sFG/vly1tAl4uRx22gtsjoi15UT0ZmBvee6ViNhUrlK6vrEsSdKALLjnEBFfo/1//edExCztq47uBB6IiBuBnwDvK8P3AFcCM8AvgfcDZOaxiLgDeKyM+3hmzp3k/hDtK6JeD3y33CRJA7RgOGTmtV2eeneHsQnc1GU5u4BdHer7gT9caD0kSSvHT0hLkiqGgySpYjhIkiqGgySpYjhIkiqGgySpYjhIkipL/d1KWkaTja/3OHTnewa4JpJGnXsOkqSKew6rlHsRkpaTew6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrfrTQC/J4lSUvNPQdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUuWUwiEiDkXEUxHxRETsL7WzI2JfRBws92tLPSLiroiYiYgnI+LixnK2lfEHI2LbqbU03iZ3fIfJHd/hqZ++POhVkbSKLcWew59k5kWZOVUe7wAeysyNwEPlMcAVwMZy2w7cDe0wAW4DLgUuAW6bCxRJ0mAsx2GlrcA9Zfoe4KpG/d5sewQ4KyLWAZcD+zLzWGa+BOwDtizDekmSenSq4ZDAP0XE4xGxvdQmMvMIQLk/t9TPA55vzDtbat3qkqQBOdXvVrosMw9HxLnAvoj40Txjo0Mt56nXC2gH0HaAiYkJWq1WXyt7/Pjxeee55cITfS1vmE28nr7/fVazhbbtKLHX0TVM/Z5SOGTm4XJ/NCK+RfucwQsRsS4zj5TDRkfL8FlgQ2P29cDhUp8+qd7q8no7gZ0AU1NTOT093WlYV61Wi/nmuaHxBXar3S0XnuDP+vz3Wc0W2rajxF5H1zD1u+jDShFxZkS8cW4a2Aw8DewG5q442gY8WKZ3A9eXq5Y2AS+Xw057gc0RsbaciN5capKkATmVPYcJ4FsRMbecv8/Mf4yIx4AHIuJG4CfA+8r4PcCVwAzwS+D9AJl5LCLuAB4r4z6emcdOYb1U+FXekhZr0eGQmc8Bf9Sh/t/AuzvUE7ipy7J2AbsWuy6SpKXlJ6QlSRXDQZJUMRwkSRXDQZJUMRwkSZVT/YT0qjc5Qh98k6SlMvbhMC78zIOkfnhYSZJUMRwkSRXDQZJU8ZzDGPL8g6SFuOcgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkih+C06v8cJykOYbDmPMryyV14mElSVLFPQctyMNN0vhxz0GSVDEcJEkVDyupLx5iksaDew6SpIp7DurIS1yl8WY4aNE8xCSNrqE5rBQRWyLi2YiYiYgdg14fSRpnQ7HnEBFrgM8BfwrMAo9FxO7M/OFyvJ6HTJaeexHSaBmKcAAuAWYy8zmAiLgP2AosSzhoefUSvgaINNyGJRzOA55vPJ4FLh3QumgFLOXeWzNouu3BGFhSf4YlHKJDLatBEduB7eXh8Yh4ts/XOQd4sc95VqW/HKNe41NAh35Lvd/lrAZjs20Zr15hZfr9vV4GDUs4zAIbGo/XA4dPHpSZO4Gdi32RiNifmVOLnX81GadeYbz6tdfRNUz9DsvVSo8BGyPi/Ig4HbgG2D3gdZKksTUUew6ZeSIibgb2AmuAXZl5YMCrJUljayjCASAz9wB7lvllFn1IahUap15hvPq119E1NP1GZnXeV5I05oblnIMkaYiMTTis1q/niIhDEfFURDwREftL7eyI2BcRB8v92lKPiLir9PhkRFzcWM62Mv5gRGxr1N9elj9T5u10WfFy9rcrIo5GxNON2rL31+01BtDr7RHx07J9n4iIKxvP3VrW+9mIuLxR7/heLhd0PFp6ur9c3EFEnFEez5TnJ1eg1w0R8XBEPBMRByLiw6U+qtu2W7+rd/tm5sjfaJ/k/jHwFuB04N+BCwa9Xj2u+yHgnJNqfwPsKNM7gE+V6SuB79L+3Mgm4NFSPxt4rtyvLdNry3M/AN5R5vkucMUK9/dO4GLg6ZXsr9trDKDX24G/6jD2gvI+PQM4v7x/18z3XgYeAK4p018APlSm/wL4Qpm+Brh/BXpdB1xcpt8I/EfpaVS3bbd+V+32XbE/AoO8lTfQ3sbjW4FbB71ePa77IepweBZYV6bXAc+W6S8C1548DrgW+GKj/sVSWwf8qFF/zbgV7HGS1/7BXPb+ur3GAHrt9sfjNe9R2lfyvaPbe7n8gXwROK3UXx03N2+ZPq2MixXexg/S/u60kd22Xfpdtdt3XA4rdfp6jvMGtC79SuCfIuLxaH9CHGAiM48AlPtzS71bn/PVZzvUB20l+uv2GoNwczmUsqtxCKTfXt8M/CwzT5xUf82yyvMvl/ErohzmeBvwKGOwbU/qF1bp9h2XcOjp6zmG1GWZeTFwBXBTRLxznrHd+uy3PqxGsb+7gd8HLgKOAJ8u9aXsdWD/DhHxBuAbwEcy8+fzDe1QW3XbtkO/q3b7jks49PT1HMMoMw+X+6PAt2h/g+0LEbEOoNwfLcO79TlffX2H+qCtRH/dXmNFZeYLmfnrzPw/4Eu0ty/03+uLwFkRcdpJ9dcsqzz/O8Cxpe/mtSLidbT/UH41M79ZyiO7bTv1u5q377iEw6r8eo6IODMi3jg3DWwGnqa97nNXbWyjfXyTUr++XPmxCXi57FbvBTZHxNqyW7uZ9vHKI8ArEbGpXOlxfWNZg7QS/XV7jRU190eseC/t7Qvt9bumXIlyPrCR9gnYju/lbB9wfhi4usx/8r/bXK9XA/9cxi+b8u/9ZeCZzPxM46mR3Lbd+l3V23elT9QM6kb7aoj/oH0lwF8Pen16XOe30L5a4d+BA3PrTft44kPAwXJ/dqkH7R9N+jHwFDDVWNYHgJlye3+jPlXesD8G/paVP1H5Ndq72/9L+/+AblyJ/rq9xgB6/bvSy5O0/yNf1xj/12W9n6VxFVm393J5v/yg/Bv8A3BGqf92eTxTnn/LCvT6x7QPbTwJPFFuV47wtu3W76rdvn5CWpJUGZfDSpKkPhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTK/wMa8bDNlfPtDAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#'~' 波浪符號為取反的意思\n", "app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'].hist(bins = 100)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAD8CAYAAACcjGjIAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFBlJREFUeJzt3X+sZHV5x/H300WoQS2LlJsNu+nFZv+QlhTxBtbQmFtNlwX/WEywgZCyKMkaC6kmNHGxf0BEE2yiJqSKrnEjNFag/ggbXbvdUCaNiSBLS4EV6V7pVq67YUMXkdWkdu3TP+Z78bDfmXtn7t57Z+7M+5VM5swz33PmPHuG++H8mJnITCRJavqtQa+AJGn4GA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqnDboFVisc845JycnJ/ua5xe/+AVnnnnm8qzQkBmnXmG8+rXX0bUS/T7++OMvZubvLjRu1YbD5OQk+/fv72ueVqvF9PT08qzQkBmnXmG8+rXX0bUS/UbEf/UyzsNKkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTKqv2E9GoxueM7r04fuvM9A1wTSeqdew6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqLBgOEbEhIh6OiGci4kBEfLjUb4+In0bEE+V2ZWOeWyNiJiKejYjLG/UtpTYTETsa9fMj4tGIOBgR90fE6UvdqCSpd73sOZwAbsnMtwKbgJsi4oLy3Gcz86Jy2wNQnrsG+ANgC/D5iFgTEWuAzwFXABcA1zaW86myrI3AS8CNS9SfJGkRFgyHzDySmf9apl8BngHOm2eWrcB9mfk/mfmfwAxwSbnNZOZzmfkr4D5ga0QE8C7g62X+e4CrFtuQJOnU9fV7DhExCbwNeBS4DLg5Iq4H9tPeu3iJdnA80phtlt+EyfMn1S8F3gz8LDNPdBi/avi7DZJGSc/hEBFvAL4BfCQzfx4RdwN3AFnuPw18AIgOsyed91JynvGd1mE7sB1gYmKCVqvV6+oDcPz48b7n6dUtF554dbr5Gt3qy205ex1G49SvvY6uYeq3p3CIiNfRDoavZuY3ATLzhcbzXwK+XR7OAhsas68HDpfpTvUXgbMi4rSy99Ac/xqZuRPYCTA1NZXT09O9rP6rWq0W/c7Tqxuaew7XTS9YX27L2eswGqd+7XV0DVO/vVytFMCXgWcy8zON+rrGsPcCT5fp3cA1EXFGRJwPbAR+ADwGbCxXJp1O+6T17sxM4GHg6jL/NuDBU2tLknQqetlzuAz4c+CpiHii1D5G+2qji2gfAjoEfBAgMw9ExAPAD2lf6XRTZv4aICJuBvYCa4BdmXmgLO+jwH0R8Qng32iHkSRpQBYMh8z8Hp3PC+yZZ55PAp/sUN/Tab7MfI721UySpCHgJ6QlSRXDQZJUMRwkSZW+PgSn3jQ/ECdJq5F7DpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkip9z6IE/5CNp3LjnIEmquOfQJ/ciJI0D9xwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRXDQZJUMRwkSRU/IT0gftJa0jBzz0GSVDEcJEkVw0GSVDEcJEkVw0GSVFkwHCJiQ0Q8HBHPRMSBiPhwqZ8dEfsi4mC5X1vqERF3RcRMRDwZERc3lrWtjD8YEdsa9bdHxFNlnrsiIpaj2aU2ueM7r94kaZT0sudwArglM98KbAJuiogLgB3AQ5m5EXioPAa4AthYbtuBu6EdJsBtwKXAJcBtc4FSxmxvzLfl1FuTJC3WguGQmUcy81/L9CvAM8B5wFbgnjLsHuCqMr0VuDfbHgHOioh1wOXAvsw8lpkvAfuALeW5N2Xm9zMzgXsby5IkDUBf5xwiYhJ4G/AoMJGZR6AdIMC5Zdh5wPON2WZLbb76bIe6JGlAev6EdES8AfgG8JHM/Pk8pwU6PZGLqHdah+20Dz8xMTFBq9VaYK1f6/jx433PA3DLhSf6nqeT5ms3l7mYdVrIYntdrcapX3sdXcPUb0/hEBGvox0MX83Mb5byCxGxLjOPlENDR0t9FtjQmH09cLjUp0+qt0p9fYfxlczcCewEmJqayunp6U7Dumq1WvQ7D8ANS3TC+dB1v3nt5jKb9aWy2F5Xq3Hq115H1zD128vVSgF8GXgmMz/TeGo3MHfF0TbgwUb9+nLV0ibg5XLYaS+wOSLWlhPRm4G95blXImJTea3rG8uSJA1AL3sOlwF/DjwVEU+U2seAO4EHIuJG4CfA+8pze4ArgRngl8D7ATLzWETcATxWxn08M4+V6Q8BXwFeD3y33CRJA7JgOGTm9+h8XgDg3R3GJ3BTl2XtAnZ1qO8H/nChdZEkrQw/IS1JqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqRKzz/2o1M3uUS/CyFJy809B0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFUMB0lSxXCQJFX8EFxD80Nqh+58zwDXRJIGyz0HSVLFcJAkVQwHSVLFcJAkVQwHSVLFq5W68Ou1JY0z9xwkSZUFwyEidkXE0Yh4ulG7PSJ+GhFPlNuVjedujYiZiHg2Ii5v1LeU2kxE7GjUz4+IRyPiYETcHxGnL2WDkqT+9bLn8BVgS4f6ZzPzonLbAxARFwDXAH9Q5vl8RKyJiDXA54ArgAuAa8tYgE+VZW0EXgJuPJWGJEmnbsFwyMx/AY71uLytwH2Z+T+Z+Z/ADHBJuc1k5nOZ+SvgPmBrRATwLuDrZf57gKv67EGStMRO5ZzDzRHxZDnstLbUzgOeb4yZLbVu9TcDP8vMEyfVJUkDtNirle4G7gCy3H8a+AAQHcYmnUMo5xnfUURsB7YDTExM0Gq1+lrp48ePzzvPLRee6Prccuq3j14s1OuoGad+7XV0DVO/iwqHzHxhbjoivgR8uzycBTY0hq4HDpfpTvUXgbMi4rSy99Ac3+l1dwI7AaampnJ6erqv9W61Wsw3zw0Dunz10HXTS77MhXodNePUr72OrmHqd1GHlSJiXePhe4G5K5l2A9dExBkRcT6wEfgB8BiwsVyZdDrtk9a7MzOBh4Gry/zbgAcXs06SpKWz4J5DRHwNmAbOiYhZ4DZgOiIuon0I6BDwQYDMPBARDwA/BE4AN2Xmr8tybgb2AmuAXZl5oLzER4H7IuITwL8BX16y7iRJi7JgOGTmtR3KXf+AZ+YngU92qO8B9nSoP0f7aiZJ0pDw6zOGgD8yJGnY+PUZkqSK4SBJqhgOkqSK4SBJqhgOkqSKVysNmZN/ZMirlyQNgnsOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSKn5Aecv7Wg6RBcM9BklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklQxHCRJlQXDISJ2RcTRiHi6UTs7IvZFxMFyv7bUIyLuioiZiHgyIi5uzLOtjD8YEdsa9bdHxFNlnrsiIpa6SUlSf3rZc/gKsOWk2g7goczcCDxUHgNcAWwst+3A3dAOE+A24FLgEuC2uUApY7Y35jv5tSRJK2zBcMjMfwGOnVTeCtxTpu8BrmrU7822R4CzImIdcDmwLzOPZeZLwD5gS3nuTZn5/cxM4N7GsiRJA7LYcw4TmXkEoNyfW+rnAc83xs2W2nz12Q51SdIALfWP/XQ6X5CLqHdeeMR22oegmJiYoNVq9bVyx48fn3eeWy480dfyVlo//S7U66gZp37tdXQNU7+LDYcXImJdZh4ph4aOlvossKExbj1wuNSnT6q3Sn19h/EdZeZOYCfA1NRUTk9PdxvaUavVYr55bmj86towOnTddM9jF+p11IxTv/Y6uoap38UeVtoNzF1xtA14sFG/vly1tAl4uRx22gtsjoi15UT0ZmBvee6ViNhUrlK6vrEsSdKALLjnEBFfo/1//edExCztq47uBB6IiBuBnwDvK8P3AFcCM8AvgfcDZOaxiLgDeKyM+3hmzp3k/hDtK6JeD3y33CRJA7RgOGTmtV2eeneHsQnc1GU5u4BdHer7gT9caD0kSSvHT0hLkiqGgySpYjhIkiqGgySpYjhIkiqGgySpYjhIkipL/d1KWkaTja/3OHTnewa4JpJGnXsOkqSKew6rlHsRkpaTew6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrfrTQC/J4lSUvNPQdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUuWUwiEiDkXEUxHxRETsL7WzI2JfRBws92tLPSLiroiYiYgnI+LixnK2lfEHI2LbqbU03iZ3fIfJHd/hqZ++POhVkbSKLcWew59k5kWZOVUe7wAeysyNwEPlMcAVwMZy2w7cDe0wAW4DLgUuAW6bCxRJ0mAsx2GlrcA9Zfoe4KpG/d5sewQ4KyLWAZcD+zLzWGa+BOwDtizDekmSenSq4ZDAP0XE4xGxvdQmMvMIQLk/t9TPA55vzDtbat3qkqQBOdXvVrosMw9HxLnAvoj40Txjo0Mt56nXC2gH0HaAiYkJWq1WXyt7/Pjxeee55cITfS1vmE28nr7/fVazhbbtKLHX0TVM/Z5SOGTm4XJ/NCK+RfucwQsRsS4zj5TDRkfL8FlgQ2P29cDhUp8+qd7q8no7gZ0AU1NTOT093WlYV61Wi/nmuaHxBXar3S0XnuDP+vz3Wc0W2rajxF5H1zD1u+jDShFxZkS8cW4a2Aw8DewG5q442gY8WKZ3A9eXq5Y2AS+Xw057gc0RsbaciN5capKkATmVPYcJ4FsRMbecv8/Mf4yIx4AHIuJG4CfA+8r4PcCVwAzwS+D9AJl5LCLuAB4r4z6emcdOYb1U+FXekhZr0eGQmc8Bf9Sh/t/AuzvUE7ipy7J2AbsWuy6SpKXlJ6QlSRXDQZJUMRwkSRXDQZJUMRwkSZVT/YT0qjc5Qh98k6SlMvbhMC78zIOkfnhYSZJUMRwkSRXDQZJU8ZzDGPL8g6SFuOcgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkih+C06v8cJykOYbDmPMryyV14mElSVLFPQctyMNN0vhxz0GSVDEcJEkVDyupLx5iksaDew6SpIp7DurIS1yl8WY4aNE8xCSNrqE5rBQRWyLi2YiYiYgdg14fSRpnQ7HnEBFrgM8BfwrMAo9FxO7M/OFyvJ6HTJaeexHSaBmKcAAuAWYy8zmAiLgP2AosSzhoefUSvgaINNyGJRzOA55vPJ4FLh3QumgFLOXeWzNouu3BGFhSf4YlHKJDLatBEduB7eXh8Yh4ts/XOQd4sc95VqW/HKNe41NAh35Lvd/lrAZjs20Zr15hZfr9vV4GDUs4zAIbGo/XA4dPHpSZO4Gdi32RiNifmVOLnX81GadeYbz6tdfRNUz9DsvVSo8BGyPi/Ig4HbgG2D3gdZKksTUUew6ZeSIibgb2AmuAXZl5YMCrJUljayjCASAz9wB7lvllFn1IahUap15hvPq119E1NP1GZnXeV5I05oblnIMkaYiMTTis1q/niIhDEfFURDwREftL7eyI2BcRB8v92lKPiLir9PhkRFzcWM62Mv5gRGxr1N9elj9T5u10WfFy9rcrIo5GxNON2rL31+01BtDr7RHx07J9n4iIKxvP3VrW+9mIuLxR7/heLhd0PFp6ur9c3EFEnFEez5TnJ1eg1w0R8XBEPBMRByLiw6U+qtu2W7+rd/tm5sjfaJ/k/jHwFuB04N+BCwa9Xj2u+yHgnJNqfwPsKNM7gE+V6SuB79L+3Mgm4NFSPxt4rtyvLdNry3M/AN5R5vkucMUK9/dO4GLg6ZXsr9trDKDX24G/6jD2gvI+PQM4v7x/18z3XgYeAK4p018APlSm/wL4Qpm+Brh/BXpdB1xcpt8I/EfpaVS3bbd+V+32XbE/AoO8lTfQ3sbjW4FbB71ePa77IepweBZYV6bXAc+W6S8C1548DrgW+GKj/sVSWwf8qFF/zbgV7HGS1/7BXPb+ur3GAHrt9sfjNe9R2lfyvaPbe7n8gXwROK3UXx03N2+ZPq2MixXexg/S/u60kd22Xfpdtdt3XA4rdfp6jvMGtC79SuCfIuLxaH9CHGAiM48AlPtzS71bn/PVZzvUB20l+uv2GoNwczmUsqtxCKTfXt8M/CwzT5xUf82yyvMvl/ErohzmeBvwKGOwbU/qF1bp9h2XcOjp6zmG1GWZeTFwBXBTRLxznrHd+uy3PqxGsb+7gd8HLgKOAJ8u9aXsdWD/DhHxBuAbwEcy8+fzDe1QW3XbtkO/q3b7jks49PT1HMMoMw+X+6PAt2h/g+0LEbEOoNwfLcO79TlffX2H+qCtRH/dXmNFZeYLmfnrzPw/4Eu0ty/03+uLwFkRcdpJ9dcsqzz/O8Cxpe/mtSLidbT/UH41M79ZyiO7bTv1u5q377iEw6r8eo6IODMi3jg3DWwGnqa97nNXbWyjfXyTUr++XPmxCXi57FbvBTZHxNqyW7uZ9vHKI8ArEbGpXOlxfWNZg7QS/XV7jRU190eseC/t7Qvt9bumXIlyPrCR9gnYju/lbB9wfhi4usx/8r/bXK9XA/9cxi+b8u/9ZeCZzPxM46mR3Lbd+l3V23elT9QM6kb7aoj/oH0lwF8Pen16XOe30L5a4d+BA3PrTft44kPAwXJ/dqkH7R9N+jHwFDDVWNYHgJlye3+jPlXesD8G/paVP1H5Ndq72/9L+/+AblyJ/rq9xgB6/bvSy5O0/yNf1xj/12W9n6VxFVm393J5v/yg/Bv8A3BGqf92eTxTnn/LCvT6x7QPbTwJPFFuV47wtu3W76rdvn5CWpJUGZfDSpKkPhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqTK/wMa8bDNlfPtDAAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "#等價上一行代碼\n", "app_train[app_train['AMT_ANNUITY'].notnull()]['AMT_ANNUITY'].hist(bins = 100)\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# 試著將 max 取代為 q99\n", "app_train[app_train['AMT_ANNUITY'] == app_train['AMT_ANNUITY'].max()] = np.percentile(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'], q = 99)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "70006.5" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.percentile(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'], q = 99)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1615.5, 16524.0, 24903.0, 34596.0, 230161.5]\n" ] } ], "source": [ "five_num = [0, 25, 50, 75, 100]\n", "quantile_5s = [np.percentile(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'], q = i) for i in five_num]\n", "print(quantile_5s)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "可以看到原本的\n", "
max 258025.5\n", "
被取代後變為\n", "
max 230161.5" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "24903.0" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 得到 median 的另外一種方法\n", "np.median(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'])" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "ModeResult(mode=array([9000.]), count=array([6385]))\n", "Elapsed time: 5.396 secs\n" ] } ], "source": [ "# 計算眾數 (mode)\n", "from scipy.stats import mode\n", "import time\n", "\n", "start_time = time.time()\n", "mode_get = mode(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'])\n", "print(mode_get)\n", "print(\"Elapsed time: %.3f secs\" % (time.time() - start_time))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(9000.0, 6385)\n", "Elapsed time: 0.300 secs\n" ] } ], "source": [ "# 計算眾數 (mode)\n", "# 較快速的方式\n", "from collections import defaultdict\n", "\n", "start_time = time.time()\n", "mode_dict = defaultdict(lambda:0)\n", "\n", "for value in app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY']:\n", " mode_dict[value] += 1\n", " \n", "mode_get = sorted(mode_dict.items(), key=lambda kv: kv[1], reverse=True)\n", "print(mode_get[0])\n", "print(\"Elapsed time: %.3f secs\" % (time.time() - start_time))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 連續值標準化\n", "### 1. Z-transform: $ \\frac{(x - mean(x))}{std(x)} $\n", "### 2. Range (0 ~ 1): $ \\frac{x - min(x)}{max(x) - min(x)} $\n", "### 3. Range (-1 ~ 1): $ (\\frac{x - min(x)}{max(x) - min(x)} - 0.5) * 2 $" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEICAYAAAC0+DhzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAF05JREFUeJzt3XuQnNV55/HvE8QtyCBhbJUWURYOSmKCyhhmQbY3WWEIFuAENoVTYokRXlJKOZByduUNwi7bkJgqnJjgZWM7VoIWfKkITOygcAlhgUmKKq6KDULGmAEULNBCEUBG3BzhZ/94z+BmTo+me6ZneqT5fqq6pvu8p8/79OmZ/s176e7ITCRJavVz/S5AkjT9GA6SpIrhIEmqGA6SpIrhIEmqGA6SpIrhIHUhIj4ZEX/d674djJURcVgvxpI6Eb7PQTNZRJwNrAJ+Afgx8B3ggsx8oZ91jRQRCSzKzKF+16KZwS0HzVgRsQr4PPA/gQOAJcA7gFsiYq82/WdNbYVS/xgOmpEiYn/gIuAPMvMfMvPfM3Mz8Ns0AfE7EXFhRFwbEd+IiB8DZ5e2b7SMc1ZE/GtE/FtEfDoiNkfECWXZG30jYmHZNbQiIp6IiGcj4lMt4xwTEXdGxAsRsTUi/qJdQElTxXDQTPU+YB/g262NmbkduAn49dJ0KnAtMAf4ZmvfiDgc+DJwJjCfZuvj4DHW+5+AXwKOBz4TEe8q7a8D/x04CHhvWf7743hcUk8YDpqpDgKezcwdbZZtLcsB7szMv8vMn2bmKyP6nQ78fWbekZk/AT4DjHUQ76LMfCUz7wfuB94NkJkbMvOuzNxRtmC+Cvzn8T00aeLch6qZ6lngoIiY1SYg5pflAD/ayRj/oXV5Zr4cEf82xnr/X8v1l4HZABHxi8CfAwPAz9P8bW4Y60FIk8UtB81UdwKvAb/V2hgR+wEnAbeWpp1tCWwFFrTcd1/greOs5yvAD2jOSNof+CQQ4xxLmjDDQTNSZm6jOSD9vyNiWUTsGRELgW8BW4CvdzDMtcBvRMT7ysHjixj/C/pbaE6l3R4Rvwx8bJzjSD1hOGjGysw/pfkP/Qs0L8x30+wmOj4zX+vg/puAPwDW0WxFvAg8Q7NF0q1PAP+1jPFXwNXjGEPqGd8EJ/VIRMwGXqDZNfR4v+uRJsItB2kCIuI3IuLny7GKLwAbgc39rUqaOMNBmphTgafKZRGwPN0c127A3UqSpIpbDpKkyi77JriDDjooFy5c2FHfl156if32229yCxoH6+qOdXXHurozE+rasGHDs5n5to46Z+YueTn66KOzU7fffnvHfaeSdXXHurpjXd2ZCXUB92WHr7HuVpIkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVXbZj8+YSgtX39C2ffMlp0xxJZI0NdxykCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVDAdJUsVwkCRVOg6HiNgjIr4bEdeX24dGxN0R8UhEXB0Re5X2vcvtobJ8YcsYF5T2hyPigy3ty0rbUESs7t3DkySNRzdbDh8HHmq5/XngssxcBDwPnFPazwGez8zDgMtKPyLicGA58CvAMuDLJXD2AL4EnAQcDpxR+kqS+qSjcIiIBcApwF+X2wF8ALi2dLkKOK1cP7Xcpiw/vvQ/FViXma9l5uPAEHBMuQxl5mOZ+RNgXekrSeqTTrccvgj8EfDTcvutwAuZuaPc3gIcXK4fDPwIoCzfVvq/0T7iPqO1S5L6ZNZYHSLiQ8AzmbkhIpYON7fpmmMsG629XUBlmzYiYiWwEmDevHkMDg6OXniL7du3d9y3nVWLd7Rtn8iYMPG6Jot1dce6umNd3elXXWOGA/B+4Dcj4mRgH2B/mi2JORExq2wdLACeKv23AIcAWyJiFnAA8FxL+7DW+4zW/iaZuQZYAzAwMJBLly7toPzmRbzTvu2cvfqGtu2bzxz/mDDxuiaLdXXHurpjXd3pV11j7lbKzAsyc0FmLqQ5oHxbZp4J3A6cXrqtAK4r19eX25Tlt2Vmlvbl5WymQ4FFwD3AvcCicvbTXmUd63vy6CRJ49LJlsNozgfWRcTngO8CV5T2K4CvR8QQzRbDcoDM3BQR1wDfB3YA52bm6wARcR5wM7AHsDYzN02gLknSBHUVDpk5CAyW64/RnGk0ss+rwIdHuf/FwMVt2m8EbuymFknS5PEd0pKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSarM6ncBM8nC1Te86faqxTs4e/UNbL7klD5VJEntueUgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaoYDpKkiuEgSaqMGQ4RsU9E3BMR90fEpoi4qLQfGhF3R8QjEXF1ROxV2vcut4fK8oUtY11Q2h+OiA+2tC8rbUMRsbr3D1OS1I1OthxeAz6Qme8GjgSWRcQS4PPAZZm5CHgeOKf0Pwd4PjMPAy4r/YiIw4HlwK8Ay4AvR8QeEbEH8CXgJOBw4IzSV5LUJ2OGQza2l5t7lksCHwCuLe1XAaeV66eW25Tlx0dElPZ1mflaZj4ODAHHlMtQZj6WmT8B1pW+kqQ+icwcu1Pz3/0G4DCa//L/DLirbB0QEYcAN2XmERHxILAsM7eUZY8CxwIXlvt8o7RfAdxUVrEsM3+3tH8EODYzz2tTx0pgJcC8efOOXrduXUcPcvv27cyePbujvu1sfHJb2/bFBx8woXHm7QtPv9L9OJNtovM1WayrO9bVnZlQ13HHHbchMwc66dvRN8Fl5uvAkRExB/gO8K523crPGGXZaO3ttl7aJlZmrgHWAAwMDOTSpUt3XngxODhIp33bOXvEN7gN23xmd2OOHGfV4h1cunFW1+NMtonO12Sxru5YV3es6826OlspM18ABoElwJyIGA6XBcBT5foW4BCAsvwA4LnW9hH3Ga1dktQnnZyt9LayxUBE7AucADwE3A6cXrqtAK4r19eX25Tlt2Wz72o9sLyczXQosAi4B7gXWFTOftqL5qD1+l48OEnS+HSyW2k+cFU57vBzwDWZeX1EfB9YFxGfA74LXFH6XwF8PSKGaLYYlgNk5qaIuAb4PrADOLfsriIizgNuBvYA1mbmpp49wi4sHGX3kSTNNGOGQ2Y+ALynTftjNGcajWx/FfjwKGNdDFzcpv1G4MYO6pUkTQHfIS1JqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqSK4SBJqhgOkqRKR1/2o/ZG+xTXzZecMsWVSFJvGQ7TgCEjabpxt5IkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqhoMkqWI4SJIqfk3oNObXh0rqF7ccJEkVw0GSVDEcJEmVMcMhIg6JiNsj4qGI2BQRHy/tB0bELRHxSPk5t7RHRFweEUMR8UBEHNUy1orS/5GIWNHSfnREbCz3uTwiYjIerCSpM51sOewAVmXmu4AlwLkRcTiwGrg1MxcBt5bbACcBi8plJfAVaMIE+CxwLHAM8NnhQCl9Vrbcb9nEH5okabzGDIfM3JqZ/1Kuvwg8BBwMnApcVbpdBZxWrp8KfC0bdwFzImI+8EHglsx8LjOfB24BlpVl+2fmnZmZwNdaxpIk9UE0r8cddo5YCPwzcATwRGbOaVn2fGbOjYjrgUsy847SfitwPrAU2CczP1faPw28AgyW/ieU9l8Fzs/MD7VZ/0qaLQzmzZt39Lp16zqqe/v27cyePXvMfhuf3NbReGNZfPABHY0/b194+pXejd8rnc7XVLOu7lhXd2ZCXccdd9yGzBzopG/H73OIiNnA3wJ/mJk/3slhgXYLchztdWPmGmANwMDAQC5dunSMqhuDg4N00vfsUd5X0K3NZ7Zf18jxVy3ewaUbu3+ryWjj90qn8zXVrKs71tUd63qzjs5Wiog9aYLhm5n57dL8dNklRPn5TGnfAhzScvcFwFNjtC9o0y5J6pNOzlYK4Argocz885ZF64HhM45WANe1tJ9VzlpaAmzLzK3AzcCJETG3HIg+Ebi5LHsxIpaUdZ3VMpYkqQ862afxfuAjwMaI+F5p+yRwCXBNRJwDPAF8uCy7ETgZGAJeBj4KkJnPRcSfAPeWfn+cmc+V6x8DrgT2BW4qF0lSn4wZDuXA8mgHGI5v0z+Bc0cZay2wtk37fTQHuSVJ04DvkJYkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVfwO6d2M3zstqRfccpAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFcJAkVQwHSVLFD97bBY324XqS1CtuOUiSKoaDJKliOEiSKoaDJKniAelJ4AFjSbs6w2GG8OtDJXXD3UqSpIrhIEmqGA6SpIrHHNSWxyikmc0tB0lSxXCQJFUMB0lSxXCQJFUMB0lSZcxwiIi1EfFMRDzY0nZgRNwSEY+Un3NLe0TE5RExFBEPRMRRLfdZUfo/EhErWtqPjoiN5T6XR0T0+kFKkrrTyZbDlcCyEW2rgVszcxFwa7kNcBKwqFxWAl+BJkyAzwLHAscAnx0OlNJnZcv9Rq5LkjTFxnyfQ2b+c0QsHNF8KrC0XL8KGATOL+1fy8wE7oqIORExv/S9JTOfA4iIW4BlETEI7J+Zd5b2rwGnATdN5EGNxQ/Gk6Sdi+Z1fIxOTThcn5lHlNsvZOacluXPZ+bciLgeuCQz7yjtt9KExlJgn8z8XGn/NPAKTahckpknlPZfBc7PzA+NUsdKmq0M5s2bd/S6des6epDbt29n9uzZb9ze+OS2ju432ebtC0+/0t8aFh98QNW2fft2Ht/2esf9p8rI53G6sK7uWFd3elnXcccdtyEzBzrp2+t3SLc7XpDjaG8rM9cAawAGBgZy6dKlHRU1ODhIa9+zp8mWw6rFO7h0Y3/fpL75zKVV2+DgIJfe8VLH/afKyOdxurCu7lhXd/pV13jPVnq67C6i/HymtG8BDmnptwB4aoz2BW3aJUl9NN5wWA8Mn3G0Ariupf2sctbSEmBbZm4FbgZOjIi55UD0icDNZdmLEbGknKV0VstYkqQ+GXOfRkT8Dc0xg4MiYgvNWUeXANdExDnAE8CHS/cbgZOBIeBl4KMAmflcRPwJcG/p98fDB6eBj9GcEbUvzYHoST0YLUkaWydnK50xyqLj2/RN4NxRxlkLrG3Tfh9wxFh1SJKmju+QliRVDAdJUsVwkCRVDAdJUsWvCVVXuv36UL9uVNo1ueUgSaq45TDDtfvPftXiHfirIc1sbjlIkiqGgySpYjhIkiqGgySpYjhIkiqGgySpYjhIkiqGgySpYjhIkiqGgySpYjhIkiqGgySp4qerqSdG+2huSbsmw0F94fc8SNObu5UkSRXDQZJUMRwkSRXDQZJU8YC0dgkLV9/AqsU7OHvEgWwPYEuTwy0HSVLFcJAkVQwHSVLFcJAkVTwgrWllKj6Gw3dnS2Nzy0GSVHHLQbu0nW1puCUgjZ9bDpKkilsO2m1N9vGL4fFHvjnPLRbtDtxykCRVpk04RMSyiHg4IoYiYnW/65GkmWxa7FaKiD2ALwG/DmwB7o2I9Zn5/f5WppmkX99m56m1mo6mRTgAxwBDmfkYQESsA04FDAftcnoVMhMdZ/hYyHhCxsBSZGa/ayAiTgeWZebvltsfAY7NzPNG9FsJrCw3fwl4uMNVHAQ826Nye8m6umNd3bGu7syEut6RmW/rpON02XKINm1VamXmGmBN14NH3JeZA+MpbDJZV3esqzvW1R3rerPpckB6C3BIy+0FwFN9qkWSZrzpEg73Aosi4tCI2AtYDqzvc02SNGNNi91KmbkjIs4Dbgb2ANZm5qYerqLrXVFTxLq6Y13dsa7uWFeLaXFAWpI0vUyX3UqSpGnEcJAk1TJzt74Ay2jeDzEErO7huJuBjcD3gPtK24HALcAj5efc0h7A5aWGB4CjWsZZUfo/AqxoaT+6jD9U7hujrQNYCzwDPNhy/77U0rKOTcAO4NXhdQAXAk+WOfsecHLLOi4o4z8MfHCs5w84FLi7rPtqYK/Svne5PVSWLxyxjs3Ay8ATpcaPT5P5WlvqehV4tKWufs/Zp0tNr5a5u2iC89+rer9VanoJ+GFLXVcCj7fM15FT/Dy2ruMHwPXTZL7armOnr3FT+UI91Reag9uPAu8E9gLuBw7v0dibgYNGtP3p8JMFrAY+X66fDNxUfnmWAHe3/JI9Vn7OLdeHf9HuAd5b7nMTcNJo6wB+DTiKN4dDX2ppWcddpaZHW9ZxIfCJNnN5eHlu9i6/4I+W527U5w+4Blherv8l8LFy/feBvyzXlwNXj1jHO4APlXEPoHlhOXwazNdtZb6W0Jy9N1zXdJizA1vWcXepcbxj9axeYHZZxzUtdV0JnN5mvqbqeRxex/+ieePa9ROc+0l7fsd8jev3C/hkXsoTe3PL7QuAC3o09mbqcHgYmF+uzwceLte/Cpwxsh9wBvDVlvavlrb5wA9a2t/ot5N1LOTN4dDPWobvuxB4sGUdF9L+he5NzwvNWWvvHe35o/njexaYNfJ5Hr5vuT6r9IudrOM6ms/06vt8jVjHP5S6ptOc3VJqO7aH89+rev+l1HUl7cNhKn/vFwC3Aj8qc9bL39eePb9jvcbt7sccDqZ5goZtKW29kMA/RsSG8rEeAPMycytA+fn2MerYWfuWUeoebR0j9bOWnc37eRHxQESsjYi546zprcALmbmjzfhv3Kcs31b6txvrSOA9NP91Tqf5erbUdne53dc5i4g9IuJ7wFLgIZr/XHsx/xOqt3xg5300//XfkZnD83Vxma/LImLvcc7XRJ7HLwJ/RLOrd59OHstUzFebdezU7h4OHX0sxzi9PzOPAk4Czo2IXxtHHd2298JU1DLafb4C/ALNC99W4NJJqKnT+8wCPgH8YWb+uM19hk3pfEXEbGAx8IVSV9/nLDNfz8wjgXXAYcC7xjtWL+ttqetfgfdExBE0/0X/MvAfaULj/B7XtTMBvA94JjM3jGgfbawpm69Rlo1qdw+HSftYjsx8qvx8BvgOzSfLPh0R8wHKz2fGqGNn7QtGqXu0dYzUz1rajpWZT5c/6J8Cf0UzZ+Op6VlgTkTMGtH+prHK8gOA50a07wmcQrM/+NvTZb5KXX9Lc6D1mwDTZc6KtwN30uyzn+hYvaz3LcD/pfnwzq3ZeA34PxOYr/E+j+8HfjMiNgPvpjmo/cVpNl/D69ip3T0cJuVjOSJiv4h4y/B14ESafevrac58oPy8rlxfD5wVjSXAtrI5ejNwYkTMLbsLTqTZT7gVeDEilkREAGeNGKvdOkbqZy3rSz+AfYfXMfwHVfyXMmfD/ZdHxN4RcSiwiOZgYNvnL5udp7cDp4+y7uGaTgduK/3fWAfNmRt7Av9jms3XFcDzwObh3RbTYM7OjIi3l3X8Is1/5Q9NZP57VO/vRcScsvyfgBOAH7S8aAdw2oj5morncS+aF+LlNGcN3ZaZZ06D+Wq3jp0b66DErn6hOYPghzT7ST/VozHfSXOWwP00p0N+qrS/leZA1CPl54GlPWi+zOhRmlPjBlrG+m80p5gNAR9taR+g+cV+FPgLfnYaXbUO4G9odjn8O81/Cef0q5aWdfyQ5lTWBJ4uNX29rPOB8gs7v2UdnyrjP0w5K2Rnz195Du4ptX4L2Lu071NuD5Xl7xyxjidLTY/TcnroNJivb5e6XilzMFxXv+fscuA1fnYq62cmOP+9qvcfS10vl/GG67qtzNeDwDeA2VP8PLau4/f42dlK/Z6vtuvY2cWPz5AkVXb33UqSpHEwHCRJFcNBklQxHCRJFcNBklQxHCRJFcNBklT5/+lu8KKEgQf6AAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEICAYAAAC0+DhzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAF3VJREFUeJzt3X+UZGV95/H3xxlRHFdBMRNliINxYkQ5EZ2DRBOdI/4YxIh7oisEBTy4E11MdM/sKppdySrskrP+iOQYXSIICisS1JVFXELExpgoAmLEAZERR2YEQfklAxEd/e4f9bQp+lZN/5juru7p9+ucOl33uU/d+zxV3fWp57m3b6WqkCSp30NG3QBJ0sJjOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNw0IKV5M+TnNPu/0aS7UmWzfI+tiR54S5u4/NJjt3J+rOSnLwr+1jIkpyc5MdJfjjqtmj2GA5LWHtjvC3Jir6y1ycZG2GzBqqqm6vqkVX1i1G3ZaKqOqyqzgZIclySL890W0k+3EKw/3Z/kkryvCGPGVn4JNkP2AgcUFW/Poo2aG4YDloOvHlXN5Ief592UVW9oYXgr27Ap4AvAv84k20mWT6rjXywJwJ3VNXt033gHLdLu8g/Zv1P4D8l2WvQyiTPSXJlknvaz+f0rRtLckqSfwTuB57Uyk5O8k/tU+//TfLYJOcm+Unbxuq+bXwgyda27uokvz+kHavbp+flSX53wifrnybZ0uo9JMmJSb6b5I4k5yd5TN92Xpvk+23dnw17UpLsn+Tu8cBL8pEkt/etPyfJW/qeh9cneSrwYWC8fXf3bXLvJJ9Lcm+SK5L85tBX5MHteCPwAuCoQaOmJBuAo4G3jj/frXxLkrcl+SZwX3vexp+Xe5Ncl+Tf9m3nuCRfTvKeJHcl+V6Swyasv6k99ntJjm7TcZcCT2j7PqvVfXmSTe35G2vPCztp15Yk/znJN5Pcl+SMJCvTm667N8nfJ9l7Ks+XZlFVeVuiN2AL8ELg08DJrez1wFi7/xjgLuC19EYYR7Xlx7b1Y8DNwNPa+oe2ss3AbwKPBq4DvtP2sxz4GPDRvja8BnhsW7cR+CHw8Lbuz4Fz2v3VQAHLJ/RhfJ//oy2/BfgqsAp4GPC/gE+0dQcA24HntXXvA3YALxzy/NwMPKvdvwG4CXhq37qD+p6H17f7xwFfnrCds4A7gYNbP88FzpvC67O2tff3Jql31vjrN+G1/QawH7BnK3sV8AR6HwpfDdwHPL6v3T8H/j2wDHgjcAsQYAXwE+Apre7jgae1++uAbX37/a223Re11+at7fdhj520a0t7zVYC+wK3A18HDmqv02XASaP+e1lqN0cOAngn8CdJHjeh/HDgxqr6eFXtqKpPAN8G/qCvzllVtamt/3kr+2hVfbeq7gE+D3y3qv6+qnYAf0vvjx6Aqjqnqu5oj38vvTeDp0yj7afRezMaHwX8MfBnVbWtqh6gFzCvbFMYrwQuqqovtXX/FfjlTrZ9OfD8JONz6Re05f2BRwH/PI12frqqvtaeg3OBZ+yschvtXAC8s6pmegzjtKraWlX/AlBVf1tVt1TVL6vqk8CN9AJr3Per6m+qN0I5m14IrGzrfgk8PcmeVXVrVW0ass9XA5+rqkvb78N7gD2B5/TVeVC7mr+qqtuq6gfAPwBXVNU17XX6DH2/M5ofhoOoqm8BFwEnTlj1BOD7E8q+T+/T3bitAzZ5W9/9fxmw/MjxhSQbk1zfpq3upjfa2Gcq7U7yx/Q+uf5RVY2/yT8R+Eyb0rgbuB74Bb03uSf0t7eq7gPu2MkuLm/bfx7wJXojhOe32z/07XMq+s/kuZ++52BAvwKcA1xdVe+bsO4dfdNpH55knw96bZIck+Qbfc/N03nwc/2rNlbV/e3uI9vz9GrgDcCtbXrst4fs80G/M+052sos/s5ofhgOGncSvSmF/j/iW+i92fb7DeAHfcszvqxvO77wNuDfAXtX1V7APfSmMqby2HcDR7QRyritwGFVtVff7eHtE+mt9KYzxrfxCHpTWsNcDvw+vYC4HPgy8Fx64XD5kMfMxmWO/wvwZOB1nY1X/ff614PVb5hkn78qT/JE4G+AN9GbFtwL+BZTeK7bfi+pqhfRG018u21rkAf9zrSg249Z+p3R/DEcBEBVbQY+CfxpX/HFwG8l+aN24PDV9ObtL5ql3f4benP+PwKWJ3knvemanUrv9MlPAsdU1XcmrP4wcEp7MyTJ45Ic0dZdALwsye8l2QN4Fzv5G6iqG+l9an0N8KWq+gm9T7R/yPBwuA1Y1bY/be0g71uBP2z7m4rbgCdNUmcFvTflH7X9vI7eyGEqbVrZDjKvAB6gdxxk2CnF5wOHJzk0yUPpHUd6APinqexLC4fhoH7vovcmAkBV3QG8jN4f+B303rReVlU/nqX9XULvmMR36E1F/JTBUw4THQr8OnBB3xTL+Bz4B4ALgb9Lci+9A53Pbv3ZBJwA/G96o4i7gG2T7Otyeqdq3ty3HOCaIfUvAzYBP0wyk+fpHfTm6L+S7v87HD3kMWcAB7Tpov8zqEJVXQe8F/gKvTA5kKmfGvsQer8Dt9A7sP584D8M2c8N9ML0r4Af0zs+9QdV9bMp7ksLRKoc4UmSHsyRgySpw3CQJHUYDpKkDsNBktSxaC98tc8++9Tq1atH3Yw5dd9997FixYrJK+5G7PPSsBT7DKPv99VXX/3jqpp4JYSBFm04rF69mquuumrUzZhTY2NjrFu3btTNmFf2eWlYin2G0fc7ycQrHgzltJIkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKlj0f6H9HxafeLnBpZvOfXweW6JJM0PRw6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqWPK4ZBkWZJrklzUlvdPckWSG5N8MskerfxhbXlzW7+6bxtvb+U3JHlJX/n6VrY5yYmz1z1J0kxMZ+TwZuD6vuW/AN5fVWuAu4DjW/nxwF1V9WTg/a0eSQ4AjgSeBqwH/roFzjLgg8BhwAHAUa2uJGlEphQOSVYBhwMfacsBXgBc0KqcDbyi3T+iLdPWH9rqHwGcV1UPVNX3gM3Awe22uapuqqqfAee1upKkEZnqyOEvgbcCv2zLjwXurqodbXkbsG+7vy+wFaCtv6fV/1X5hMcMK5ckjcjyySokeRlwe1VdnWTdePGAqjXJumHlgwKqBpSRZAOwAWDlypWMjY0Nb/gs2njgjoHlc73/7du3z1sfFwr7vDQsxT7D4ur3pOEAPBd4eZKXAg8HHkVvJLFXkuVtdLAKuKXV3wbsB2xLshx4NHBnX/m4/scMK3+QqjodOB1g7dq1tW7duik0f9cdd+LnBpZvOXpu9z82NsZ89XGhsM9Lw1LsMyyufk86rVRVb6+qVVW1mt4B5cuq6mjgi8ArW7Vjgc+2+xe2Zdr6y6qqWvmR7Wym/YE1wNeAK4E17eynPdo+LpyV3kmSZmQqI4dh3gacl+Rk4BrgjFZ+BvDxJJvpjRiOBKiqTUnOB64DdgAnVNUvAJK8CbgEWAacWVWbdqFdkqRdNK1wqKoxYKzdv4nemUYT6/wUeNWQx58CnDKg/GLg4um0RZI0d/wPaUlSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpI7lo27AUrL6xM8NLN9y6uHz3BJJ2jlHDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeqYNBySPDzJ15L8c5JNSf5bK98/yRVJbkzyySR7tPKHteXNbf3qvm29vZXfkOQlfeXrW9nmJCfOfjclSdMxlZHDA8ALqup3gGcA65McAvwF8P6qWgPcBRzf6h8P3FVVTwbe3+qR5ADgSOBpwHrgr5MsS7IM+CBwGHAAcFSrK0kakUnDoXq2t8WHtlsBLwAuaOVnA69o949oy7T1hyZJKz+vqh6oqu8Bm4GD221zVd1UVT8Dzmt1JUkjMqUv+2mf7q8GnkzvU/53gburakersg3Yt93fF9gKUFU7ktwDPLaVf7Vvs/2P2Tqh/NlD2rEB2ACwcuVKxsbGptL8XbbxwB0Dy6e7/+luZ/v27fPWx4XCPi8NS7HPsLj6PaVwqKpfAM9IshfwGeCpg6q1nxmyblj5oNFLDSijqk4HTgdYu3ZtrVu3bucNnyXHDfsGt6Ont//pbmdsbIz56uNCYZ+XhqXYZ1hc/Z7W2UpVdTcwBhwC7JVkPFxWAbe0+9uA/QDa+kcDd/aXT3jMsHJJ0ohM5Wylx7URA0n2BF4IXA98EXhlq3Ys8Nl2/8K2TFt/WVVVKz+ync20P7AG+BpwJbCmnf20B72D1hfORuckSTMzlWmlxwNnt+MODwHOr6qLklwHnJfkZOAa4IxW/wzg40k20xsxHAlQVZuSnA9cB+wATmjTVSR5E3AJsAw4s6o2zVoPp2H1kGkfSVpqJg2HqvomcNCA8pvonWk0sfynwKuGbOsU4JQB5RcDF0+hvZKkeeB/SEuSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1GA6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHVP6sh8NNuwqrltOPXyeWyJJs8twWACGhcxZ61fMc0skqcdpJUlSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6vBrQhewa39wD8cN+ApRv6Na0lxz5CBJ6jAcJEkdhoMkqWPScEiyX5IvJrk+yaYkb27lj0lyaZIb28+9W3mSnJZkc5JvJnlm37aObfVvTHJsX/mzklzbHnNaksxFZyVJUzOVkcMOYGNVPRU4BDghyQHAicAXqmoN8IW2DHAYsKbdNgAfgl6YACcBzwYOBk4aD5RWZ0Pf49bvetckSTM1aThU1a1V9fV2/17gemBf4Ajg7FbtbOAV7f4RwMeq56vAXkkeD7wEuLSq7qyqu4BLgfVt3aOq6itVVcDH+rYlSRqBaZ3KmmQ1cBBwBbCyqm6FXoAk+bVWbV9ga9/DtrWynZVvG1A+aP8b6I0wWLlyJWNjY9Np/qQ2HrhjVrYzrF3T3f7KPQc/Zrb7vZBs3759t+7fIPZ56VhM/Z5yOCR5JPAp4C1V9ZOdHBYYtKJmUN4trDodOB1g7dq1tW7duklaPT2D/qdgJrYcvW5Wtr/xwB2899ruSzRs+7uDsbExZvt1Xejs89KxmPo9pbOVkjyUXjCcW1WfbsW3tSkh2s/bW/k2YL++h68CbpmkfNWAcknSiEzlbKUAZwDXV9X7+lZdCIyfcXQs8Nm+8mPaWUuHAPe06adLgBcn2bsdiH4xcElbd2+SQ9q+junbliRpBKYyrfRc4LXAtUm+0creAZwKnJ/keOBm4FVt3cXAS4HNwP3A6wCq6s4k7waubPXeVVV3tvtvBM4C9gQ+326SpBGZNByq6ssMPi4AcOiA+gWcMGRbZwJnDii/Cnj6ZG2RJM0P/0NaktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcNwkCR1+B3Su5nVQ67f5PdOS5oORw6SpA7DQZLUYThIkjoMB0lSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1GE4SJI6DAdJUocX3luEhl1cT5JmiyMHSVKH4SBJ6jAcJEkdhoMkqcMD0nPAA8aSFjvDYYnw60MlTYfTSpKkDsNBktRhOEiSOjzmoIE8RiEtbY4cJEkdhoMkqcNwkCR1GA6SpA7DQZLUMWk4JDkzye1JvtVX9pgklya5sf3cu5UnyWlJNif5ZpJn9j3m2Fb/xiTH9pU/K8m17TGnJclsd1KSND1TGTmcBayfUHYi8IWqWgN8oS0DHAasabcNwIegFybAScCzgYOBk8YDpdXZ0Pe4ifuSJM2zSf/Poaq+lGT1hOIjgHXt/tnAGPC2Vv6xqirgq0n2SvL4VvfSqroTIMmlwPokY8CjquorrfxjwCuAz+9KpybjhfEkaedm+k9wK6vqVoCqujXJr7XyfYGtffW2tbKdlW8bUD5Qkg30RhmsXLmSsbGxGTV+44E7ZvS4+bZyz7lv67DncNh+Z/qcT9X27dvnfB8LjX1eOhZTv2f7P6QHHS+oGZQPVFWnA6cDrF27ttatWzeDJsJxi2TksPHAHbz32rn9J/YtR68bWD7sORpWf7aMjY0x09d1sbLPS8di6vdMz1a6rU0X0X7e3sq3Afv11VsF3DJJ+aoB5ZKkEZppOFwIjJ9xdCzw2b7yY9pZS4cA97Tpp0uAFyfZux2IfjFwSVt3b5JD2llKx/RtS5I0IpPOWST5BL0Dyvsk2UbvrKNTgfOTHA/cDLyqVb8YeCmwGbgfeB1AVd2Z5N3Ala3eu8YPTgNvpHdG1J70DkTP6cFoSdLkpnK20lFDVh06oG4BJwzZzpnAmQPKrwKePlk7JEnzx/+QliR1GA6SpA7DQZLUYThIkjr8mlBNy3S/PtSvG5UWJ0cOkqQORw5LnBchlDSIIwdJUofhIEnqMBwkSR2GgySpw3CQJHUYDpKkDsNBktRhOEiSOgwHSVKH4SBJ6jAcJEkdhoMkqcML72lWeAE/afdiOGgkhoXJWetXzHNLJA3itJIkqcNwkCR1GA6SpA7DQZLU4QFpLQrDDmBvOfXweW6JtDQ4cpAkdRgOkqQOw0GS1GE4SJI6PCCtBeXaH9zDcXN8KQ4PbkuTc+QgSepw5KBFbWcX/HMkIM2cIwdJUocjB+225voy4h670O7MkYMkqWPBhEOS9UluSLI5yYmjbo8kLWULYlopyTLgg8CLgG3AlUkurKrrRtsyLSWj+ja7YafvOj2lUVoQ4QAcDGyuqpsAkpwHHAEYDlp0phsyGw+cne0MM5OQ8XiKUlWjbgNJXgmsr6rXt+XXAs+uqjdNqLcB2NAWnwLcMK8NnX/7AD8edSPmmX1eGpZin2H0/X5iVT1uKhUXysghA8o6qVVVpwOnz31zFoYkV1XV2lG3Yz7Z56VhKfYZFle/F8oB6W3Afn3Lq4BbRtQWSVryFko4XAmsSbJ/kj2AI4ELR9wmSVqyFsS0UlXtSPIm4BJgGXBmVW0acbMWgiUzhdbHPi8NS7HPsIj6vSAOSEuSFpaFMq0kSVpADAdJUofhsAAtxUuJJNkvyReTXJ9kU5I3j7pN8yXJsiTXJLlo1G2ZD0n2SnJBkm+31/t3R92muZbkP7bf628l+USSh4+6TZMxHBaYvkuJHAYcAByV5IDRtmpe7AA2VtVTgUOAE5ZIvwHeDFw/6kbMow8A/6+qfhv4HXbzvifZF/hTYG1VPZ3eSTdHjrZVkzMcFp5fXUqkqn4GjF9KZLdWVbdW1dfb/XvpvWHsO9pWzb0kq4DDgY+Mui3zIcmjgOcBZwBU1c+q6u7RtmpeLAf2TLIceASL4P+4DIeFZ19ga9/yNpbAm2S/JKuBg4ArRtuSefGXwFuBX466IfPkScCPgI+2qbSPJFkx6kbNpar6AfAe4GbgVuCeqvq70bZqcobDwjOlS4nsrpI8EvgU8Jaq+smo2zOXkrwMuL2qrh51W+bRcuCZwIeq6iDgPmC3Pq6WZG96o//9gScAK5K8ZrStmpzhsPAs2UuJJHkovWA4t6o+Per2zIPnAi9PsoXe9OELkpwz2ibNuW3AtqoaHxVeQC8sdmcvBL5XVT+qqp8DnwaeM+I2TcpwWHiW5KVEkoTePPT1VfW+UbdnPlTV26tqVVWtpvc6X1ZVC/4T5a6oqh8CW5M8pRUdyu5/af6bgUOSPKL9nh/KIjgIvyAun6F/tYQvJfJc4LXAtUm+0creUVUXj7BNmht/ApzbPvzcBLxuxO2ZU1V1RZILgK/TOyvvGhbBZTS8fIYkqcNpJUlSh+EgSeowHCRJHYaDJKnDcJAkdRgOkqQOw0GS1PH/AQMOiyEIuQGhAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYcAAAEICAYAAAC0+DhzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDMuMC4yLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvOIA7rQAAFppJREFUeJzt3X+0ZWV93/H3R0bAgAiCThWQ0Timoix/ZJaQH00mYnUEA/6BLYYIWJLJsmhtQ6pYm2BV1sK4jAld/uikEMAYEU0TRsUQKt7aGEEgVnEghAFHmEAlCkwcjeLot3+cZ+hxnnPnnnvn/r7v11p33bOf/ey9n++dO+dzn733OSdVhSRJwx6z0AOQJC0+hoMkqWM4SJI6hoMkqWM4SJI6hoMkqWM4aMVL8rYkf9wePy3JziT7zfIxtiV5yWzuU5pLhoPmXHti/EaSg4bafi3JxAIOa6SquqeqDq6qH87XMZNcluSd+7D9AUkuTfKPSf5vkt+czfG1Yzw3ybVJvpnEF0etAIaD5ssq4I37upMM+Hv7494GrAWOAX4JeFOSDbN8jB8AVwHnzPJ+tUj5n0zz5d3AbyU5dNTKJD+b5KYkO9r3nx1aN5HkwiSfB74LPKO1vTPJX7fTQJ9IcniSD7e/oG9KsmZoH3+Q5N627pYk/2KScaxJUklWJfmZtu/dX99Lsq31e0yS85PcleRbSa5K8sSh/bwmydfburdO9kNJshE4g8ET+s4kn2jtz241PpxkS5JT9vKzPRN4R1U9VFW3A38InL2X/tNWVXdU1SXAltncrxYvw0Hz5WZgAvitPVe0J9VPARcDhwO/B3wqyeFD3V4DbAQeD3y9tZ3e2o8EfhL4AvBHwBOB24ELhra/CXh+W/cnwMeSHLi3AVfVF9oppoOBw4AbgI+01f8OeCXwi8BTgYeA97V6jgU+0Mb21FbTUZMcYxPwYeB327F+OcljgU8Afwk8GXgD8OEkPzXiZ3dYO8aXh5q/DDxnb7VNJklmsp2WH8NB8+l3gDckedIe7ScDd1bVh6pqV1V9BPhb4JeH+lxWVVva+h+0tj+qqruqagfwaeCuqvqfVbUL+Bjwgt0bV9UfV9W32vbvAQ4AuifbvbgY+A6wexbwG8Bbq2p7VX2fwamd05KsAk4DPllVn2vrfhv40TSOdQJwMHBRVT1SVdcDnwRePaLvwe37jqG2HQxCtNOuT1zUZjzbkrwryXOSHJPkQmDkjEorj+GgeVNVX2XwJHf+Hqueyv+fDez2dQYzgt3uHbHLbww9/qcRy7ufOElyXpLb22mrh4EnAEeMM+4kvwGsB36lqnY/yR8D/Fk77fMwg5nKD4HVrZ5Hx1tV3wG+Nc6xmqcC9w4dC/qfx2472/dDhtoOAb49yb6PZxByxwG/ADzC4N/kegbXFf56GuPUMrZqoQegFecC4G+A9wy13cfgyXbY04C/GFqe8R0y7frCm4ETgS1V9aMkDwFTnkJp274D+Pk2Q9ntXuDfVNXnR2xzP/DsoeWfYHBqaTJ71nYfcHSSxwwFxNOAv+s2rHqoHe95wHWt+XlMfm3gr6rqc+3xPQxmNb+9l7FphXLmoHlVVVuBjzI4Z7/bNcCzkvxKuxD8r4FjGfxFOxseD+wC/gFYleR3+PG/tEdKcnQb65lVtecT8weBC5Mc0/o+Kcmpbd3HgVck+fkk+wNvZ+//174BPGNo+UYGf92/Kcljk6xncIrtykm2vwL4z0kOS/LPgV8HLhvVcY/ZyNjaXWIHAvu35QOTHDCTfWlpMBy0EN4OPPqah6r6FvAK4DwGp1/eBLyiqr45S8e7lsE1ib9jcHrme4w+TbWnE4F/Bnx86I6l3X+R/wGwGfjLJN9mcLH6+FbPFuBcBhe+72dwsXr7Xo5zCXBsO0X151X1CHAK8HLgm8D7GQTU306y/QXAXa22/wW8u6r+YpK+M3UMg1N1u+v/J+COWT6GFpH4YT+SpD05c5AkdQwHSVLHcJAkdQwHSVJnyb7O4Ygjjqg1a9bMaNvvfOc7HHTQQVN3XEaseflbafWCNU/XLbfc8s2q2vMdCkZasuGwZs0abr755hltOzExwfr162d3QIucNS9/K61esObpSrLnOxFMytNKkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqSO4SBJ6hgOkqTOkn2F9Hxac/6nRrZvu+jkeR6JJM0PZw6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpM7Y4ZBkvyRfSvLJtvz0JDcmuTPJR5Ps39oPaMtb2/o1Q/t4S2u/I8nLhto3tLatSc6fvfIkSTMxnZnDG4Hbh5bfBby3qtYCDwHntPZzgIeq6pnAe1s/khwLnA48B9gAvL8Fzn7A+4CXA8cCr259JUkLZKxwSHIUcDLw39tygBcDH29dLgde2R6f2pZp609s/U8Frqyq71fV14CtwIva19aquruqHgGubH0lSQtk3JnD7wNvAn7Ulg8HHq6qXW15O3Bke3wkcC9AW7+j9X+0fY9tJmuXJC2QVVN1SPIK4IGquiXJ+t3NI7rWFOsmax8VUDWijSQbgY0Aq1evZmJiYvKB78XOnTunte15x+0a2T7T4y+E6da8HKy0mldavWDNc2nKcAB+DjglyUnAgcAhDGYShyZZ1WYHRwH3tf7bgaOB7UlWAU8AHhxq3214m8naf0xVbQI2Aaxbt67Wr18/xvB7ExMTTGfbs8//1Mj2bWfM7PgLYbo1LwcrreaVVi9Y81ya8rRSVb2lqo6qqjUMLihfX1VnAJ8FTmvdzgKubo83t2Xa+uurqlr76e1upqcDa4EvAjcBa9vdT/u3Y2yeleokSTMyzsxhMm8GrkzyTuBLwCWt/RLgQ0m2MpgxnA5QVVuSXAXcBuwCzq2qHwIkeT1wLbAfcGlVbdmHcUmS9tG0wqGqJoCJ9vhuBnca7dnne8CrJtn+QuDCEe3XANdMZyySpLnjK6QlSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUWbXQA1hJ1pz/qZHt2y46eZ5HIkl758xBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktSZMhySHJjki0m+nGRLkv/S2p+e5MYkdyb5aJL9W/sBbXlrW79maF9vae13JHnZUPuG1rY1yfmzX6YkaTrGmTl8H3hxVT0PeD6wIckJwLuA91bVWuAh4JzW/xzgoap6JvDe1o8kxwKnA88BNgDvT7Jfkv2A9wEvB44FXt36SpIWyJThUAM72+Jj21cBLwY+3tovB17ZHp/almnrT0yS1n5lVX2/qr4GbAVe1L62VtXdVfUIcGXrK0laIGN92E/76/4W4JkM/sq/C3i4qna1LtuBI9vjI4F7AapqV5IdwOGt/Yah3Q5vc+8e7cdPMo6NwEaA1atXMzExMc7wOzt37pzWtucdt2tk+3SPP1v7mYnp1rwcrLSaV1q9YM1zaaxwqKofAs9PcijwZ8CzR3Vr3zPJusnaR81eakQbVbUJ2ASwbt26Wr9+/d4HPomJiQmms+3Zk32C2xnTO/5s7WcmplvzcrDSal5p9YI1z6Vp3a1UVQ8DE8AJwKFJdofLUcB97fF24GiAtv4JwIPD7XtsM1m7JGmBjHO30pPajIEkjwNeAtwOfBY4rXU7C7i6Pd7clmnrr6+qau2nt7uZng6sBb4I3ASsbXc/7c/govXm2ShOkjQz45xWegpwebvu8Bjgqqr6ZJLbgCuTvBP4EnBJ638J8KEkWxnMGE4HqKotSa4CbgN2Aee201UkeT1wLbAfcGlVbZm1CqdhzSSnfSRppZkyHKrqK8ALRrTfzeBOoz3bvwe8apJ9XQhcOKL9GuCaMcYrSZoHvkJaktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJHcNBktQxHCRJnbE+7EejTfYurtsuOnmeRyJJs8twWAQMGUmLjaeVJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1DEcJEkdw0GS1PFjQhcxPz5U0kJx5iBJ6hgOkqSO4SBJ6kwZDkmOTvLZJLcn2ZLkja39iUmuS3Jn+35Ya0+Si5NsTfKVJC8c2tdZrf+dSc4aav/pJLe2bS5OkrkoVpI0nnFmDruA86rq2cAJwLlJjgXOBz5TVWuBz7RlgJcDa9vXRuADMAgT4ALgeOBFwAW7A6X12Ti03YZ9L02SNFNThkNV3V9Vf9Mefxu4HTgSOBW4vHW7HHhle3wqcEUN3AAcmuQpwMuA66rqwap6CLgO2NDWHVJVX6iqAq4Y2pckaQFM61bWJGuAFwA3Aqur6n4YBEiSJ7duRwL3Dm22vbXtrX37iPZRx9/IYIbB6tWrmZiYmM7wH7Vz586R25533K4Z7W9Pk41rrve/N5PVvJyttJpXWr1gzXNp7HBIcjDwp8C/r6p/3MtlgVEragbtfWPVJmATwLp162r9+vVTjHq0iYkJRm179iSvK5iubWf0+56P/e/NZDUvZyut5pVWL1jzXBrrbqUkj2UQDB+uqv/Rmr/RTgnRvj/Q2rcDRw9tfhRw3xTtR41olyQtkHHuVgpwCXB7Vf3e0KrNwO47js4Crh5qP7PdtXQCsKOdfroWeGmSw9qF6JcC17Z1305yQjvWmUP7kiQtgHFOK/0c8Brg1iT/p7X9J+Ai4Kok5wD3AK9q664BTgK2At8FXgtQVQ8meQdwU+v39qp6sD1+HXAZ8Djg0+1LkrRApgyHqvorRl8XADhxRP8Czp1kX5cCl45ovxl47lRjkSTND18hLUnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnq+BnSy8xknzt92YaD5nkkkpYyZw6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnqGA6SpI7hIEnq+MZ7S9Bkb64nSbPFmYMkqWM4SJI6hoMkqWM4SJI6XpCeA14wlrTUGQ4rxK1/v4OzR4TWtotOXoDRSFrsPK0kSeoYDpKkjuEgSep4zUEjTXZR3WsU0srgzEGS1DEcJEkdw0GS1DEcJEkdw0GS1JkyHJJcmuSBJF8dantikuuS3Nm+H9bak+TiJFuTfCXJC4e2Oav1vzPJWUPtP53k1rbNxUky20VKkqZnnJnDZcCGPdrOBz5TVWuBz7RlgJcDa9vXRuADMAgT4ALgeOBFwAW7A6X12Ti03Z7HkiTNsylf51BVn0uyZo/mU4H17fHlwATw5tZ+RVUVcEOSQ5M8pfW9rqoeBEhyHbAhyQRwSFV9obVfAbwS+PS+FDWVyd5nSJI0MNMXwa2uqvsBqur+JE9u7UcC9w71297a9ta+fUT7SEk2MphlsHr1aiYmJmY2+MfBecftmtG2S9VkNU/2M5zs5zPTn/lC2Llz55Ia775aafWCNc+l2X6F9KjrBTWD9pGqahOwCWDdunW1fv36GQwR/uuHr+Y9t66sF4efd9yukTVvO2P9yP6Tzawm678YTUxMMNPfkaVopdUL1jyXZnq30jfa6SLa9wda+3bg6KF+RwH3TdF+1Ih2SdICmmk4bAZ233F0FnD1UPuZ7a6lE4Ad7fTTtcBLkxzWLkS/FLi2rft2khPaXUpnDu1LkrRApjy3kuQjDC4oH5FkO4O7ji4CrkpyDnAP8KrW/RrgJGAr8F3gtQBV9WCSdwA3tX5v331xGngdgzuiHsfgQvScXoyWJE1tnLuVXj3JqhNH9C3g3En2cylw6Yj2m4HnTjUOSdL88RXSkqSO4SBJ6hgOkqSO4SBJ6qysV4Jpn03340P9uFFpaXLmIEnqOHNY4Sb7y17SyubMQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3DQZLUMRwkSR3feE+zwjfwk5YXw0ELws95kBY3TytJkjqGgySpYzhIkjqGgySp4wVpLQlewJbmlzMHSVLHcJAkdQwHSVLHcJAkdbwgrUVlPt6Gw4vb0tScOUiSOs4ctKTtbaZx2YaD5nEk0vLizEGS1HHmoGXr1r/fwdlzeA3Daxdazpw5SJI6iyYckmxIckeSrUnOX+jxSNJKtihOKyXZD3gf8C+B7cBNSTZX1W0LOzKtJAv1aXaentJitCjCAXgRsLWq7gZIciVwKmA4aMmZrZCZ7n7OO27XyGssMwkZA0upqoUeA0lOAzZU1a+15dcAx1fV6/fotxHY2BZ/Crhjhoc8AvjmDLddqqx5+Vtp9YI1T9cxVfWkcToulplDRrR1qVVVm4BN+3yw5OaqWrev+1lKrHn5W2n1gjXPpcVyQXo7cPTQ8lHAfQs0Fkla8RZLONwErE3y9CT7A6cDmxd4TJK0Yi2K00pVtSvJ64Frgf2AS6tqyxwecp9PTS1B1rz8rbR6wZrnzKK4IC1JWlwWy2klSdIiYjhIkjrLOhymekuOJAck+Whbf2OSNfM/ytkzRr2/meS2JF9J8pkkxyzEOGfTuG+7kuS0JJVkyd/2OE7NSf5V+7fekuRP5nuMs22M3+2nJflski+13++TFmKcsyXJpUkeSPLVSdYnycXt5/GVJC+c9UFU1bL8YnBh+y7gGcD+wJeBY/fo82+BD7bHpwMfXehxz3G9vwT8RHv8uqVc77g1t36PBz4H3ACsW+hxz8O/81rgS8BhbfnJCz3ueah5E/C69vhYYNtCj3sfa/4F4IXAVydZfxLwaQavETsBuHG2x7CcZw6PviVHVT0C7H5LjmGnApe3xx8HTkwy6gV5S8GU9VbVZ6vqu23xBgavJ1nKxvk3BngH8LvA9+ZzcHNknJp/HXhfVT0EUFUPzPMYZ9s4NRdwSHv8BJb466Sq6nPAg3vpcipwRQ3cABya5CmzOYblHA5HAvcOLW9vbSP7VNUuYAdw+LyMbvaNU++wcxj85bGUTVlzkhcAR1fVJ+dzYHNonH/nZwHPSvL5JDck2TBvo5sb49T8NuBXk2wHrgHeMD9DWzDT/f8+bYvidQ5zZJy35BjrbTuWiLFrSfKrwDrgF+d0RHNvrzUneQzwXuDs+RrQPBjn33kVg1NL6xnMDv93kudW1cNzPLa5Mk7NrwYuq6r3JPkZ4EOt5h/N/fAWxJw/dy3nmcM4b8nxaJ8kqxhMR/c2lVvMxnoLkiQvAd4KnFJV35+nsc2VqWp+PPBcYCLJNgbnZjcv8YvS4/5eX11VP6iqrzF4g8q18zS+uTBOzecAVwFU1ReAAxm8Qd1yNedvObScw2Gct+TYDJzVHp8GXF/tas8SNGW97RTLf2MQDEv9PDRMUXNV7aiqI6pqTVWtYXCd5ZSqunlhhjsrxvm9/nMGNx+Q5AgGp5nuntdRzq5xar4HOBEgybMZhMM/zOso59dm4Mx219IJwI6qun82D7BsTyvVJG/JkeTtwM1VtRm4hMH0cyuDGcPpCzfifTNmve8GDgY+1q6731NVpyzYoPfRmDUvK2PWfC3w0iS3AT8E/mNVfWvhRr1vxqz5POAPk/wHBqdXzl7Cf+iR5CMMTgse0a6jXAA8FqCqPsjguspJwFbgu8BrZ30MS/jnJ0maI8v5tJIkaYYMB0lSx3CQJHUMB0lSx3CQJHUMB0lSx3CQJHX+HzEDS2B1ubMgAAAAAElFTkSuQmCC\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# 以 AMT_CREDIT 為例\n", "app_train['AMT_CREDIT'].hist(bins = 50)\n", "plt.title(\"Original\")\n", "plt.show()\n", "value = app_train['AMT_CREDIT'].values\n", "\n", "app_train['AMT_CREDIT_Norm1'] = ( value - np.mean(value) ) / ( np.std(value) )\n", "app_train['AMT_CREDIT_Norm1'].hist(bins = 50)\n", "plt.title(\"Normalized with Z-transform\")\n", "plt.show()\n", "\n", "app_train['AMT_CREDIT_Norm2'] = ( value - min(value) ) / ( max(value) - min(value) )\n", "app_train['AMT_CREDIT_Norm2'].hist(bins = 50)\n", "plt.title(\"Normalized to 0 ~ 1\")\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 作業 7\n", "## 處理 outliers\n", "* 新增欄位註記\n", "* outliers 或 NA 填補\n", " 1. 平均數 (mean)\n", " 2. 中位數 (median, or Q50)\n", " 3. 最大/最小值 (max/min, Q100, Q0)\n", " 4. 分位數 (quantile)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "# Import 需要的套件\n", "import os\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "\n", "%matplotlib inline\n", "\n", "# 設定 data_path\n", "dir_data = './data/'" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Path of read in data: ./data/application_train.csv\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SK_ID_CURRTARGETNAME_CONTRACT_TYPECODE_GENDERFLAG_OWN_CARFLAG_OWN_REALTYCNT_CHILDRENAMT_INCOME_TOTALAMT_CREDITAMT_ANNUITY...FLAG_DOCUMENT_18FLAG_DOCUMENT_19FLAG_DOCUMENT_20FLAG_DOCUMENT_21AMT_REQ_CREDIT_BUREAU_HOURAMT_REQ_CREDIT_BUREAU_DAYAMT_REQ_CREDIT_BUREAU_WEEKAMT_REQ_CREDIT_BUREAU_MONAMT_REQ_CREDIT_BUREAU_QRTAMT_REQ_CREDIT_BUREAU_YEAR
01000021Cash loansMNY0202500.0406597.524700.5...00000.00.00.00.00.01.0
11000030Cash loansFNN0270000.01293502.535698.5...00000.00.00.00.00.00.0
21000040Revolving loansMYY067500.0135000.06750.0...00000.00.00.00.00.00.0
31000060Cash loansFNY0135000.0312682.529686.5...0000NaNNaNNaNNaNNaNNaN
41000070Cash loansMNY0121500.0513000.021865.5...00000.00.00.00.00.00.0
\n", "

5 rows × 122 columns

\n", "
" ], "text/plain": [ " SK_ID_CURR TARGET NAME_CONTRACT_TYPE CODE_GENDER FLAG_OWN_CAR \\\n", "0 100002 1 Cash loans M N \n", "1 100003 0 Cash loans F N \n", "2 100004 0 Revolving loans M Y \n", "3 100006 0 Cash loans F N \n", "4 100007 0 Cash loans M N \n", "\n", " FLAG_OWN_REALTY CNT_CHILDREN AMT_INCOME_TOTAL AMT_CREDIT AMT_ANNUITY \\\n", "0 Y 0 202500.0 406597.5 24700.5 \n", "1 N 0 270000.0 1293502.5 35698.5 \n", "2 Y 0 67500.0 135000.0 6750.0 \n", "3 Y 0 135000.0 312682.5 29686.5 \n", "4 Y 0 121500.0 513000.0 21865.5 \n", "\n", " ... FLAG_DOCUMENT_18 FLAG_DOCUMENT_19 FLAG_DOCUMENT_20 FLAG_DOCUMENT_21 \\\n", "0 ... 0 0 0 0 \n", "1 ... 0 0 0 0 \n", "2 ... 0 0 0 0 \n", "3 ... 0 0 0 0 \n", "4 ... 0 0 0 0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_HOUR AMT_REQ_CREDIT_BUREAU_DAY \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_WEEK AMT_REQ_CREDIT_BUREAU_MON \\\n", "0 0.0 0.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "\n", " AMT_REQ_CREDIT_BUREAU_QRT AMT_REQ_CREDIT_BUREAU_YEAR \n", "0 0.0 1.0 \n", "1 0.0 0.0 \n", "2 0.0 0.0 \n", "3 NaN NaN \n", "4 0.0 0.0 \n", "\n", "[5 rows x 122 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f_app = os.path.join(dir_data, 'application_train.csv')\n", "print('Path of read in data: %s' % (f_app))\n", "app_train = pd.read_csv(f_app)\n", "app_train.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# It's your turn\n", "### 1. 列出 AMT_ANNUITY 的 q0 - q100\n", "### 2.1 將 AMT_ANNUITY 中的 NAs 暫時以中位數填補\n", "### 2.2 將 AMT_ANNUITY 的數值標準化至 -1 ~ 1 間\n", "### 3. 將 AMT_GOOD_PRICE 的 NAs 以眾數填補" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[1615.5, 6182.910000000001, 6750.0, 7875.0, 8703.0, 9000.0, 9000.0, 9553.5, 10125.0, 10503.0, 11074.5, 11430.0, 11970.0, 12375.0, 12838.5, 13302.0, 13500.0, 13500.0, 13896.0, 14350.5, 14701.5, 15124.5, 15583.5, 15970.5, 16209.0, 16524.0, 16852.5, 17109.0, 17487.0, 17806.5, 18189.0, 18643.5, 19102.5, 19417.5, 19836.0, 20151.0, 20421.0, 20853.0, 21186.0, 21609.0, 21865.5, 22018.5, 22342.5, 22527.0, 22972.5, 23346.0, 23719.5, 23931.0, 24259.5, 24583.589999999953, 24903.0, 25240.5, 25537.5, 25960.5, 26217.0, 26316.0, 26640.0, 26860.5, 27189.0, 27558.0, 28062.0, 28480.5, 28917.0, 29340.0, 29830.5, 30078.0, 30483.0, 30897.0, 31275.0, 31630.5, 32004.0, 32458.5, 32895.0, 33376.5, 33984.0, 34596.0, 35345.16000000005, 35806.5, 36328.5, 36747.0, 37516.5, 37948.5, 38556.0, 39456.0, 40135.5, 40806.0, 41845.5, 42790.5, 43735.5, 44991.0, 45954.0, 47254.5, 48465.0, 49878.0, 51745.5, 53325.0, 55624.5, 58482.0, 62964.0, 70006.5, 258025.5]\n", " q value\n", "0 0 1615.50\n", "1 1 6182.91\n", "2 2 6750.00\n", "3 3 7875.00\n", "4 4 8703.00\n", "5 5 9000.00\n", "6 6 9000.00\n", "7 7 9553.50\n", "8 8 10125.00\n", "9 9 10503.00\n", "10 10 11074.50\n", "11 11 11430.00\n", "12 12 11970.00\n", "13 13 12375.00\n", "14 14 12838.50\n", "15 15 13302.00\n", "16 16 13500.00\n", "17 17 13500.00\n", "18 18 13896.00\n", "19 19 14350.50\n", "20 20 14701.50\n", "21 21 15124.50\n", "22 22 15583.50\n", "23 23 15970.50\n", "24 24 16209.00\n", "25 25 16524.00\n", "26 26 16852.50\n", "27 27 17109.00\n", "28 28 17487.00\n", "29 29 17806.50\n", ".. ... ...\n", "71 71 32458.50\n", "72 72 32895.00\n", "73 73 33376.50\n", "74 74 33984.00\n", "75 75 34596.00\n", "76 76 35345.16\n", "77 77 35806.50\n", "78 78 36328.50\n", "79 79 36747.00\n", "80 80 37516.50\n", "81 81 37948.50\n", "82 82 38556.00\n", "83 83 39456.00\n", "84 84 40135.50\n", "85 85 40806.00\n", "86 86 41845.50\n", "87 87 42790.50\n", "88 88 43735.50\n", "89 89 44991.00\n", "90 90 45954.00\n", "91 91 47254.50\n", "92 92 48465.00\n", "93 93 49878.00\n", "94 94 51745.50\n", "95 95 53325.00\n", "96 96 55624.50\n", "97 97 58482.00\n", "98 98 62964.00\n", "99 99 70006.50\n", "100 100 258025.50\n", "\n", "[101 rows x 2 columns]\n" ] } ], "source": [ "\"\"\"\n", "YOUR CODE HERE\n", "\"\"\"\n", "q_all = list(range(101))\n", "quantile_q_all = [np.percentile(app_train[~app_train['AMT_ANNUITY'].isnull()]['AMT_ANNUITY'], q = i) for i in q_all]\n", "print(quantile_q_all)\n", "\n", "# 1: 計算 AMT_ANNUITY 的 q0 - q100\n", "q_all = pd.DataFrame({'q': list(range(101)),\n", " 'value': quantile_q_all})\n", "\n", "print(q_all)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
qvalue
001615.50
116182.91
226750.00
337875.00
448703.00
559000.00
669000.00
779553.50
8810125.00
9910503.00
101011074.50
111111430.00
121211970.00
131312375.00
141412838.50
151513302.00
161613500.00
171713500.00
181813896.00
191914350.50
202014701.50
212115124.50
222215583.50
232315970.50
242416209.00
252516524.00
262616852.50
272717109.00
282817487.00
292917806.50
.........
717132458.50
727232895.00
737333376.50
747433984.00
757534596.00
767635345.16
777735806.50
787836328.50
797936747.00
808037516.50
818137948.50
828238556.00
838339456.00
848440135.50
858540806.00
868641845.50
878742790.50
888843735.50
898944991.00
909045954.00
919147254.50
929248465.00
939349878.00
949451745.50
959553325.00
969655624.50
979758482.00
989862964.00
999970006.50
100100258025.50
\n", "

101 rows × 2 columns

\n", "
" ], "text/plain": [ " q value\n", "0 0 1615.50\n", "1 1 6182.91\n", "2 2 6750.00\n", "3 3 7875.00\n", "4 4 8703.00\n", "5 5 9000.00\n", "6 6 9000.00\n", "7 7 9553.50\n", "8 8 10125.00\n", "9 9 10503.00\n", "10 10 11074.50\n", "11 11 11430.00\n", "12 12 11970.00\n", "13 13 12375.00\n", "14 14 12838.50\n", "15 15 13302.00\n", "16 16 13500.00\n", "17 17 13500.00\n", "18 18 13896.00\n", "19 19 14350.50\n", "20 20 14701.50\n", "21 21 15124.50\n", "22 22 15583.50\n", "23 23 15970.50\n", "24 24 16209.00\n", "25 25 16524.00\n", "26 26 16852.50\n", "27 27 17109.00\n", "28 28 17487.00\n", "29 29 17806.50\n", ".. ... ...\n", "71 71 32458.50\n", "72 72 32895.00\n", "73 73 33376.50\n", "74 74 33984.00\n", "75 75 34596.00\n", "76 76 35345.16\n", "77 77 35806.50\n", "78 78 36328.50\n", "79 79 36747.00\n", "80 80 37516.50\n", "81 81 37948.50\n", "82 82 38556.00\n", "83 83 39456.00\n", "84 84 40135.50\n", "85 85 40806.00\n", "86 86 41845.50\n", "87 87 42790.50\n", "88 88 43735.50\n", "89 89 44991.00\n", "90 90 45954.00\n", "91 91 47254.50\n", "92 92 48465.00\n", "93 93 49878.00\n", "94 94 51745.50\n", "95 95 53325.00\n", "96 96 55624.50\n", "97 97 58482.00\n", "98 98 62964.00\n", "99 99 70006.50\n", "100 100 258025.50\n", "\n", "[101 rows x 2 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_all" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "也可以使用 [DataFrame.quantile](https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.DataFrame.quantile.html#pandas.DataFrame.quantile) 的方法,得到的答案是一樣的。" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "q_all = app_train['AMT_ANNUITY'].quantile([i/100 for i in range(101)])\n", "q_all = pd.DataFrame({'q': list(range(101)),\n", " 'value': quantile_q_all})" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
qvalue
001615.50
116182.91
226750.00
337875.00
448703.00
559000.00
669000.00
779553.50
8810125.00
9910503.00
101011074.50
111111430.00
121211970.00
131312375.00
141412838.50
151513302.00
161613500.00
171713500.00
181813896.00
191914350.50
202014701.50
212115124.50
222215583.50
232315970.50
242416209.00
252516524.00
262616852.50
272717109.00
282817487.00
292917806.50
.........
717132458.50
727232895.00
737333376.50
747433984.00
757534596.00
767635345.16
777735806.50
787836328.50
797936747.00
808037516.50
818137948.50
828238556.00
838339456.00
848440135.50
858540806.00
868641845.50
878742790.50
888843735.50
898944991.00
909045954.00
919147254.50
929248465.00
939349878.00
949451745.50
959553325.00
969655624.50
979758482.00
989862964.00
999970006.50
100100258025.50
\n", "

101 rows × 2 columns

\n", "
" ], "text/plain": [ " q value\n", "0 0 1615.50\n", "1 1 6182.91\n", "2 2 6750.00\n", "3 3 7875.00\n", "4 4 8703.00\n", "5 5 9000.00\n", "6 6 9000.00\n", "7 7 9553.50\n", "8 8 10125.00\n", "9 9 10503.00\n", "10 10 11074.50\n", "11 11 11430.00\n", "12 12 11970.00\n", "13 13 12375.00\n", "14 14 12838.50\n", "15 15 13302.00\n", "16 16 13500.00\n", "17 17 13500.00\n", "18 18 13896.00\n", "19 19 14350.50\n", "20 20 14701.50\n", "21 21 15124.50\n", "22 22 15583.50\n", "23 23 15970.50\n", "24 24 16209.00\n", "25 25 16524.00\n", "26 26 16852.50\n", "27 27 17109.00\n", "28 28 17487.00\n", "29 29 17806.50\n", ".. ... ...\n", "71 71 32458.50\n", "72 72 32895.00\n", "73 73 33376.50\n", "74 74 33984.00\n", "75 75 34596.00\n", "76 76 35345.16\n", "77 77 35806.50\n", "78 78 36328.50\n", "79 79 36747.00\n", "80 80 37516.50\n", "81 81 37948.50\n", "82 82 38556.00\n", "83 83 39456.00\n", "84 84 40135.50\n", "85 85 40806.00\n", "86 86 41845.50\n", "87 87 42790.50\n", "88 88 43735.50\n", "89 89 44991.00\n", "90 90 45954.00\n", "91 91 47254.50\n", "92 92 48465.00\n", "93 93 49878.00\n", "94 94 51745.50\n", "95 95 53325.00\n", "96 96 55624.50\n", "97 97 58482.00\n", "98 98 62964.00\n", "99 99 70006.50\n", "100 100 258025.50\n", "\n", "[101 rows x 2 columns]" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_all" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before replace NAs, numbers of row that AMT_ANNUITY is NAs: 12\n", "After replace NAs, numbers of row that AMT_ANNUITY is NAs: 0\n" ] } ], "source": [ "# 2.1 將 NAs 以 q50 填補\n", "print(\"Before replace NAs, numbers of row that AMT_ANNUITY is NAs: %i\" % sum(app_train['AMT_ANNUITY'].isnull()))\n", "\n", "\"\"\"\n", "Your Code Here\n", "\"\"\"\n", "q_50 = app_train['AMT_ANNUITY'].quantile(50/100)\n", "app_train.loc[app_train['AMT_ANNUITY'].isnull(),'AMT_ANNUITY'] = q_50\n", "\n", "print(\"After replace NAs, numbers of row that AMT_ANNUITY is NAs: %i\" % sum(app_train['AMT_ANNUITY'].isnull()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "將NAN填補過後, isnull() 回報總共的 NAN 數量變 0 " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Hints: Normalize function (to -1 ~ 1)\n", "$ y = 2*(\\frac{x - min(x)}{max(x) - min(x)} - 0.5) $" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "== Original data range ==\n", "count 307511.000000\n", "mean 27108.487841\n", "std 14493.461065\n", "min 1615.500000\n", "25% 16524.000000\n", "50% 24903.000000\n", "75% 34596.000000\n", "max 258025.500000\n", "Name: AMT_ANNUITY, dtype: float64\n", "== Normalized data range ==\n" ] }, { "data": { "text/plain": [ "count 307511.000000\n", "mean -0.801154\n", "std 0.113049\n", "min -1.000000\n", "25% -0.883714\n", "50% -0.818357\n", "75% -0.742752\n", "max 1.000000\n", "Name: AMT_ANNUITY_NORMALIZED, dtype: float64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# 2.2 Normalize values to -1 to 1\n", "print(\"== Original data range ==\")\n", "print(app_train['AMT_ANNUITY'].describe())\n", "\n", "def normalize_value(x):\n", " min_x = x.min()\n", " max_x = x.max()\n", " y = 2*((x-min_x)/(max_x-min_x) - 0.5)\n", " return y\n", "\n", "app_train['AMT_ANNUITY_NORMALIZED'] = normalize_value(app_train['AMT_ANNUITY'])\n", "\n", "print(\"== Normalized data range ==\")\n", "app_train['AMT_ANNUITY_NORMALIZED'].describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "也可以用 sklearn 的 MinMaxScaler function 直接套用" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "== Normalized data range ==\n" ] }, { "data": { "text/plain": [ "count 307511.000000\n", "mean -0.801154\n", "std 0.113049\n", "min -1.000000\n", "25% -0.883714\n", "50% -0.818357\n", "75% -0.742752\n", "max 1.000000\n", "Name: AMT_ANNUITY_NORMALIZED, dtype: float64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from sklearn.preprocessing import MinMaxScaler\n", "def normalize_value(x):\n", " mms = MinMaxScaler((-1,1))\n", " x = mms.fit_transform(x.to_frame())\n", " return x\n", "app_train['AMT_ANNUITY_NORMALIZED'] = normalize_value(app_train['AMT_ANNUITY'])\n", "\n", "print(\"== Normalized data range ==\")\n", "app_train['AMT_ANNUITY_NORMALIZED'].describe()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Before replace NAs, numbers of row that AMT_GOODS_PRICE is NAs: 278\n", "(450000.0, 26022)\n", "After replace NAs, numbers of row that AMT_GOODS_PRICE is NAs: 0\n" ] } ], "source": [ "# 3\n", "print(\"Before replace NAs, numbers of row that AMT_GOODS_PRICE is NAs: %i\" % sum(app_train['AMT_GOODS_PRICE'].isnull()))\n", "\n", "# 列出重複最多的數值\n", "\"\"\"\n", "Your Code Here\n", "\"\"\"\n", "from collections import defaultdict\n", "mode_dict = defaultdict(lambda:0)\n", "\n", "for value in app_train[~app_train['AMT_GOODS_PRICE'].isnull()]['AMT_GOODS_PRICE']:\n", " mode_dict[value] += 1\n", " \n", "mode_get = sorted(mode_dict.items(), key=lambda kv: kv[1], reverse=True)\n", "\n", "value_most = mode_get[0]\n", "print(value_most)\n", "\n", "mode_goods_price = list(app_train['AMT_GOODS_PRICE'].value_counts().index)\n", "app_train.loc[app_train['AMT_GOODS_PRICE'].isnull(), 'AMT_GOODS_PRICE'] = mode_goods_price[0]\n", "\n", "print(\"After replace NAs, numbers of row that AMT_GOODS_PRICE is NAs: %i\" % sum(app_train['AMT_GOODS_PRICE'].isnull()))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 1 }