{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# 寻找特征" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "data_folder = os.path.join(\".\", \"data\")\n", "adult_filename = os.path.join(data_folder, \"adult.data\")" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "adult = pd.read_csv(adult_filename, header=None, names=[\"Age\", \"Work-Class\", \"fnlwgt\", \"Education\",\n", " \"Education-Num\", \"Marital-Status\", \"Occupation\",\n", " \"Relationship\", \"Race\", \"Sex\", \"Capital-gain\",\n", " \"Capital-loss\", \"Hours-per-week\", \"Native-Country\",\n", " \"Earnings-Raw\"])" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Age', 'Work-Class', 'fnlwgt', 'Education', 'Education-Num',\n", " 'Marital-Status', 'Occupation', 'Relationship', 'Race', 'Sex',\n", " 'Capital-gain', 'Capital-loss', 'Hours-per-week', 'Native-Country',\n", " 'Earnings-Raw'],\n", " dtype='object')" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult.dropna(how='all', inplace=True)\n", "adult.columns" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "count 32561.000000\n", "mean 40.437456\n", "std 12.347429\n", "min 1.000000\n", "25% 40.000000\n", "50% 40.000000\n", "75% 45.000000\n", "max 99.000000\n", "Name: Hours-per-week, dtype: float64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult[\"Hours-per-week\"].describe()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "10.0" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult[\"Education-Num\"].median()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([' State-gov', ' Self-emp-not-inc', ' Private', ' Federal-gov',\n", " ' Local-gov', ' ?', ' Self-emp-inc', ' Without-pay', ' Never-worked'], dtype=object)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "adult[\"Work-Class\"].unique()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2],\n", " [ 3, 4, 5],\n", " [ 6, 7, 8],\n", " [ 9, 10, 11],\n", " [12, 13, 14],\n", " [15, 16, 17],\n", " [18, 19, 20],\n", " [21, 22, 23],\n", " [24, 25, 26],\n", " [27, 28, 29]])" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "X = np.arange(30).reshape((10, 3))\n", "X" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[ 0, 1, 2],\n", " [ 3, 1, 5],\n", " [ 6, 1, 8],\n", " [ 9, 1, 11],\n", " [12, 1, 14],\n", " [15, 1, 17],\n", " [18, 1, 20],\n", " [21, 1, 23],\n", " [24, 1, 26],\n", " [27, 1, 29]])" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X[:,1] = 1\n", "X" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 0 2]\n", " [ 3 5]\n", " [ 6 8]\n", " [ 9 11]\n", " [12 14]\n", " [15 17]\n", " [18 20]\n", " [21 23]\n", " [24 26]\n", " [27 29]]\n", "[ 74.25 0. 74.25]\n" ] } ], "source": [ "from sklearn.feature_selection import VarianceThreshold\n", "\n", "vt = VarianceThreshold()\n", "Xt = vt.fit_transform(X)\n", "print(Xt)\n", "print(vt.variances_)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "X = adult[[\"Age\", \"Education-Num\", \"Capital-gain\", \"Capital-loss\", \"Hours-per-week\"]].values\n", "y = (adult[\"Earnings-Raw\"] == ' >50K').values" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[[ 39 13 2174 0 40]\n", " [ 50 13 0 0 13]\n", " [ 38 9 0 0 40]\n", " ..., \n", " [ 58 9 0 0 40]\n", " [ 22 9 0 0 20]\n", " [ 52 9 15024 0 40]]\n", "[ 8.60061182e+03 2.40142178e+03 8.21924671e+07 1.37214589e+06\n", " 6.47640900e+03]\n" ] } ], "source": [ "from sklearn.feature_selection import SelectKBest\n", "from sklearn.feature_selection import chi2\n", "print(X)\n", "transformer = SelectKBest(score_func=chi2, k=3)\n", "Xt_chi2 = transformer.fit_transform(X, y)\n", "print(transformer.scores_)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "from scipy.stats import pearsonr\n", "\n", "def multivariate_pearsonr(X, y):\n", " scores, pvalues = [], []\n", " for column in range(X.shape[1]):\n", " cur_score, cur_p = pearsonr(X[:,column], y)\n", " scores.append(abs(cur_score))\n", " pvalues.append(cur_p)\n", " return (np.array(scores), np.array(pvalues))" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[ 0.2340371 0.33515395 0.22332882 0.15052631 0.22968907]\n" ] } ], "source": [ "transformer = SelectKBest(score_func=multivariate_pearsonr, k=3)\n", "Xt_pearson = transformer.fit_transform(X, y)\n", "print(transformer.scores_)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/dlinking-lxy/more-space/pyworks/venv/lib/python3.5/site-packages/sklearn/cross_validation.py:44: DeprecationWarning: This module was deprecated in version 0.18 in favor of the model_selection module into which all the refactored classes and functions are moved. Also note that the interface of the new CV iterators are different from that of this module. This module will be removed in 0.20.\n", " \"This module will be removed in 0.20.\", DeprecationWarning)\n" ] } ], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.cross_validation import cross_val_score\n", "clf = DecisionTreeClassifier(random_state=14)\n", "scores_chi2 = cross_val_score(clf, Xt_chi2, y, scoring='accuracy')\n", "scores_pearson = cross_val_score(clf, Xt_pearson, y, scoring='accuracy')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Chi2 performance: 0.829\n", "Pearson performance: 0.771\n" ] } ], "source": [ "print(\"Chi2 performance: {0:.3f}\".format(scores_chi2.mean()))\n", "print(\"Pearson performance: {0:.3f}\".format(scores_pearson.mean()))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 创造特征" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/dlinking-lxy/more-space/pyworks/venv/lib/python3.5/site-packages/IPython/core/interactiveshell.py:2728: DtypeWarning: Columns (3) have mixed types. Specify dtype option on import or set low_memory=False.\n", " interactivity=interactivity, compiler=compiler, result=result)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ " 0 1 2 3 4 5 6 7 8 9 ... 1549 \\\n", "0 125.0 125.0 1.0000 1.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 \n", "1 57.0 468.0 8.2105 1.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 \n", "2 33.0 230.0 6.9696 1.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 \n", "3 60.0 468.0 7.8000 1.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 \n", "4 60.0 468.0 7.8000 1.0 0.0 0.0 0.0 0.0 0.0 0.0 ... 0.0 \n", "\n", " 1550 1551 1552 1553 1554 1555 1556 1557 1558 \n", "0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "1 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "2 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "3 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "4 0.0 0.0 0.0 0.0 0.0 0.0 0.0 0.0 1.0 \n", "\n", "[5 rows x 1559 columns]\n" ] } ], "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", "data_folder = os.path.join(\".\", \"data\")\n", "data_filename = os.path.join(data_folder, \"ad.data\")\n", "\n", "def convert_number(x):\n", " try:\n", " return float(x)\n", " except ValueError:\n", " return np.nan\n", "\n", "from collections import defaultdict\n", "converters = defaultdict(convert_number) #{i: convert_number for i in range(1558)}\n", "converters[1558] = lambda x: 1 if x.strip() == \"ad.\" else 0\n", " \n", "ads = pd.read_csv(data_filename, header=None, converters=converters)\n", "# ads = ads.applymap(lambda x: np.nan if isinstance(x, str) and not x == \"ad.\" else x)\n", "# ads[[0, 1, 2]] = ads[[0, 1, 2]].astype(float)\n", "\n", "ads = ads.apply(pd.to_numeric, errors='coerce')\n", "ads = ads.astype(float).dropna()\n", "print(ads[:5])" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(2359, 1558) (2359,)\n", "[[ 125. 125. 1. ..., 0. 0. 0. ]\n", " [ 57. 468. 8.2105 ..., 0. 0. 0. ]\n", " [ 33. 230. 6.9696 ..., 0. 0. 0. ]\n", " ..., \n", " [ 101. 140. 1.3861 ..., 0. 0. 0. ]\n", " [ 23. 120. 5.2173 ..., 0. 0. 0. ]\n", " [ 40. 40. 1. ..., 0. 0. 0. ]]\n" ] } ], "source": [ "# ads = ads.astype(float).dropna()\n", "X = ads.drop(1558, axis=1).values\n", "y = ads[1558]\n", "print(X.shape,y.shape)\n", "print(X)\n" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(2359, 5)\n" ] } ], "source": [ "from sklearn.decomposition import PCA\n", "pca = PCA(n_components=5)\n", "Xd = pca.fit_transform(X)\n", "print(Xd.shape)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0.854, 0.145, 0.001, 0. , 0. ])" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.set_printoptions(precision=3, suppress=True)\n", "pca.explained_variance_ratio_" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The average score is 0.9334\n" ] } ], "source": [ "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.cross_validation import cross_val_score\n", "\n", "clf = DecisionTreeClassifier(random_state=14)\n", "scores = cross_val_score(clf, X, y, scoring='accuracy')\n", "print(\"The average score is {:.4f}\".format(np.mean(scores)))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([ 0.092, 0.995, 0.024, ..., 0. , 0. , 0. ])" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pca.components_[0]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "The average score from the reduced dataset is 0.9326\n" ] } ], "source": [ "clf = DecisionTreeClassifier(random_state=14)\n", "scores_reduced = cross_val_score(clf, Xd, y, scoring='accuracy')\n", "print(\"The average score from the reduced dataset is {:.4f}\".format(np.mean(scores_reduced)))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYIAAAD8CAYAAAB6paOMAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAIABJREFUeJztnX+UXHV58D/Pzu4mGaIJ2UTemGVnsVBLYvxFRGl9Lbqx\nQiygHuXgGWgQz9my0fcNb1sVuq0B326L9bwFfDXRrYCBnYq+VgtoisWI5z2nPaKhYiIgL6lml00R\nwkIisEA2u8/7x72zc2fm3pk7M3dm7sw8n3PumXu/986dZ2bvfp/v9/n1FVXFMAzD6Fy6mi2AYRiG\n0VxMERiGYXQ4pggMwzA6HFMEhmEYHY4pAsMwjA7HFIFhGEaHY4rAMAyjwzFFYBiG0eGYIjAMw+hw\nupstQBhWr16tg4ODzRbDMAyjpXjggQeeVtU15a5rCUUwODjIvn37mi2GYRhGSyEik2GuM9OQYRhG\nhxOJIhCRlSLyTRH5hYg8IiLniMgqEblXRB5zX092rxUR+byIHBSR/SLy5ihkMAzDMKojqhnBTcA9\nqvo7wBuAR4Crgb2qegaw1z0GOB84w92GgV0RyWAYhmFUQc0+AhFZAbwDuBxAVY8Dx0XkIuBc97Ld\nwA+BTwEXAbepU//6R+5sYq2qPlGrLIZhGFEzNzfH9PQ0L730UrNFCWTp0qX09/fT09NT1fujcBaf\nBhwBbhWRNwAPANuBUzyd+6+BU9z9dcDjnvdPu22mCAzDiB3T09O84hWvYHBwEBFptjhFqCozMzNM\nT09z2mmnVXWPKExD3cCbgV2q+ibgBXJmIADc0X9FK+CIyLCI7BORfUeOHIlAzAaQycDgIHR1Oa+Z\nTLMlMgyjRl566SX6+vpiqQQARIS+vr6aZixRKIJpYFpV73ePv4mjGJ4UkbUA7utT7vnDwKme9/e7\nbXmo6riqblLVTWvWlA2DbT6ZDAwPw+QkqDqvw8OmDAyjDYirEshSq3w1KwJV/TXwuIi81m0aAh4G\n7gK2um1bgTvd/buAP3Kjh94GHGsL/8DoKMzO5rfNzjrthmEYMSaqqKH/BmREZD/wRuCvgeuBd4vI\nY8Bm9xhgD/BL4CDw98C2iGRoLlNTlbUbhmFUwD333MNrX/taTj/9dK6//vryb6iASDKLVfVBYJPP\nqSGfaxX4WBSfGysGBhxzkF+7YRhGDczPz/Oxj32Me++9l/7+ft7ylrdw4YUXsn79+kjub5nFUTE2\nBslkflsy6bQbhtE51CFo5Mc//jGnn346r3nNa+jt7eWSSy7hzjvvLP/GkJgiiIp0GsbHIZUCEed1\nfNxpNwyjM6hT0Mjhw4c59dRcjE1/fz+HDxfF2FSNKYIoSafh0CFYWHBeTQkYRmfRokEjpggMwzCi\nok5BI+vWrePxx3N5uNPT06xbt66me3oxRWAYhhEVQcEhNQaNvOUtb+Gxxx7jV7/6FcePH+eOO+7g\nwgsvrOmeXkwRGIZhREWdgka6u7v5whe+wHve8x7OPPNMLr74YjZs2FDTPfPuH9mdDMMwOp2sX3B0\n1DEHDQw4SiACf+GWLVvYsmVLzffxwxSBYRhGlKTTLRcoYqYhwzCMDscUgWEYRodjisAwDKPDMUVg\nGIbR4ZgiMAzD6HBMERiGYcScK664gle96lW87nWvq8v9TREYhmHEnMsvv5x77rmnbvc3RWAYhhEh\nmQMZBm8cpOu6LgZvHCRzoPYy1O94xztYtWpVBNL5YwllhmEYEZE5kGH47mFm55wKpJPHJhm+exiA\n9Mb4JpnZjMAwDCMiRveOLiqBLLNzs4zutTLUhmEYHcHUMf9y00HtccEUgWEYRkQMrPAvNx3UHhci\nUQQickhEDojIgyKyz21bJSL3ishj7uvJbruIyOdF5KCI7BeRN0chg2EYRrMZGxoj2ZNfhjrZk2Rs\nqLYy1B/+8Ic555xzePTRR+nv7+fmm2+u6X6FROksfqeqPu05vhrYq6rXi8jV7vGngPOBM9ztrcAu\n99UwDKOlyTqER/eOMnVsioEVA4wNjdXsKP7a174WhXiB1DNq6CLgXHd/N/BDHEVwEXCbqirwIxFZ\nKSJrVfWJOspiGIbRENIb07GOEPIjKh+BAv8iIg+IyLDbdoqnc/81cIq7vw543PPeabctDxEZFpF9\nIrLvyJEjEYlpGIZhFBLVjODtqnpYRF4F3Csiv/CeVFUVEa3khqo6DowDbNq0qaL3GoZhRImqIiLN\nFiMQx8BSPZHMCFT1sPv6FPBt4GzgSRFZC+C+PuVefhg41fP2frfNMAwjdixdupSZmZmaO9t6oarM\nzMywdOnSqu9R84xARE4CulT1OXf/D4DPAHcBW4Hr3dc73bfcBXxcRO7AcRIfM/+AYRhxpb+/n+np\naeJsol66dCn9/f1Vvz8K09ApwLfdaVM38A+qeo+I/AT4hoh8FJgELnav3wNsAQ4Cs8BHIpDBMAyj\nLvT09HDaaac1W4y6UrMiUNVfAm/waZ8BhnzaFfhYrZ9rGIZhRINlFhuGYXQ4pggMwzA6HFMEhmEY\nHY4pAsMwjA7HFIFhGEaHY4rAMAyjwzFFYBiG0eGYIjAMw+hwTBEYhmF0OKYIDMMwOhxTBIZhGB2O\nKQLDMIwOxxSBYRhGh2OKwDAMo8MxRWAYhtHhmCIwDMPocEwRGEVs++42uj/TjVwndH+mm23f3dZs\nkQzDqCNRLFVptBHbvruNXft2LR7P6/zi8c737myWWIZh1BGbERh5jD8wXlG7YRitjykCI495na+o\n3TCM1icyRSAiCRH5qYh8xz0+TUTuF5GDIvJ1Eel125e4xwfd84NRyWDUTkISFbUbhtH6RDkj2A48\n4jn+LHCDqp4OPAt81G3/KPCs236De50RE4bPGq6o3TCM1icSRSAi/cB7ga+4xwK8C/ime8lu4H3u\n/kXuMe75Ifd6IwbsfO9ORjaNLM4AEpJgZNOIOYoNo42JKmroRuCTwCvc4z7gqKqecI+ngXXu/jrg\ncQBVPSEix9zrn/beUESGgWGAgYGBiMQ0wrDzvTut4zeMDqLmGYGI/CHwlKo+EIE8i6jquKpuUtVN\na9asifLWhmEYhocoZgS/B1woIluApcArgZuAlSLS7c4K+oHD7vWHgVOBaRHpBlYAMxHIYRiGYVRB\nzTMCVb1GVftVdRC4BPiBqqaB+4APupdtBe509+9yj3HP/0BVtVY5DMMwjOqoZx7Bp4A/EZGDOD6A\nm932m4E+t/1PgKvrKINhGIZRhkhLTKjqD4Efuvu/BM72ueYl4ENRfq5hGIZRPZZZbBiGEUcyGRgc\nhK4u5zWTqdtHWdE5wzCMuJHJwPAwzM46x5OTzjFAOh35x9mMwDAMI26MjuaUQJbZWae9DpgiMAzD\niBtTU5W114gpAsMwjLgRVE2hTlUWTBEYhmHEjbExSCbz25JJp70OmCIwDMOIG+k0jI9DKgUizuv4\neF0cxWBRQ4ZhGPEkna5bx1+IzQgMwzA6HFMEhmEYHY4pAsMwjA7HFIFhGEaHY4rAMAyjwzFFYBiG\n0eGYIjAMw+hwTBEYhmF0OKYIDMMwOhxTBIZhGB2OKQLDMIwOxxSBYRhGh1OzIhCRpSLyYxH5mYg8\nJCLXue2nicj9InJQRL4uIr1u+xL3+KB7frBWGQzDMIzqiWJG8DLwLlV9A/BG4DwReRvwWeAGVT0d\neBb4qHv9R4Fn3fYb3OsMwzCMJlGzIlCH593DHndT4F3AN9323cD73P2L3GPc80MiIrXKYRiGYVRH\nJD4CEUmIyIPAU8C9wH8AR1X1hHvJNLDO3V8HPA7gnj8G9Pncc1hE9onIviNHjkQhpmEYhuFDJIpA\nVedV9Y1AP3A28DsR3HNcVTep6qY1a9bULKPRAWQyMDgIXV3OaybTbIkMoyWIdIUyVT0qIvcB5wAr\nRaTbHfX3A4fdyw4DpwLTItINrABmopTD6EAyGRgehtlZ53hy0jmGhq3yZBitShRRQ2tEZKW7vwx4\nN/AIcB/wQfeyrcCd7v5d7jHu+R+oqtYqh9FGVDOyHx3NKYEss7NOeytgsxmjiUQxI1gL7BaRBI5i\n+YaqfkdEHgbuEJG/An4K3OxefzNwu4gcBJ4BLolABqNdqHZkPzVVWXucsNmM0WSkFQbjmzZt0n37\n9jVbDKMRDA46HWEhqRQcOhT9++JAK8tuxBoReUBVN5W7zjKLjXhR7ch+bAySyfy2ZNJpjzutPJsx\n2gJTBJ1EK9ihBwYqa8+STsP4uDOKFnFex8dbw7RS7Xc2jIgwRdApZO3Qk5OgmrNDx00Z1DKyT6cd\nU8rCgvPaCkoAWns2Y7QFpgg6hbhH1WRnK5ddBsuWQV9f643sq6WVZzNGW2CKoFOIsx26cLYyMwMv\nvgi3395aI/taaNXZTBS0gsmyzTFF0CnE2Q4dNFv54z9ujjxG42gVk2WbY4qgU4izHTpoVvLCC7Bt\nW2NlMRpL3E2WHYIpgk4ha4fu89T3W7asefJ4KTUrGR9vnBxG44mzybKDMEXQabz4Ym5/ZiYe0/BS\ns5L5+cbJYTSeOJssOwhTBJ1EXKfh6bTjKPQjkWisLO1AKzlf42yy7CBMEXQScZ6GBzmGszV3jHC0\nmvPVQmdjgSmCTqIR0/BqR6M7d8LISG4GkEg4xzt3RidbJxDXWV8pOjl0NiaYIugkKp2GV9qp1zoa\n3bkTTpxw3nvihCkBqPxvEOdZnxFfVDX221lnnaVGRExMqKZSqiLO68RE8HXJpKrTLTtbMln6+kQi\n//rslkrV6cu0OZX+DVSd39r+BoYLsE9D9LFWhtrwp5LSyIX19AsRcab9RmUE/Q0SCdi929+E4ve3\nSCbN7t6hWBlqozYqMTH42aW9WChgdQT9Debng01u5nw1qsAUgeFPJY7lUvZnCwWsnlIKtJQD2Jyv\nRoWYIjD82bLFGVF6CerUgzqsRMJGo7Xg59z3Yg5gIyJMERjFZDKODdrrPxKBrVv9O/WgaKQgO7YR\njqyZJyiprhkmt1ZKVjNCY4rAKMbP5q8Ke/b4X2926fqRTjsKNQ7Zt62WrGaEpuaoIRE5FbgNOAVQ\nYFxVbxKRVcDXgUHgEHCxqj4rIgLcBGwBZoHLVfXfS32GRQ01mK6u/NlAFov+aR6ZjKOgp6acmcDY\nWOMVbSWRZEYsaGTU0AngT1V1PfA24GMish64GtirqmcAe91jgPOBM9xtGNgVgQxGlFghsPgRBwew\nJau1LTUrAlV9IjuiV9XngEeAdcBFwG73st3A+9z9i4Db3HyHHwErRWRtrXIYEWKFwAw/bIDQtkTq\nIxCRQeBNwP3AKar6hHvq1zimI3CUxOOet027bYX3GhaRfSKy78iRI1GKaZQjKpt/rY5Fc0zGCxsg\ntC9h0o/DbMBy4AHgA+7x0YLzz7qv3wHe7mnfC2wqdW8rMdGCVFMeIcr3G/UhbIkSIxYQssREJDMC\nEekB/hHIqOq33OYnsyYf9/Upt/0wcKrn7f1um9FOVFIF02/kX0sVTZtJ1I84+CqMyKlZEbhRQDcD\nj6jq33lO3QVsdfe3And62v9IHN4GHNOcCcmIkMyBDIM3DtJ1XReDNw6SOVDHDrGw8/WLLoFix2JQ\nSGLY9/vJYSGOhlERUcwIfg+4DHiXiDzobluA64F3i8hjwGb3GGAP8EvgIPD3gK1OXgcyBzIM3z3M\n5LFJFGXy2CTDdw+XVgbVjqT9Ot/CrOQsXV3hRv7VJlG1Yj1+w2gyVn20TRm8cZDJY8Wj6tSKFIeu\nOlT8hlqqVgbNAET88xG89y9VrK7wfBh5LAfCMBax6qPtTpnR+9QxfxNKUHtNI+kgc41qLvLIb4Rf\nauSfjVSqNHLJQhwNo2JMEbQiIezgAyv8O76g9pqShYI62WzG6cJC8Gh8fh56evLbenpymbOVOiYt\nxNEwKsYUQSsSYvQ+NjRGsie/Q0z2JBkbCugQaxlJh+l8V63yf29fX7E/YW4Otm+vzsFrdY8Mo2JM\nEbQiIUbv6Y1pxi8YJ7UihSCkVqQYv2Cc9MaADrHWkfSyZbn9vr78zjeTgeeeK35PdiZw/HjxuZmZ\n6qN9LMTRMCqiu9kCGFUwMODvnC0Yvac3poM7/kKynWWlhc38nMwvvph/zeiof2c/P+90+EHMzjql\nr73yGYYROTYjaEXqZQcvNZIOck6HcTIHzWAWFoLDTLOUWpbRMIxIMEXQitTTDu7X4ZdyTodxMpfy\nM6iWVwZR5gFY1rFhFBOmDkWzN6s11CCC6vv09eW3ZbdUytmCzpW6b+G2fHnp8yL1+35+9XKspo7R\nBtDIWkNGmxBk5gmy409N+ZupRJxZQ3bEnU7DOecEf25XFyxZUlq2gYHaR/NhcyWsTIXRYVhmsZEj\nKCs3iGyeQLZURLa0hPceyaTTaT/8cPVyJZOO03j37uKOvK8PbropnFksbNaxrcRltAmWWWxUTqlY\n/66CR6Wry5kNeJdQTCSKO9rZ2dqUQCLh+D/27PEvR1FJmGnYXAlbicvoMEwRdAJhTCqZDPzmN8Xt\nvb3wxjcWZwYvLMCtt+abUObno5U7mXRmAel06U44rDM5bLSVlakwOo0wjoRmb+YsroFyDtKsUzTI\nSdvXp5pIlHbkRrX19QU7aEvJmN3COHTDOIFtURyjTSCks7jpnXyYzRRBFZTr4LOdYLloHpHGKIFy\nHe3EhGpvb233qOb3s6gho4UJqwjMWdyO+GX7+pFIlDfn9PXB0aPRm32yiITPYl69unQmMphD1zA8\nmLO4kyj0AWzfXl4JiITr3GdmoLtOlUhSqcrqAT3zTPlrKnXoWoKZYZgiaDkKO65t24pj3suNmsst\nGFPIyy87yqBcBnAhfX2lz1daEiOMs7YSh67lCxgGYIogOrZty3WW3d3OcT3ufeml+R3Xrl3lR/9e\n+voqUwJZTpxwOllVmJgoXkOgkGQSLr64OEony8RE8Syg3OjcL+rHS3Ydg7DYspaG4RDGkdDsLfbO\n4pERf+flyEhNt53YP6Gpv1yusgNNXYVObKzRGTsyUt7hWm5bFG6iuPRE1rGcda6GKT/hvV+5SB2/\nzyyMOqqEMN/TMFoYGhk1BNwCPAX83NO2CrgXeMx9PdltF+DzOIvX7wfeXO7+sVcEQeGViUTVt5zY\nP6HJsaRyLYtb8s8rUAZ+oZhhQjBLbV1dlUXSBEUc+dUNKqc0wkY4VUId/m6GB4u8ajphFUFUpqGv\nAucVtF0N7FXVM4C97jHA+cAZ7jYM7IpIhuYR5HStIdJmdO8os3P5ZovZXhgdCnmDrJ/g9tsdc0m2\nBEQtLCxUZk8PsterOmau1atz73edvJmNMHgVdO1wXjOvdGX2M+OE/bwg6vB3M1zM/9JSRKIIVPX/\nAoUhHRcBu9393cD7PO23uQrrR8BKEVkbhRxNI2gB9qD2EAQuPr+igptMTjo+haxfIWrK2dPL2fRn\nZuCKK5zOYWCAzEYYvgAmV4KK8zp8kZA5UKLcdZZq1mNIpSprN8Jj/peWop7O4lNU9Ql3/9fAKe7+\nOuBxz3XTblvrMjxcWbuXAAdp4OLzx6oTsW5kC82JwMknF5/3LmHpx/HjTucwNsboZmG2N//0bLcy\nune09Gi/2vUYbKH7+mH1mlqKhkQNubaqikJVRGRYRPaJyL4jR47USbKI2LkTRkZyM4CuLjjpJPjS\nl0rHppeYPvsuPn8cxn4gzmcVFoGLmpNOqvwzjh51OtLVq3MRTuVCWcH53pddxtQr/R+RqWMlyl2P\njFS/LnE7LnQfl7wIq9fUWoRxJITZgEHyncWPAmvd/bXAo+7+l4EP+10XtMXeWeylkjo1ZRykE/sn\nNHVDSuVa0dQNKZ3Y77lHLU7fUlsikYt2alR5CXdLXUWeczy7pf4s4chy0knF77MaQDniVCMpTrJ0\nMDS6xISIDALfUdXXucefA2ZU9XoRuRpYpaqfFJH3Ah8HtgBvBT6vqmeXundLlZiopJZ92Pr4flSa\n3FUNYUpQREjWR+A1DyWPw/jdkD5Q4o1WVsIhbusoeEuUhy0jYkRK2BITkSgCEfkacC6wGngS2AH8\nE/ANYACYBC5W1WdERIAv4EQZzQIfUdWSvXxLKYJKOvda/nGrVQS1du7JpGM+ufTS6u9RgsxGJzJq\naoXjDxnbW0YJQDjF2QnUMrAw2pKwiiAy01A9t5YyDUWYRBVoGpqYqM5sU25d4HJbX19uat9Ak1HZ\nbcmShvxpY08lz57REWBrFjeJSiJRSjgrMwcyDN89zOSxSRRl8tgkw9+4jMzrBS67zH/k52HDlSA7\nctuGK4Hnn6/tu83MODOBehWhq5aXX4bNm5stRfOxKCijSkwRRE2lkSjptGMGKqjC6ZtQ1q1OQlkI\nJfDwKTg53O728CmuMoiCOCZc7d1ryUrtGAVlNARTBPUgoHOvhFoSyhaVgBdXGbQ1nZC5msnkwnML\ns7MhkmfP6DxMETSbCBPKsuagjqXdM1czGScT25ubMTMDH/lI+ytAo66YImgmQQll27YxdufzJI/n\nX5487kTR+FFkDmoHRkbKr2lQyNRUfJKqomZ01MnELmRurr0VYDNp12epAFuqspkEhY+6C8dUEkop\nOyitABTWPwkPfSkCuRtBXx88/bSzH3bpzez7Xnwx/9psyGurm0mCwkPBQkTrgd9z12LPki1V2QoE\nFYJz/9nTB+DQjbBwnfNaNp6+DC2jBJJJuOmm3LHXCVrufdC+xc5KlWcIOtchI9q60EGF80wRNItM\npjHZwXGlp8dZpUzVeS0X6ZJOO/WPgsi+L2hdY2+xs1btHMfGoLe3uD1oZTYrBV0bHVQ4zxRBlFTS\nwYyOlg0DrYT1T0JlZf2azNwc/Ou/OvveSJfs2gl+v+HDDwffLxshU67YWSt3juk03HJLvt+krw9u\nvdXfVNFBI9q60EmF88JknTV7a4nM4lJZwn4rNVWQGTxyPpr4S5QdzuvI+f7Xrb/SuYYdBYXbdgS/\np6mbSPFSlKUKlZW6V9h7dFL2bSUrxBnFtEHhPBq5VGW9t5ZQBEEdTF9f8TrB3d2hO8uR8yvv2MMq\njlhs3qUog5aOzF4TRhFk7xW0RGIndY6dpPTqRYsvtxlWEVjUUFR0dbHtPGV8E8x3QWIB5iHP+NYz\nB8f/urLbdv8lzPssdJaYhxP/sxaB48G282H8rV3M6wKJBRjeBzv/ueCibERMKZ+K33PsV/0yaMnO\ndqxg2gZRL0ZtWNRQg9n2oSS7znY7bXFfu8gr8zDXA71/XuIe50PXp3P1gV5xjaNU/AhqD0PRusAb\nq79XLWw7H+c3Y2HxN9t1ttOeR9YmW8nSkkG+gC1bOqcej5WcMEJiiqBaChzD42fO+pZ1KDye6/G/\nXbZTVI/yeH5J8McnqgwZ910X+ILmKIPxTfj+RuPe8Yu3k66kqNr27f6O0j17OqtztJITRghMEVSD\nz2hzvoqQnZM/kRv97zob/4QwgaJbq2NCqYbRIYrXBe512htN2dlOIpHfSYcd4WYywUtkTk0Vd47Q\nGuGkrRr2asQeUwRZKg39LBhtVjpCP/kTcDRJnumoFIl5QJ3XkR/72NFDElS0Lkwxu1opNElJgO5M\nLOCM9Hfv9s8n8HTimdfD4I2DdF3XxeCNg2QOZEqHRxaG/mUyTq0erwkpjrV7Wjns1Yg95iwG55/p\n8svhxIlcW3c3fPWr/lNpn1T/rGmnXJmHPELmk6WOOpnFUTB4lWMOqudn+JE5t4/hdz7HrOZq5XTP\nw4msKSyLwsgjJ7Hzwi8HmzFcJ3DmlZMMXyTMdud+2GRPkvE7ZoOzsEdGHPNQ1oH89NPwwgvF13lL\nXDSbTAa2bvUv/92OTm4jMsxZXAlXXpmvBMA5vjKggL9PQsnOf4blPvXAAEcBKKFH/16654MLzVXD\n2F7CF7NbssRRerXgZhCPXrQ8TwkAnEg4v1nRbOfCLwcnlXlGxqND5CkBgNm5WUbf4xNmBbB8uTPL\n8I6q/ZQABJuWGk32+watAdGGWa5G4zFFAMErdwW1+zktgRd8sv8XKedIzqK5bfnL8NV/qr3GUJZs\nEbvZnlznmzpasDh8KpWLOH/pJbjtNt/vGhp35hS0vsILvU4YrF7nvO78XldpE4jHLBdo5lo+7+9U\nXrIkXOG6OOGXHeyl2ixX8zcYHkwRVEM6Tea3ZotCMEutFeCLjxN45azTKep18NzfRKsEstFC2VDN\n5FxxRdPMp7bk29xfT37Bt0rrI504Adu3h19fYdmy0mURPCPgoN97YEXK36kcVIfIj0rLX9eLUiP+\nasNezd9gFNA0RSAi54nIoyJyUESubpYcrjDh2t1RVOb14huCueVRf7NLSTwzAHAcyLLDcSZXyrbz\nnQQ02eG8euPxw0QLZTbC8JGb89dJvnvYUQaHDjmdxu2353ewYZiZYWxojGRP/ig9udDN2A8KfuMg\nU022Q/SMgH3NXD1JxobG/MMmw46ee3vzq582kyCZCyOqKqHaGkRRzCI2bwYRMq8XBv+H0HWt5Jz8\nRtNoiiIQkQTwReB8YD3wYRFZ3wxZAFi71r/d6yMotE37dKp7XuuYWVJHnYiYrNnFtyCcOu16nTML\nAPJ8CFmFUC7hK/EX+SGo3oS2XWc7SWmZjeGihUaHKLLjz87NMnrb1tw/fWEHG1IZpDemGb9gnNSK\nFIKQWpFi/INfJf3J28OZnrIdoscslz7g/t7HBAHnnheMk94Y0DkG5SGMjOQrt1tuqayDraeZJUhm\nv4iqsFRTVTOKWcTmzbB3b3EuS3bAYcqgaTQlakhEzgGuVdX3uMfXAKjq3/hdX9eoIffhLOLVr4bD\nh3PHPT2LDuWuHc4DXIios3aAH4sriLl4F4kpt6hM8niBHd8l8RewkCj93uz7l83BjF8VZ82Fo5b8\nXp8LKE0QdtGYoOcsaHGevC9Q8Nl+pSPCdoq1vDfofvUu4xC1zEG/eakIpGreU4g7ww6MXFuR4tBV\nIe9lhCLuUUPrgMc9x9NuW+PxUwIA//mfuf0NG/KiigJt0yV8BA99KWf71+sqWyQmKOErjBLIvh8C\nzFSSK+tQ8nt5TQfeEfDoqBPaWM6mfvLJ/u1TU+VLXoyPOyWru7udzmTrVqdURDXZslFn2tZS6nnb\nttx36u5zCLICAAARhklEQVR2jhshcyUZ2lkirM0fODsNCCgw6k9sncUiMiwi+0Rk35EjR5orTEEd\n/IpCMCNickXOBCQ7nBlGJTyTdGYVvgnQblmHLY/6nFe3HZwR4fLlxQlYX/kKPPdcaQGOHvVtzvz+\nqiJ/y6UfcL/np3Fs4bfeCrt25UIo5+ed46COs174mYCq7SA3b27ed6qmBlGEtfmDBhxd0pWfGGg0\njGYpgsPAqZ7jfrdtEVUdV9VNqrppzZo1DRWuHIu26QJfQLURPitn8e+gC/H4ELxmpjAMHCst33yX\n4+PwC3Pd81rP8QsvOIvKeJmb819UPQSjm4v9Ld7vKaPzwbO28fGS984cyBRnHVdLkI181Sr/60t1\nkJlM8Hf68peLmjZ8cQNynSxuG764oYovUECls4xqZhGFDDnTWr+BFMC8zucHKZgyaBjNUgQ/Ac4Q\nkdNEpBe4BLirKZIMBRTZCWp3iXI94Wc/51EGngiiPILyEHxG8IVtebOVIIWjDSg/4eNYnDpRIqSz\nXPJdUJIVjhIYvns4PwLqjkvJvHO1I0elDt4gExCE6yC9n7d1a/DnLCzkybLhixt4+On8GenDTz8c\njTKohCgqmX7/+zA0VDSQSkhxAuDs3Cyje20ltUbRFEWgqieAjwPfAx4BvqGqDzVDluzDmcfQkNO+\nbl3D1hV+9nM5/8HEtwg3QwC63MSwouxlnOO+FwpmK0FfR6rzfVSEN8rE7RgHjtYYrBDQkY/uHWV2\nLr/jnu2F0TfOOKatK64IFwGT7cCDHNrPPFO+gyycTZRQYI7wuQ6wUAmUa68rUfgqvv99UCW9Xzl0\ng7JwrbKg/oW6zGfQOJrmI1DVPar626r6W6ra3GLw7sO5uGWVgNdh3EAqmV3M/5WjPFLH8J01LJ8L\nf7+qfR+JgJIOhczOwqWXOh3mpZfC5GSgmSA02Y780kvzFnYP6kSmVuBvypqddUpXe/F24EEMDOQ6\nyNtvd9ouuyxfOZXLDi4SMp4dYKSmNg+ByYYB7Ub0xNZZ3HB6e50OKrs1UAkULkjT9ekQb3LzEMCJ\nspmMwKxTte9jpU8sYEg+fp5T8sLXJBZkJgtibm7RTBM6k9lD5tUzDI6tznV0X/FZ08CL1wRUKs6+\n0o49pAM2bGccRQfua2qLyI7vm2yYTQw0GkLnVh9twog/W+tnaoXTIY3thX/tD1G11Iv758rmIWST\nc4ocri6po87nZD+3az6XdOa9pzevoVHkleL2yOLd189UcWPVxY7Lax4KyscA/9+x1PWkUvnx/KXi\n7MH/XFeXY2bxUpCD4OcjyLu8J7mYRJc5kGF07yhTx6YYWDGw2JEW/Q49Sba+YSt7HtuTd603Ea/w\nXpNHJ32f0ahi//1kD0wMNEITNo+gMxVBnZSAX0ef7USCOpqXumEh5LxMFuD2b+d3TEHJOdn7b/0p\n7H5TgaIo+JMn5p2chEKZo8Tvt7n0A/grQHXMXdXcM32AxeS1zIEMo3dtZ2puJv98T09R5FNF5bn9\nkqi6usi8Tovl+bk4JqOCpLPMWT2Mvv+VTJ2YYeD5BGPfmyf9m5Rvslg5ZZBakWJsaMy3w1/WvYyZ\nF4srqQqCeh6EQoVSeK9F/5PPfRZ2VLlcnlF3TBGUog4O4HIjysAOO+AfrPCaVEAnHZQNjDpO59Gh\nYEXRe8L5+Lluf5n9OswgklfDi56lNZe9DLPXO/tBv81sD+EVQXYBe4BMhsxnLw3+vfcXPNN+mbnb\nt+eVmg6dLR6QNZx552qGf3emWJ5/6yN939N5MmR+f1XR2gzejjgIuVYCf6/UyhSTx8pkaJchO7of\nvHEw9L06JRu4VWcscc8sbjvKFXULtNWH0EmpY8EhqkE275SbN1DKR3C8O18JQEEWc8hBwqIS8MT/\nv7jEaYfg36YiVJ3s282bYfv2wHtufT/+duvnn8/Z7rdvh4svzgv7DIyY6unLmXcSiVzWcGGUkk8+\nxGyv0w7kRdz4rc0wOzfL6F0FzuoKiCLCZurYFJkDmfAKReH548+3fbx/Pf0jccEUQUSUi8GvNgSz\nXNROuUifaj538bt4ymp4y0Cs/oSzZUtCLCoBL5KbIUSWhzDvJpfNzATec76L/H/STMYJFfUuNDMz\n42RDb926GPY59mAfScnvyZM9ScYuvCmXTJUN+/QJNw3Kh/BrD4xompupumDdwG/8RxR9y/qQkA6o\nVctWMXz3cPgPFZh5cabtOsVCfEOR2yzPoTMUQSZD5p2rc2Vvy1T0rIZyMfihwiTdKBlZwH/RGB/K\nRfpUE55Z+F0Kq0XOnORs2ZIQld4vCkrdc3F0PTjohJX6ZT3PzTlLVrqj9PR9TzP+/lvyq6OevJX0\nBaPOPfySyTzhppWEQJaMaAqoUbThqtJTqLF7F3xLct90/k15voAgslE7hR1eGNqtUywkUHG3UZ5D\n+yuCTIbMDR9h+HdnitYPiFIZlBuZezvsoP/L1DHHNr7wGec1bMZyqSzn7Of2vVD8ub0noKdghU6/\nGYifGSaPMgPOoN+mFsopuKm5mfJVTQvCOtMb0xy66hALOxY4tGaM9J/tLn2PmdwIvpIQyLGhseBn\nJSDU9OGVc4H+gWUv+wwInk8s+hxSK/xLhSckkVN6F4zzzIsVLNxTQDt1ioV0Qp5D+yuC0VFG/+tc\n2UVZaiVMDH62w574VmOL1qUPwNOfcz7XK98td8Ktd5bPGwhl2vHJAVj2cu7z/X6bZS+Xfl+57zR+\nt7vkpg+hZiGl4vXDJoG5I3jf9RYCnL/pjWnG/63P/3evoohb1imfNyD4XwuLnx2kpHa/f7ej9K46\nRHpjumzHVsrE1E6dYiGdkOfQ/lFDXV10fVorXj8AETjzTHjkkdBO00ooFWoaN0qFqC6yQN6I1Rs1\nVIpS0UZhqDj+P0tPj1PVNKhMQldXuL+7N5qpEipcx6BUxJBvqG1BiGuYqBffsFGXVy9/NYf/9LB/\nfkaIiKdWp92jhlDV2G9nnXWWVk0qpamrUK4t3lI3pHLXTUyoplKqIs7rxER+O6gmEt5CFO299fYu\n7k9sRJN/7v8bcq1zbmJjifv09NRPzr4+ndg5oqkbUirXiqZuSOnEuX1l37P49y3x3IT6/FSq9H1K\nEfTM+bB+e4+yo+C334GuvzL/b6WgmkyW/35BIu2fyP8t9xffJ8w1LUEFv3+rAuzTEH1sxZ1yM7aa\nFMHEhE6c1VPUkSWv663+AR4ZySmFRMI59hLUaXR1Oa8i9esYCzevnMuX+19TKE+2IxkaWmyb2Iim\nPtGtsgPt+4SzyQ40dVUJJZDtcL3/cCedFE7uML9RUCc8MeF8h1o6R797FG41dLgVMzGh6690Ov/s\ntn57T07WNu/QIieKZ6QFMEXgZWJCJ87t09RVbuf1V33NHcUU/uOG7dRFnM51yRL/TtNPKRV+rt/D\nPzISviPxzpC8ymZoKPw9vIq03Mi93O8R9jeu5h+88B6V/E5R0iGdVkMJ+r+rZYYXQ0wRGP7EbfRY\n6h9yYqLY7NHsf9pys8F6UO9OK27PRJ2Z2D+RGxQWzmhLDS5aEFMERmtQbrQ7MeE/M2jGiHhkxL9D\nrrcyCDKTRdFpddhsY2L/hCbHksE+rg6dEbR/+KgRb8qtfJVOw9NPw8REbatjRUHQ0phllsysmaDl\nMIPaKyFo5bWAxLZWJ3DBoiEqX3qzjWj/8FHDiIpSxQrr+X+0enV+iYwsfX2OkqyFoDDZasNiY07X\ndV2+mdaisHDGROMHF3XGis4ZRtQErcQWdoW2ankmIOM3qL0SghLYqkhsawUCs4RXppydStaxbiNM\nERhGWIYDCrIFtUdFPTvrbEE9L21sIgnMEl6yJXiFuQ7AFIFhhGXnThgZyc0AEgnneOfO+n5uPTvr\ncj6aNiOwFMhn93SUr6SQmnwEIvIh4FrgTOBsVd3nOXcN8FFgHvjvqvo9t/084CYgAXxFVcsWFDAf\ngdHx+C2u06addVNoU19JWB9Bd7kLyvBz4APAlws+fD1wCbABeDXwfRH5bff0F4F3A9PAT0TkLlUN\nXofPMAyn07eOv34EDYhbIJgmCmpSBKr6CIAUR1NcBNyhqi8DvxKRg8DZ7rmDqvpL9313uNeaIjAM\nw2gS9fIRrAMe9xxPu21B7YZhGEaTKDsjEJHvA//F59Soqt4ZvUiLnzsMDAMMtGkom2EYRhwoqwhU\ndXO5a3w4DJzqOe532yjRXvi548A4OM7iKmQwDMMwQlAv09BdwCUiskRETgPOAH4M/AQ4Q0ROE5Fe\nHIfyXXWSwTAMIxzmLK4eEXk/8L+BNcB3ReRBVX2Pqj4kIt/AcQKfAD6mqvPuez4OfA8nfPQWVX2o\npm9gGIYRBR3S6fthtYYMwzDaFKs1ZBiGYYTCFIFhGEaHY4rAMAyjwzFFYBiG0eG0hLNYRJ4DHm22\nHBWyGqhx1ZCGYvLWn1aTudXkhdaTud7yplR1TbmLai061ygeDeP5jhMisq+VZDZ560+rydxq8kLr\nyRwXec00ZBiG0eGYIjAMw+hwWkURjDdbgCpoNZlN3vrTajK3mrzQejLHQt6WcBYbhmEY9aNVZgSG\nYRhGnYidIhCRz4nIL0Rkv4h8W0RWes5dIyIHReRREXmPp/08t+2giFzdYHk/JCIPiciCiGwqOBc7\nef2ImzwAInKLiDwlIj/3tK0SkXtF5DH39WS3XUTk8678+0XkzU2Q91QRuU9EHnafh+0tIPNSEfmx\niPzMlfk6t/00Ebnfle3rbqVg3GrCX3fb7xeRwUbL7MqREJGfish34i6viBwSkQMi8qCI7HPb4vdM\nqGqsNuAPgG53/7PAZ9399cDPgCXAacB/4FQwTbj7rwF63WvWN1DeM4HXAj8ENnnaYymvj/yxkscj\n1zuANwM/97T9LXC1u3+159nYAvwzIMDbgPubIO9a4M3u/iuA/+c+A3GWWYDl7n4PcL8ryzeAS9z2\nLwEj7v424Evu/iXA15v0bPwJ8A/Ad9zj2MoLHAJWF7TF7plo+B+xwh/x/UDG3b8GuMZz7nvAOe72\nPU973nUNlLVQEcRaXs/nx0qeAtkGCxTBo8Bad38tTn4JwJeBD/td10TZ7wTe3SoyA0ng34G34iQ4\nZQdji89H9hl297vd66TBcvYDe4F3Ad9xO804y+unCGL3TMTONFTAFTgaElpvHeRWkTdu8pTiFFV9\nwt3/NXCKux+r7+CaIN6EM8KOtcyumeVB4CngXpzZ4VFVPeEj16LM7vljQF9jJeZG4JPAgnvcR7zl\nVeBfROQBcZbfhRg+E03JLJYQ6yCLyCjOojaZRsrmRxh5jcaiqioisQt5E5HlwD8CV6nqb0Rk8Vwc\nZVZnwag3ur64bwO/02SRAhGRPwSeUtUHROTcZssTkrer6mEReRVwr4j8wnsyLs9EUxSBllkHWUQu\nB/4QGFJ3jkQE6yBXSzl5A2iavBVSSs648aSIrFXVJ0RkLc4oFmLyHUSkB0cJZFT1W25zrGXOoqpH\nReQ+HNPKShHpdkfRXrmyMk+LSDewAphpoJi/B1woIluApcArgZtiLC+qeth9fUpEvg2cTQyfidiZ\nhkTkPJyp34WqOus51WrrILeKvHGTpxR3AVvd/a04dvhs+x+5URdvA455pt4NQZyh/83AI6r6d55T\ncZZ5jTsTQESW4fg0HgHuAz4YIHP2u3wQ+IFnoFZ3VPUaVe1X1UGc5/QHqpqOq7wicpKIvCK7jxMI\n83Pi+Ew00nES0rlyEMdO9qC7fclzbhTHhvkocL6nfQtOlMZ/4JhrGinv+3FseS8DT5LveI2dvAHf\nIVbyuDJ9DXgCmHN/34/i2Hf3Ao8B3wdWudcK8EVX/gN4nPYNlPftOPbg/Z5nd0vMZX498FNX5p8D\nn3bbX4MzaDkI/B9gidu+1D0+6J5/TROfj3PJRQ3FUl5Xrp+520PZ/604PhOWWWwYhtHhxM40ZBiG\nYTQWUwSGYRgdjikCwzCMDscUgWEYRodjisAwDKPDMUVgGIbR4ZgiMAzD6HBMERiGYXQ4/x+Pg9kt\npmkfOwAAAABJRU5ErkJggg==\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%matplotlib inline\n", "from matplotlib import pyplot as plt\n", "classes = set(y)\n", "colors = ['red', 'green']\n", "for cur_class, color in zip(classes, colors):\n", " mask = (y == cur_class).values\n", " plt.scatter(Xd[mask,0], Xd[mask,1], marker='o', color=color, label=int(cur_class))\n", "plt.legend()\n", "\n", "plt.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# 创造自己的转换器" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "from sklearn.base import TransformerMixin\n", "from sklearn.utils import as_float_array\n", "\n", "class MeanDiscrete(TransformerMixin):\n", " def fit(self, X, y=None):\n", " X = as_float_array(X)\n", " self.mean = np.mean(X, axis=0)\n", " return self\n", "\n", " def transform(self, X):\n", " X = as_float_array(X)\n", " assert X.shape[1] == self.mean.shape[0]\n", " return X > self.mean" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "mean_discrete = MeanDiscrete()\n", "\n", "X_mean = mean_discrete.fit_transform(X)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "#%%file ./data/adult_tests.py\n", "import numpy as np\n", "from numpy.testing import assert_array_equal\n", "\n", "def test_meandiscrete():\n", " X_test = np.array([[ 0, 2],\n", " [ 3, 5],\n", " [ 6, 8],\n", " [ 9, 11],\n", " [12, 14],\n", " [15, 17],\n", " [18, 20],\n", " [21, 23],\n", " [24, 26],\n", " [27, 29]])\n", " mean_discrete = MeanDiscrete()\n", " mean_discrete.fit(X_test)\n", " assert_array_equal(mean_discrete.mean, np.array([13.5, 15.5]))\n", " X_transformed = mean_discrete.transform(X_test)\n", " X_expected = np.array([[ 0, 0],\n", " [ 0, 0],\n", " [ 0, 0],\n", " [ 0, 0],\n", " [ 0, 0],\n", " [ 1, 1],\n", " [ 1, 1],\n", " [ 1, 1],\n", " [ 1, 1],\n", " [ 1, 1]])\n", " assert_array_equal(X_transformed, X_expected)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "test_meandiscrete()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [], "source": [ "from sklearn.pipeline import Pipeline\n", "\n", "pipeline = Pipeline([('mean_discrete', MeanDiscrete()),\n", " ('classifier', DecisionTreeClassifier(random_state=14))])\n", "scores_mean_discrete = cross_val_score(pipeline, X, y, scoring='accuracy')" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean Discrete performance: 0.917\n" ] } ], "source": [ "print(\"Mean Discrete performance: {0:.3f}\".format(scores_mean_discrete.mean()))" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.2" }, "toc": { "colors": { "hover_highlight": "#DAA520", "navigate_num": "#000000", "navigate_text": "#333333", "running_highlight": "#FF0000", "selected_highlight": "#FFD700", "sidebar_border": "#EEEEEE", "wrapper_background": "#FFFFFF" }, "moveMenuLeft": true, "nav_menu": { "height": "50px", "width": "252px" }, "navigate_menu": true, "number_sections": true, "sideBar": true, "threshold": 4, "toc_cell": false, "toc_section_display": "block", "toc_window_display": false, "widenNotebook": false } }, "nbformat": 4, "nbformat_minor": 2 }