{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Bagging ensemble:" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: xgboost in c:\\anaconda3\\lib\\site-packages (0.90)\n", "Requirement already satisfied: scipy in c:\\anaconda3\\lib\\site-packages (from xgboost) (1.3.1)\n", "Requirement already satisfied: numpy in c:\\anaconda3\\lib\\site-packages (from xgboost) (1.16.5)\n" ] } ], "source": [ "# Install once.\n", "!pip install xgboost" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import warnings \n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from sklearn.model_selection import train_test_split,GridSearchCV\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier,AdaBoostClassifier, GradientBoostingClassifier\n", "from xgboost import XGBClassifier\n", "from sklearn import metrics\n", "from sklearn.datasets import load_breast_cancer\n", "warnings.filterwarnings(action='ignore') # Turn off the warnings." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.1. Read in data:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# Load data.\n", "data = load_breast_cancer()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['mean radius' 'mean texture' 'mean perimeter' 'mean area'\n", " 'mean smoothness' 'mean compactness' 'mean concavity'\n", " 'mean concave points' 'mean symmetry' 'mean fractal dimension'\n", " 'radius error' 'texture error' 'perimeter error' 'area error'\n", " 'smoothness error' 'compactness error' 'concavity error'\n", " 'concave points error' 'symmetry error' 'fractal dimension error'\n", " 'worst radius' 'worst texture' 'worst perimeter' 'worst area'\n", " 'worst smoothness' 'worst compactness' 'worst concavity'\n", " 'worst concave points' 'worst symmetry' 'worst fractal dimension']\n" ] } ], "source": [ "#설명변수\n", "X = data['data']\n", "variable_names = data['feature_names']\n", "print(variable_names)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(569, 30)" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X.shape" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['benign', 'malignant']\n" ] } ], "source": [ "#반응변수\n", "#0은 양성, 1은 악성이 되도록 레이블 다시 지정\n", "Y = 1 - data['target']\n", "label = list(data['target_names'])\n", "label.reverse()\n", "print(label)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=1234)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.2. Random Forest:" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Random Forest accuracy : 0.924\n" ] } ], "source": [ "# Classification by Random Forest.\n", "RFC = RandomForestClassifier(n_estimators=100,max_depth=5,min_samples_leaf=2,random_state=123)\n", "RFC.fit(X_train, Y_train)\n", "Y_pred = RFC.predict(X_test)\n", "print( \"Random Forest accuracy : \" + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Display the top 10 most important variables.\n", "variable_importance = pd.Series(RFC.feature_importances_, index = variable_names)\n", "top_variables_10 = variable_importance.sort_values(ascending=False)[:10]\n", "sns.barplot(x=top_variables_10.values, y = top_variables_10.index, ci=None)\n", "plt.show()\n", "\n", "#변수 중에서 중요하다는 순위대로 나타내줌" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Boosting Ensemble:" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.1. AdaBoost:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AdaBoost accuracy : 0.924\n" ] } ], "source": [ "# Classification by AdaBoost.\n", "ABC = AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=10),n_estimators=100,learning_rate=0.01,random_state=123)\n", "ABC.fit(X_train, Y_train)\n", "Y_pred = ABC.predict(X_test)\n", "print( \"AdaBoost accuracy : \" + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Display the top 10 most important variables.\n", "variable_importance = pd.Series(ABC.feature_importances_, index = variable_names)\n", "top_variables_10 = variable_importance.sort_values(ascending=False)[:10] #탑 10만 그림\n", "sns.barplot(x=top_variables_10.values, y = top_variables_10.index, ci=None)\n", "plt.show()\n", "\n", "#mean~이 제일 중요한 변수이다\n", "#순위는 앞과 비슷하지만 mean이 압도적" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.2. Gradient Boosting:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Gradient Boosting accuracy : 0.906\n" ] } ], "source": [ "# Classification by Gradient Boosting.\n", "GBC = GradientBoostingClassifier(n_estimators=100,learning_rate=0.01,min_samples_leaf=2,max_leaf_nodes=30,random_state=123)\n", "GBC.fit(X_train, Y_train)\n", "Y_pred = GBC.predict(X_test)\n", "print( \"Gradient Boosting accuracy : \" + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))\n", "\n", "\n", "#앞에서보다 성능이 저조해짐" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAdkAAAD4CAYAAACgwJwlAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjEsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8QZhcZAAAgAElEQVR4nO3de5xeVX3v8c8XiFySGOSiAoLByEVACGYSDHKHYlvbEDQcRUSglggeSEGB6qFFitqSYsurXiFGT+SQNhYQQUoFgVxGAoEk5EYALUJPqTlcKhBuARK+54+9pjwzPnMLs2cmzPf9ej2vWc/a67bXBH7PWnvPs2WbiIiI6HubDfQAIiIi3qwSZCMiImqSIBsREVGTBNmIiIiaJMhGRETUZIuBHkAMLjvssINHjx490MOIiNikLFmy5CnbO3bMT5CNdkaPHs3ixYsHehgREZsUSf/eLD/bxRERETXJSjbaeeCx/2Lc+VcN9DAiIvrVkss+XUu7WclGRETUJEE2IiKiJgmyERERNUmQjYiIqMkmFWQlTZa0z0CPY6BIukTSMd2UOULSwf01poiI6NygDLKSNu/k0GRgyAZZ2xfZvq2bYkcACbIREYNAnwZZSRdImlbSl0u6o6SPlnR1SZ8oaaWkVZKmN9R9vqzUFgETJV0qabWkFZK+XlZnk4DLJC2TNKZD3++QdL2k5eV1cMn/fOlrlaRzSt5oSQ9I+p6k+yXdKmnrcuy9km4rbSyVNEbSCEm3l/crJR1Xyk6X9LmGMVws6Qslfb6ke8v4/6qT+Xpe0t+Vdm+XtGPJHyvp7lL3eklvK/mzJE0p6Ucl/VXDmPaWNBo4Azi3zNGhkk4o575c0oI39AuOiIhe6euV7ALg0JJuAUZIGgYcArRK2hmYDhwFjAXGS5pcyg8HVtk+CFgNHA/sa3t/4Ku2FwI3AufbHmv74Q59fwOYb/sA4APA/ZLGAacBBwEfBE6XdGApvwfwbdv7As8AHyv5s0v+AVQrwjXAOuB42x8AjgT+TpKAOcDHG8bwP4BrJB1b2p9QznOcpMOazNdwYGlpdz7w5ZJ/FfDn5dxXNuR39FSp+13gPNuPAlcAl5c5agUuAj5czmdSs0YkTZW0WNLi9S8+10lXERHRW30dZJdQBZSRwMvAXVTB9lCgFRgPzLP9pO31VAGtLfhsAK4r6bVUgW2mpI8CL/ag76Oogg22N9h+liq4X2/7BdvPAz/m9Q8Bj9he1jDu0WXcu9i+vrSzzvaLgIC/lrQCuA3YBXiH7fuAt0vaWdIBwNO2/y9wbHndBywF9qYKuh29BvyopK8GDpE0CtjW9vyS/8OGOerox43j76TMncAsSacDTbfhbc+w3WK7ZYttRnbSTERE9FaffuOT7VclPUq1elwIrKBa+Y0BHgD27KL6OtsbSjvrJU0AjgY+AZxFFUR7S10ce7khvQHYuovyJwE7AuMaznGrcuxaYArwTqqVbVu/f2P7yl6O170s33YOG+jkd2n7DEkHAR8Blkkaa/u/etlPRERshDpufFoAnFd+tlJdI1xm28Ai4HBJO5Sbm06k2iZtR9IIYJTtm4FzqLZcAZ4DOltq3Q6cWepvLumtZQyTJW0jaTjVFnRrZwO3vRZ4rG0LW9KWkrYBRgFPlAB7JPDuhmpzqD4ITKEKuAC3AH9SzgNJu0h6e5MuNyv1AD4J/KKswJ+W1LbiPpkmc9SFdnMkaYztRbYvAp4Cdu1FWxER8QbUEWRbgZ2Au2w/TrXt2wpgew3wJWAusJzqeuQNTdoYCdxUtmfnA+eW/DnA+ZLu63jjE/BnwJGSVlJtn+5reykwC7iHKsDPLFu8XTkZmFb6Xki1Qp0NtEhaTLWqfbCtsO37y3j/s5wftm8F/hG4q4znWpp/OHgB2FfSEqqV+iUl/xSqG7xWUH3AuKRJ3c78FDi+7can0s5KSauoPnQs70VbERHxBqhaYMZAkPS87REDPY5Gw9+5u/c+uenN0BERb1pv9AEBkpbYbumYPyj/TjYiIuLNIEF2AA22VWxERPStBNmIiIia5KHt0c773rU9i2t6eHFExFCTlWxERERNEmQjIiJqkiAbERFRk1yTjXZeWXM///eS9w/0MCJigO120cqBHsKbQlayERERNUmQjYiIqEmCbERERE0SZCMiImqSIBsREVGTBNlNiKSZkvbppszk7spERET/SJDdhNj+U9uruyk2GUiQjYgYBPosyEoaLenBstpaJWm2pGMk3SnpV5ImlHLDJf1A0r3l4evHNdRvlbS0vA4u+UdImifp2tL+bElq0v97Jd0maXmpP0aVy8p4Vkr6eHdtShovaWFp5x5JI7sY248k/WHDGGZJ+pikzUu/90paIemzXczXD0uZayVtU44dXeZmZZmrLUv+PEktJf28pK+Vcd4t6R1lXJOoHtS+rMzBNEmrSx9z+ur3HRER3evrlex7gX8A9gf2Bj4JHAKcB/yvUuZC4A7b44EjqQLCcOAJ4PdsfwD4OPCNhnYPBM6hWqG9B/hQk75nA9+2fQBwMLAG+CgwFjgAOKb0tVNnbUp6C/Aj4M9KO8cAL3UxtjnlPaXu0cDNwGeAZ8s5jgdOl7R7kzHvBcywvT+wFvicpK2AWcDHbb+f6gtDzmxSdzhwdxnnAuB02wuBG4HzbY+1/TDwReDA0scZTdpB0lRJiyUt/u0LG5oViYiIjdDXQfYR2yttvwbcD9xu28BKYHQpcyzwRUnLgHnAVsBuwDDge5JWAtfQfsvzHtuPlXaXNbQFgKSRwC62rwewvc72i1QB/p9sb7D9ODCfKuh11uZewBrb95Z21tpe38XY/hU4qqw0/wBYYPulco6fLue4CNge2KPJfP2H7TtL+uoy3r3KPP6y5P8QOKxJ3VeAm0p6Scc5abACmC3pU8D6ZgVsz7DdYrtlu+Gbd9JMRET0Vl9/reLLDenXGt6/1tCXgI/ZfqixoqSLgcepVp2bAes6aXcDvzvu39k+7ia/szYFuEnZc5uNzfY6SfOAD1OtaP+pod+zbd/SRf806cvdjLnRq+UDTOP4m/kIVZCeBPylpH3LB4eIiKjZQNz4dAtwdsM10ANL/iiqVeRrwMlAj5dUttcCj0maXNrcslzfXAB8vFwj3ZEq2NzTRVMPAjtLGl/aGSlpi27GNgc4DTi0nFvbOZ4paVhpZ8+yJd7RbpImlvSJwC/KGEZLem/JP5lqBd5TzwEjS7+bAbvangtcAGwLjOhFWxER8QYMRJD9CtX26wpJq8p7gO8Ap0i6G9gTeKGX7Z4MTJO0AlgIvBO4nmq7dDlwB3CB7f/XWQO2X6FakX5T0nLg51Tb2V2N7Vaq4H1bqQ8wE1gNLC3neCXNV5oPlHZXANsB37W9jipoX1O2p18DrujFPMwBzpd0H9UW9dWlnfuAy20/04u2IiLiDdDrO47RnySNBm6yvd8AD6Wd/XfZ2jd99r3dF4yIN7U8had3JC2x3dIxP38nGxERUZM8T3aA2H4UGFSr2IiI6FtZyUZERNQkK9lo5y077ctuFy0e6GFERLwpZCUbERFRkwTZiIiImiTIRkRE1CTXZKOdB594kA99s9nzFwafO8++s/tCEREDKCvZiIiImiTIRkRE1CRBNiIioiYJshERETVJkI2IiKhJgmwTkiZL2qfG9hf2UTtHSDq4L9qKiIi+N6SDrKTOHgw/GejzINvWn+2+CoxHAL1qqzyEPiIi+sEmGWQlXSBpWklfLumOkj5a0tUlfaKklZJWSZreUPd5SZdIWgRMlHSppNWSVkj6elkZTgIuk7RM0pgOfc+SdIWkVkm/lPRHJX9zSZdJure09dmSf4SkuZL+EVjZNoaGY/Ml/XNp61JJJ0m6p4x9TCm3o6TrStv3SvpQeR7tGcC5ZZyHNitX6l8saYakW4Gravq1REREB5vqqmYB8AXgG0ALsKWkYcAhQKuknYHpwDjgaeBWSZNt/wQYDqyyfZGk7YDvA3vbtqRtbT8j6UaqB6pf20n/o4HDgTHAXEnvBT4NPGt7vKQtgTtLUAOYAOxn+5EmbR0AvA/4LfBrYKbtCZL+DDgbOAf4B+By27+QtBtwi+33SboCeN721wFKIG9XrrRNmYtDbL/UcQCSpgJTAd7ytrd0PusREdErm2qQXQKMkzQSeBlYShVsDwWmAeOBebafBJA0GzgM+AmwAbiutLMWWAfMlPQvwE097P+fbb8G/ErSr4G9gWOB/SVNKWVGAXsArwD3dBJgAe61vaaM82GgLTCvBI4s6WOAfSS11XlrOfeOuip3Y7MAC2B7BjADYMRuI9z5aUdERG9skkHW9quSHgVOAxYCK6gC0hjgAWDPLqqvs72htLNe0gTgaOATwFnAUT0ZQpP3As62fUvjAUlHAC900dbLDenXGt6/xuu/n82AiR2DZEMwpQfluhpDRETUYJO8JlssAM4rP1uprk8us21gEXC4pB3KzUYnAvM7NiBpBDDK9s1U27Jjy6HngGYrxTYnSNqsXDN9D/AQ1dbsmWXbGkl7ShreB+cJ1er2rIZxdzbOzspFRMQA2JSDbCuwE3CX7ceptn1bAcr265eAucByYKntG5q0MRK4SdIKqiB8bsmfA5wv6b6ONz4VD5Xy/wqcYXsdMBNYDSyVtAq4kr7bKZgGtJQbqlZTfaAA+ClwfNuNT12Ui4iIAaBq4Rc9JWkWXd8UtUkbsdsIH3D+AQM9jB7JU3giYrCQtMR2S8f8TXklGxERMahtkjc+DSTbpw70GCIiYtOQIBvt7P32vbMNGxHRR7JdHBERUZME2YiIiJokyEZERNQkQTYiIqImufEp2nnuoYeYf9jh/drn4Qt+58u4IiLeFLKSjYiIqEmCbERERE0SZCMiImqSIBsREVGTBNkaSZosaZ8+bnOepJaSvlnStn3ZfkRE9J0E2T5QnlnbzGSg2yAraaPu8rb9h7af2Zi6ERFRvyEdZCVdIGlaSV8u6Y6SPlrS1SV9oqSVklZJmt5Q93lJl0haBEyUdKmk1eVZrl+XdDAwCbisPO91TIe+Z0n6e0lzgemSJkhaWJ5hu1DSXqXc1pLmlHZ/BGzd0Maj5cH0o8szbNvyz5N0cUlPaxjXnHpmMiIimhnqfye7APgC8A2gBdhS0jDgEKBV0s7AdGAc8DRwq6TJtn8CDAdW2b5I0nbA94G9bVvStrafkXQjXT97dk/gGNsbJL0VOMz2eknHAH8NfAw4E3jR9v6S9geW9vIcvwjsbvvlzraWJU0FpgK8Y8ste9l8RER0ZkivZIElwDhJI4GXgbuogu2hQCswHphn+0nb64HZwGGl7gbgupJeC6wDZkr6KPBiD/u/xvaGkh4FXFNWpJcD+5b8w4CrAWyvAFb08hxXALMlfQpY36yA7Rm2W2y3jBo2rJfNR0REZ4Z0kLX9KvAocBqwkCqwHgmMAR4A1EX1dW0BsgTgCVRBdzLwsx4O4YWG9FeAubb3A/4Y2KpxqN20s572v8vGuh8Bvk21Gl+ysdd/IyKi94Z0kC0WAOeVn63AGcAy2wYWAYeX656bAycCv/MdgJJGAKNs3wycA4wth54DRvZwHKOA/yzpUzuM76TSz37A/k3qPg68XdL2krYE/qiU3wzY1fZc4AJgW2BED8cTERFvUIJsFVh3Au6y/TjVtm8rgO01wJeAucByYKntG5q0MRK4SdIKqiB8bsmfA5xfbmYa06Reo78F/kbSnUDj3crfBUaUti8A7ulYsazIL6H6UHAT8GA5tDlwtaSVwH3A5bkbOSKi/6hasEVU9ho50jMO/EC/9pkHBETEpk7SEtstHfOzko2IiKhJgmxERERNEmQjIiJqkj/niHZG7rVXrpFGRPSRrGQjIiJqkiAbERFRkwTZiIiImiTIRkRE1CQ3PkU7Tzz2LN/6wk9rafusv/vjWtqNiBisspKNiIioSYJsRERETRJkIyIiapIgGxERUZME2YiIiJokyNagPOC97j626PC+R32qkt97REQ/yP9se0nSTyQtkXS/pKkN+c9LukTSImCipHGS5peyt0jaqZQ7XdK9kpZLuk7SNk36GC7pB6XcfZKOK/mnSrpG0k+BWyUdIWmupH8EVpYyn5e0qrzOKXmjJT0g6TvAUmDX2icqIiISZDfCn9geB7QA0yRtX/KHA6tsHwQsAr4JTCllfwB8rZT7se3xtg8AHgA+06SPC4E7bI8HjgQukzS8HJsInGL7qPJ+AnCh7X0kjQNOAw4CPgicLunAUm4v4CrbB9r+98bOJE2VtFjS4udffHbjZyYiItrJl1H03jRJx5f0rsAewH8BG4DrSv5ewH7AzyUBbA6sKcf2k/RVYFtgBHBLkz6OBSZJOq+83wrYraR/bvu3DWXvsf1ISR8CXG/7BQBJPwYOBW4E/t323c1OyPYMYAbAbu/cw93OQERE9EiCbC9IOgI4Bpho+0VJ86gCIMA62xvaigL3257YpJlZwGTbyyWdChzRrCvgY7Yf6tD/QcALHco2vlcXw+9YLyIiapbt4t4ZBTxdAuzeVFuyzTwE7ChpIoCkYZL2LcdGAmskDQNO6qT+LcDZKsvghi3f7iwAJkvapmwvHw+09rBuRET0sQTZ3vkZsIWkFcBXgM62X18BpgDTJS0HlgEHl8N/SXXN9ufAg5308xVgGLBC0qryvlu2l1KtlO8pfcy0fV9P6kZERN+TnUtw8brd3rmHLzjp72tpOw8IiIg3K0lLbLd0zM9KNiIioiYJshERETVJkI2IiKhJ/oQn2nn7u0bl2mlERB/JSjYiIqImCbIRERE1SZCNiIioSa7JRjtrHnmYr31qSp+2eeHV1/ZpexERm4qsZCMiImqSIBsREVGTBNmIiIiaJMhGRETUJEE2IiKiJgmyA0zSZEn7DPQ4IiKi7yXI9hNJm3dyaDKw0UFWUv4MKyJikEqQ7YakCyRNK+nLJd1R0kdLurqkT5S0UtIqSdMb6j4v6RJJi4CJki6VtFrSCklfl3QwMAm4TNIySWM69P3HkhZJuk/SbZLeUfIvljRD0q3AVZI2l3SZpHtL258t5UZIul3S0jK+4/pjziIiopJVUPcWAF8AvgG0AFtKGgYcArRK2hmYDowDngZulTTZ9k+A4cAq2xdJ2g74PrC3bUva1vYzkm4EbrLd7BsbfgF8sJT/U+CCMhZKf4fYfknSVOBZ2+MlbQncWQLwfwDH214raQfgbkk32nZjJ6X+VIBR22zdR9MWERFZyXZvCTBO0kjgZeAuqmB7KNAKjAfm2X7S9npgNnBYqbsBuK6k1wLrgJmSPgq82IO+3wXcImklcD6wb8OxG22/VNLHAp+WtAxYBGwP7AEI+GtJK4DbgF2Ad3TsxPYM2y22W4ZvtWUPhhURET2RINsN268CjwKnAQupAuuRwBjgAapA1pl1tjeUdtYDE6iC7mTgZz3o/pvAt2y/H/gssFXDsRca0gLOtj22vHa3fStwErAjMM72WODxDm1ERESNEmR7ZgFwXvnZCpwBLCvbrouAwyXtUG5uOhGY37EBSSOAUbZvBs4BxpZDzwEjO+l3FPCfJX1KF+O7BTizbGMjaU9Jw0v9J2y/KulI4N09PeGIiHjjEmR7phXYCbjL9uNU276tALbXAF8C5gLLgaW2b2jSxkjgprJ1Ox84t+TPAc4vNzeN6VDnYuAaSa3AU12MbyawGlgqaRVwJdX19tlAi6TFVKvaB3t11hER8Yaowz0wMcTtsv3b/Lk/OLpP28xTeCLizU7SEtstHfOzko2IiKhJgmxERERNEmQjIiJqki+jiHZ22n1MrqFGRPSRrGQjIiJqkiAbERFRkwTZiIiImuSabLSzbs1zPPC1Oza6/vsuPKoPRxMRsWnLSjYiIqImCbIRERE1SZCNiIioSYJsRERETRJkIyIiapIg20ckTZa0T2+P9bDtUyXtvPGji4iIgZAg20vlwezNTAY6C6RdHeuJU4FeBVlJ+fOsiIgBNmSCrKQLJE0r6csl3VHSR0u6uqRPlLRS0ipJ0xvqPi/pEkmLgImSLpW0WtIKSV+XdDAwCbhM0rLGh683O1ZeP5O0RFKrpL1L2RskfbqkPytptqQpQAswu9TfWtKjknYo5VokzSvpiyXNkHQrcJWkzSVdJuneMtbP1jzNERHRYCitdhYAXwC+QRW0tpQ0DDgEaC3bsdOBccDTwK2SJtv+CTAcWGX7IknbAd8H9rZtSdvafkbSjcBNttt9u77thR2PSbodOMP2ryQdBHwHOAqYCtwp6ZEy1g/a/q2ks4DzbC8u9bs6z3HAIbZfkjQVeNb2eElblrZvtf1IY4VSbirATqPe3vuZjYiIpoZSkF0CjJM0EngZWEoVbA8FpgHjgXm2nwSQNBs4DPgJsAG4rrSzFlgHzJT0L8BNvRmEpBHAwcA1DcFySwDbj0u6CJgLHG/7txtxnjfafqmkjwX2L6thgFHAHkC7IGt7BjADYL9d9vJG9BkREU0MmSBr+1VJjwKnAQuBFcCRwBjgAWDPLqqvs72htLNe0gTgaOATwFlUq9Ce2gx4xvbYTo6/H/gvur4Gu57Xt/q36nDshYa0gLNt39KL8UVERB8ZMtdkiwXAeeVnK3AGsMy2gUXA4ZJ2KDc3nQjM79hAWYmOsn0zcA7QFiyfA0Z20u9/H7O9FnhE0gmlPUk6oKQnAH8AHAicJ2n3Ttp+lGpbGOBjXZzvLcCZZVscSXtKGt5F+YiI6ENDLci2AjsBd9l+nGrbtxXA9hrgS1RbtcuBpbZvaNLGSOAmSSuogvC5JX8OcL6k+xpvfOrk2EnAZyQtB+4HjivXTL8H/Int31Bdk/2Bqj3lWcAVbTc+AX8F/IOkVqqt7M7MBFYDSyWtAq5kCO1eREQMNFWLuIjKfrvs5Ws+992Nrp+n8ETEUCRpie2WjvlDbSUbERHRbxJkIyIiapLrc9HOVjuNzJZvREQfyUo2IiKiJgmyERERNUmQjYiIqEmCbERERE1y41O085vf/IaLL764yzLdHY+IiEpWshERETVJkI2IiKhJgmxERERNEmQjIiJqkiDbDySdKqmr58N2V/8cSdv05ZgiIqJ+CbL941S6fgh7d84BehVkJeXO8YiIATYkgqyk0ZIelDRT0ipJsyUdI+lOSb8qD0tH0nBJP5B0b3n263EN9VslLS2vg0v+EZLmSbq2tD+7PP+1se8pQAswu+15sJLGSZovaYmkWyTtJGmL0u8Rpd7fSPqapGlUAXqupLnl2PON7UuaVdKzJP19KTe9s/OJiIj+MZRWO+8FTgCmAvcCnwQOASYB/wuYDFwI3GH7TyRtC9wj6TbgCeD3bK+TtAfwT1SBE+BAYF/gN8CdwIeAX7R1avtaSWcB59leLGkY8E3gONtPSvo48LXS56nAtSWw/j5wkO1XJH0eONL2Uz04zz2BY2xvkPTXzc7H9guNFSRNLfPCqFGjejyhERHRtaEUZB+xvRJA0v3A7bYtaSUwupQ5Fpgk6bzyfitgN6oA+i1JY4ENVIGszT22HyvtLitt/YLO7QXsB/y8LHo3B9YA2L5f0v8BfgpMtP3KRpznNbY3dHM+DzRWsD0DmAGw8847eyP6jIiIJoZSkH25If1aw/vXeH0eBHzM9kONFSVdDDwOHEC1xb6uk3Y30P2cCrjf9sROjr8feAZ4RxdtNAbCrToca1ylNj2fiIjoH0Pimmwv3AKc3XZdVdKBJX8UsMb2a8DJVKvP3ngOGFnSDwE7SppY+hgmad+S/iiwPXAY8I2yxduxPsDjkt4naTPg+I04n4iI6AcJsu19BRgGrJC0qrwH+A5wiqS7qbaKX+ikfmdmAVeU7eTNgSlUNyYtB5YBB0vaAbgU+IztXwLfAv6h1J8B/GvbjU/AF4GbgDsoW829PJ+IiOgHsnMJLl638847e+rUqV2WyQMCIiLak7TEdkvH/KxkIyIiapIgGxERUZME2YiIiJrkmmy009LS4sWLFw/0MCIiNim5JhsREdHPEmQjIiJqkiAbERFRk6H0tYrRA08//QD/fM2Epsf+xwn39PNoIiI2bVnJRkRE1CRBNiIioiYJshERETVJkI2IiKhJgmxERERNhkyQlTRZ0j6DYBynStq54f2j5TF3ERHxJvOmC7KSOnug+mRgwIMscCqwc3eFIiJi0zdogqykCyRNK+nLJd1R0kdLurqkT5S0UtIqSdMb6j4v6RJJi4CJki6VtFrSCklfl3QwMAm4TNIySWM69H1CaXO5pAUl71RJP5H0U0mPSDpL0ucl3SfpbknblXJjy/sVkq6X9LbO8iVNAVqA2WUcW5chnC1paTm3vUv9iyX9QNI8Sb9um5ty7FOS7iltXClp8/KaVc5jpaRzS9lpDXMxp+9/cxER0ZlBE2SBBcChJd0CjJA0DDgEaC1brNOBo4CxwHhJk0v54cAq2wcBq4HjgX1t7w981fZC4EbgfNtjbT/coe+LgA/bPoAqGLfZD/gkMAH4GvCi7QOBu4BPlzJXAX9e+loJfLmzfNvXAouBk8o4Xipln7L9AeC7wHkN/e8NfLj0/2VJwyS9D/g48CHbY4ENwEllTnaxvZ/t9wP/u7TxReDAMo4zmk28pKmSFktavHbt+mZFIiJiIwymILsEGCdpJPAyVSBroQq8rcB4YJ7tJ22vB2YDh5W6G4DrSnotsA6YKemjwIs96PtOYJak04HG7ea5tp+z/STwLPDTkr8SGC1pFLCt7fkl/4fAYZ3ld9H/jxvmYHRD/r/Yftn2U8ATwDuAo4FxwL2SlpX37wF+DbxH0jcl/X6ZB4AVVCvnTwFNI6jtGbZbbLe89a35ErCIiL4yaIKs7VeBR4HTgIVUgfVIYAzwAKAuqq+zvaG0s55q5Xcd1XXYn/Wg7zOAvwB2BZZJ2r4cermh2GsN71+jb7+Ssq3dDR3abey/7ZiAH5aV8Fjbe9m+2PbTwAHAPOB/AjNLvY8A36YKzEskJYpGRPSTQRNkiwVU26ULqILsGcAyVw+9XQQcLmmHcnPTicD8jg1IGgGMsn0zcA7VNirAc8DIZp1KGmN7ke2LgKeogm23bD8LPC2pbZv7ZGB+Z/ndjaOHbgemSHp7Gft2kt5d7lDezPZ1wF8CH5C0GbCr7bnABcC2wIg30FdRgbwAAAjHSURBVHdERPTCYFvVtAIXAnfZfkHSupKH7TWSvgTMpVrN3Wz7hiZtjARukLRVKXduyZ8DfK/cQDSlw3XZyyTtUcrfDizn9eDcnVOAKyRtQ7Vle1o3+bNK/kvAxB728d9sr5b0F8CtJYi+SrVyfQn43yUP4EtUW99Xl+1rAZfbfqa3fUZExMZRtUiMqIwZM9x/c+m+TY/lKTwREc1JWmK7pWP+YNsujoiIeNNIkI2IiKhJgmxERERNBtuNTzHA3va29+Xaa0REH8lKNiIioiYJshERETVJkI2IiKhJrslGO6ufXssB197SLm/5lA8P0GgiIjZtWclGRETUJEE2IiKiJgmyERERNUmQjYiIqEmCbERERE0GNMhKmixpn06O7ShpkaT7Gp7LurH9jJb0yR6WW9WDcrMkTSnpmZ2dQ50knSHp0/3db0RE9Fy/BNnykPVmJgOdBaijgQdtH2i7tYftdWY00G2Q3Ri2/9T26jra7qbfK2xf1d/9RkREz3UZZCVdUB5yjqTLJd1R0kdLurqkT5S0UtIqSdMb6j4v6RJJi4CJki6VtFrSCklfl3QwMInqgenLJI1pqDsW+FvgD8uxrZu0d5Gke0u/MySp1H2vpNskLZe0tLR7KXBoaevcsmJtLceXlrF0NQ+S9K0y/n8B3t5wbJ6kloZzni5pSRnDhHL815ImlTKbS7qsjH2FpM+W/CNK2WslPShpdsM5tZu7knexpPPa5kvS3eX49ZLe1jC26ZLukfTLN7ojEBERvdPdSnYB0PY/5hZghKRhwCFAq6SdgenAUcBYYLykyaX8cGCV7YOA1cDxwL629we+anshcCNwvu2xth9u69T2MuAi4Efl2EuN7dn+BfAt2+Nt7wdsDfxRqT4b+LbtA4CDgTXAF4HW0tblwBPA79n+APBx4BvdzMPxwF7A+4HTS7vNDAfm2R4HPAd8Ffi9Uv+SUuYzwLO2xwPjgdMl7V6OHQicQ7W6fw/wIUnbdZy7Jv1eBfx5Ob4S+HLDsS1sTyjtfrlJXSRNlbRY0uL1a5/teiYiIqLHuguyS4BxkkYCLwN3UQXbQ4FWqiAxz/aTttdTBbjDSt0NwHUlvRZYB8yU9FHgxY0Ya2N7AEeWa7YrqYL8vmWcu9i+HsD2OtvN+hoGfK/UvYbOt6zbHAb8k+0Ntn8D3NFJuVeAn5X0SmC+7VdLenTJPxb4tKRlwCJge2CPcuwe24/Zfg1YVup0OXeSRgHb2p5fsn7I678DgB+Xn0saxtCO7Rm2W2y3bPHWUZ1OQkRE9E6XQbYEiEeB04CFVIH1SGAM8ACgLqqvs72htLMemEAVJCfzeiDqjf9uT9JWwHeAKbbfD3wP2Kqb8TQ6F3gcOIDqQ8NbelDHPSjzqu22cq9RfTChBM22r7AUcHZZVY+1vbvtW8uxlxva2kC1Cn2jc9fW5gbyNZoREf2qJzc+LQDOKz9bgTOAZSWYLAIOl7RDuRnpRGB+xwYkjQBG2b6ZattybDn0HDByI8a9Vfn5VGl7CoDttcBjbVvWkraUtE2TfkYBa0rwOxno7kaqBcAnyvXUnag+aGysW4Azy7Y7kvaUNLyzwl3MHQC2nwWebrjeejJNfgcREdH/erKyaQUuBO6y/YKkdSUP22skfQmYS7VCu9n2DU3aGAncUFagolpJAsyh2radRrUqfbhJ3d9h+xlJ36Pahn0UuLfh8MnAlZIuAV4FTgBWAOslLQdmUa2Cr5N0Qhn7C910eT3VlvRK4Je8sSA2k2rbdmm5selJqhVqZzqbu0anAFeUDxS/ptp5iIiIAabXdzcjYJsxe3qP6d9sl5en8EREdE3SEtstHfPzjU8RERE1SZCNiIioSYJsRERETfInHdHOPm97K4tzDTYiok9kJRsREVGT3F0c7Uh6DnhooMexCdgBeGqgB7EJyDz1TOapZwbzPL3b9o4dM7NdHB091Ow29GhP0uLMU/cyTz2TeeqZTXGesl0cERFRkwTZiIiImiTIRkczBnoAm4jMU89knnom89Qzm9w85caniIiImmQlGxERUZME2YiIiJokyA5Rkn5f0kOS/k3SF5sc31LSj8rxRZJG9/8oB14P5ukwSUslrZc0ZSDGOBj0YJ4+L2m1pBWSbpf07oEY50DrwTydIWmlpGWSfiFpn4EY50Drbp4ayk2RZEmD9896bOc1xF5UD6l/GHgP8BZgObBPhzKfA64o6U8APxrocQ/SeRoN7A9cRfVM5AEf9yCdpyOBbUr6zPx76nSe3tqQngT8bKDHPRjnqZQbCSwA7gZaBnrcnb2ykh2aJgD/ZvvXtl8B5gDHdShzHPDDkr4WOLo8ZH4o6XaebD9qewXw2kAMcJDoyTzNtf1ieXs38K5+HuNg0JN5WtvwdjgwFO9M7cn/nwC+AvwtsK4/B9dbCbJD0y7AfzS8f6zkNS1jez3wLLB9v4xu8OjJPEXv5+kzwL/WOqLBqUfzJOl/SnqYKoBM66exDSbdzpOkA4Fdbd/UnwPbGAmyQ1OzFWnHT8w9KfNmlznomR7Pk6RPAS3AZbWOaHDq0TzZ/rbtMcCfA39R+6gGny7nSdJmwOXAF/ptRG9AguzQ9Biwa8P7dwG/6ayMpC2AUcBv+2V0g0dP5il6OE+SjgEuBCbZfrmfxjaY9Pbf0xxgcq0jGpy6m6eRwH7APEmPAh8EbhysNz8lyA5N9wJ7SNpd0luobmy6sUOZG4FTSnoKcIfL3QZDSE/mKXowT2V770qqAPvEAIxxMOjJPO3R8PYjwK/6cXyDRZfzZPtZ2zvYHm17NNU1/km2Fw/McLuWIDsElWusZwG3AA8A/2z7fkmXSJpUin0f2F7SvwGfBzq9jf7NqifzJGm8pMeAE4ArJd0/cCMeGD3893QZMAK4pvx5ypD7sNLDeTpL0v2SllH9d3dKJ829afVwnjYZ+VrFiIiImmQlGxERUZME2YiIiJokyEZERNQkQTYiIqImCbIRERE1SZCNiIioSYJsRERETf4/bytc80ogoKsAAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Display the top 10 most important variables.\n", "variable_importance = pd.Series(GBC.feature_importances_, index = variable_names)\n", "top_variables_10 = variable_importance.sort_values(ascending=False)[:10]\n", "sns.barplot(x=top_variables_10.values, y = top_variables_10.index, ci=None)\n", "plt.show()\n", "\n", "#어떻게 유방암을 검출해내느냐\n", "#worst~(모양)을 가장 많이 보고 한다는 뜻" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3. XGBoost:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "XGBoost accuracy : 0.942\n" ] } ], "source": [ "# Classification by XGBoost.\n", "XGBC = XGBClassifier(n_estimator = 500, learning_Rate = 0.1, max_depth = 4, random_state=123)\n", "XGBC.fit(X_train, Y_train)\n", "Y_pred = XGBC.predict(X_test)\n", "print( \"XGBoost accuracy : \" + str(np.round(metrics.accuracy_score(Y_test,Y_pred),3)))\n", "\n", "#성능이 가장 좋음(데이터만 가지고 유방암 판별을 94%할 수 있다는 뜻)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# Display the top 10 most important variables.\n", "variable_importance = pd.Series(XGBC.feature_importances_, index = variable_names)\n", "top_variables_10 = variable_importance.sort_values(ascending=False)[:10]\n", "sns.barplot(x=top_variables_10.values, y = top_variables_10.index, ci=None)\n", "plt.show()\n", "\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "NOTE: The hyperparameters of XGBClassifier can be optimized using GridSearchCV() and RandomSearchCV(). Students are encouraged to explore. " ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 2 }