{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "zp6fW8MP-mrO" }, "source": [ "# 트리의 앙상블" ] }, { "cell_type": "markdown", "metadata": { "id": "pv1IwHmU-mrU" }, "source": [ "\n", " \n", "
\n", " 구글 코랩에서 실행하기\n", "
" ] }, { "cell_type": "markdown", "metadata": { "id": "dIaIAizcRSG-" }, "source": [ "## 랜덤포레스트" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "ioJUlZ0M_uSZ" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "\n", "wine = pd.read_csv('https://bit.ly/wine_csv_data')\n", "\n", "data = wine[['alcohol', 'sugar', 'pH']].to_numpy()\n", "target = wine['class'].to_numpy()\n", "\n", "train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JDKQudr7_8nu", "outputId": "b6625aa7-2118-4543-bdcd-d678e84576e3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9973541965122431 0.8905151032797809\n" ] } ], "source": [ "from sklearn.model_selection import cross_validate\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "rf = RandomForestClassifier(n_jobs=-1, random_state=42)\n", "scores = cross_validate(rf, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XYDbzXNLG8fK", "outputId": "17bddd3a-4de6-41c8-9135-af164b021f55" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.23167441 0.50039841 0.26792718]\n" ] } ], "source": [ "rf.fit(train_input, train_target)\n", "print(rf.feature_importances_)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oMc06S1Fa_A-", "outputId": "dea1c2c9-c2cd-452e-f3d9-a24b678f02d5" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8934000384837406\n" ] } ], "source": [ "rf = RandomForestClassifier(oob_score=True, n_jobs=-1, random_state=42)\n", "\n", "rf.fit(train_input, train_target)\n", "print(rf.oob_score_)" ] }, { "cell_type": "markdown", "metadata": { "id": "KdrVoeQZRU14" }, "source": [ "## 엑스트라트리" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "noMLdywdOGrE", "outputId": "07e18f2b-bb89-46fe-cec2-bf2f43f111b0" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9974503966084433 0.8887848893166506\n" ] } ], "source": [ "from sklearn.ensemble import ExtraTreesClassifier\n", "\n", "et = ExtraTreesClassifier(n_jobs=-1, random_state=42)\n", "scores = cross_validate(et, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HnB0_mBqfcXL", "outputId": "d4fde47e-766d-484a-b1a4-a982db8ccbb0" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.20183568 0.52242907 0.27573525]\n" ] } ], "source": [ "et.fit(train_input, train_target)\n", "print(et.feature_importances_)" ] }, { "cell_type": "markdown", "metadata": { "id": "csKxnaxeRX8s" }, "source": [ "## 그레이디언트 부스팅" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_IlNEFkaNsoG", "outputId": "9b4b36e3-5b1c-412e-e6c1-d039c389f3e5" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8881086892152563 0.8720430147331015\n" ] } ], "source": [ "from sklearn.ensemble import GradientBoostingClassifier\n", "\n", "gb = GradientBoostingClassifier(random_state=42)\n", "scores = cross_validate(gb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pNpeS8EWpeEi", "outputId": "69f7971c-5011-4b03-a867-4284cb3336d5" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9464595437171814 0.8780082549788999\n" ] } ], "source": [ "gb = GradientBoostingClassifier(n_estimators=500, learning_rate=0.2, random_state=42)\n", "scores = cross_validate(gb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qD6iWVsGqCAE", "outputId": "175b9cf4-5c19-45d4-a622-d84f1f8baa40" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.15887763 0.6799705 0.16115187]\n" ] } ], "source": [ "gb.fit(train_input, train_target)\n", "print(gb.feature_importances_)" ] }, { "cell_type": "markdown", "metadata": { "id": "BthW_II9RbLa" }, "source": [ "## 히스토그램 기반 부스팅" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_3Ct_NNWQbdA", "outputId": "2bccc04e-09d7-407a-95bb-9314b3b1ae23" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9321723946453317 0.8801241948619236\n" ] } ], "source": [ "# 사이킷런 1.0 버전 아래에서는 다음 라인의 주석을 해제하고 실행하세요.\n", "# from sklearn.experimental import enable_hist_gradient_boosting\n", "from sklearn.ensemble import HistGradientBoostingClassifier\n", "\n", "hgb = HistGradientBoostingClassifier(random_state=42)\n", "scores = cross_validate(hgb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TvlB0GMTS3hn", "outputId": "2b446a17-5593-4f19-bc59-4b9a4d908379" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.08876275 0.23438522 0.08027708]\n" ] } ], "source": [ "from sklearn.inspection import permutation_importance\n", "\n", "hgb.fit(train_input, train_target)\n", "result = permutation_importance(hgb, train_input, train_target, n_repeats=10,\n", " random_state=42, n_jobs=-1)\n", "print(result.importances_mean)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "S8FfxInn-xBQ", "outputId": "cd18c70e-7555-4a4b-80cc-1af876e63304" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.05969231 0.20238462 0.049 ]\n" ] } ], "source": [ "result = permutation_importance(hgb, test_input, test_target, n_repeats=10,\n", " random_state=42, n_jobs=-1)\n", "print(result.importances_mean)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pqplZjh0j2nw", "outputId": "3d6d6d84-c29e-4110-849a-a8c2b4c3033f" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.8723076923076923" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "hgb.score(test_input, test_target)" ] }, { "cell_type": "markdown", "metadata": { "id": "8fz_FrezBezR" }, "source": [ "#### XGBoost" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YBYLvOiV6rga", "outputId": "a9ad7740-5d4c-48a8-c714-2d6eacfdb20b" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9558403027491312 0.8782000074035686\n" ] } ], "source": [ "from xgboost import XGBClassifier\n", "\n", "xgb = XGBClassifier(tree_method='hist', random_state=42)\n", "scores = cross_validate(xgb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "markdown", "metadata": { "id": "zl6nh6DOBd-B" }, "source": [ "#### LightGBM" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "maihlDMP7lmY", "outputId": "19176c46-8abe-41d7-d6a2-11e9ade8ae9c" }, "outputs": [ { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/dask/dataframe/__init__.py:42: FutureWarning: \n", "Dask dataframe query planning is disabled because dask-expr is not installed.\n", "\n", "You can install it with `pip install dask[dataframe]` or `conda install dask`.\n", "This will raise in a future version.\n", "\n", " warnings.warn(msg, FutureWarning)\n" ] }, { "output_type": "stream", "name": "stdout", "text": [ "0.935828414851749 0.8801251203079884\n" ] } ], "source": [ "from lightgbm import LGBMClassifier\n", "\n", "lgb = LGBMClassifier(random_state=42)\n", "scores = cross_validate(lgb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] } ], "metadata": { "colab": { "name": "5-3 트리의 앙상블.ipynb", "provenance": [] }, "kernelspec": { "display_name": "default:Python", "language": "python", "name": "conda-env-default-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 0 }