{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "zp6fW8MP-mrO" }, "source": [ "# 트리의 앙상블" ] }, { "cell_type": "markdown", "metadata": { "id": "pv1IwHmU-mrU" }, "source": [ "\n", " \n", "
\n", " 구글 코랩에서 실행하기\n", "
" ] }, { "cell_type": "markdown", "metadata": { "id": "dIaIAizcRSG-" }, "source": [ "## 랜덤포레스트" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "ioJUlZ0M_uSZ" }, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "\n", "wine = pd.read_csv('https://bit.ly/wine_csv_data')\n", "\n", "data = wine[['alcohol', 'sugar', 'pH']].to_numpy()\n", "target = wine['class'].to_numpy()\n", "\n", "train_input, test_input, train_target, test_target = train_test_split(data, target, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "JDKQudr7_8nu", "outputId": "e9dc5d13-d6ef-4c68-a5c0-52e7f97eaf50" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9973541965122431 0.8905151032797809\n" ] } ], "source": [ "from sklearn.model_selection import cross_validate\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "rf = RandomForestClassifier(n_jobs=-1, random_state=42)\n", "scores = cross_validate(rf, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "XYDbzXNLG8fK", "outputId": "c8b93973-9dbd-433b-f41c-dc0b79a6f94b" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.23167441 0.50039841 0.26792718]\n" ] } ], "source": [ "rf.fit(train_input, train_target)\n", "print(rf.feature_importances_)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "oMc06S1Fa_A-", "outputId": "4a521cc0-651a-4d3d-be00-68403fdcd2b3" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8934000384837406\n" ] } ], "source": [ "rf = RandomForestClassifier(oob_score=True, n_jobs=-1, random_state=42)\n", "\n", "rf.fit(train_input, train_target)\n", "print(rf.oob_score_)" ] }, { "cell_type": "markdown", "metadata": { "id": "KdrVoeQZRU14" }, "source": [ "## 엑스트라트리" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "noMLdywdOGrE", "outputId": "eac10009-dfbb-4b2d-ca78-1a2bb721bb3e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9974503966084433 0.8887848893166506\n" ] } ], "source": [ "from sklearn.ensemble import ExtraTreesClassifier\n", "\n", "et = ExtraTreesClassifier(n_jobs=-1, random_state=42)\n", "scores = cross_validate(et, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "HnB0_mBqfcXL", "outputId": "dea60ed0-c89c-4b28-f36b-f1a632ce6da4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.20183568 0.52242907 0.27573525]\n" ] } ], "source": [ "et.fit(train_input, train_target)\n", "print(et.feature_importances_)" ] }, { "cell_type": "markdown", "metadata": { "id": "csKxnaxeRX8s" }, "source": [ "## 그레이디언트 부스팅" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_IlNEFkaNsoG", "outputId": "17aa4d4b-4f9e-46cc-da26-0fcb9b698e3c" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8881086892152563 0.8720430147331015\n" ] } ], "source": [ "from sklearn.ensemble import GradientBoostingClassifier\n", "\n", "gb = GradientBoostingClassifier(random_state=42)\n", "scores = cross_validate(gb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pNpeS8EWpeEi", "outputId": "4753ea63-ef6d-40c0-8e84-5306c56d3ec5" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9464595437171814 0.8780082549788999\n" ] } ], "source": [ "gb = GradientBoostingClassifier(n_estimators=500, learning_rate=0.2, random_state=42)\n", "scores = cross_validate(gb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "qD6iWVsGqCAE", "outputId": "d621da25-66a8-4aef-ce4f-d85b9ac20228" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.15872278 0.68010884 0.16116839]\n" ] } ], "source": [ "gb.fit(train_input, train_target)\n", "print(gb.feature_importances_)" ] }, { "cell_type": "markdown", "metadata": { "id": "BthW_II9RbLa" }, "source": [ "## 히스토그램 기반 부스팅" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "_3Ct_NNWQbdA", "outputId": "ce062886-1cf1-4169-e6e1-2e2d105eeba7" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9321723946453317 0.8801241948619236\n" ] } ], "source": [ "# 사이킷런 1.0 버전 아래에서는 다음 라인의 주석을 해제하고 실행하세요.\n", "# from sklearn.experimental import enable_hist_gradient_boosting\n", "from sklearn.ensemble import HistGradientBoostingClassifier\n", "\n", "hgb = HistGradientBoostingClassifier(random_state=42)\n", "scores = cross_validate(hgb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "TvlB0GMTS3hn", "outputId": "62c06ffb-979f-4488-ff6b-3f42023d135a" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.08876275 0.23438522 0.08027708]\n" ] } ], "source": [ "from sklearn.inspection import permutation_importance\n", "\n", "hgb.fit(train_input, train_target)\n", "result = permutation_importance(hgb, train_input, train_target, n_repeats=10,\n", " random_state=42, n_jobs=-1)\n", "print(result.importances_mean)" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "S8FfxInn-xBQ", "outputId": "fe562fce-1d79-4da9-962b-1d7b91b88899" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "[0.05969231 0.20238462 0.049 ]\n" ] } ], "source": [ "result = permutation_importance(hgb, test_input, test_target, n_repeats=10,\n", " random_state=42, n_jobs=-1)\n", "print(result.importances_mean)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pqplZjh0j2nw", "outputId": "a24b9e23-ca52-46ec-b360-647935214597" }, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "0.8723076923076923" ] }, "metadata": {}, "execution_count": 13 } ], "source": [ "hgb.score(test_input, test_target)" ] }, { "cell_type": "markdown", "metadata": { "id": "8fz_FrezBezR" }, "source": [ "#### XGBoost" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "YBYLvOiV6rga", "outputId": "58d75137-1c27-4156-a3d1-16a86b7ef197" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9555033709953124 0.8799326275264677\n" ] } ], "source": [ "from xgboost import XGBClassifier\n", "\n", "xgb = XGBClassifier(tree_method='hist', random_state=42)\n", "scores = cross_validate(xgb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] }, { "cell_type": "markdown", "metadata": { "id": "zl6nh6DOBd-B" }, "source": [ "#### LightGBM" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "maihlDMP7lmY", "outputId": "83921d16-47d4-4bca-c7ff-4548116da90b" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.935828414851749 0.8801251203079884\n" ] } ], "source": [ "from lightgbm import LGBMClassifier\n", "\n", "lgb = LGBMClassifier(random_state=42)\n", "scores = cross_validate(lgb, train_input, train_target, return_train_score=True, n_jobs=-1)\n", "\n", "print(np.mean(scores['train_score']), np.mean(scores['test_score']))" ] } ], "metadata": { "colab": { "name": "5-3 트리의 앙상블.ipynb", "provenance": [] }, "kernelspec": { "display_name": "default:Python", "language": "python", "name": "conda-env-default-py" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.10" } }, "nbformat": 4, "nbformat_minor": 0 }