{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "L5ALSPQSJp-n" }, "source": [ "# 확률적 경사 하강법" ] }, { "cell_type": "markdown", "metadata": { "id": "hnCwTs5KJp-t" }, "source": [ "\n", " \n", "
\n", " 구글 코랩에서 실행하기\n", "
" ] }, { "cell_type": "markdown", "metadata": { "id": "3x4OwaSIR50l" }, "source": [ "## SGDClassifier" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "id": "j3z-zKXoRmWB" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "fish = pd.read_csv('https://bit.ly/fish_csv_data')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "WAiJVY9nR1fF" }, "outputs": [], "source": [ "fish_input = fish[['Weight','Length','Diagonal','Height','Width']].to_numpy()\n", "fish_target = fish['Species'].to_numpy()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "AW6LMW_URpto" }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "train_input, test_input, train_target, test_target = train_test_split(\n", " fish_input, fish_target, random_state=42)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "1RTAwK_DRutj" }, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "ss = StandardScaler()\n", "ss.fit(train_input)\n", "train_scaled = ss.transform(train_input)\n", "test_scaled = ss.transform(test_input)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "FSyujXY7sli6" }, "outputs": [], "source": [ "from sklearn.linear_model import SGDClassifier" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KofoXhbwR9yu", "outputId": "eedcd281-e908-42f0-ed16-f92344cc4a38" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.773109243697479\n", "0.775\n" ] }, { "output_type": "stream", "name": "stderr", "text": [ "/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_stochastic_gradient.py:702: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", " warnings.warn(\n" ] } ], "source": [ "sc = SGDClassifier(loss='log_loss', max_iter=10, random_state=42)\n", "sc.fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "duwA4N3eSUk5", "outputId": "a44fd468-ad76-4311-e815-3c9e8569d52e" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.8151260504201681\n", "0.85\n" ] } ], "source": [ "sc.partial_fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] }, { "cell_type": "markdown", "metadata": { "id": "TEtfnUQhzKO2" }, "source": [ "## 에포크와 과대/과소적합" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "id": "pt7BHZVZ-dWT" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "sc = SGDClassifier(loss='log_loss', random_state=42)\n", "\n", "train_score = []\n", "test_score = []\n", "\n", "classes = np.unique(train_target)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "id": "-65Gz13tVOP7" }, "outputs": [], "source": [ "for _ in range(0, 300):\n", " sc.partial_fit(train_scaled, train_target, classes=classes)\n", "\n", " train_score.append(sc.score(train_scaled, train_target))\n", " test_score.append(sc.score(test_scaled, test_target))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 449 }, "id": "V19SzZJ5ZjSI", "outputId": "2d6cfa42-4924-4808-ef2a-6060222f9f69" }, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ "
" ], "image/png": "\n" }, "metadata": {} } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "plt.plot(train_score)\n", "plt.plot(test_score)\n", "plt.xlabel('epoch')\n", "plt.ylabel('accuracy')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pdp2Ykst1K_I", "outputId": "4b9ce3ab-c497-4541-b4e2-e88efcfabd7d" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.957983193277311\n", "0.925\n" ] } ], "source": [ "sc = SGDClassifier(loss='log_loss', max_iter=100, tol=None, random_state=42)\n", "sc.fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OL7-y1kgIP4S", "outputId": "e50984d7-4934-4090-9e5b-0ca308d407c4" }, "outputs": [ { "output_type": "stream", "name": "stdout", "text": [ "0.9495798319327731\n", "0.925\n" ] } ], "source": [ "sc = SGDClassifier(loss='hinge', max_iter=100, tol=None, random_state=42)\n", "sc.fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] } ], "metadata": { "colab": { "name": "4-2 확률적 경사 하강법.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 0 }