{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "L5ALSPQSJp-n" }, "source": [ "# 확률적 경사 하강법" ] }, { "cell_type": "markdown", "metadata": { "id": "hnCwTs5KJp-t" }, "source": [ "\n", " \n", "
\n", " 구글 코랩에서 실행하기\n", "
" ] }, { "cell_type": "markdown", "metadata": { "id": "3x4OwaSIR50l" }, "source": [ "## SGDClassifier" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "id": "j3z-zKXoRmWB" }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "fish = pd.read_csv('https://bit.ly/fish_csv_data')" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "id": "WAiJVY9nR1fF" }, "outputs": [], "source": [ "fish_input = fish[['Weight','Length','Diagonal','Height','Width']].to_numpy()\n", "fish_target = fish['Species'].to_numpy()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "id": "AW6LMW_URpto" }, "outputs": [], "source": [ "from sklearn.model_selection import train_test_split\n", "\n", "train_input, test_input, train_target, test_target = train_test_split(\n", " fish_input, fish_target, random_state=42)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "id": "1RTAwK_DRutj" }, "outputs": [], "source": [ "from sklearn.preprocessing import StandardScaler\n", "\n", "ss = StandardScaler()\n", "ss.fit(train_input)\n", "train_scaled = ss.transform(train_input)\n", "test_scaled = ss.transform(test_input)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "id": "FSyujXY7sli6" }, "outputs": [], "source": [ "from sklearn.linear_model import SGDClassifier" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "KofoXhbwR9yu", "outputId": "1d6a0eac-9a2d-4008-a7fa-044c2e39907d" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.773109243697479\n", "0.775\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/studio-lab-user/.conda/envs/default/lib/python3.9/site-packages/sklearn/linear_model/_stochastic_gradient.py:705: ConvergenceWarning: Maximum number of iteration reached before convergence. Consider increasing max_iter to improve the fit.\n", " warnings.warn(\n" ] } ], "source": [ "# 사이킷런 1.1.0 버전 이하일 경우 'log_loss'를 'log'로 바꾸어 주세요.\n", "sc = SGDClassifier(loss='log_loss', max_iter=10, random_state=42)\n", "sc.fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "duwA4N3eSUk5", "outputId": "f26d3a96-dca9-46b0-8201-4e12b15d57aa" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.8151260504201681\n", "0.85\n" ] } ], "source": [ "sc.partial_fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] }, { "cell_type": "markdown", "metadata": { "id": "TEtfnUQhzKO2" }, "source": [ "## 에포크와 과대/과소적합" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "id": "pt7BHZVZ-dWT" }, "outputs": [], "source": [ "import numpy as np\n", "\n", "# 사이킷런 1.1 버전에서는 SGDClassifier의 loss 매개변수 중 \n", "# 로지스틱 손실을 의미하는 'log'가 'log_loss'로 바뀐다는 경고가 발생합니다.\n", "# 사이킷런 1.1 이상을 사용하는 경우 loss='log'를 loss='log_loss'로 변경하세요.\n", "sc = SGDClassifier(loss='log', random_state=42)\n", "\n", "train_score = []\n", "test_score = []\n", "\n", "classes = np.unique(train_target)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "id": "-65Gz13tVOP7" }, "outputs": [], "source": [ "for _ in range(0, 300):\n", " sc.partial_fit(train_scaled, train_target, classes=classes)\n", " \n", " train_score.append(sc.score(train_scaled, train_target))\n", " test_score.append(sc.score(test_scaled, test_target))" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 279 }, "id": "V19SzZJ5ZjSI", "outputId": "de31130f-e860-4143-99ec-014654191160" }, "outputs": [ { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "\n", "plt.plot(train_score)\n", "plt.plot(test_score)\n", "plt.xlabel('epoch')\n", "plt.ylabel('accuracy')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "pdp2Ykst1K_I", "outputId": "6214bd83-00c8-4a16-9931-ad7746ddff9f" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.957983193277311\n", "0.925\n" ] } ], "source": [ "# 사이킷런 1.1 버전에서는 SGDClassifier의 loss 매개변수 중 \n", "# 로지스틱 손실을 의미하는 'log'가 'log_loss'로 바뀐다는 경고가 발생합니다.\n", "# 사이킷런 1.1 이상을 사용하는 경우 loss='log'를 loss='log_loss'로 변경하세요.\n", "sc = SGDClassifier(loss='log', max_iter=100, tol=None, random_state=42)\n", "sc.fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "colab": { "base_uri": "https://localhost:8080/" }, "id": "OL7-y1kgIP4S", "outputId": "554597de-bec8-45d1-92e6-a0f5a7ebe519" }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0.9495798319327731\n", "0.925\n" ] } ], "source": [ "sc = SGDClassifier(loss='hinge', max_iter=100, tol=None, random_state=42)\n", "sc.fit(train_scaled, train_target)\n", "\n", "print(sc.score(train_scaled, train_target))\n", "print(sc.score(test_scaled, test_target))" ] } ], "metadata": { "colab": { "collapsed_sections": [], "name": "4-2 확률적 경사 하강법.ipynb", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.3" } }, "nbformat": 4, "nbformat_minor": 4 }