{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# **학습데이터 Restful API 서버 만들기**\n", "1. https://honeyteacs.tistory.com/6\n", "1. https://scikit-learn.org/stable/modules/model_persistence.html\n", "1. https://scikit-learn.org/stable/modules/generated/sklearn.ensemble.RandomForestClassifier.html?highlight=randomforestclassifier#sklearn.ensemble.RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.17287856 0.80608704 0.01884792 0.00218648]\n", "[1]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/momukji/Python/Python/lib/python3.6/site-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" ] } ], "source": [ "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.datasets import make_classification\n", "X, y = make_classification(n_samples=1000, n_features=4,\n", " n_informative=2, n_redundant=0,\n", " random_state=0, shuffle=False)\n", "clf = RandomForestClassifier(max_depth=2, random_state=0)\n", "clf.fit(X, y)\n", "print(clf.feature_importances_)\n", "print(clf.predict([[0, 0, 0, 0]]))" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [], "source": [ "# Save Model\n", "import pickle\n", "with open('data/train.pk', 'wb') as f:\n", " pickle.dump(clf, f)" ] }, { "cell_type": "code", "execution_count": 53, "metadata": {}, "outputs": [], "source": [ "# Load Model\n", "clf = pickle.load(open(\"data/train.pk\", \"rb\"))\n", "print(clf.predict([[0,0,1,1]]))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ ">>> import pickle\n", ">>> s = pickle.dumps(clf)\n", ">>> clf2 = pickle.loads(s)\n", ">>> clf2.predict(X[0:1])\n", "array([0])\n", ">>> y[0]\n", "0" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 'setosa', 1: 'versicolor', 2: 'virginica'}" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import numpy as np\n", "import pandas as pd\n", "from sklearn import datasets\n", "from sklearn.ensemble import RandomForestClassifier\n", "\n", "iris = datasets.load_iris()\n", "mapping = dict(zip(np.unique(iris.target), iris.target_names))\n", "mapping" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "( sepal length (cm) sepal width (cm) petal length (cm) petal width (cm)\n", " 0 5.1 3.5 1.4 0.2\n", " 1 4.9 3.0 1.4 0.2\n", " 2 4.7 3.2 1.3 0.2,\n", " 0\n", " 0 setosa\n", " 1 setosa\n", " 2 setosa)" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X = pd.DataFrame(iris.data, columns=iris.feature_names)\n", "y = pd.DataFrame(iris.target).replace(mapping)\n", "X.head(3), y.head(3) # Train, Test DataFrame" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/home/momukji/Python/Python/lib/python3.6/site-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n", "/home/momukji/Python/Python/lib/python3.6/site-packages/ipykernel_launcher.py:2: DataConversionWarning: A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().\n", " \n" ] }, { "data": { "text/plain": [ "RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, n_estimators=10,\n", " n_jobs=None, oob_score=False, random_state=None,\n", " verbose=0, warm_start=False)" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "clf = RandomForestClassifier()\n", "clf.fit(X,y) " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0.17287856 0.80608704 0.01884792 0.00218648]\n", "[1]\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/home/momukji/Python/Python/lib/python3.6/site-packages/sklearn/ensemble/forest.py:245: FutureWarning: The default value of n_estimators will change from 10 in version 0.20 to 100 in 0.22.\n", " \"10 in version 0.20 to 100 in 0.22.\", FutureWarning)\n" ] } ], "source": [] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "# Model Save & Load" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ ">>> import pickle\n", ">>> s = pickle.dumps(clf)\n", ">>> clf2 = pickle.loads(s)\n", ">>> clf2.predict(X[0:1])\n", "array([0])\n", ">>> y[0]\n", "0" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.9" } }, "nbformat": 4, "nbformat_minor": 4 }