{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "이 노트북의 코드에 대한 설명은 [Pipeline에서 캐싱을 사용하기](https://tensorflow.blog/2017/12/08/pipeline%ec%97%90%ec%84%9c-%ec%ba%90%ec%8b%b1%ec%9d%84-%ec%82%ac%ec%9a%a9%ed%95%98%ea%b8%b0/) 글을 참고하세요." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "CPython 3.5.6\n", "IPython 6.5.0\n", "\n", "sklearn 0.20.1\n", "numpy 1.15.2\n", "scipy 1.1.0\n", "matplotlib 3.0.0\n" ] } ], "source": [ "%load_ext watermark\n", "%watermark -v -p sklearn,numpy,scipy,matplotlib" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.datasets import load_boston\n", "from sklearn.preprocessing import StandardScaler, PolynomialFeatures\n", "from sklearn.linear_model import Ridge\n", "from sklearn.model_selection import train_test_split, GridSearchCV\n", "from sklearn.pipeline import make_pipeline\n", "\n", "boston = load_boston()\n", "X_train, X_test, y_train, y_test = train_test_split(boston.data, boston.target, random_state=0)\n", "param_grid = {'polynomialfeatures__degree': [1, 2, 3, 4, 5],\n", " 'ridge__alpha': [0.001, 0.01, 0.1, 1, 10, 100]}" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pipe = make_pipeline(StandardScaler(), PolynomialFeatures(), Ridge())\n", "grid = GridSearchCV(pipe, param_grid=param_grid, cv=5, n_jobs=-1, iid=False)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8.35 s ± 1.44 s per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit grid.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "from tempfile import mkdtemp\n", "from shutil import rmtree" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "cache_dir = mkdtemp()\n", "pipe2 = make_pipeline(StandardScaler(), PolynomialFeatures(), Ridge(), memory=cache_dir)\n", "grid2 = GridSearchCV(pipe2, param_grid=param_grid, cv=5, n_jobs=-1, iid=False)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4.89 s ± 981 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)\n" ] } ], "source": [ "%timeit grid2.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "rmtree(cache_dir)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.6" } }, "nbformat": 4, "nbformat_minor": 2 }