{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Пробное программирование" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "\n", "from sklearn.metrics import accuracy_score\n", "from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split\n", "from sklearn.utils import shuffle \n", "\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Загружаем и \"обрабатываем\" данные. Как видно, все признаки имеют тип float, а тип стекла - int." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "RI float64\n", "Na float64\n", "Mg float64\n", "Al float64\n", "Si float64\n", "K float64\n", "Ca float64\n", "Ba float64\n", "Fe float64\n", "Type_of_glass int64\n", "dtype: object" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.read_csv('glass.csv')\n", "\n", "df.drop(['Id'], axis = 1, inplace = True)\n", "\n", "df.dtypes" ] }, { "cell_type": "code", "execution_count": 406, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RINaMgAlSiKCaBaFeType_of_glass
01.5210113.644.491.1071.780.068.750.00.01
11.5176113.893.601.3672.730.487.830.00.01
21.5161813.533.551.5472.990.397.780.00.01
31.5176613.213.691.2972.610.578.220.00.01
41.5174213.273.621.2473.080.558.070.00.01
\n", "
" ], "text/plain": [ " RI Na Mg Al Si K Ca Ba Fe Type_of_glass\n", "0 1.52101 13.64 4.49 1.10 71.78 0.06 8.75 0.0 0.0 1\n", "1 1.51761 13.89 3.60 1.36 72.73 0.48 7.83 0.0 0.0 1\n", "2 1.51618 13.53 3.55 1.54 72.99 0.39 7.78 0.0 0.0 1\n", "3 1.51766 13.21 3.69 1.29 72.61 0.57 8.22 0.0 0.0 1\n", "4 1.51742 13.27 3.62 1.24 73.08 0.55 8.07 0.0 0.0 1" ] }, "execution_count": 406, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 407, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RINaMgAlSiKCaBaFeType_of_glass
2091.5162314.140.02.8872.610.089.181.060.07
2101.5168514.920.01.9973.060.008.401.590.07
2111.5206514.360.02.0273.420.008.441.640.07
2121.5165114.380.01.9473.610.008.481.570.07
2131.5171114.230.02.0873.360.008.621.670.07
\n", "
" ], "text/plain": [ " RI Na Mg Al Si K Ca Ba Fe Type_of_glass\n", "209 1.51623 14.14 0.0 2.88 72.61 0.08 9.18 1.06 0.0 7\n", "210 1.51685 14.92 0.0 1.99 73.06 0.00 8.40 1.59 0.0 7\n", "211 1.52065 14.36 0.0 2.02 73.42 0.00 8.44 1.64 0.0 7\n", "212 1.51651 14.38 0.0 1.94 73.61 0.00 8.48 1.57 0.0 7\n", "213 1.51711 14.23 0.0 2.08 73.36 0.00 8.62 1.67 0.0 7" ] }, "execution_count": 407, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tail()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Т.к. записи упорядочены по типу стекла, перемешаем строки." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
RINaMgAlSiKCaBaFeType_of_glass
1691.5199413.270.001.7673.030.4711.320.000.005
511.5192613.203.331.2872.360.609.140.000.111
1501.5166513.143.451.7672.480.608.380.000.173
151.5176112.813.541.2373.240.588.390.000.001
2001.5150815.150.002.2573.500.008.340.630.007
\n", "
" ], "text/plain": [ " RI Na Mg Al Si K Ca Ba Fe Type_of_glass\n", "169 1.51994 13.27 0.00 1.76 73.03 0.47 11.32 0.00 0.00 5\n", "51 1.51926 13.20 3.33 1.28 72.36 0.60 9.14 0.00 0.11 1\n", "150 1.51665 13.14 3.45 1.76 72.48 0.60 8.38 0.00 0.17 3\n", "15 1.51761 12.81 3.54 1.23 73.24 0.58 8.39 0.00 0.00 1\n", "200 1.51508 15.15 0.00 2.25 73.50 0.00 8.34 0.63 0.00 7" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = shuffle(df) \n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Разделим датасет на обучающую выборку и тест. Оставим на тест 30%." ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "y = df['Type_of_glass']\n", "X = df.copy()\n", "X.drop('Type_of_glass', axis = 1, inplace = True)\n", "\n", "X_train, X_holdout, y_train, y_holdout = train_test_split(X.values, y, test_size = .3)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Решающее дерево" ] }, { "cell_type": "code", "execution_count": 410, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "1.0" ] }, "execution_count": 410, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dec_tree = DecisionTreeClassifier(criterion='entropy')\n", "\n", "dec_tree.fit(X_train, y_train)\n", "\n", "dec_tree.score(X_train, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Если строить дерево без ограничения глубины, то оно идеально подстроится под обучающую выборку." ] }, { "cell_type": "code", "execution_count": 411, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6153846153846154" ] }, "execution_count": 411, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dec_tree.score(X_holdout, y_holdout)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Точность на отложеных данных - 61.5%." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Теперь будем использовать кросс-валидацию. Оптимизируем максимальную глубину дерева." ] }, { "cell_type": "code", "execution_count": 412, "metadata": { "scrolled": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 10 candidates, totalling 40 fits\n", "Best params: {'max_depth': 7}\n", "Best cross validaton score 0.6577181208053692\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 33 out of 40 | elapsed: 8.2s remaining: 1.7s\n", "[Parallel(n_jobs=4)]: Done 40 out of 40 | elapsed: 8.2s finished\n" ] } ], "source": [ "tree_params = {'max_depth': range(1, 11, 1)}\n", "\n", "cv_best_tree = GridSearchCV(dec_tree, tree_params, cv = 4, n_jobs = 4, verbose = True)\n", "\n", "cv_best_tree.fit(X_train, y_train)\n", "\n", "print(\"Best params:\", cv_best_tree.best_params_)\n", "print(\"Best cross validaton score\", cv_best_tree.best_score_)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Оптимальная глубина - 7." ] }, { "cell_type": "code", "execution_count": 413, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6307692307692307" ] }, "execution_count": 413, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cv_best_tree.score(X_holdout, y_holdout)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Точность на отложенных данных - 63.1%." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Метод ближайших соседей" ] }, { "cell_type": "code", "execution_count": 414, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6778523489932886" ] }, "execution_count": 414, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn = KNeighborsClassifier(n_neighbors = 10, n_jobs = 4) #10 ближайших соседей\n", "\n", "knn.fit(X_train, y_train)\n", "\n", "knn.score(X_train, y_train)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Метод ближайших соседей на обучающей выборке дает точность 67.8%." ] }, { "cell_type": "code", "execution_count": 415, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6461538461538462" ] }, "execution_count": 415, "metadata": {}, "output_type": "execute_result" } ], "source": [ "knn.score(X_holdout, y_holdout)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "На отложенных данных - 64.6%." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Теперь будем оптимизировать количество ближайших соседей и их веса (одинаковые, либо то расстояния)." ] }, { "cell_type": "code", "execution_count": 416, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.2s\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Best params: {'n_neighbors': 3, 'weights': 'distance'}\n", "Best cross validaton score 0.6845637583892618\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.4s finished\n" ] } ], "source": [ "weights = ['uniform', 'distance']\n", "knn_params = {'n_neighbors': range(1, 21, 1), 'weights': weights}\n", "\n", "cv_best_knn = GridSearchCV(knn, knn_params, cv = 4, n_jobs = 4, verbose = True)\n", "\n", "cv_best_knn.fit(X_train, y_train)\n", "\n", "print(\"Best params:\", cv_best_knn.best_params_)\n", "print(\"Best cross validaton score\", cv_best_knn.best_score_)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Модель определила, что лучше всего использовать 3х соседей и веса, зависящие от расстояния." ] }, { "cell_type": "code", "execution_count": 417, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.6923076923076923" ] }, "execution_count": 417, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cv_best_knn.score(X_holdout, y_holdout)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Точность на тестовых данных - 69.2%." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Пробный анализ ошибки (метод ближайших соседей)" ] }, { "cell_type": "code", "execution_count": 453, "metadata": {}, "outputs": [], "source": [ "import matplotlib.pyplot as plt\n", "\n", "from sklearn.metrics import mean_squared_error\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Мы хотим построить графики зависимости значений ошибки и ее стандартного отклонения от объема выборки на обучении и контроле." ] }, { "cell_type": "code", "execution_count": 419, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(214, 10)" ] }, "execution_count": 419, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "В датасете 214 записей." ] }, { "cell_type": "code", "execution_count": 425, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.6s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.8s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.1s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.3s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.9s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 17.1s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 11.4s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 17.6s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.6s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.9s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.6s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.8s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:605: Warning: The least populated class in y has only 3 members, which is too few. The minimum number of members in any class cannot be less than n_splits=4.\n", " % (min_groups, self.n_splits)), Warning)\n", "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 11.0s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 17.2s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.7s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.9s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 11.4s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 17.6s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:605: Warning: The least populated class in y has only 3 members, which is too few. The minimum number of members in any class cannot be less than n_splits=4.\n", " % (min_groups, self.n_splits)), Warning)\n", "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.0s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.3s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:605: Warning: The least populated class in y has only 3 members, which is too few. The minimum number of members in any class cannot be less than n_splits=4.\n", " % (min_groups, self.n_splits)), Warning)\n", "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.0s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.2s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:605: Warning: The least populated class in y has only 3 members, which is too few. The minimum number of members in any class cannot be less than n_splits=4.\n", " % (min_groups, self.n_splits)), Warning)\n", "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.7s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.9s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:605: Warning: The least populated class in y has only 2 members, which is too few. The minimum number of members in any class cannot be less than n_splits=4.\n", " % (min_groups, self.n_splits)), Warning)\n", "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.8s\n", "[Parallel(n_jobs=4)]: Done 153 out of 160 | elapsed: 16.0s remaining: 0.6s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 16.2s finished\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Fitting 4 folds for each of 40 candidates, totalling 160 fits\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\ProgramData\\Anaconda3\\lib\\site-packages\\sklearn\\model_selection\\_split.py:605: Warning: The least populated class in y has only 2 members, which is too few. The minimum number of members in any class cannot be less than n_splits=4.\n", " % (min_groups, self.n_splits)), Warning)\n", "[Parallel(n_jobs=4)]: Done 42 tasks | elapsed: 10.4s\n", "[Parallel(n_jobs=4)]: Done 160 out of 160 | elapsed: 14.4s finished\n" ] } ], "source": [ "test_size = np.linspace(0.2, 0.8, 14)\n", "\n", "test_acc_error = []\n", "test_squ_error = []\n", "\n", "train_acc_error = []\n", "train_squ_error = []\n", "\n", "for i in test_size:\n", " X_train, X_holdout, y_train, y_holdout = train_test_split(X.values, y, test_size = i)\n", " cv_best_knn.fit(X_train, y_train)\n", "\n", " cv_best_knn_prediction_train = cv_best_knn.predict(X_train)\n", " cv_best_knn_prediction_test = cv_best_knn.predict(X_holdout)\n", "\n", " train_acc_error.append(1 - accuracy_score(cv_best_knn_prediction_train, y_train))\n", " train_squ_error.append(mean_squared_error(cv_best_knn_prediction_train, y_train))\n", "\n", " test_acc_error.append(1 - accuracy_score(cv_best_knn_prediction_test, y_holdout))\n", " test_squ_error.append(mean_squared_error(cv_best_knn_prediction_test, y_holdout))" ] }, { "cell_type": "code", "execution_count": 452, "metadata": {}, "outputs": [], "source": [ "data_acc_err = pd.DataFrame(data=[test_size, test_acc_error, train_acc_error]).transpose()\n", "data_acc_err.columns = ['объем тестовых данных', 'средняя ошибка на тестовых данных', 'средняя ошибка на обучающей выборке']\n", "\n", "data_squ_err = pd.DataFrame(data=[test_size, test_squ_error, train_squ_error]).transpose()\n", "data_squ_err.columns = ['объем тестовых данных', 'среднеквадратичная ошибка на тестовых данных', 'среднеквадратичная ошибка на обучающей выборке']" ] }, { "cell_type": "code", "execution_count": 454, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 454, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot( 'объем тестовых данных', 'средняя ошибка на тестовых данных', data=data_acc_err)\n", "plt.plot( 'объем тестовых данных', 'средняя ошибка на обучающей выборке', data=data_acc_err)\n", "plt.legend()" ] }, { "cell_type": "code", "execution_count": 455, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 455, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot( 'объем тестовых данных', 'среднеквадратичная ошибка на тестовых данных', data=data_squ_err)\n", "plt.plot( 'объем тестовых данных', 'среднеквадратичная ошибка на обучающей выборке', data=data_squ_err)\n", "plt.legend()s" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }