{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Импортируем библиотеки" ] }, { "cell_type": "code", "execution_count": 46, "metadata": {}, "outputs": [], "source": [ "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import random\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.model_selection import GridSearchCV, StratifiedKFold\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.pipeline import Pipeline\n", "from sklearn.datasets import make_classification" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Синтетические данные" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Создаём данные" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Т.к. в задании сказано мерить AUC, то речь должна идти о бинарной классификации. Т.к. мы хотим перебирать число признаков как параметр, то создаём данные, в которых 10 признаков, информативными являются только 4" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [], "source": [ "X, y = make_classification(n_samples=2000, n_features=10, n_informative=4, n_classes=3, random_state=42)" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | x | \n", "0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "7 | \n", "8 | \n", "y | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "-1.765310 | \n", "-1.024146 | \n", "-0.080752 | \n", "-1.371603 | \n", "-1.693012 | \n", "1.203602 | \n", "0.485726 | \n", "-0.805957 | \n", "-2.294698 | \n", "-0.872444 | \n", "0.0 | \n", "
1 | \n", "-0.858258 | \n", "0.148367 | \n", "-0.676983 | \n", "-2.565923 | \n", "-2.296268 | \n", "-0.160094 | \n", "1.177521 | \n", "-0.901972 | \n", "-0.488819 | \n", "-2.252863 | \n", "0.0 | \n", "
2 | \n", "-1.070693 | \n", "-1.402868 | \n", "-1.964698 | \n", "-2.615374 | \n", "-1.870387 | \n", "-0.723574 | \n", "-0.190782 | \n", "-3.305596 | \n", "1.029794 | \n", "-2.143583 | \n", "0.0 | \n", "
3 | \n", "0.330069 | \n", "2.002894 | \n", "-1.453344 | \n", "0.084383 | \n", "0.248392 | \n", "-0.676807 | \n", "-1.160034 | \n", "0.004808 | \n", "0.935594 | \n", "-0.138064 | \n", "1.0 | \n", "
4 | \n", "1.427064 | \n", "0.000755 | \n", "0.745855 | \n", "0.588909 | \n", "1.038847 | \n", "-0.383004 | \n", "-1.143316 | \n", "-0.207470 | \n", "1.574944 | \n", "0.663299 | \n", "1.0 | \n", "