{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "[![Open in Binder](https://mybinder.org/badge_logo.svg)](https://mybinder.org/v2/gh/justmarkham/scikit-learn-tips/master?filepath=notebooks%2F18_hyperparameter_search_results.ipynb)\n", "\n", "[![Open in Colab](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/justmarkham/scikit-learn-tips/blob/master/notebooks/18_hyperparameter_search_results.ipynb)\n", "\n", "# 🤖⚡ scikit-learn tip #18 ([video](https://www.youtube.com/watch?v=ct7W4Zs63VE&list=PL5-da3qGB5ID7YYAqireYEew2mWVvgmj6&index=18))\n", "\n", "Hyperparameter search results (from GridSearchCV or RandomizedSearchCV) can be converted into a pandas DataFrame.\n", "\n", "Makes it far easier to explore the results!\n", "\n", "See example 👇" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "df = pd.read_csv('http://bit.ly/kaggletrain')" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "X = df[['Pclass', 'Sex', 'Name']]\n", "y = df['Survived']" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "from sklearn.preprocessing import OneHotEncoder\n", "from sklearn.feature_extraction.text import CountVectorizer\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.compose import make_column_transformer\n", "from sklearn.pipeline import Pipeline" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "ohe = OneHotEncoder()\n", "vect = CountVectorizer()\n", "clf = LogisticRegression(solver='liblinear', random_state=1)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "ct = make_column_transformer((ohe, ['Sex']), (vect, 'Name'), remainder='passthrough')\n", "pipe = Pipeline([('preprocessor', ct), ('model', clf)])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# specify parameter values to search\n", "params = {}\n", "params['model__C'] = [0.1, 1, 10]\n", "params['model__penalty'] = ['l1', 'l2']" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# try all possible combinations of those parameter values\n", "from sklearn.model_selection import GridSearchCV\n", "grid = GridSearchCV(pipe, params, cv=5, scoring='accuracy')\n", "grid.fit(X, y);" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# convert results into a DataFrame\n", "results = pd.DataFrame(grid.cv_results_)[['params', 'mean_test_score', 'rank_test_score']]" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | params | \n", "mean_test_score | \n", "rank_test_score | \n", "
---|---|---|---|
2 | \n", "{'model__C': 1, 'model__penalty': 'l1'} | \n", "0.821512 | \n", "1 | \n", "
4 | \n", "{'model__C': 10, 'model__penalty': 'l1'} | \n", "0.820413 | \n", "2 | \n", "
5 | \n", "{'model__C': 10, 'model__penalty': 'l2'} | \n", "0.817055 | \n", "3 | \n", "
3 | \n", "{'model__C': 1, 'model__penalty': 'l2'} | \n", "0.812573 | \n", "4 | \n", "
1 | \n", "{'model__C': 0.1, 'model__penalty': 'l2'} | \n", "0.791225 | \n", "5 | \n", "
0 | \n", "{'model__C': 0.1, 'model__penalty': 'l1'} | \n", "0.788984 | \n", "6 | \n", "