{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"\n",
"# Hide Numpy warnings from Statsmodels\n",
"import warnings\n",
"warnings.filterwarnings('ignore')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Load data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"[Dataset from University of Notre Dame](https://www3.nd.edu/~rwilliam/stats3/L04.pdf) for demonstration of **logistic regression.**\n",
"\n",
"It is being used here to compare output from `appelpy` to output from Stata directly."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"df = pd.read_stata('https://www3.nd.edu/~rwilliam/statafiles/glm-logit.dta')"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" grade | \n",
" gpa | \n",
" tuce | \n",
" psi | \n",
"
\n",
" \n",
" \n",
" \n",
" | 0 | \n",
" 0 | \n",
" 2.06 | \n",
" 22 | \n",
" 1 | \n",
"
\n",
" \n",
" | 1 | \n",
" 1 | \n",
" 2.39 | \n",
" 19 | \n",
" 1 | \n",
"
\n",
" \n",
" | 2 | \n",
" 0 | \n",
" 2.63 | \n",
" 20 | \n",
" 0 | \n",
"
\n",
" \n",
" | 3 | \n",
" 0 | \n",
" 2.92 | \n",
" 12 | \n",
" 0 | \n",
"
\n",
" \n",
" | 4 | \n",
" 0 | \n",
" 2.76 | \n",
" 17 | \n",
" 0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" grade gpa tuce psi\n",
"0 0 2.06 22 1\n",
"1 1 2.39 19 1\n",
"2 0 2.63 20 0\n",
"3 0 2.92 12 0\n",
"4 0 2.76 17 0"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.head()"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Int64Index: 32 entries, 0 to 31\n",
"Data columns (total 4 columns):\n",
"grade 32 non-null int8\n",
"gpa 32 non-null float32\n",
"tuce 32 non-null int8\n",
"psi 32 non-null int8\n",
"dtypes: float32(1), int8(3)\n",
"memory usage: 480.0 bytes\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Inspect data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's do some exploratory data analysis on this dataset."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"from appelpy.eda import statistical_moments"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 15.1 ms, sys: 165 µs, total: 15.2 ms\n",
"Wall time: 13.1 ms\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" mean | \n",
" var | \n",
" skew | \n",
" kurtosis | \n",
"
\n",
" \n",
" \n",
" \n",
" | grade | \n",
" 0.34375 | \n",
" 0.232863 | \n",
" 0.657952 | \n",
" -1.5671 | \n",
"
\n",
" \n",
" | gpa | \n",
" 3.11719 | \n",
" 0.217821 | \n",
" 0.122658 | \n",
" -0.429932 | \n",
"
\n",
" \n",
" | tuce | \n",
" 21.9375 | \n",
" 15.2218 | \n",
" -0.52511 | \n",
" 0.0483051 | \n",
"
\n",
" \n",
" | psi | \n",
" 0.4375 | \n",
" 0.254032 | \n",
" 0.251976 | \n",
" -1.93651 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" mean var skew kurtosis\n",
"grade 0.34375 0.232863 0.657952 -1.5671\n",
"gpa 3.11719 0.217821 0.122658 -0.429932\n",
"tuce 21.9375 15.2218 -0.52511 0.0483051\n",
"psi 0.4375 0.254032 0.251976 -1.93651"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"statistical_moments(df)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"grade 2\n",
"gpa 29\n",
"tuce 14\n",
"psi 2\n",
"dtype: int64"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.nunique()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"`grade` is a binary outcome that we would like to predict in this dataset, so logistic regression is a natural choice for modelling it."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Logistic regression"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's do a regression of `grade` on `gpa`, `tuce`, `psi`."
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"from appelpy.discrete_model import Logit"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"y_list = ['grade']\n",
"X_list = ['gpa', 'tuce', 'psi']"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 103 ms, sys: 6.05 ms, total: 109 ms\n",
"Wall time: 121 ms\n"
]
}
],
"source": [
"%%time\n",
"model1 = Logit(df, y_list, X_list).fit()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Standardized and unstandardized estimates"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The **unstandardized estimates** can be returned in two formats: the **coefficients** themselves and their **odds ratio** equivalents."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Logit Regression Results\n",
"\n",
" | Dep. Variable: | grade | No. Observations: | 32 | \n",
"
\n",
"\n",
" | Model: | Logit | Df Residuals: | 28 | \n",
"
\n",
"\n",
" | Method: | MLE | Df Model: | 3 | \n",
"
\n",
"\n",
" | Date: | Fri, 03 Jan 2020 | Pseudo R-squ.: | 0.3740 | \n",
"
\n",
"\n",
" | Time: | 21:39:27 | Log-Likelihood: | -12.890 | \n",
"
\n",
"\n",
" | converged: | True | LL-Null: | -20.592 | \n",
"
\n",
"\n",
" | Covariance Type: | nonrobust | LLR p-value: | 0.001502 | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | z | P>|z| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" | const | -13.0213 | 4.931 | -2.641 | 0.008 | -22.687 | -3.356 | \n",
"
\n",
"\n",
" | gpa | 2.8261 | 1.263 | 2.238 | 0.025 | 0.351 | 5.301 | \n",
"
\n",
"\n",
" | tuce | 0.0952 | 0.142 | 0.672 | 0.501 | -0.182 | 0.373 | \n",
"
\n",
"\n",
" | psi | 2.3787 | 1.065 | 2.234 | 0.025 | 0.292 | 4.465 | \n",
"
\n",
"
"
],
"text/plain": [
"\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: grade No. Observations: 32\n",
"Model: Logit Df Residuals: 28\n",
"Method: MLE Df Model: 3\n",
"Date: Fri, 03 Jan 2020 Pseudo R-squ.: 0.3740\n",
"Time: 21:39:27 Log-Likelihood: -12.890\n",
"converged: True LL-Null: -20.592\n",
"Covariance Type: nonrobust LLR p-value: 0.001502\n",
"==============================================================================\n",
" coef std err z P>|z| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"const -13.0213 4.931 -2.641 0.008 -22.687 -3.356\n",
"gpa 2.8261 1.263 2.238 0.025 0.351 5.301\n",
"tuce 0.0952 0.142 0.672 0.501 -0.182 0.373\n",
"psi 2.3787 1.065 2.234 0.025 0.292 4.465\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model1.results_output"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"There are two significant regressors in the model."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/plain": [
"['gpa', 'psi']"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model1.significant_regressors(0.05)"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"gpa 16.879721\n",
"tuce 1.099832\n",
"psi 10.790733\n",
"Name: odds_ratios, dtype: float64"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model1.odds_ratios"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `results_output_standardized` object contains the **standardized estimates** of the regressors (and the unstandardized ones).\n",
"\n",
"Standardized coefficients are sometimes called **beta coefficients**.\n",
"\n",
"The output is similar to what would be returned by Stata's _`listcoef` command_.\n",
"\n",
"These are the standardized estimates listed:\n",
"- `coef_stdX`: x-standardized coefficient, i.e. how much does `y` increase with a one-standard deviation increase in `x`.\n",
"- `coef_stdXy`: fully standardized coefficient, i.e. by how many standard deviations does `y` increase with a one=standard deviation increase in `x`.\n",
"- `stdev_X`: standard deviation of regressor"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"Unstandardized and Standardized Estimates | coef | z | P>|z| | coef_stdX | coef_stdXy | stdev_X |
| grade | | | | | | |
\n",
" \n",
" | gpa | \n",
" +2.8261 | \n",
" +2.238 | \n",
" 0.025 | \n",
" +1.3190 | \n",
" +0.4912 | \n",
" 0.4667 | \n",
"
\n",
" \n",
" | tuce | \n",
" +0.0952 | \n",
" +0.672 | \n",
" 0.501 | \n",
" +0.3713 | \n",
" +0.1383 | \n",
" 3.9015 | \n",
"
\n",
" \n",
" | psi | \n",
" +2.3787 | \n",
" +2.234 | \n",
" 0.025 | \n",
" +1.1989 | \n",
" +0.4465 | \n",
" 0.5040 | \n",
"
\n",
"
"
],
"text/plain": [
""
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model1.results_output_standardized"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compare the results from the Stata command `listcoef, std help` below:"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"```\n",
"logit (N=32): Unstandardized and standardized estimates\n",
"\n",
" Observed SD: 0.4826\n",
" Latent SD: 2.6851\n",
"\n",
"--------------------------------------------------------------------------------\n",
" | b z P>|z| bStdX bStdY bStdXY SDofX\n",
"-------------+------------------------------------------------------------------\n",
" gpa | 2.8261 2.238 0.025 1.319 1.053 0.491 0.467\n",
" tuce | 0.0952 0.672 0.501 0.371 0.035 0.138 3.902\n",
" 1.psi | 2.3787 2.234 0.025 1.199 0.886 0.447 0.504\n",
" constant | -13.0213 -2.641 0.008 . . . .\n",
"--------------------------------------------------------------------------------\n",
" b = raw coefficient\n",
" z = z-score for test of b=0\n",
" P>|z| = p-value for z-test\n",
" bStdX = x-standardized coefficient\n",
" bStdY = y-standardized coefficient\n",
" bStdXY = fully standardized coefficient\n",
" SDofX = standard deviation of X\n",
" ```"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Model selection"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Some model selection statistics are stored in the `model_selection_stats` attribute."
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'log_likelihood': -12.88963346533348,\n",
" 'r_squared_pseudo': 0.3740383321251376,\n",
" 'aic': 33.779266930666964,\n",
" 'bic': 39.642210541865865}"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model1.model_selection_stats"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The log-likelihood is also stored in its own attribute `log_likelihood` for ease of reference."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"-12.88963346533348"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"model1.log_likelihood"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Prediction"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The `predict` method can be used to return estimated probabilities for observations."
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"CPU times: user 2.27 ms, sys: 23 µs, total: 2.29 ms\n",
"Wall time: 2.16 ms\n"
]
},
{
"data": {
"text/plain": [
"array([0.06137582, 0.11103084, 0.02447037, 0.02590164, 0.02650405,\n",
" 0.02657799, 0.24177247, 0.03018375, 0.03485825, 0.03858832,\n",
" 0.30721867, 0.05155897, 0.0536264 , 0.05950126, 0.48113296,\n",
" 0.52911713, 0.11112664, 0.58987241, 0.63542067, 0.6607859 ,\n",
" 0.18725992, 0.18999741, 0.19321116, 0.32223953, 0.83829052,\n",
" 0.84170419, 0.8520909 , 0.36098992, 0.90484726, 0.56989301,\n",
" 0.94534027, 0.69351141])"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"%%time\n",
"preds = model1.predict(df[X_list].to_numpy())\n",
"preds"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAp0AAAG5CAYAAADMAI+DAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy8li6FKAAAgAElEQVR4nO3debglZX0n8O8PGmVpBGMzTEQQEaNRiBsaHZPYRM2gqDCOcRlIxBiJk4lLokZ0XMgYM2YmbtkD0aBiVFyijMQxamyNiRu4BHGJDqAsKriwNBIReeePqjaHa997z+3u95zuez+f5zlPnzpVp95f1Vvn9PfWcqpaawEAgJ52m3cBAACsfkInAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ2sGlV1QVVtnHcd81RV/6mqLqmqzVV1z85tHVpVrarWbeP7W1Udvsi4E6rq77Y2bVX9eVW9YIn5Pq+q/nJbapqXqtpYVZdODM9kW66qM6rqd5cY/7tV9c2q+nrvWqZRVadW1Zlzavtm2+SOmnaKeV1cVQ+ectqTqurDO6Jd6EHoZJewtS/ehV+wrbW7tdY2LTOf7QpKu4A/SPIbrbX1rbVPzbuYbdVae0Nr7RcWGfeU1tqLkx8Na+P432ut/eos6uxlmm05WTq4b6+qOiTJM5PctbX273u0sUz7P9K387TUNrk9087LPAM8a5fQCTvQThBmb5/kgh0xo51gWXZJq2i9HZLkW621K7Y2chUt57LW0rJCT0Inq8bk3tCqum9VnVtV11TVN6rq5eNkHxr/vWo8BH3/qtqtqp5fVV+pqiuq6nVVtd/EfH95HPetqnrBgnZOraq3VtWZVXVNkpPGtj9SVVdV1deq6o+r6hYT82tV9etV9aWquraqXlxVd6yqfxrrPWty+gXLuNVaq+qWVbU5ye5JPlNV/2+R97eqelpVXTgeNv3fVbXbOO6kqvrHqnpFVX0ryanLrZvRr1TV5eOyPmuirSXXw+hhS9Sy1cOEWw4JV9U+Sd6d5LZjX26uqtsu3INTVfcb1+1VVfWZmjhsPbZz4dgPF1XVCYu0uaWf3zxO+8mquvvE+Iur6jlV9c9JrquqdWMtb6uqK8d5P21i+r3G5fhOVX0uyX0WtDe5je1ewykD/29s+7yqOriqtmzLnxmX/bHj9A+vqk+Py/tPVfVTE/O951j7tVX15iR7LrK8D07y3ol1e0b921GCJ1XVV5P8/TjtI2s4HeCqqtpUVT+5YDmeXVX/XFXXVdWrq+rAqnr3WMP7qurWW2l/q307jr7FuB1eO7Z71MT7Fl3nW2ljv3E+V47b9/OX+SzcbJusql+oqi9W1dVV9adV9cGq+tWJ909O26rqKTV85q+qqj+pqhrH3bGq/r6G75dvVtUbqmr/xepesAy3qaqza/je+HiSOy4Y/6oaTre5ZtxufnZ8/Zgkz0vy2HHdfmZ8/YlV9flx3V5YVb82TR0wtdaah8dO/0hycZIHL3jtpCQf3to0ST6S5JfG5+uT3G98fmiSlmTdxPt+JcmXkxw2Tvv2JK8fx901yeYkP5PkFhkOX39/op1Tx+HjM/wRt1eSeye5X5J1Y3ufT/KMifZakncmuVWSuyX5XpL3j+3vl+RzSZ6wyHpYtNaJeR++xHpsST6Q5Mcy7Mn6lyS/OrE+b0zy1LH2vZZZN1vW5RuT7JPkyCRXTqybadbDUrV8eGvLleSMJL87Pt+Y5NIFy3hqkjPH5wcl+VaSh43985Bx+ICx5muS3Hmc9seT3G2R9balnx+dZI8kz0pyUZI9Jra9Tyc5eFxvuyU5L8kLM2w3hyW5MMl/HKd/aZJ/GJf94CSfnVyO3HxbfnaS85PcOUkluXuS22ytv5PcM8kVSX46wx8gTxjndcuxjq8k+c1xGR49LtPvLrLMN1u3E/39unHd7ZXkJ5JcN67XPZL8dobt5RYTy/HRJAeOfXFFkk+Ode6ZIbi+aJr2J/rhX8f+3D3J/0zy0XHckut8K/N/XYbP4b7jsv1Lkict8Vk4KeM2mWRDhm3nUeP4p4/rcqnt911J9s+wrV+Z5Jhx3OHj+rtlhu3yQ0leudR338S4NyU5a+yPI5JctqDdE5PcZqzxmUm+nmTPhZ+TiemPzRBcK8kDk3w3yb3m9b3vsfoecy/Aw2Oax/jFuznJVROP72bx0PmhJL+TZMOC+RyaHw2d70/y6xPDdx7/A1k3/gf2xolxeye5ITcPnR9apvZnJPmbieGW5AETw+clec7E8Msm/9NZMK9Fa52Y93Kh85iJ4V9P8v7x+UlJvjptexPr8i4T4/9XklevYD0sVcuOCJ3PyUQoH197T4Ywts+4Hf3nJHst04enZgw34/BuSb6W5Gcntr1fmRj/01tZl89N8lfj8wsXLPvJWTx0fjHJcUv052To/LMkL14wzRczBIifS3J5kpoY909Zeeg8bOK1FyQ5a8F6uSzJxonlOGFi/NuS/NnE8FOTvGOa9if64X0Tw3dNcv0063zB67tn+BzfdeK1X0uyaYnPwkn5t9D5y0k+MjGuklySpUPnz0wMn5XklEWW+/gkn9ratrCVZfh+bv75+73Jdrfynu8kufvCz8kS078jydOXmsbDYyUPh9fZlRzfWtt/yyNDSFnMkzLshflCVX2iqh6+xLS3zbAHaIuvZAhVB47jLtkyorX23Qx7yiZdMjlQVT9RVe+qqq/XcMj99zLsGZn0jYnn129leP021DqtyXq/Ms5za+OmbW+r85tyPSxVy45w+yS/OB7SvKqqrsqw1/rHW2vXJXlskqck+VpVnVNVd1liXpPbwU1JLs3i6+72GQ4NT7b7vPzberttfnTZF3Nwkq2eLrEVt0/yzAXtHjy2d9skl7XW2pTtLmay7pttH+N6uSTDXs0ttnVbX8zklfTfTbJnDedcLrfOJ23IsGd24bY9WffCz8Kkhd8LLcP2sJK61yfJeLrBm6rqsvFzcmZ+9HOyNQdk+Cwuuh1V1bPGw+VXj+tjv6XmXVUPraqPVtW3x+kfNmUtMBWhk1Wptfal1trjk/y7JL+f5K3jeWJtK5NfnuE/rC0OyXBo7RsZ9mbdbsuIqtorw+GqmzW3YPjPknwhyZ1aa7fK8B9fbfvSTF3rtA5e8P7LJ4YXLss07S02v2nWw1K1TGNr/Tnpkgx7OvefeOzTWntpkrTW3tNae0iGQ+tfSHL6EvP6Ya3juX+3y+Lr7pIkFy1od9/W2sPG8V/Ljy77UstwxyXGL5z2JQva3bu19saxzYO2nEs4RbuLmVzOm20f47wPzrC3c3st17cLLbfOJ30zw17Chdv2ZN1Ltb/we6Emh1fo98a2jhw/Jydmuu+LKzN8Fre6HY3nb/52ksckufX4h/rVE/O+2fJV1S0z7In+gyQHjtP/7ZS1wFSETlalqjqxqg4Y97xcNb58U4Yv6psynO+1xRuT/GZV3aGq1mf4T+DNrbUbk7w1ySOq6j/UcBHMqVn+S3jfDOd7bR73nP3XHbVcy9Q6rWdX1a2r6uAM56K9eTvbe0FV7V1Vd0vyxIn5TbMeVlLL1nwjyW3qRy9u2uLMDP33H2u4IGfPGn6K53bjHqbjxj9Gvpfh9I2blmjr3lX1qHGv2jPG93x0kWk/nuTaGi4u2mts+4iq2nLB0FlJnjsu++0yHGZezF8meXFV3akGP1VVW/7w+UZuvi2fnuQpVfXT47T7VNWxVbVvhvOcb0zytKrao6oeleS+S7Q7jbOSHFtVD6qqPTKcN/i9DIftt9dyfbvQcuv8h1prPxhrf0lV7VtVt0/yWxm2l2mck+TIqjp+3B7+W5Jt/VmpfTNse1dX1UEZzuFd1rgMb89wkdPeVXXXDKeNTM73xgzfeeuq6oUZziPf4htJDh3/gEqG82BvOU5/Y1U9NMlO/bNP7HqETlarY5JcUMMV3a9K8rjW2vXj4fGXJPnH8RDc/ZK8JsnrM5wHelGGCxWemiSttQvG52/KsHdjc4aLIb63RNvPSvJfklybIQSsNEgtZdFaV+CdGc4j/XSG/zxfvZ3tfTDDxSPvT/IHrbUtP4o9zXpYSS0/orX2hQzB+MKxP2+7YPwlSY7LsJf1ygx7w56d4btvtwxB4/Ik385w3uNSfyC8M8Ph+O8k+aUkj2qtfX+Run6Q5OFJ7pFhvX0zQ3jcEqB+J8Oh0IuS/F2GdbyYl2cISH+XIcS/OsOFLcnwR9Brx2V/TGvt3CRPTvLHY51fznB+YVprN2S48OWkcXkfmyG0bLPW2hcz7Jn7o3EZH5HkEWNb22W5vt3K9Mut84WemuEiqAuTfDjJX2fY3qep7ZtJfjHDOczfynBu6blZ+nthMb+T5F4Z9kKek5X1yW9kOEz/9QznOv/VxLj3JPm/GS6Q+kqGz+7kofi3jP9+q6o+2Vq7NsnTMmxr38nw2T17y8RVdUgNV7pvy95xSDKeUA5MZ9zbd1WGQ8YXzbuelaqqlqH2L8+7ll1JVZ2a4YKdE+ddCzufcW/hpRkumvrAvOuBnZU9nbCMqnrEePhqnwznO52f4YpSYI0aT9nYfzwXcsv5youdbgFE6IRpHJfhEOzlSe6U4VC9QwSwtt0/w68KbDmt4PjW2vXzLQl2bg6vAwDQnT2dAAB0t27eBUxj//33b4cffvi8y2AGrrvuuuyzzz7zLoPO9PPaoa/XDn29dpx33nnfbK0dsNL37RKh88ADD8y555477zKYgU2bNmXjxo3zLoPO9PPaoa/XDn29dlTVttzNzOF1AAD6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7tbNu4BpXP/9H+TQU86ZdxkzdfFLj513CQAAO4w9nQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdCd0AgDQndAJAEB3QicAAN0JnQAAdNctdFbVa6rqiqr67MRrP1ZV762qL43/3rpX+wAA7Dx67uk8I8kxC147Jcn7W2t3SvL+cRgAgFWuW+hsrX0oybcXvHxckteOz1+b5Phe7QMAsPOo1lq/mVcdmuRdrbUjxuGrWmv7j88ryXe2DG/lvScnOTlJNmw44N4vfOXp3ercGR150H7zLmEuNm/enPXr18+7DDrTz2uHvl479PXacfTRR5/XWjtqpe9b16OYabTWWlUtmnhba6clOS1JDjns8Pay8+dW6lxcfMLGeZcwF5s2bcrGjRvnXQad6ee1Q1+vHfqa5cz66vVvVNWPJ8n47xUzbh8AgDmYdeg8O8kTxudPSPLOGbcPAMAc9PzJpDcm+UiSO1fVpVX1pCQvTfKQqvpSkgePwwAArHLdTpRsrT1+kVEP6tUmAAA7J3ckAgCgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgu7mEzqr6zaq6oKo+W1VvrKo951EHAACzMfPQWVUHJXlakqNaa0ck2T3J42ZdBwAAszOvw+vrkuxVVeuS7J3k8jnVAQDADFRrbfaNVj09yUuSXJ/k71prJ2xlmpOTnJwkGzYccO8XvvL02RY5Z0cetN+8S5iLzZs3Z/369fMug87089qhr9cOfb12HH300ee11o5a6fvW9ShmKVV16yTHJblDkquSvKWqTmytnTk5XWvttCSnJckhhx3eXnb+zEudq4tP2DjvEuZi06ZN2bhx47zLoDP9vHbo67VDX7OceRxef3CSi1prV7bWvp/k7Un+wxzqAABgRuYROr+a5H5VtXdVVZIHJfn8HOoAAGBGZh46W2sfS/LWJJ9Mcv5Yw2mzrgMAgNmZy4mSrbUXJXnRPNoGAGD23JEIAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgu6lCZ1Ud2bsQAABWr2n3dP5pVX28qn69qvbrWhEAAKvOVKGztfazSU5IcnCS86rqr6vqIV0rAwBg1Zj6nM7W2peSPD/Jc5I8MMkfVtUXqupRvYoDAGB1mPaczp+qqlck+XySn0/yiNbaT47PX9GxPgAAVoF1U073R0n+MsnzWmvXb3mxtXZ5VT2/S2UAAKwa04bOY5Nc31r7QZJU1W5J9mytfbe19vpu1QEAsCpMe07n+5LsNTG89/gaAAAsa9rQuWdrbfOWgfH53n1KAgBgtZk2dF5XVffaMlBV905y/RLTAwDAD017Tuczkrylqi5PUkn+fZLHdqsKAIBVZarQ2Vr7RFXdJcmdx5e+2Fr7fr+yAABYTabd05kk90ly6Piee1VVWmuv61IVAACrylShs6pen+SOST6d5Afjyy3JNoXOqto/w+9+HjHO51daax/ZlnkBALDzm3ZP51FJ7tpaazuo3Vcl+b+ttUdX1S3iSngAgFVt2qvXP5vh4qHtVlX7Jfm5JK9OktbaDa21q3bEvAEA2DnVNDsvq+oDSe6R5ONJvrfl9dbaI1fcYNU9kpyW5HNJ7p7kvCRPb61dt2C6k5OcnCQbNhxw7xe+8vSVNrVLO/Kg/eZdwlxs3rw569evn3cZdKaf1w59vXbo67Xj6KOPPq+1dtRK3zdt6Hzg1l5vrX1wxQ1WHZXko0ke0Fr7WFW9Ksk1rbUXLPaeQw47vO32mFettKld2sUvPXbeJczFpk2bsnHjxnmXQWf6ee3Q12uHvl47qmqbQudUh9fHcHlxkj3G559I8smVNja6NMmlrbWPjcNvTXKvJaYHAGAXN1XorKonZwiHfzG+dFCSd2xLg621rye5pKq2/ObngzIcagcAYJWa9ur1/5bkvkk+liSttS9V1b/bjnafmuQN45XrFyZ54nbMCwCAndy0ofN7rbUbqipJUlXrMvy+5jZprX06w88wAQCwBkz7k0kfrKrnJdmrqh6S5C1J/k+/sgAAWE2mDZ2nJLkyyflJfi3J3yZ5fq+iAABYXaY6vN5auynJ6eMDAABWZNp7r1+UrZzD2Vo7bIdXBADAqrOSe69vsWeSX0zyYzu+HAAAVqNpfxz+WxOPy1prr0yyNm+ZAwDAik17eH3yjkG7ZdjzOe1eUgAA1rhpg+PLJp7fmOGWmI/Z4dUAALAqTXv1+tG9CwEAYPWa9vD6by01vrX28h1TDgAAq9FKrl6/T5Kzx+FHJPl4ki/1KAoAgNVl2tB5uyT3aq1dmyRVdWqSc1prJ/YqDACA1WPa22AemOSGieEbxtcAAGBZ0+7pfF2Sj1fV34zDxyd5bZ+SAABYbaa9ev0lVfXuJD87vvTE1tqn+pUFAMBqMu3h9STZO8k1rbVXJbm0qu7QqSYAAFaZqUJnVb0oyXOSPHd8aY8kZ/YqCgCA1WXaPZ3/Kckjk1yXJK21y5Ps26soAABWl2lD5w2ttZakJUlV7dOvJAAAVptpQ+dZVfUXSfavqicneV+S0/uVBQDAajLt1et/UFUPSXJNkjsneWFr7b1dKwMAYNVYNnRW1e5J3tdaOzqJoAkAwIote3i9tfaDJDdV1X4zqAcAgFVo2jsSbU5yflW9N+MV7EnSWntal6oAAFhVpg2dbx8fAACwYkuGzqo6pLX21daa+6wDALDNljun8x1bnlTV2zrXAgDAKrXc4fWaeH5Yz0K4uUNPOWfeJczFGce47wAArEbL7elsizwHAICpLben8+5VdU2GPZ57jc8zDrfW2q26VgcAwKqwZOhsre0+q0IAAFi9pr33OgAAbDOhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7oROAAC6EzoBAOhO6AQAoDuhEwCA7uYWOqtq96r6VFW9a141AAAwG/Pc0/n0JJ+fY/sAAMzIXEJnVd0uybFJ/nIe7QMAMFvr5tTuK5P8dpJ9F5ugqk5OcnKSbNhwQF545I0zKo152rx5czZt2jTvMuhMP68d+nrt0NcsZ+ahs6oenuSK1tp5VbVxselaa6clOS1JDjns8Pay8+eVj5mlM47ZJxs3bpx3GXS2adMm/bxG6Ou1Q1+znHkcXn9AkkdW1cVJ3pTk56vqzDnUAQDAjMw8dLbWnttau11r7dAkj0vy9621E2ddBwAAs+N3OgEA6G6uJ0q21jYl2TTPGgAA6M+eTgAAuhM6AQDoTugEAKA7oRMAgO6ETgAAuhM6AQDoTugEAKA7oRMAgO6ETgAAuhM6AQDoTugEAKA7oRMAgO6ETgAAuhM6AQDoTugEAKA7oRMAgO6ETgAAuhM6AQDoTugEAKA7oRMAgO6ETgAAuhM6AQDoTugEAKA7oRMAgO6ETgAAuhM6AQDoTugEAKC7dfMuANa6Q085Z94lzNwZx+wz7xLmYi329TOPvDEnrbHlvvilx867hLk4/7Kr9TVLsqcTAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuhE4AALoTOgEA6E7oBACgO6ETAIDuZh46q+rgqvpAVX2uqi6oqqfPugYAAGZr3RzavDHJM1trn6yqfZOcV1Xvba19bg61AAAwAzPf09la+1pr7ZPj82uTfD7JQbOuAwCA2anW2vwarzo0yYeSHNFau2bBuJOTnJwkGzYccO8XvvL0mdfH7N1hv92zfv36eZcxU+dfdvW8S5i5tdjPydrs6wP3Sr5x/byrmK0jD9pv3iXMxRXfvnrN9fVa9bQTjz+vtXbUSt83j8PrSZKqWp/kbUmesTBwJklr7bQkpyXJIYcd3l52/txKZYbOOGafbNy4cd5lzNRJp5wz7xJmbi32c7I2+/qZR96Ytfb9ffEJG+ddwlz80Rveueb6mpWZy9XrVbVHhsD5htba2+dRAwAAszOPq9cryauTfL619vJZtw8AwOzNY0/nA5L8UpKfr6pPj4+HzaEOAABmZOYnX7TWPpykZt0uAADz445EAAB0J3QCANCd0AkAQHdCJwAA3QmdAAB0J3QCANCd0AkAQHdCJwAA3QmdAAB0J3QCANCd0AkAQHdCJwAA3QmdAAB0J3QCANCd0AkAQHdCJwAA3QmdAAB0J3QCANCd0AkAQHdCJwAA3QmdAAB0J3QCANCd0AkAQHdCJwAA3QmdAAB0J3QCANCd0AkAQHfr5l0ATDr/sqtz0innzLsMOtPPAGuPPZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQndAIA0J3QCQBAd0InAADdCZ0AAHQ3l9BZVcdU1Rer6stVdco8agAAYHZmHjqravckf5LkoUnumuTxVXXXWdcBAMDszGNP532TfLm1dmFr7YYkb0py3BzqAABgRqq1NtsGqx6d5JjW2q+Ow7+U5Kdba7+xYLqTk5w8Dh6R5LMzLZR52ZDkm/Mugu7089qhr9cOfb123Lm1tu9K37SuRyU7QmvttCSnJUlVndtaO2rOJTED+npt0M9rh75eO/T12lFV527L++ZxeP2yJAdPDN9ufA0AgFVqHqHzE0nuVFV3qKpbJHlckrPnUAcAADMy88PrrbUbq+o3krwnye5JXtNau2CZt53WvzJ2Evp6bdDPa4e+Xjv09dqxTX098wuJAABYe9yRCACA7oROAAC626lC53K3x6yqW1bVm8fxH6uqQ2dfJdtrin7+rar6XFX9c1W9v6puP4862X7T3vK2qv5zVbWq8nMru6hp+rqqHjN+ti+oqr+edY3sGFN8hx9SVR+oqk+N3+MPm0edbJ+qek1VXVFVW/2d9Br84bgd/HNV3Wu5ee40oXPK22M+Kcl3WmuHJ3lFkt+fbZVsryn7+VNJjmqt/VSStyb5X7Otkh1h2lveVtW+SZ6e5GOzrZAdZZq+rqo7JXlukge01u6W5BkzL5TtNuXn+vlJzmqt3TPDL9T86WyrZAc5I8kxS4x/aJI7jY+Tk/zZcjPcaUJnprs95nFJXjs+f2uSB1VVzbBGtt+y/dxa+0Br7bvj4Ecz/JYru55pb3n74gx/QP7rLItjh5qmr5+c5E9aa99JktbaFTOukR1jmr5uSW41Pt8vyeUzrI8dpLX2oSTfXmKS45K8rg0+mmT/qvrxpSJwPnQAAAUGSURBVOa5M4XOg5JcMjF86fjaVqdprd2Y5Ookt5lJdewo0/TzpCcleXfXiuhl2b4eD8cc3Fo7Z5aFscNN87n+iSQ/UVX/WFUfraql9qCw85qmr09NcmJVXZrkb5M8dTalMWMr/f98570NJlTViUmOSvLAedfCjldVuyV5eZKT5lwKs7Euw2G4jRmOXnyoqo5srV0116ro4fFJzmitvayq7p/k9VV1RGvtpnkXxnztTHs6p7k95g+nqap1GXbbf2sm1bGjTHUb1Kp6cJL/nuSRrbXvzag2dqzl+nrfJEck2VRVFye5X5KzXUy0S5rmc31pkrNba99vrV2U5F8yhFB2LdP09ZOSnJUkrbWPJNkzyYaZVMcsrfi25jtT6Jzm9phnJ3nC+PzRSf6++XX7Xc2y/VxV90zyFxkCp/O+dl1L9nVr7erW2obW2qGttUMznL/7yNbaufMpl+0wzff3OzLs5UxVbchwuP3CWRbJDjFNX381yYOSpKp+MkPovHKmVTILZyf55fEq9vslubq19rWl3rDTHF5f7PaYVfU/kpzbWjs7yasz7Kb/coaTWx83v4rZFlP28/9Osj7JW8brxL7aWnvk3Ipmm0zZ16wCU/b1e5L8QlV9LskPkjy7teZI1S5myr5+ZpLTq+o3M1xUdJIdRLueqnpjhj8UN4zn574oyR5J0lr78wzn6z4syZeTfDfJE5edp+0AAIDedqbD6wAArFJCJwAA3QmdAAB0J3QCANCd0AkAQHdCJ7DmVdUPqurTVfXZqnpLVe29wvdvXuH0Z1TVo7fy+lFV9Yfj85Oq6o/H50+pql+eeP22K2kPYGcgdAIk17fW7tFaOyLJDUmeMjly/PHj7t+XrbVzW2tP28rrf95ae904eFISoRPY5QidADf3D0kOr6pDq+qLVfW6JJ9NcnBVPb6qzh/3iP7+5Juq6hVVdUFVvb+qDhhfe3JVfaKqPlNVb1uwB/XBVXVuVf1LVT18nH5jVb1rYUFVdWpVPWvcO3pUkjeMe2aPrap3TEz3kKr6mx2/SgC2n9AJMKqqdUkemuT88aU7JfnT1trdknw/ye8n+fkk90hyn6o6fpxunwx3Y7lbkg9muHNHkry9tXaf1trdk3w+wz2ptzg0yX2THJvkz6tqz+Xqa629Ncm5SU5ord0jwx1B7rIl5Ga4I8hrVrzgADMgdAIke1XVpzMEuq9muOVuknyltfbR8fl9kmxqrV3ZWrsxyRuS/Nw47qYkbx6fn5nkZ8bnR1TVP1TV+UlOSHK3iTbPaq3d1Fr7UoZ7kN9lpUWPtxZ8fZITq2r/JPdP8u6VzgdgFnaae68DzNH1457DH6qqJLluG+e35f7CZyQ5vrX2mao6KcN9jBdOs9jwtP4qyf9J8q9J3jIGYoCdjj2dANP5eJIHVtWGqto9yeMzHEpPhu/SLVej/5ckHx6f75vka1W1R4Y9nZN+sap2q6o7JjksyRenrOPacb5Jktba5UkuT/L8DAEUYKdkTyfAFFprX6uqU5J8IEklOae19s5x9HVJ7ltVz09yRZLHjq+/IMnHklw5/rvvxCy/miHI3irJU1pr/zruXV3OGRnOAb0+yf1ba9dnONR/QGvt89uxiABd1XBKEAC7qvH3PD/VWnv1shMDzInQCbALq6rzMuxpfUhr7XvzrgdgMUInAADduZAIAIDuhE4AALoTOgEA6E7oBACgO6ETAIDu/j/mzDz29Z23ugAAAABJRU5ErkJggg==\n",
"text/plain": [
""
]
},
"metadata": {
"needs_background": "light"
},
"output_type": "display_data"
}
],
"source": [
"fig, ax = plt.subplots(figsize=(11,7))\n",
"pd.Series(preds).hist(ax=ax, range=(0,1))\n",
"ax.set_xlim([0, 1])\n",
"ax.set_title('Histogram of probabilities predicted from the original data.')\n",
"ax.set_ylabel('Frequency')\n",
"ax.set_xlabel('Probability')\n",
"plt.show()"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.5"
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": false,
"title_cell": "Table of Contents",
"title_sidebar": "Contents",
"toc_cell": false,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": true
}
},
"nbformat": 4,
"nbformat_minor": 2
}