{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# (Generalized) Linear and Hierarchical Linear Models in PyMC3" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import arviz as az\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "from pymc3 import *\n", "import theano\n", "import pandas as pd\n", "from statsmodels.formula.api import glm as glm_sm\n", "import statsmodels.api as sm\n", "from pandas.plotting import scatter_matrix" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "%config InlineBackend.figure_format = 'retina'\n", "az.style.use('arviz-darkgrid')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Linear Regression\n", "\n", "Lets generate some data with known slope and intercept and fit a simple linear GLM." ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "size = 50\n", "true_intercept = 1\n", "true_slope = 2\n", "x = np.linspace(0, 1, size)\n", "y = true_intercept + x*true_slope + np.random.normal(scale=.5, size=size)\n", "data = {'x': x, 'y': y}" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The `glm.linear_component()` function can be used to generate the output variable y_est and coefficients of the specified linear model." ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Auto-assigning NUTS sampler...\n", "Initializing NUTS using jitter+adapt_diag...\n", "Multiprocess sampling (2 chains in 2 jobs)\n", "NUTS: [sigma, x, Intercept]\n" ] }, { "data": { "text/html": [ "\n", "
\n", " | male | \n", "height | \n", "weight | \n", "
---|---|---|---|
0 | \n", "0 | \n", "63.2 | \n", "168.7 | \n", "
1 | \n", "0 | \n", "68.7 | \n", "169.8 | \n", "
2 | \n", "0 | \n", "64.8 | \n", "176.6 | \n", "
3 | \n", "0 | \n", "67.9 | \n", "246.8 | \n", "
4 | \n", "1 | \n", "68.9 | \n", "151.6 | \n", "