{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Internet use and religion in Europe, part two\n",
    "-----------------------------------------\n",
    "\n",
    "This notebook presents a second-pass analysis of the association between Internet use and religion in Europe, using data from the European Social Survey (http://www.europeansocialsurvey.org).\n",
    "\n",
    "Copyright 2015 Allen Downey\n",
    "\n",
    "MIT License: http://opensource.org/licenses/MIT"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "from __future__ import print_function, division\n",
    "\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "import thinkstats2\n",
    "import thinkplot\n",
    "\n",
    "import statsmodels.formula.api as smf\n",
    "\n",
    "from iso_country_codes import COUNTRY\n",
    "\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "The following function selects the columns I need."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def read_cycle(filename):\n",
    "    \"\"\"Reads a file containing ESS data and selects columns.\n",
    "    \n",
    "    filename: string\n",
    "    \n",
    "    returns: DataFrame\n",
    "    \"\"\" \n",
    "    df = pd.read_stata(filename, convert_categoricals=False)\n",
    "\n",
    "    if 'hinctnta' not in df.columns:\n",
    "        df['hinctnta'] = df.hinctnt\n",
    "        \n",
    "    if 'inwyr' not in df.columns:\n",
    "        df['inwyr'] = df.inwyye\n",
    "        \n",
    "    cols = ['cntry', 'inwyr', 'tvtot', 'tvpol', 'rdtot', 'rdpol', \n",
    "            'nwsptot', 'nwsppol', 'netuse', \n",
    "            'rlgblg', 'rlgdgr', 'eduyrs', 'hinctnta', 'yrbrn', \n",
    "            'eisced', 'pspwght', 'pweight']\n",
    "    df = df[cols]\n",
    "    return df"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read data from Cycle 1."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cntry</th>\n",
       "      <th>inwyr</th>\n",
       "      <th>tvtot</th>\n",
       "      <th>tvpol</th>\n",
       "      <th>rdtot</th>\n",
       "      <th>rdpol</th>\n",
       "      <th>nwsptot</th>\n",
       "      <th>nwsppol</th>\n",
       "      <th>netuse</th>\n",
       "      <th>rlgblg</th>\n",
       "      <th>rlgdgr</th>\n",
       "      <th>eduyrs</th>\n",
       "      <th>hinctnta</th>\n",
       "      <th>yrbrn</th>\n",
       "      <th>eisced</th>\n",
       "      <th>pspwght</th>\n",
       "      <th>pweight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AT</td>\n",
       "      <td>2003</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>11</td>\n",
       "      <td>77</td>\n",
       "      <td>1949</td>\n",
       "      <td>0</td>\n",
       "      <td>0.940933</td>\n",
       "      <td>0.271488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AT</td>\n",
       "      <td>2003</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>14</td>\n",
       "      <td>2</td>\n",
       "      <td>1953</td>\n",
       "      <td>0</td>\n",
       "      <td>0.470466</td>\n",
       "      <td>0.271488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AT</td>\n",
       "      <td>2003</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>9</td>\n",
       "      <td>77</td>\n",
       "      <td>1940</td>\n",
       "      <td>0</td>\n",
       "      <td>1.392155</td>\n",
       "      <td>0.271488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AT</td>\n",
       "      <td>2003</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>18</td>\n",
       "      <td>9</td>\n",
       "      <td>1959</td>\n",
       "      <td>0</td>\n",
       "      <td>1.382163</td>\n",
       "      <td>0.271488</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AT</td>\n",
       "      <td>2003</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>15</td>\n",
       "      <td>9</td>\n",
       "      <td>1962</td>\n",
       "      <td>0</td>\n",
       "      <td>1.437766</td>\n",
       "      <td>0.271488</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  cntry  inwyr  tvtot  tvpol  rdtot  rdpol  nwsptot  nwsppol  netuse  rlgblg  \\\n",
       "0    AT   2003      1      1      1      1        1        1       5       1   \n",
       "1    AT   2003      3      2      4      1        2        2       6       2   \n",
       "2    AT   2003      7      3      0     66        0       66       0       1   \n",
       "3    AT   2003      1      1      1      1        2        2       4       1   \n",
       "4    AT   2003      0     66      1      1        0       66       7       1   \n",
       "\n",
       "   rlgdgr  eduyrs  hinctnta  yrbrn  eisced   pspwght   pweight  \n",
       "0       8      11        77   1949       0  0.940933  0.271488  \n",
       "1       5      14         2   1953       0  0.470466  0.271488  \n",
       "2       7       9        77   1940       0  1.392155  0.271488  \n",
       "3       7      18         9   1959       0  1.382163  0.271488  \n",
       "4      10      15         9   1962       0  1.437766  0.271488  "
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df1 = read_cycle('ESS1e06_4.dta')\n",
    "df1.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read data from Cycle 2."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cntry</th>\n",
       "      <th>inwyr</th>\n",
       "      <th>tvtot</th>\n",
       "      <th>tvpol</th>\n",
       "      <th>rdtot</th>\n",
       "      <th>rdpol</th>\n",
       "      <th>nwsptot</th>\n",
       "      <th>nwsppol</th>\n",
       "      <th>netuse</th>\n",
       "      <th>rlgblg</th>\n",
       "      <th>rlgdgr</th>\n",
       "      <th>eduyrs</th>\n",
       "      <th>hinctnta</th>\n",
       "      <th>yrbrn</th>\n",
       "      <th>eisced</th>\n",
       "      <th>pspwght</th>\n",
       "      <th>pweight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AT</td>\n",
       "      <td>2005</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>12</td>\n",
       "      <td>8</td>\n",
       "      <td>1971</td>\n",
       "      <td>0</td>\n",
       "      <td>0.682185</td>\n",
       "      <td>0.302006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AT</td>\n",
       "      <td>2005</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>8</td>\n",
       "      <td>4</td>\n",
       "      <td>1925</td>\n",
       "      <td>0</td>\n",
       "      <td>0.565038</td>\n",
       "      <td>0.302006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AT</td>\n",
       "      <td>2005</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>4</td>\n",
       "      <td>13</td>\n",
       "      <td>6</td>\n",
       "      <td>1977</td>\n",
       "      <td>0</td>\n",
       "      <td>0.341133</td>\n",
       "      <td>0.302006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AT</td>\n",
       "      <td>2005</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>8</td>\n",
       "      <td>9</td>\n",
       "      <td>1989</td>\n",
       "      <td>0</td>\n",
       "      <td>0.804050</td>\n",
       "      <td>0.302006</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AT</td>\n",
       "      <td>2005</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>11</td>\n",
       "      <td>88</td>\n",
       "      <td>1988</td>\n",
       "      <td>0</td>\n",
       "      <td>1.125251</td>\n",
       "      <td>0.302006</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  cntry  inwyr  tvtot  tvpol  rdtot  rdpol  nwsptot  nwsppol  netuse  rlgblg  \\\n",
       "0    AT   2005      3      2      0     66        4        2       7       1   \n",
       "1    AT   2005      7      2      3      1        5        2       0       1   \n",
       "2    AT   2005      6      2      1      0        1        0       6       2   \n",
       "3    AT   2005      3      1      2      1        2        1       7       1   \n",
       "4    AT   2005      2      1      1      1        1        1       4       2   \n",
       "\n",
       "   rlgdgr  eduyrs  hinctnta  yrbrn  eisced   pspwght   pweight  \n",
       "0       7      12         8   1971       0  0.682185  0.302006  \n",
       "1       7       8         4   1925       0  0.565038  0.302006  \n",
       "2       4      13         6   1977       0  0.341133  0.302006  \n",
       "3       5       8         9   1989       0  0.804050  0.302006  \n",
       "4       1      11        88   1988       0  1.125251  0.302006  "
      ]
     },
     "execution_count": 4,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df2 = read_cycle('ESS2e03_4.dta')\n",
    "df2.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read data from Cycle 3."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cntry</th>\n",
       "      <th>inwyr</th>\n",
       "      <th>tvtot</th>\n",
       "      <th>tvpol</th>\n",
       "      <th>rdtot</th>\n",
       "      <th>rdpol</th>\n",
       "      <th>nwsptot</th>\n",
       "      <th>nwsppol</th>\n",
       "      <th>netuse</th>\n",
       "      <th>rlgblg</th>\n",
       "      <th>rlgdgr</th>\n",
       "      <th>eduyrs</th>\n",
       "      <th>hinctnta</th>\n",
       "      <th>yrbrn</th>\n",
       "      <th>eisced</th>\n",
       "      <th>pspwght</th>\n",
       "      <th>pweight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>AT</td>\n",
       "      <td>2007</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>3</td>\n",
       "      <td>11</td>\n",
       "      <td>7</td>\n",
       "      <td>1980</td>\n",
       "      <td>0</td>\n",
       "      <td>0.949578</td>\n",
       "      <td>0.289116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>AT</td>\n",
       "      <td>2007</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>9</td>\n",
       "      <td>20</td>\n",
       "      <td>5</td>\n",
       "      <td>1974</td>\n",
       "      <td>0</td>\n",
       "      <td>1.412180</td>\n",
       "      <td>0.289116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>AT</td>\n",
       "      <td>2007</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>16</td>\n",
       "      <td>77</td>\n",
       "      <td>1954</td>\n",
       "      <td>0</td>\n",
       "      <td>0.723276</td>\n",
       "      <td>0.289116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>AT</td>\n",
       "      <td>2007</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>12</td>\n",
       "      <td>88</td>\n",
       "      <td>1967</td>\n",
       "      <td>0</td>\n",
       "      <td>0.625744</td>\n",
       "      <td>0.289116</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>AT</td>\n",
       "      <td>2007</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>11</td>\n",
       "      <td>88</td>\n",
       "      <td>1971</td>\n",
       "      <td>0</td>\n",
       "      <td>0.417162</td>\n",
       "      <td>0.289116</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  cntry  inwyr  tvtot  tvpol  rdtot  rdpol  nwsptot  nwsppol  netuse  rlgblg  \\\n",
       "0    AT   2007      5      1      7      2        0       66       6       1   \n",
       "1    AT   2007      3      1      2      1        2        1       7       1   \n",
       "2    AT   2007      1      1      7      2        2        1       7       1   \n",
       "3    AT   2007      4      1      7      2        3        2       6       1   \n",
       "4    AT   2007      6      2      6      2        3        1       0       1   \n",
       "\n",
       "   rlgdgr  eduyrs  hinctnta  yrbrn  eisced   pspwght   pweight  \n",
       "0       3      11         7   1980       0  0.949578  0.289116  \n",
       "1       9      20         5   1974       0  1.412180  0.289116  \n",
       "2       6      16        77   1954       0  0.723276  0.289116  \n",
       "3       5      12        88   1967       0  0.625744  0.289116  \n",
       "4       7      11        88   1971       0  0.417162  0.289116  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df3 = read_cycle('ESS3e03_5.dta')\n",
    "df3.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read data from Cycle 4."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cntry</th>\n",
       "      <th>inwyr</th>\n",
       "      <th>tvtot</th>\n",
       "      <th>tvpol</th>\n",
       "      <th>rdtot</th>\n",
       "      <th>rdpol</th>\n",
       "      <th>nwsptot</th>\n",
       "      <th>nwsppol</th>\n",
       "      <th>netuse</th>\n",
       "      <th>rlgblg</th>\n",
       "      <th>rlgdgr</th>\n",
       "      <th>eduyrs</th>\n",
       "      <th>hinctnta</th>\n",
       "      <th>yrbrn</th>\n",
       "      <th>eisced</th>\n",
       "      <th>pspwght</th>\n",
       "      <th>pweight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BE</td>\n",
       "      <td>2008</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>18</td>\n",
       "      <td>4</td>\n",
       "      <td>1972</td>\n",
       "      <td>6</td>\n",
       "      <td>0.823223</td>\n",
       "      <td>0.503773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BE</td>\n",
       "      <td>2008</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>15</td>\n",
       "      <td>7</td>\n",
       "      <td>1982</td>\n",
       "      <td>6</td>\n",
       "      <td>0.798610</td>\n",
       "      <td>0.503773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BE</td>\n",
       "      <td>2009</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>18</td>\n",
       "      <td>10</td>\n",
       "      <td>1940</td>\n",
       "      <td>7</td>\n",
       "      <td>0.778020</td>\n",
       "      <td>0.503773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BE</td>\n",
       "      <td>2008</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "      <td>15</td>\n",
       "      <td>7</td>\n",
       "      <td>1931</td>\n",
       "      <td>6</td>\n",
       "      <td>0.777735</td>\n",
       "      <td>0.503773</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>BE</td>\n",
       "      <td>2009</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>3</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>7</td>\n",
       "      <td>1982</td>\n",
       "      <td>4</td>\n",
       "      <td>0.960960</td>\n",
       "      <td>0.503773</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  cntry  inwyr  tvtot  tvpol  rdtot  rdpol  nwsptot  nwsppol  netuse  rlgblg  \\\n",
       "0    BE   2008      7      1      0     66        0       66       0       2   \n",
       "1    BE   2008      7      2      7      3        0       66       0       2   \n",
       "2    BE   2009      2      2      3      3        3        3       7       1   \n",
       "3    BE   2008      7      2      2      2        2        2       0       1   \n",
       "4    BE   2009      0     66      3      0        1        1       7       2   \n",
       "\n",
       "   rlgdgr  eduyrs  hinctnta  yrbrn  eisced   pspwght   pweight  \n",
       "0       1      18         4   1972       6  0.823223  0.503773  \n",
       "1       0      15         7   1982       6  0.798610  0.503773  \n",
       "2       6      18        10   1940       7  0.778020  0.503773  \n",
       "3       6      15         7   1931       6  0.777735  0.503773  \n",
       "4       0      13         7   1982       4  0.960960  0.503773  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df4 = read_cycle('ESS4e04_3.dta')\n",
    "df4.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Read data from Cycle 5."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cntry</th>\n",
       "      <th>inwyr</th>\n",
       "      <th>tvtot</th>\n",
       "      <th>tvpol</th>\n",
       "      <th>rdtot</th>\n",
       "      <th>rdpol</th>\n",
       "      <th>nwsptot</th>\n",
       "      <th>nwsppol</th>\n",
       "      <th>netuse</th>\n",
       "      <th>rlgblg</th>\n",
       "      <th>rlgdgr</th>\n",
       "      <th>eduyrs</th>\n",
       "      <th>hinctnta</th>\n",
       "      <th>yrbrn</th>\n",
       "      <th>eisced</th>\n",
       "      <th>pspwght</th>\n",
       "      <th>pweight</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BE</td>\n",
       "      <td>2010</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>15</td>\n",
       "      <td>88</td>\n",
       "      <td>1988</td>\n",
       "      <td>4</td>\n",
       "      <td>0.792865</td>\n",
       "      <td>0.528619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BE</td>\n",
       "      <td>2010</td>\n",
       "      <td>4</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>15</td>\n",
       "      <td>5</td>\n",
       "      <td>1967</td>\n",
       "      <td>6</td>\n",
       "      <td>0.871107</td>\n",
       "      <td>0.528619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BE</td>\n",
       "      <td>2010</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>13</td>\n",
       "      <td>1</td>\n",
       "      <td>1991</td>\n",
       "      <td>3</td>\n",
       "      <td>0.799453</td>\n",
       "      <td>0.528619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BE</td>\n",
       "      <td>2010</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>7</td>\n",
       "      <td>1</td>\n",
       "      <td>5</td>\n",
       "      <td>15</td>\n",
       "      <td>10</td>\n",
       "      <td>1987</td>\n",
       "      <td>6</td>\n",
       "      <td>0.816030</td>\n",
       "      <td>0.528619</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>BE</td>\n",
       "      <td>2010</td>\n",
       "      <td>7</td>\n",
       "      <td>3</td>\n",
       "      <td>7</td>\n",
       "      <td>4</td>\n",
       "      <td>0</td>\n",
       "      <td>66</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>15</td>\n",
       "      <td>6</td>\n",
       "      <td>1952</td>\n",
       "      <td>6</td>\n",
       "      <td>0.764902</td>\n",
       "      <td>0.528619</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "  cntry  inwyr  tvtot  tvpol  rdtot  rdpol  nwsptot  nwsppol  netuse  rlgblg  \\\n",
       "0    BE   2010      5      1      1      0        1        1       2       1   \n",
       "1    BE   2010      4      3      2      2        1        2       6       2   \n",
       "2    BE   2010      4      0      0     66        0       66       7       2   \n",
       "3    BE   2010      2      2      7      2        0       66       7       1   \n",
       "4    BE   2010      7      3      7      4        0       66       0       2   \n",
       "\n",
       "   rlgdgr  eduyrs  hinctnta  yrbrn  eisced   pspwght   pweight  \n",
       "0       5      15        88   1988       4  0.792865  0.528619  \n",
       "1       7      15         5   1967       6  0.871107  0.528619  \n",
       "2       0      13         1   1991       3  0.799453  0.528619  \n",
       "3       5      15        10   1987       6  0.816030  0.528619  \n",
       "4       5      15         6   1952       6  0.764902  0.528619  "
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df5 = read_cycle('ESS5e03_2.dta')\n",
    "df5.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "def clean_cycle(df):\n",
    "    \"\"\"Cleans data from one cycle.\n",
    "    \n",
    "    df: DataFrame\n",
    "    \"\"\"\n",
    "    df.tvtot.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.rdtot.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.nwsptot.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.netuse.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.tvpol.replace([66, 77, 88, 99], np.nan, inplace=True)\n",
    "    df.rdpol.replace([66, 77, 88, 99], np.nan, inplace=True)\n",
    "    df.nwsppol.replace([66, 77, 88, 99], np.nan, inplace=True)\n",
    "    df.eduyrs.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.rlgblg.replace([7, 8, 9], np.nan, inplace=True)\n",
    "    df.rlgdgr.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.hinctnta.replace([77, 88, 99], np.nan, inplace=True)\n",
    "    df.yrbrn.replace([7777, 8888, 9999], np.nan, inplace=True)\n",
    "    df.inwyr.replace([9999], np.nan, inplace=True)\n",
    "    \n",
    "    df['hasrelig'] = (df.rlgblg==1).astype(int)\n",
    "    df.loc[df.rlgblg.isnull(), 'hasrelig'] = np.nan\n",
    "    \n",
    "    df['yrbrn60'] = df.yrbrn - 1960\n",
    "    df['inwyr07'] = df.inwyr - 2007 + np.random.uniform(-0.5, 0.5, len(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "cycles = [df1, df2, df3, df4, df5]\n",
    "for cycle in cycles:\n",
    "    clean_cycle(cycle)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "22 countries\n",
      "25 countries\n",
      "23 countries\n",
      "29 countries\n",
      "27 countries\n"
     ]
    }
   ],
   "source": [
    "def resample(df):\n",
    "    \"\"\"Resample data by country.\n",
    "    \n",
    "    df: DataFrame\n",
    "    \n",
    "    returns: map from country code to DataFrame\n",
    "    \"\"\"\n",
    "    res = {}\n",
    "    grouped = df.groupby('cntry')\n",
    "    for name, group in grouped:\n",
    "        sample = group.sample(len(group), weights=group.pspwght, replace=True)\n",
    "        sample.index = range(len(group))\n",
    "        res[name] = sample\n",
    "    return res\n",
    "\n",
    "# each cycle_map is a map from country code to DataFrame\n",
    "cycle_maps = [resample(cycle) for cycle in cycles]\n",
    "for cycle_map in cycle_maps:\n",
    "    print(len(cycle_map), 'countries')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "In a each cycle, a few questions were not asked in some countries."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Cycle 1\n",
      "FR netuse 0\n",
      "FR hinctnta 0\n",
      "DE netuse 0\n",
      "HU hinctnta 0\n",
      "IE hinctnta 0\n",
      "Cycle 2\n",
      "FR netuse 0\n",
      "FI rlgblg 0\n",
      "EE hinctnta 0\n",
      "UA hinctnta 0\n",
      "Cycle 3\n",
      "HU hinctnta 0\n",
      "EE hinctnta 0\n",
      "UA hinctnta 0\n",
      "Cycle 4\n",
      "BG hinctnta 0\n",
      "CY hinctnta 0\n",
      "SK hinctnta 0\n",
      "Cycle 5\n",
      "PT hinctnta 0\n",
      "EE inwyr07 0\n"
     ]
    }
   ],
   "source": [
    "def check_variables(name, group):\n",
    "    \"\"\"Print variables missing from a group.\n",
    "    \n",
    "    name: group name (country code)\n",
    "    group: DataFrame\n",
    "    \"\"\"\n",
    "    varnames = ['cntry', 'tvtot', 'tvpol', 'rdtot', 'rdpol', \n",
    "                'nwsptot', 'nwsppol', 'netuse', 'inwyr07', \n",
    "                'rlgblg', 'rlgdgr', 'eduyrs', 'hinctnta', \n",
    "                'yrbrn', 'pspwght', 'pweight']\n",
    "    for var in varnames:\n",
    "        n = len(group[var].dropna())\n",
    "        if (n < 100):\n",
    "            print(name, var, len(group[var].dropna()))\n",
    "\n",
    "for i, cycle_map in enumerate(cycle_maps):\n",
    "    print('Cycle', i+1)\n",
    "    for name, group in cycle_map.items():\n",
    "        check_variables(name, group)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "I'll remove the cycle/country groups that are missing netuse or rlgblg.\n",
    "\n",
    "The ones that are missing income information undermine the ability to control for income, but it's a pretty weak effect, so I don't think it's a problem.\n",
    "\n",
    "inwtr07 is interview year shifted by 1970, the approximate mean."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "del cycle_maps[0]['FR']\n",
    "del cycle_maps[0]['DE']\n",
    "del cycle_maps[1]['FR']\n",
    "del cycle_maps[1]['FI']\n",
    "\n",
    "ee = cycle_maps[4]['EE']\n",
    "ee.inwyr07 = 3 + np.random.uniform(-0.5, 0.5, len(ee))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Income is reported on a different scale in different cycles, and differs substantially across countries.  So I'm replacing it with rank on a per-country basis: that is, each respondent is mapped to their income rank, from 0 to 1, within their country.\n",
    "\n",
    "I do the same thing with years of education, in part because there are some suspiciously high values.  At the very least, mapping to rank reduces the effect of outliers."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def replace_var_with_rank(name, df, old, new):\n",
    "    \"\"\"Replaces a scale variable with a rank from 0-1.\n",
    "    \n",
    "    Creates a new column.\n",
    "    \n",
    "    name: country code\n",
    "    df: DataFrame\n",
    "    old: old variable name\n",
    "    new: new variable name\n",
    "    \"\"\"\n",
    "    # jitter the data\n",
    "    series = df[old] + np.random.uniform(-0.25, 0.25, len(df))\n",
    "    \n",
    "    # if there's no data, just put in random values\n",
    "    if len(series.dropna()) < 10:\n",
    "        df[new] = np.random.random(len(df))\n",
    "        return\n",
    "    \n",
    "    # map from values to ranks\n",
    "    cdf = thinkstats2.Cdf(series)\n",
    "    df[new] = cdf.Probs(series)\n",
    "    \n",
    "    # make sure NaN maps to NaN\n",
    "    df.loc[df[old].isnull(), new] = np.nan\n",
    "    \n",
    "    \n",
    "def replace_with_ranks(cycle_map):\n",
    "    \"\"\"Replace variables within countries.\n",
    "    \n",
    "    cycle_map: map from country code to DataFrame\n",
    "    \"\"\"\n",
    "    for name, group in cycle_map.items():\n",
    "        replace_var_with_rank(name, group, 'hinctnta', 'hincrank')\n",
    "        replace_var_with_rank(name, group, 'eduyrs', 'edurank')\n",
    "        \n",
    "for cycle_map in cycle_maps:\n",
    "    replace_with_ranks(cycle_map)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "To fill missing values, I am drawing random samples from the available values, on a per-country basis.\n",
    "\n",
    "Since I am not modeling relationships between variables, I am losing some information with this replacement policy; the net effect is to understate the actual strength of all relationships."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def fill_var(df, old, new):\n",
    "    \"\"\"Fills missing values.\n",
    "    \n",
    "    Creates a new column\n",
    "    \n",
    "    df: DataFrame\n",
    "    old: old variable name\n",
    "    new: new variable name\n",
    "    \"\"\"\n",
    "    # find the NaN rows\n",
    "    null = df[df[old].isnull()]\n",
    "    \n",
    "    # sample from the non-NaN rows\n",
    "    fill = df[old].dropna().sample(len(null), replace=True)\n",
    "    fill.index = null.index\n",
    "    \n",
    "    # replace NaNs with the random sample\n",
    "    df[new] = df[old].fillna(fill)\n",
    "    \n",
    "    \n",
    "def fill_all_vars(df):\n",
    "    \"\"\"Fills missing values in the variables we need.\n",
    "    \n",
    "    df: DataFrame\n",
    "    \"\"\"\n",
    "    for old in ['hasrelig', 'rlgdgr', 'yrbrn60', 'edurank', 'hincrank',\n",
    "           'tvtot', 'rdtot', 'nwsptot', 'netuse', 'inwyr07']:\n",
    "        new = old + '_f'\n",
    "        fill_var(df, old, new)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def fill_vars_by_country(cycle_map):\n",
    "    for name, group in cycle_map.items():\n",
    "        fill_all_vars(group)\n",
    "        \n",
    "for cycle_map in cycle_maps:\n",
    "    fill_vars_by_country(cycle_map)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Concatenate the cycles."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "37937\n",
      "43709\n",
      "43000\n",
      "56752\n",
      "52458\n"
     ]
    }
   ],
   "source": [
    "def concat_groups(cycle_map):\n",
    "    \"\"\"Concat all countries in a cycle.\n",
    "    \n",
    "    cycle_map: map from country code to DataFrame\n",
    "    \n",
    "    returns: DataFrame\n",
    "    \"\"\"\n",
    "    return pd.concat(cycle_map.values(), ignore_index=True)\n",
    "\n",
    "dfs = [concat_groups(cycle_map) for cycle_map in cycle_maps]\n",
    "\n",
    "for df in dfs:\n",
    "    print(len(df))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "(233856, 32)\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>cntry</th>\n",
       "      <th>inwyr</th>\n",
       "      <th>tvtot</th>\n",
       "      <th>tvpol</th>\n",
       "      <th>rdtot</th>\n",
       "      <th>rdpol</th>\n",
       "      <th>nwsptot</th>\n",
       "      <th>nwsppol</th>\n",
       "      <th>netuse</th>\n",
       "      <th>rlgblg</th>\n",
       "      <th>...</th>\n",
       "      <th>hasrelig_f</th>\n",
       "      <th>rlgdgr_f</th>\n",
       "      <th>yrbrn60_f</th>\n",
       "      <th>edurank_f</th>\n",
       "      <th>hincrank_f</th>\n",
       "      <th>tvtot_f</th>\n",
       "      <th>rdtot_f</th>\n",
       "      <th>nwsptot_f</th>\n",
       "      <th>netuse_f</th>\n",
       "      <th>inwyr07_f</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>BE</td>\n",
       "      <td>2002</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>7</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>4</td>\n",
       "      <td>-8</td>\n",
       "      <td>0.476496</td>\n",
       "      <td>0.761204</td>\n",
       "      <td>2</td>\n",
       "      <td>7</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>-5.265308</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>BE</td>\n",
       "      <td>2002</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>6</td>\n",
       "      <td>-23</td>\n",
       "      <td>0.444444</td>\n",
       "      <td>0.475585</td>\n",
       "      <td>4</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-5.463721</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>BE</td>\n",
       "      <td>2002</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>-7</td>\n",
       "      <td>0.307158</td>\n",
       "      <td>0.434114</td>\n",
       "      <td>5</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>1</td>\n",
       "      <td>-4.811951</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>BE</td>\n",
       "      <td>2002</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>...</td>\n",
       "      <td>0</td>\n",
       "      <td>2</td>\n",
       "      <td>-3</td>\n",
       "      <td>0.167201</td>\n",
       "      <td>0.460201</td>\n",
       "      <td>2</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>0</td>\n",
       "      <td>-4.888268</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>BE</td>\n",
       "      <td>2002</td>\n",
       "      <td>6</td>\n",
       "      <td>2</td>\n",
       "      <td>5</td>\n",
       "      <td>0</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>...</td>\n",
       "      <td>1</td>\n",
       "      <td>10</td>\n",
       "      <td>-39</td>\n",
       "      <td>0.207799</td>\n",
       "      <td>0.287625</td>\n",
       "      <td>6</td>\n",
       "      <td>5</td>\n",
       "      <td>5</td>\n",
       "      <td>1</td>\n",
       "      <td>-5.321503</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "<p>5 rows × 32 columns</p>\n",
       "</div>"
      ],
      "text/plain": [
       "  cntry  inwyr  tvtot  tvpol  rdtot  rdpol  nwsptot  nwsppol  netuse  rlgblg  \\\n",
       "0    BE   2002      2      0      7      2        0      NaN       1       1   \n",
       "1    BE   2002      4      1      1      1        0      NaN       0       2   \n",
       "2    BE   2002      5      2      2      1        0      NaN       1       2   \n",
       "3    BE   2002      2      1      0    NaN        0      NaN       0       2   \n",
       "4    BE   2002      6      2      5      0        5        1       1       1   \n",
       "\n",
       "     ...      hasrelig_f  rlgdgr_f  yrbrn60_f  edurank_f  hincrank_f  tvtot_f  \\\n",
       "0    ...               1         4         -8   0.476496    0.761204        2   \n",
       "1    ...               0         6        -23   0.444444    0.475585        4   \n",
       "2    ...               0         2         -7   0.307158    0.434114        5   \n",
       "3    ...               0         2         -3   0.167201    0.460201        2   \n",
       "4    ...               1        10        -39   0.207799    0.287625        6   \n",
       "\n",
       "   rdtot_f  nwsptot_f  netuse_f  inwyr07_f  \n",
       "0        7          0         1  -5.265308  \n",
       "1        1          0         0  -5.463721  \n",
       "2        2          0         1  -4.811951  \n",
       "3        0          0         0  -4.888268  \n",
       "4        5          5         1  -5.321503  \n",
       "\n",
       "[5 rows x 32 columns]"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.concat(dfs, ignore_index=True)\n",
    "print(df.shape)\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "TV watching time on average weekday"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     8029\n",
       "1    12609\n",
       "2    32932\n",
       "3    32466\n",
       "4    38880\n",
       "5    29248\n",
       "6    28409\n",
       "7    50587\n",
       "Name: tvtot, dtype: int64"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.tvtot.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Radio listening, total time on average weekday."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    57368\n",
       "1    35800\n",
       "2    36999\n",
       "3    18019\n",
       "4    15176\n",
       "5    10336\n",
       "6     9372\n",
       "7    49540\n",
       "Name: rdtot, dtype: int64"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.rdtot.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Newspaper reading, total time on average weekday."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    68275\n",
       "1    72280\n",
       "2    62358\n",
       "3    17305\n",
       "4     6804\n",
       "5     2729\n",
       "6     1324\n",
       "7     1875\n",
       "Name: nwsptot, dtype: int64"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.nwsptot.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Personal use of Internet, email, www"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0    71528\n",
       "1    36749\n",
       "2     4705\n",
       "3     3751\n",
       "4     8248\n",
       "5     9923\n",
       "6    28414\n",
       "7    69833\n",
       "Name: netuse, dtype: int64"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.netuse.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Belong to a particular religion or denomination"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "1    148495\n",
       "2     83497\n",
       "Name: rlgblg, dtype: int64"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.rlgblg.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "How religious"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     29052\n",
       "1     13327\n",
       "2     16316\n",
       "3     18726\n",
       "4     15487\n",
       "5     40339\n",
       "6     22866\n",
       "7     27172\n",
       "8     23707\n",
       "9     10667\n",
       "10    14027\n",
       "Name: rlgdgr, dtype: int64"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.rlgdgr.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Total household net income, all sources"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    184192.000000\n",
       "mean          0.500507\n",
       "std           0.288454\n",
       "min           0.000014\n",
       "25%           0.250977\n",
       "50%           0.500285\n",
       "75%           0.750301\n",
       "max           1.000000\n",
       "Name: hincrank, dtype: float64"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.hincrank.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Year born"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    232794.000000\n",
       "mean       1961.491916\n",
       "std          18.727516\n",
       "min        1885.000000\n",
       "25%        1947.000000\n",
       "50%        1962.000000\n",
       "75%        1977.000000\n",
       "max        1996.000000\n",
       "Name: yrbrn, dtype: float64"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.yrbrn.describe()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEZCAYAAACJjGL9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHWVJREFUeJzt3Xu4HFWZ7/HvjySAcmcYEcP1QAS8cJFD4HFUNspICGoc\nGIQgd8/AczwZRhFFOIzu0aMjcjw6yIyDigg6Q0SNTvREUA/uARGBOBBuCZKBQC6AXMId5JL3/FFr\nh0pnd+/ee3d1V1X/Ps+zn1RXVVevtXun3lrvWrVKEYGZmVkzG/S6AGZmVm4OFGZm1pIDhZmZteRA\nYWZmLTlQmJlZSw4UZmbWkgOFjYukr0k6t8Dj7yjpKUlKr4ckfaiDx18g6fhOHW8Mn/u/JD0saVW3\nP7sVSSdJurbX5bBymtzrAlj5SFoGvAZ4CXgZuBO4DPh6pBtvIuK/T/D4p0TE1c32iYj7gc3yq9LP\neD5vENg1ItYGhoiYOZ5jTYSkHYEzgB0i4tFuf77ZeLlFYSMJ4D0RsTmwI/AF4Czg4okcVNLwhUkA\namO/utkReLTXQaLo36+kSUUe37rPgcJaioinIuInwNHAiZLeACDp25I+m5a3kfRTSaslPSrpmuH3\nS1om6ROSFgFPS/pXshPmT1Jq6UxJO0taI+kUSfcBv5S0U1qX/xvdTdINkp6Q9GNJW6XPGJC0PF/u\n9LnvkjQDOBs4On3ezWn72lSWMuem9zwk6VJJm6dtw2U7QdJ9KW10TrPfl6QtJF0m6Q/peP8zHf8Q\n4OfA61I5vjXCe2+X9J7c6ymSHpG0d3p9oKTfpN/zLZIOyu17sqQ7JT0p6T8lnZrbNiBpRfoeHqB5\nwJekr0p6XNJiSe/MbXidpPnp+71b0n/LbRuU9ANJ35H0BHBS+v1+VtKvU5mukvQnzX5vVm4OFNaW\niLgJWAG8fXgVr6SCPgYsB7YhS1md3fD2Y4CZwBYRcSxwP1mLZbOI+N+5/d4B7AEcyvotDgEnACcD\n25GlxS5oVeSs2HEl8Hlgbvq8fUco/8nAicAA8F+ATYELG473Z8DrgXcBn5K0R5PP/SpZymwX4KDh\nMkfEL4HDgFWpHKeM8N5LgeNyr2cCKyNikaSpwE+Bz0TEVsCZwA9zJ9+HgMNTK/Bk4MuS9s0da1tg\nK7IgfVqTsh8ALAX+BPg0ME/SlmnbXLLvbTvgL4HPSzo49973Ad+PiC2Af0nrjgFOIvub2DCV2SrI\ngcLGYhWw9QjrXyA7gewcES9HxHW5bQFcEBErI+KPoxx/MCKea7JfAJdFxJ0R8Szwt8AHhju7RyFa\npLqADwJfiohlEfEMWaA7pqE183cR8ceIuBVYBOy93odkKZejgbMj4pmIuA/4EjDcNzJaWf8FOFzS\npun18cB30vJxwIIU+EiBZyFweHq9ICLuTcvXkLVe3p479hrg0xHxYkQ83+Tz/xAR/5C+wyuAu4D3\nSNoBeCtwVkS8EBGLgG+SBcFhv4mI+enznyf7vi6JiKXp9RXAPqPU30rKgcLGYnvgsdzr4RPf+WRX\noj9PaY+zGt63nPaMtl9++/3AFLJWzERtB9zXcOzJZFfhwx7MLT8LbDLCcbZJZWo81tR2ChERq4Dr\ngL9MV/IzeOXqfCfgqJR2Wi1pNVkr57UAkg6T9NuUGlpN1hrJp3oejogXRinCyobX95H9brYDHktB\ntFm9VoxwvPzv7DmylppVkAOFtUXS/sDrgF83bouIpyPizIjYlSwFcUZDWqJxtFKz0UujjWrasWH5\nReAR4Bng1bmyTgL+dAzHXQXs3HDsl8jSOWPxSCpT47FGOok2M5x+OorsKv2BtP5+4DsRsVXuZ7OI\n+KKkjYAfAl8EXpNSUwtYtwXTzoixxoC2E9nvZhWwda6lM1K9PA11jTlQWDPD9y9snjpYLyc7Ud2R\n3572eY+k3VIa6EmyIbVrWhz7IWDXcZTnOEl7Sno18BmynHgAvwc2ljRT0hTgXGCj3HsfBHZukaa6\nHPho6rjelFf6NFrVYb1jRcTLZCmWz0naVNJOwEeB746hnj8C3gKcTjYkedh3gfdKerekSZI2Tp3U\nU8ny/xuSBao1kg4D3j2Gzxz2Gkmnp070o8j6ixZExArgN8DfS9pI0l7AKW3Uq520oFWAA4U18xNJ\nT5JdyZ5Nlms/Obc93xm8G/AL4CmyE8o/RsS/tzj23wPnphTKGbnjNYqG5cuAbwMPkJ0YTweIiCeA\nD5PlzVcAT7Numur76d9HJS0c4XO+RdYXcA1wD1lq6a+blKPVOtL7nknHuZYsdXRJG+8j1eV5YB5Z\nq2Rebv0KYBZwDvAHsu/lY4Ai4imy38UVZKnB2cC/tVne/PbfAtOAh4HPAkdGxOq0fXYq06pUrk/l\n7oNpdo9L4/fnVkdFqcgHF6UhgIeTdZK9uck+F5CNBnkWOCkibi6sQGYVIOlvgWkRccKoO5t1QdEt\nikvIOuRGJGkmsFtETANOBb5WcHnMSk3S1mRpna/3uixmwwoNFBFxLbC6xS7vI+u8IyJuALaUtG2L\n/c1qS9JfkaWUfhYR6w0aMOuVXvdRTGXdXPIKsiGYZn0nIr4REZtGxId7XRazvF4HClh/ZIQ7vMzM\nSqTXk6+tBHbIvd6e9W/6QZKDh5nZOETEhIcp9zpQzAfmAHMlHQg8HhEj3uRU5OisXhscHGRwcLDX\nxShMnetX57qB69dN/7bgLr734zt4/vmX2tp/3mUfGHWf9ma4GV3R0w1fTjYx2jbKZvf8NNkUB0TE\nRRGxIN0ktZRs7PnJzY9mZlYv7QaHjTeezNHvfyOzZu7epZKtq9BAERGz29hnTpFlMDMrq1ZBotfB\nIa/XqScDBgYGel2EQtW5fnWuG7h+RWnWkihTcMgr9M7sTpEUVSinmVk7jj113jpBYuONJ/OvXz+i\n458jqSOd2WUYHmtm1lcag8TR739jD0szOqeezMy6oFm6qYiWRKe5RWFm1gXN+iSqoBqlNDOroFbD\nX6uQchrmQGFmVpBmrYgqpJvynHoyMytIs+GvVeMWhZlZF7Qz5UZZOVCYmXXQWOdsqgKnnszMOqjK\no5uacaAwM+uguvRL5FU7zJmZlUCzdFOV+yXy3KIwM5ugOqab8hwozMwmqI7pprz6hDwzsxKoS7op\nz4HCzGychvsm6s6pJzOzcWrsm6hTv0SeA4WZ2ThV7bkS41XP8Gdm1mVVm+hvLBwozMzGoI5TdIzG\nqSczszGo+z0TI3GgMDMbg7rfMzGSeodBM7MC1fGeiZE4UJiZjaIf+yXynHoyMxtFP/ZL5DlQmJmN\noh/7JfL6JySamY1B3acOHwu3KMzMRtDv6aY8BwozsxH0e7oprz/Do5nZGPRjuinPgcLMLKdfpg4f\nC6eezMxy+mXq8LFwoDAzy+mXqcPHwqHSzKyJOk8dPhYOFGbW9/p9io7RFJp6kjRD0hJJd0s6a4Tt\n20i6UtItkm6XdFKR5TEzG4nvmWitsEAhaRJwITADeAMwW9KeDbvNAW6OiH2AAeBLkvztmFlX+Z6J\n1oo8KU8HlkbEMgBJc4FZwOLcPg8Ae6XlzYFHI8JtPzMrnKfoaF+RgWIqsDz3egVwQMM+3wCulrQK\n2AzwN2RmXeF0U/uK/K1EG/ucA9wSEQOSdgV+IWnviHiqccfBwcG1ywMDAwwMDHSqnGbWh+qYbhoa\nGmJoaKjjx1VEO+fzcRxYOhAYjIgZ6fXZwJqIOC+3zwLgcxFxXXr9/4CzImJhw7GiqHKaWX864oQr\n1i7XNd0kiYjQRI9TZItiITBN0s7AKuBoYHbDPkuAQ4DrJG0L7A7cU2CZzKyPeRjs+BQWKCLiJUlz\ngKuAScDFEbFY0mlp+0XA54FLJC0iG4H1iYh4rKgymVl/c7/E+BT6G4qInwE/a1h3UW75EeC9RZbB\nzGxYHfslusGh1MxqzcNgJ86TAppZrTndNHEOFGZWa043TZzDqpn1DaebxseBwsxqyU+q6xynnsys\nlvykus5xoDCzWvKT6jrHIdbMaqPZUFg/qW5i3KIws9rwUNhiOFCYWW14KGwxHGrNrJY8FLZzHCjM\nrNI8I2zxnHoys0pzv0TxHCjMrNLcL1E8h10zqw33SxTDgcLMKsf9Et3l1JOZVY77JbrLgcLMKsf9\nEt3lEGxmleAn1fWOWxRmVglON/WOA4WZVYLTTb3jcGxmleN0U3c5UJhZqflJdb3n1JOZlZqfVNd7\nDhRmVmp+Ul3vOTSbWen4SXXl4haFmZWOh8KWiwOFmZWOh8KWi0O0mZWah8L2ngOFmZWCZ4QtL6ee\nzKwU3C9RXg4UZlYK7pcoL4drMysd90uUiwOFmfWM+yWqwaknM+sZ90tUQ6GBQtIMSUsk3S3prCb7\nDEi6WdLtkoaKLI+ZlYv7JaqhsNAtaRJwIXAIsBK4SdL8iFic22dL4B+BQyNihaRtiiqPmZWb+yXK\nq8g23nRgaUQsA5A0F5gFLM7tcyzww4hYARARjxRYHjMrAfdLVE+RqaepwPLc6xVpXd40YGtJv5K0\nUNLxBZbHzErA/RLVU+S3E23sMwV4C/Au4NXA9ZJ+GxF3F1guM+sh90tUT5GBYiWwQ+71DmStirzl\nwCMR8RzwnKRrgL2B9QLF4ODg2uWBgQEGBgY6XFwz6zb3S3TW0NAQQ0NDHT+uItq58B/HgaXJwF1k\nrYVVwI3A7IbO7D3IOrwPBTYCbgCOjog7G44VRZXTzLpjpL4JB4piSSIiNNHjFNaiiIiXJM0BrgIm\nARdHxGJJp6XtF0XEEklXArcCa4BvNAYJM6sHP9K0ugprUXSSWxRm1XfECVesXR7ul5g1c/celqj+\nSt+iMDPzI03rwVN4mFlhPBS2HhwozKwwHgpbDw7tZtYVHuFUXQ4UZtZRnqKjfpx6MrOOcr9E/ThQ\nmFlHuV+ifhzmzaww7peoBwcKM5sw90vUm1NPZjZh7peot6aBQtK3c8sndqU0ZlZJ7peot1Yhf+/c\n8keASwsui5nVgPsl6sdtQzMbt+G+Cau3VoFie0kXAAKm5pYBIiJOL7x0ZlZqnjq8P7T6Vj9O9jhT\nAb9r2OY5v81svSDhfol6ahooIuLbXSyHmVWcpw6vr5btREknAacDe6RVdwJfjQh3bJv1Kd8z0X+a\nBoo0JPZvgDOAm8lSUPsC56cnzl3WnSKaWZn4non+0+qGuw8DR0TEryLi8YhYHRFXA0cC/6M7xTOz\nsvE9E/2n1WXAZhFxb+PKiFgmabMCy2RmFeF7JvpDq0Dx/Di3mVnNuF+iv7UKFHtKuq3Jtl2LKIyZ\nlZP7Jfpbq296L2BbYEXD+h2ABworkZmVjvsl+lurQPEV4JMRsSy/UtLmwJeB9xZYLjMrKfdL9J9W\ngWLbiFgv9RQRt0rapcAymVkJuF/ChrUaHrtli20bd7ogZlYu7pewYa0CxUJJpzaulPRXrD/3k5nV\njPslbFiry4OPAD+S9EFeCQz7ARsBf1F0wcysPNwv0d9aTQr4oKS3AgcDbyKbMfan6e5sM6sh90vY\nSFomHCMigKvTj5nVnPslbCSt+ijMrM+4X8JG4ksFMxuR+yVsmAOFmfnZ19aSU09m5mdfW0sOFGbm\nZ19bS75sMLN1+NnX1qjQQCFpBtnkgpOAb0bEeU322x+4HvhARMwrskxmlvE9E9auwlJPkiYBFwIz\ngDcAsyXt2WS/84AryZ7LbWZd4HsmrF1F9lFMB5ZGxLKIeBGYC8waYb+/Bn4APFxgWcysge+ZsHYV\nefkwFViee70COCC/g6SpZMHjncD+ZNOEmFmX+Z4Ja6XIQNHOSX/44UghSTj1ZFYo90vYeBQZKFaS\nPTZ12A6s/1jV/YC5WYxgG+AwSS9GxPzGgw0ODq5dHhgYYGBgoMPFNas/90vU29DQEENDQx0/rrJ5\n/zpP0mTgLuBdwCrgRmB2RCxusv8lwE9GGvUkKYoqp1k/OeKEK9Z5PdwvMWvm7j0qkRVJEhEx4UxN\nYZcSEfGSpDnAVWTDYy+OiMWSTkvbLyrqs81sdO6XsHYV1qLoJLcozMavWb+EA0X9dapF4Sk8zGrO\n/RI2UQ4UZjXn+yVsonxZYdZHnG6y8XCgMKsh3y9hneTUk1kNuV/COsmBwqyG3C9hneRLDLOac7+E\nTZQDhVmN+NnXVgSnnsxqxM++tiI4UJjViJ99bUXw5YZZxTUbCutnX1unuEVhVnEeCmtFc6AwqzgP\nhbWi+bLDrEY8FNaK4EBhVkGeosO6yaknswpyv4R1kwOFWQW5X8K6yZcgZhXhJ9VZr7hFYVYRTjdZ\nrzhQmFWE003WK74cMasgp5usmxwozErMw2CtDJx6Misx90tYGThQmJWY+yWsDHxpYlYR7pewXnGg\nMCshP6nOysSpJ7MS8pPqrEwcKMxKyE+qszLxZYpZSfhJdVZWblGYlYSHwlpZOVCYlYSHwlpZ+XLF\nrIc8I6xVgVsUZj3kdJNVgQOFWQ853WRV4EsXs5JwusnKyoHCrMs8I6xVTeGpJ0kzJC2RdLeks0bY\n/kFJiyTdKuk6SXsVXSazXnK/hFVNoX+dkiYBFwKHACuBmyTNj4jFud3uAd4REU9ImgF8HTiwyHKZ\n9UKzloT7Jazsir6MmQ4sjYhlAJLmArOAtYEiIq7P7X8DsH3BZTLriZHmb/Jd11YFRaeepgLLc69X\npHXNfAhYUGiJzHrE8zdZVRXdooh2d5R0MHAK8GcjbR8cHFy7PDAwwMDAwASLZlY8z99k3TQ0NMTQ\n0FDHj6uIts/lYz+4dCAwGBEz0uuzgTURcV7DfnsB84AZEbF0hONEkeU0K8qxp84bsU/CgcK6QRIR\noYkep+jU00JgmqSdJW0IHA3Mz+8gaUeyIHHcSEHCrMrccW11UGjqKSJekjQHuAqYBFwcEYslnZa2\nXwR8CtgK+JokgBcjYnqR5TIrkudvsropNPXUKU49WZU43WRlUZXUk1nfcbrJ6sa3g5p1gNNNVmdu\nUZh1gKflsDpzoDDrAKebrM58yWM2Tk43Wb9wi8JsnJxusn7hv2qzMfIssNZvHCjMxsizwFq/cerJ\nbIw8C6z1G7cozNrgWWCtn7lFYdYGd1xbP/NfulkTzVoR4JST9RcHCrMmmrUinG6yfuPUk1kTHv5q\nlnGLwizHd1ubrc8tCrMcd1qbrc//A6zvudParDUHCut77rQ2a82BwvqW52wya48DhfUtz9lk1h4H\nCusrbkWYjZ0DhfUV90eYjZ0DhdWeRzWZTYwDhdWeWxFmE+NAYbXkVoRZ5zhQWC25FWHWOQ4UVhtu\nRZgVw4HCasOtCLNiOFBY5fneCLNiOVBYJY2WZnIrwqxzHCisMloFh2FuRZh1ngOFVUarIDEcIGbN\n3L3LpTKrPwcKK7V2RjI5OJgVy4HCSsl9EGbl4UBhpeE+CLNyKjRQSJoBfAWYBHwzIs4bYZ8LgMOA\nZ4GTIuLmIstk5TKW4OAUk1lvFBYoJE0CLgQOAVYCN0maHxGLc/vMBHaLiGmSDgC+BhxYVJnKamho\niIGBgV4XozBDQ0M88ex2owaERlUIEP3w3bl+VmSLYjqwNCKWAUiaC8wCFuf2eR9wKUBE3CBpS0nb\nRsRDBZardOr2x9rYSli86PvsufdRbb23CsEhr27fXSPXz6DYQDEVWJ57vQI4oI19tgf6KlCUVTtp\noU6oWnAw6zdFBopocz+N830tHXHCFZ04TFcsXnQHt95TnfKOlwOCWTUpoiPn5fUPLB0IDEbEjPT6\nbGBNvkNb0j8DQxExN71eAhzUmHqSVEwhzcxqLiIaL8bHrMgWxUJgmqSdgVXA0cDshn3mA3OAuSmw\nPD5S/0QnKmpmZuNTWKCIiJckzQGuIhsee3FELJZ0Wtp+UUQskDRT0lLgGeDkospjZmbjU1jqyczM\n6mGDXnyopG9JekjSbbl10yXdKOlmSTdJ2j+t31jS5ZJulXSnpE/m3rOfpNsk3S3pH3pRl5E0qd/e\nkq5P9ZgvabPctrNTHZZIendufeXrJ+nPJS1M6xdKOjj3nsrXL7d9R0lPS/pYbl3p6jeOv8290rbb\n0/YN0/rS1Q3G/LdZxXPLDpJ+JemO9J2cntZvLekXkn4v6eeStsy9Z+Lnl4jo+g/wdmBf4LbcuiHg\n0LR8GPCrtHwScHlafhVwL7Bjen0jMD0tLwBm9KI+bdbvJuDtaflk4DNp+Q3ALcAUYGdgKa+09OpQ\nv32A16blNwIrcu+pfP1y238AfA/4WJnrN8bvbjKwCHhzer0VsEFZ6zaO+lXx3PJaYJ+0vClwF7An\n8EXgE2n9WcAX0nJHzi89aVFExLXA6obVDwBbpOUtye7mHl6/ibI7vTcBXgCelLQdsFlE3Jj2uwx4\nf6EFb1OT+k1L6wF+CRyZlmeR/bG+GNnNiUuBA+pSv4i4JSIeTOvvBF4laUpd6gcg6f3APWT1G15X\nyvqNsW7vBm6NiNvSe1dHxJqy1g3GXL8qnlsejIhb0vLTZDcwTyV383L6d7i8HTm/9CRQNPFJ4EuS\n7gfOB84BiIirgCfJvtRlwPkR8TjZL2dF7v0r07qyukPSrLR8FLBDWn4d69ZjBVk9GtdXtX55RwK/\ni4gXqcn3J2lT4BPAYMP+Vapfs+/u9UBIulLS7yR9PK2vUt2gSf2qfm5RNqJ0X+AGID+jxUPAtmm5\nI+eXMgWKi4HTI2JH4KPpNZKOI2sWbgfsApwpaZeelXL8TgE+LGkhWZPxhR6Xp9Na1k/SG4EvAKf1\noGyd0Kx+g8CXI+JZ1r95tCqa1W0y8Dbg2PTvX0h6Jx26KbaLRqxflc8t6QLlh8DfRMRT+W2R5ZI6\n+h2VaZrx6RFxSFr+AfDNtPxW4EcR8TLwsKTrgP2AX5NN9zFse15JV5VORNwFHAog6fXA4WnTSta9\n+t6eLNKvpB71Q9L2wDzg+Ii4N62uev1mpk3TgSMlfZEsZbpG0nNk9a1E/Vp8d8uBayLisbRtAfAW\n4LtUpG7Q8rur5LlF0hSyIPGdiPhxWv2QpNdGxIMprfSHtL4j55cytSiWSjooLb8T+H1aXpJeI2kT\nstlll6S895OSDpAk4Hjgx5SUpD9N/24AnEs2Uy5kNx0eI2nDdDUzDbixLvVLoy/+L3BWRFw/vH9E\nPEC16/fPABHxjojYJSJ2IZtS/3MR8U9V+v5a/G1eBbxZ0qskTQYOAu6oUt2g+XdHBc8tqTwXA3dG\nxFdym+YDJ6blE3mlvJ05v/So5/5ysru1XyC7ajkZ+K9kubZbgOuBfdO+G5FdwdwG3MG6o0r2S+uX\nAhf0oi5t1u8U4HSyEQp3AZ9v2P+cVIclpJFfdakf2X/Mp4Gbcz/b1KV+De/7NHBGmb+/cfxtfhC4\nPdXjC2Wu2zj+Nqt4bnkbsCadJ4f/P80AtibrqP898HNgy9x7Jnx+8Q13ZmbWUplST2ZmVkIOFGZm\n1pIDhZmZteRAYWZmLTlQmJlZSw4UZmbWkgOF1Z4y10qakVt3lKSfdfAznu7UsczKxvdRWF9Ic019\nn2wStSnAf5DdfHRvyzeOfKzJEfFSw7qnImKzZu8Z5XiTIptGwqyUHCisb0g6D3iWbErpp4GdgDeR\nBY7BiJifZuS8LO0DMCcirpc0AHwWeAzYIyJ2bzj2U8A3yKbmfhA4JiIekbQP2ZQRrwL+EzglIh6X\nNER2V+3byO4mfh/wW+BgsjmjPhQRvy7g12A2Zk49WT/5O2A22ZQHGwNXR8QBZPP9nC/p1WRTNP95\nROwHHANckHv/vmQzHO/O+jYBboqINwH/TjadB2RB5+MRsTfZdAnD6wOYEhH7R8T/Sa8npfJ8JLef\nWc+VafZYs0JFxLOSvkfWmvgA8F5JZ6bNG5HNsvkgcKGkvYGXySZRG3ZjRNzX5PBryJ5wB9n8QfMk\nbQ5sEa88NOdSsvTXsO+xrnnp3/8gexqZWSk4UFi/WZN+BBwREXfnN0oaBB6IiOPTk8+ez21+ps3P\nECM/D6DxeRWNx/tj+vdl/H/TSsSpJ+tXV5HNKgqApH3T4uZkrQqAE4BJbR5vA7Knp0H2oJ9rI+JJ\nYLWkt6X1x5M9G37tx4692Gbd50Bh/SjIOqanSLpV0u1k/RcA/wScKOkWYHeyNFX+fc08A0yXdBsw\nAHwmrT+RrP9jEbBXbv1ox/MoEysNj3oyM7OW3KIwM7OWHCjMzKwlBwozM2vJgcLMzFpyoDAzs5Yc\nKMzMrCUHCjMza8mBwszMWvr/9X+OumFQm/YAAAAASUVORK5CYII=\n",
      "text/plain": [
       "<matplotlib.figure.Figure at 0x7f2e04476a50>"
      ]
     },
     "metadata": {},
     "output_type": "display_data"
    }
   ],
   "source": [
    "cdf = thinkstats2.Cdf(df.yrbrn)\n",
    "thinkplot.PrePlot(1)\n",
    "thinkplot.Cdf(cdf)\n",
    "thinkplot.Config(xlabel='Year born', ylabel='CDF', \n",
    "                 title='Disrtribution of year born', legend=False)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Shifted to mean near 0"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    232794.000000\n",
       "mean          1.491916\n",
       "std          18.727516\n",
       "min         -75.000000\n",
       "25%         -13.000000\n",
       "50%           2.000000\n",
       "75%          17.000000\n",
       "max          36.000000\n",
       "Name: yrbrn60, dtype: float64"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.yrbrn60.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Number of years of education, converted to ranks."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    230992.000000\n",
       "mean          0.500264\n",
       "std           0.288676\n",
       "min           0.000332\n",
       "25%           0.250251\n",
       "50%           0.500277\n",
       "75%           0.750251\n",
       "max           1.000000\n",
       "Name: edurank, dtype: float64"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.edurank.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Country codes"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "AT     6918\n",
       "BE     8939\n",
       "BG     6064\n",
       "CH     9310\n",
       "CY     3293\n",
       "CZ     8790\n",
       "DE    11568\n",
       "DK     7684\n",
       "EE     6960\n",
       "ES     9729\n",
       "FI     7969\n",
       "FR     5787\n",
       "GB    11117\n",
       "GR     9759\n",
       "HR     3133\n",
       "HU     7806\n",
       "IE    10472\n",
       "IL     7283\n",
       "IS      579\n",
       "IT     1207\n",
       "LT     1677\n",
       "LU     3187\n",
       "LV     1980\n",
       "NL     9741\n",
       "NO     8643\n",
       "PL     8917\n",
       "PT    10302\n",
       "RO     2146\n",
       "RU     7544\n",
       "SE     9201\n",
       "SI     7126\n",
       "SK     6944\n",
       "TR     4272\n",
       "UA     7809\n",
       "Name: cntry, dtype: int64"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.cntry.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0     29052\n",
       "1     13327\n",
       "2     16316\n",
       "3     18726\n",
       "4     15487\n",
       "5     40339\n",
       "6     22866\n",
       "7     27172\n",
       "8     23707\n",
       "9     10667\n",
       "10    14027\n",
       "Name: rlgdgr, dtype: int64"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.rlgdgr.value_counts().sort_index()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "count    233537.000000\n",
       "mean         -0.219482\n",
       "std           2.933184\n",
       "min          -5.499864\n",
       "25%          -2.722377\n",
       "50%          -0.247620\n",
       "75%           2.284778\n",
       "max           5.499294\n",
       "Name: inwyr07, dtype: float64"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.inwyr07.describe()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run the model"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def run_model(df, formula):\n",
    "    model = smf.logit(formula, data=df)    \n",
    "    results = model.fit(disp=False)\n",
    "    return results"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Here's the model with all control variables and all media variables:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>     <td>hasrelig</td>     <th>  No. Observations:  </th>   <td>179125</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>               <td>Logit</td>      <th>  Df Residuals:      </th>   <td>179117</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>               <td>MLE</td>       <th>  Df Model:          </th>   <td>     7</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>          <td>Mon, 16 Nov 2015</td> <th>  Pseudo R-squ.:     </th>   <td>0.02992</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>              <td>14:59:20</td>     <th>  Log-Likelihood:    </th> <td>-1.1470e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>           <td>True</td>       <th>  LL-Null:           </th> <td>-1.1824e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th> </th>                      <td> </td>        <th>  LLR p-value:       </th>   <td> 0.000</td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "      <td></td>         <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th> <th>[95.0% Conf. Int.]</th> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th> <td>    1.0394</td> <td>    0.019</td> <td>   54.565</td> <td> 0.000</td> <td>    1.002     1.077</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>yrbrn60</th>   <td>   -0.0080</td> <td>    0.000</td> <td>  -24.684</td> <td> 0.000</td> <td>   -0.009    -0.007</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>edurank</th>   <td>   -0.0096</td> <td>    0.020</td> <td>   -0.490</td> <td> 0.624</td> <td>   -0.048     0.029</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>hincrank</th>  <td>    0.1361</td> <td>    0.019</td> <td>    7.309</td> <td> 0.000</td> <td>    0.100     0.173</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>tvtot</th>     <td>   -0.0154</td> <td>    0.003</td> <td>   -6.059</td> <td> 0.000</td> <td>   -0.020    -0.010</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>rdtot</th>     <td>   -0.0151</td> <td>    0.002</td> <td>   -7.956</td> <td> 0.000</td> <td>   -0.019    -0.011</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>nwsptot</th>   <td>   -0.0412</td> <td>    0.004</td> <td>  -10.213</td> <td> 0.000</td> <td>   -0.049    -0.033</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>netuse</th>    <td>   -0.1124</td> <td>    0.002</td> <td>  -56.961</td> <td> 0.000</td> <td>   -0.116    -0.109</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:               hasrelig   No. Observations:               179125\n",
       "Model:                          Logit   Df Residuals:                   179117\n",
       "Method:                           MLE   Df Model:                            7\n",
       "Date:                Mon, 16 Nov 2015   Pseudo R-squ.:                 0.02992\n",
       "Time:                        14:59:20   Log-Likelihood:            -1.1470e+05\n",
       "converged:                       True   LL-Null:                   -1.1824e+05\n",
       "                                        LLR p-value:                     0.000\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [95.0% Conf. Int.]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept      1.0394      0.019     54.565      0.000         1.002     1.077\n",
       "yrbrn60       -0.0080      0.000    -24.684      0.000        -0.009    -0.007\n",
       "edurank       -0.0096      0.020     -0.490      0.624        -0.048     0.029\n",
       "hincrank       0.1361      0.019      7.309      0.000         0.100     0.173\n",
       "tvtot         -0.0154      0.003     -6.059      0.000        -0.020    -0.010\n",
       "rdtot         -0.0151      0.002     -7.956      0.000        -0.019    -0.011\n",
       "nwsptot       -0.0412      0.004    -10.213      0.000        -0.049    -0.033\n",
       "netuse        -0.1124      0.002    -56.961      0.000        -0.116    -0.109\n",
       "==============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "formula = ('hasrelig ~ yrbrn60 + edurank + hincrank +'\n",
    "           'tvtot + rdtot + nwsptot + netuse')\n",
    "res = run_model(df, formula)\n",
    "res.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now using the filled variables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 34,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>    <td>hasrelig_f</td>    <th>  No. Observations:  </th>   <td>233856</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>               <td>Logit</td>      <th>  Df Residuals:      </th>   <td>233848</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>               <td>MLE</td>       <th>  Df Model:          </th>   <td>     7</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>          <td>Mon, 16 Nov 2015</td> <th>  Pseudo R-squ.:     </th>   <td>0.02832</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>              <td>14:59:21</td>     <th>  Log-Likelihood:    </th> <td>-1.4846e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>           <td>True</td>       <th>  LL-Null:           </th> <td>-1.5278e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th> </th>                      <td> </td>        <th>  LLR p-value:       </th>   <td> 0.000</td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th> <th>[95.0% Conf. Int.]</th> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th>  <td>    1.1082</td> <td>    0.017</td> <td>   66.598</td> <td> 0.000</td> <td>    1.076     1.141</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>yrbrn60_f</th>  <td>   -0.0079</td> <td>    0.000</td> <td>  -28.692</td> <td> 0.000</td> <td>   -0.008    -0.007</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>edurank_f</th>  <td>   -0.0123</td> <td>    0.017</td> <td>   -0.725</td> <td> 0.469</td> <td>   -0.045     0.021</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>hincrank_f</th> <td>    0.0826</td> <td>    0.016</td> <td>    5.188</td> <td> 0.000</td> <td>    0.051     0.114</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>tvtot_f</th>    <td>   -0.0142</td> <td>    0.002</td> <td>   -6.417</td> <td> 0.000</td> <td>   -0.019    -0.010</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>rdtot_f</th>    <td>   -0.0185</td> <td>    0.002</td> <td>  -11.059</td> <td> 0.000</td> <td>   -0.022    -0.015</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>nwsptot_f</th>  <td>   -0.0425</td> <td>    0.004</td> <td>  -11.886</td> <td> 0.000</td> <td>   -0.050    -0.035</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>netuse_f</th>   <td>   -0.1052</td> <td>    0.002</td> <td>  -60.862</td> <td> 0.000</td> <td>   -0.109    -0.102</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:             hasrelig_f   No. Observations:               233856\n",
       "Model:                          Logit   Df Residuals:                   233848\n",
       "Method:                           MLE   Df Model:                            7\n",
       "Date:                Mon, 16 Nov 2015   Pseudo R-squ.:                 0.02832\n",
       "Time:                        14:59:21   Log-Likelihood:            -1.4846e+05\n",
       "converged:                       True   LL-Null:                   -1.5278e+05\n",
       "                                        LLR p-value:                     0.000\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [95.0% Conf. Int.]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept      1.1082      0.017     66.598      0.000         1.076     1.141\n",
       "yrbrn60_f     -0.0079      0.000    -28.692      0.000        -0.008    -0.007\n",
       "edurank_f     -0.0123      0.017     -0.725      0.469        -0.045     0.021\n",
       "hincrank_f     0.0826      0.016      5.188      0.000         0.051     0.114\n",
       "tvtot_f       -0.0142      0.002     -6.417      0.000        -0.019    -0.010\n",
       "rdtot_f       -0.0185      0.002    -11.059      0.000        -0.022    -0.015\n",
       "nwsptot_f     -0.0425      0.004    -11.886      0.000        -0.050    -0.035\n",
       "netuse_f      -0.1052      0.002    -60.862      0.000        -0.109    -0.102\n",
       "==============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": 34,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "formula = ('hasrelig_f ~ yrbrn60_f + edurank_f + hincrank_f +'\n",
    "           'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
    "res = run_model(df, formula)\n",
    "res.summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Now adding inwyr07"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 35,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>    <td>hasrelig_f</td>    <th>  No. Observations:  </th>   <td>233856</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>               <td>Logit</td>      <th>  Df Residuals:      </th>   <td>233847</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>               <td>MLE</td>       <th>  Df Model:          </th>   <td>     8</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>          <td>Mon, 16 Nov 2015</td> <th>  Pseudo R-squ.:     </th>   <td>0.03166</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>              <td>14:59:21</td>     <th>  Log-Likelihood:    </th> <td>-1.4795e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>           <td>True</td>       <th>  LL-Null:           </th> <td>-1.5278e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th> </th>                      <td> </td>        <th>  LLR p-value:       </th>   <td> 0.000</td>   \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th> <th>[95.0% Conf. Int.]</th> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th>  <td>    1.1406</td> <td>    0.017</td> <td>   68.130</td> <td> 0.000</td> <td>    1.108     1.173</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>inwyr07_f</th>  <td>    0.0494</td> <td>    0.002</td> <td>   31.850</td> <td> 0.000</td> <td>    0.046     0.052</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>yrbrn60_f</th>  <td>   -0.0085</td> <td>    0.000</td> <td>  -30.617</td> <td> 0.000</td> <td>   -0.009    -0.008</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>edurank_f</th>  <td>    0.0180</td> <td>    0.017</td> <td>    1.058</td> <td> 0.290</td> <td>   -0.015     0.051</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>hincrank_f</th> <td>    0.0949</td> <td>    0.016</td> <td>    5.949</td> <td> 0.000</td> <td>    0.064     0.126</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>tvtot_f</th>    <td>   -0.0201</td> <td>    0.002</td> <td>   -9.026</td> <td> 0.000</td> <td>   -0.024    -0.016</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>rdtot_f</th>    <td>   -0.0148</td> <td>    0.002</td> <td>   -8.817</td> <td> 0.000</td> <td>   -0.018    -0.012</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>nwsptot_f</th>  <td>   -0.0352</td> <td>    0.004</td> <td>   -9.780</td> <td> 0.000</td> <td>   -0.042    -0.028</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>netuse_f</th>   <td>   -0.1151</td> <td>    0.002</td> <td>  -65.319</td> <td> 0.000</td> <td>   -0.119    -0.112</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:             hasrelig_f   No. Observations:               233856\n",
       "Model:                          Logit   Df Residuals:                   233847\n",
       "Method:                           MLE   Df Model:                            8\n",
       "Date:                Mon, 16 Nov 2015   Pseudo R-squ.:                 0.03166\n",
       "Time:                        14:59:21   Log-Likelihood:            -1.4795e+05\n",
       "converged:                       True   LL-Null:                   -1.5278e+05\n",
       "                                        LLR p-value:                     0.000\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [95.0% Conf. Int.]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept      1.1406      0.017     68.130      0.000         1.108     1.173\n",
       "inwyr07_f      0.0494      0.002     31.850      0.000         0.046     0.052\n",
       "yrbrn60_f     -0.0085      0.000    -30.617      0.000        -0.009    -0.008\n",
       "edurank_f      0.0180      0.017      1.058      0.290        -0.015     0.051\n",
       "hincrank_f     0.0949      0.016      5.949      0.000         0.064     0.126\n",
       "tvtot_f       -0.0201      0.002     -9.026      0.000        -0.024    -0.016\n",
       "rdtot_f       -0.0148      0.002     -8.817      0.000        -0.018    -0.012\n",
       "nwsptot_f     -0.0352      0.004     -9.780      0.000        -0.042    -0.028\n",
       "netuse_f      -0.1151      0.002    -65.319      0.000        -0.119    -0.112\n",
       "==============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": 35,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "formula = ('hasrelig_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n",
    "           'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
    "res = run_model(df, formula)\n",
    "res.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 36,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(233856, -0.11506110306210458, '**')"
      ]
     },
     "execution_count": 36,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "def extract_res(res, var='netuse_f'):\n",
    "    param = res.params[var]\n",
    "    pvalue = res.pvalues[var]\n",
    "    stars = '**' if pvalue < 0.01 else '*' if pvalue < 0.05 else ''\n",
    "    return res.nobs, param, stars\n",
    "\n",
    "extract_res(res)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Group by country:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 37,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "grouped = df.groupby('cntry')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run a sample country"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 38,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>Logit Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>    <td>hasrelig_f</td>    <th>  No. Observations:  </th>  <td>   579</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>               <td>Logit</td>      <th>  Df Residuals:      </th>  <td>   570</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>               <td>MLE</td>       <th>  Df Model:          </th>  <td>     8</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>          <td>Mon, 16 Nov 2015</td> <th>  Pseudo R-squ.:     </th>  <td>0.06961</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>              <td>14:59:22</td>     <th>  Log-Likelihood:    </th> <td> -372.62</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>converged:</th>           <td>True</td>       <th>  LL-Null:           </th> <td> -400.50</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th> </th>                      <td> </td>        <th>  LLR p-value:       </th> <td>3.143e-09</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>z</th>      <th>P>|z|</th> <th>[95.0% Conf. Int.]</th> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th>  <td>   -1.6075</td> <td>    0.753</td> <td>   -2.136</td> <td> 0.033</td> <td>   -3.083    -0.133</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>inwyr07_f</th>  <td>   -0.0307</td> <td>    0.305</td> <td>   -0.101</td> <td> 0.920</td> <td>   -0.629     0.567</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>yrbrn60_f</th>  <td>   -0.0372</td> <td>    0.006</td> <td>   -6.020</td> <td> 0.000</td> <td>   -0.049    -0.025</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>edurank_f</th>  <td>    0.2802</td> <td>    0.384</td> <td>    0.730</td> <td> 0.465</td> <td>   -0.472     1.033</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>hincrank_f</th> <td>    0.8706</td> <td>    0.326</td> <td>    2.672</td> <td> 0.008</td> <td>    0.232     1.509</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>tvtot_f</th>    <td>    0.0625</td> <td>    0.052</td> <td>    1.206</td> <td> 0.228</td> <td>   -0.039     0.164</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>rdtot_f</th>    <td>   -0.0137</td> <td>    0.036</td> <td>   -0.378</td> <td> 0.706</td> <td>   -0.085     0.057</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>nwsptot_f</th>  <td>   -0.0036</td> <td>    0.107</td> <td>   -0.034</td> <td> 0.973</td> <td>   -0.214     0.207</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>netuse_f</th>   <td>    0.1488</td> <td>    0.044</td> <td>    3.352</td> <td> 0.001</td> <td>    0.062     0.236</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                           Logit Regression Results                           \n",
       "==============================================================================\n",
       "Dep. Variable:             hasrelig_f   No. Observations:                  579\n",
       "Model:                          Logit   Df Residuals:                      570\n",
       "Method:                           MLE   Df Model:                            8\n",
       "Date:                Mon, 16 Nov 2015   Pseudo R-squ.:                 0.06961\n",
       "Time:                        14:59:22   Log-Likelihood:                -372.62\n",
       "converged:                       True   LL-Null:                       -400.50\n",
       "                                        LLR p-value:                 3.143e-09\n",
       "==============================================================================\n",
       "                 coef    std err          z      P>|z|      [95.0% Conf. Int.]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept     -1.6075      0.753     -2.136      0.033        -3.083    -0.133\n",
       "inwyr07_f     -0.0307      0.305     -0.101      0.920        -0.629     0.567\n",
       "yrbrn60_f     -0.0372      0.006     -6.020      0.000        -0.049    -0.025\n",
       "edurank_f      0.2802      0.384      0.730      0.465        -0.472     1.033\n",
       "hincrank_f     0.8706      0.326      2.672      0.008         0.232     1.509\n",
       "tvtot_f        0.0625      0.052      1.206      0.228        -0.039     0.164\n",
       "rdtot_f       -0.0137      0.036     -0.378      0.706        -0.085     0.057\n",
       "nwsptot_f     -0.0036      0.107     -0.034      0.973        -0.214     0.207\n",
       "netuse_f       0.1488      0.044      3.352      0.001         0.062     0.236\n",
       "==============================================================================\n",
       "\"\"\""
      ]
     },
     "execution_count": 38,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "group = grouped.get_group('IS')\n",
    "run_model(group, formula).summary()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run all countries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 39,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def run_logits(grouped, formula, var):\n",
    "    for name, group in grouped:\n",
    "        country = '%14.14s' % COUNTRY[name]\n",
    "        model = smf.logit(formula, data=group)    \n",
    "        results = model.fit(disp=False)\n",
    "        nobs, param, stars = extract_res(results, var=var)\n",
    "        arrow = '<--' if stars and param > 0 else ''\n",
    "        print(country, nobs, '%0.3g'%param, stars, arrow, sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 40,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       AUSTRIA\t6918\t-0.0783\t**\t\n",
      "       BELGIUM\t8939\t-0.0484\t**\t\n",
      "      BULGARIA\t6064\t0.0168\t\t\n",
      "   SWITZERLAND\t9310\t-0.037\t**\t\n",
      "        CYPRUS\t3293\t-0.0223\t\t\n",
      "CZECH REPUBLIC\t8790\t-0.0164\t\t\n",
      "       GERMANY\t11568\t-0.0243\t**\t\n",
      "       DENMARK\t7684\t-0.0374\t**\t\n",
      "       ESTONIA\t6960\t-0.0452\t**\t\n",
      "         SPAIN\t9729\t-0.0657\t**\t\n",
      "       FINLAND\t7969\t-0.0472\t**\t\n",
      "        FRANCE\t5787\t0.000362\t\t\n",
      "UNITED KINGDOM\t11117\t-0.0297\t**\t\n",
      "        GREECE\t9759\t-0.031\t\t\n",
      "       CROATIA\t3133\t-0.0361\t\t\n",
      "       HUNGARY\t7806\t-0.0301\t**\t\n",
      "       IRELAND\t10472\t-0.0582\t**\t\n",
      "        ISRAEL\t7283\t-0.0813\t**\t\n",
      "       ICELAND\t579\t0.149\t**\t<--\n",
      "         ITALY\t1207\t-0.0873\t**\t\n",
      "     LITHUANIA\t1677\t-0.0398\t\t\n",
      "    LUXEMBOURG\t3187\t-0.0581\t**\t\n",
      "        LATVIA\t1980\t-0.0319\t\t\n",
      "   NETHERLANDS\t9741\t-0.0457\t**\t\n",
      "        NORWAY\t8643\t-0.0385\t**\t\n",
      "        POLAND\t8917\t-0.119\t**\t\n",
      "      PORTUGAL\t10302\t-0.102\t**\t\n",
      "       ROMANIA\t2146\t-0.00946\t\t\n",
      "RUSSIAN FEDERA\t7544\t0.0301\t**\t<--\n",
      "        SWEDEN\t9201\t-0.0652\t**\t\n",
      "      SLOVENIA\t7126\t-0.0426\t**\t\n",
      "      SLOVAKIA\t6944\t-0.0615\t**\t\n",
      "        TURKEY\t4272\t-0.0939\t*\t\n",
      "       UKRAINE\t7809\t-0.0601\t**\t\n"
     ]
    }
   ],
   "source": [
    "formula = ('hasrelig_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n",
    "           'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
    "\n",
    "run_logits(grouped, formula, 'netuse_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 41,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       AUSTRIA\t6918\t0.375\t**\t<--\n",
      "       BELGIUM\t8939\t-0.136\t\t\n",
      "      BULGARIA\t6064\t0.0274\t\t\n",
      "   SWITZERLAND\t9310\t-0.143\t\t\n",
      "        CYPRUS\t3293\t0.111\t\t\n",
      "CZECH REPUBLIC\t8790\t-0.199\t*\t\n",
      "       GERMANY\t11568\t0.518\t**\t<--\n",
      "       DENMARK\t7684\t0.342\t**\t<--\n",
      "       ESTONIA\t6960\t-0.241\t*\t\n",
      "         SPAIN\t9729\t0.0597\t\t\n",
      "       FINLAND\t7969\t0.0164\t\t\n",
      "        FRANCE\t5787\t-0.0222\t\t\n",
      "UNITED KINGDOM\t11117\t-0.0577\t\t\n",
      "        GREECE\t9759\t-0.758\t**\t\n",
      "       CROATIA\t3133\t0.12\t\t\n",
      "       HUNGARY\t7806\t-0.119\t\t\n",
      "       IRELAND\t10472\t0.285\t**\t<--\n",
      "        ISRAEL\t7283\t-0.1\t\t\n",
      "       ICELAND\t579\t0.871\t**\t<--\n",
      "         ITALY\t1207\t0.519\t*\t<--\n",
      "     LITHUANIA\t1677\t0.655\t**\t<--\n",
      "    LUXEMBOURG\t3187\t0.302\t*\t<--\n",
      "        LATVIA\t1980\t0.0253\t\t\n",
      "   NETHERLANDS\t9741\t-0.407\t**\t\n",
      "        NORWAY\t8643\t-0.0125\t\t\n",
      "        POLAND\t8917\t-0.156\t\t\n",
      "      PORTUGAL\t10302\t-0.0845\t\t\n",
      "       ROMANIA\t2146\t0.0659\t\t\n",
      "RUSSIAN FEDERA\t7544\t-0.0556\t\t\n",
      "        SWEDEN\t9201\t-0.112\t\t\n",
      "      SLOVENIA\t7126\t-0.313\t**\t\n",
      "      SLOVAKIA\t6944\t0.0315\t\t\n",
      "        TURKEY\t4272\t-0.119\t\t\n",
      "       UKRAINE\t7809\t0.0702\t\t\n"
     ]
    }
   ],
   "source": [
    "run_logits(grouped, formula, 'hincrank_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 42,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "       AUSTRIA\t6918\t-0.682\t**\t\n",
      "       BELGIUM\t8939\t0.142\t\t\n",
      "      BULGARIA\t6064\t-0.247\t\t\n",
      "   SWITZERLAND\t9310\t-0.665\t**\t\n",
      "        CYPRUS\t3293\t-1.19\t*\t\n",
      "CZECH REPUBLIC\t8790\t-0.38\t**\t\n",
      "       GERMANY\t11568\t-0.616\t**\t\n",
      "       DENMARK\t7684\t-0.241\t**\t\n",
      "       ESTONIA\t6960\t0.0551\t\t\n",
      "         SPAIN\t9729\t-0.4\t**\t\n",
      "       FINLAND\t7969\t0.127\t\t\n",
      "        FRANCE\t5787\t0.0997\t\t\n",
      "UNITED KINGDOM\t11117\t0.446\t**\t<--\n",
      "        GREECE\t9759\t-1.34\t**\t\n",
      "       CROATIA\t3133\t-0.693\t**\t\n",
      "       HUNGARY\t7806\t-0.604\t**\t\n",
      "       IRELAND\t10472\t-0.198\t*\t\n",
      "        ISRAEL\t7283\t-0.0924\t\t\n",
      "       ICELAND\t579\t0.28\t\t\n",
      "         ITALY\t1207\t0.192\t\t\n",
      "     LITHUANIA\t1677\t0.13\t\t\n",
      "    LUXEMBOURG\t3187\t-1.01\t**\t\n",
      "        LATVIA\t1980\t0.303\t\t\n",
      "   NETHERLANDS\t9741\t-0.412\t**\t\n",
      "        NORWAY\t8643\t-0.106\t\t\n",
      "        POLAND\t8917\t-1.16\t**\t\n",
      "      PORTUGAL\t10302\t-0.715\t**\t\n",
      "       ROMANIA\t2146\t-1.43\t**\t\n",
      "RUSSIAN FEDERA\t7544\t-0.0916\t\t\n",
      "        SWEDEN\t9201\t0.561\t**\t<--\n",
      "      SLOVENIA\t7126\t-0.413\t**\t\n",
      "      SLOVAKIA\t6944\t-0.298\t**\t\n",
      "        TURKEY\t4272\t-1.69\t**\t\n",
      "       UKRAINE\t7809\t-0.255\t**\t\n"
     ]
    }
   ],
   "source": [
    "run_logits(grouped, formula, 'edurank_f')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Run OLS model with `rlgdgr`: \"Regardless of whether you belong to a particular religion, how religious would you say you are?\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 43,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "data": {
      "text/html": [
       "<table class=\"simpletable\">\n",
       "<caption>OLS Regression Results</caption>\n",
       "<tr>\n",
       "  <th>Dep. Variable:</th>        <td>rlgdgr_f</td>     <th>  R-squared:         </th>  <td>   0.051</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Model:</th>                   <td>OLS</td>       <th>  Adj. R-squared:    </th>  <td>   0.051</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Method:</th>             <td>Least Squares</td>  <th>  F-statistic:       </th>  <td>   1559.</td>  \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Date:</th>             <td>Mon, 16 Nov 2015</td> <th>  Prob (F-statistic):</th>   <td>  0.00</td>   \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Time:</th>                 <td>14:59:26</td>     <th>  Log-Likelihood:    </th> <td>-5.7994e+05</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>No. Observations:</th>      <td>233856</td>      <th>  AIC:               </th>  <td>1.160e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Residuals:</th>          <td>233847</td>      <th>  BIC:               </th>  <td>1.160e+06</td> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Df Model:</th>              <td>     8</td>      <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Covariance Type:</th>      <td>nonrobust</td>    <th>                     </th>      <td> </td>     \n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "       <td></td>         <th>coef</th>     <th>std err</th>      <th>t</th>      <th>P>|t|</th> <th>[95.0% Conf. Int.]</th> \n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Intercept</th>  <td>    6.0342</td> <td>    0.022</td> <td>  270.724</td> <td> 0.000</td> <td>    5.991     6.078</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>inwyr07_f</th>  <td>    0.0244</td> <td>    0.002</td> <td>   11.646</td> <td> 0.000</td> <td>    0.020     0.029</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>yrbrn60_f</th>  <td>   -0.0175</td> <td>    0.000</td> <td>  -47.085</td> <td> 0.000</td> <td>   -0.018    -0.017</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>edurank_f</th>  <td>   -0.2782</td> <td>    0.023</td> <td>  -12.079</td> <td> 0.000</td> <td>   -0.323    -0.233</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>hincrank_f</th> <td>   -0.1569</td> <td>    0.022</td> <td>   -7.236</td> <td> 0.000</td> <td>   -0.199    -0.114</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>tvtot_f</th>    <td>   -0.0763</td> <td>    0.003</td> <td>  -25.339</td> <td> 0.000</td> <td>   -0.082    -0.070</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>rdtot_f</th>    <td>   -0.0238</td> <td>    0.002</td> <td>  -10.429</td> <td> 0.000</td> <td>   -0.028    -0.019</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>nwsptot_f</th>  <td>   -0.0668</td> <td>    0.005</td> <td>  -13.715</td> <td> 0.000</td> <td>   -0.076    -0.057</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>netuse_f</th>   <td>   -0.1367</td> <td>    0.002</td> <td>  -57.186</td> <td> 0.000</td> <td>   -0.141    -0.132</td>\n",
       "</tr>\n",
       "</table>\n",
       "<table class=\"simpletable\">\n",
       "<tr>\n",
       "  <th>Omnibus:</th>       <td>31835.411</td> <th>  Durbin-Watson:     </th> <td>   1.760</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Prob(Omnibus):</th>  <td> 0.000</td>   <th>  Jarque-Bera (JB):  </th> <td>8476.913</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Skew:</th>           <td>-0.134</td>   <th>  Prob(JB):          </th> <td>    0.00</td>\n",
       "</tr>\n",
       "<tr>\n",
       "  <th>Kurtosis:</th>       <td> 2.107</td>   <th>  Cond. No.          </th> <td>    85.0</td>\n",
       "</tr>\n",
       "</table>"
      ],
      "text/plain": [
       "<class 'statsmodels.iolib.summary.Summary'>\n",
       "\"\"\"\n",
       "                            OLS Regression Results                            \n",
       "==============================================================================\n",
       "Dep. Variable:               rlgdgr_f   R-squared:                       0.051\n",
       "Model:                            OLS   Adj. R-squared:                  0.051\n",
       "Method:                 Least Squares   F-statistic:                     1559.\n",
       "Date:                Mon, 16 Nov 2015   Prob (F-statistic):               0.00\n",
       "Time:                        14:59:26   Log-Likelihood:            -5.7994e+05\n",
       "No. Observations:              233856   AIC:                         1.160e+06\n",
       "Df Residuals:                  233847   BIC:                         1.160e+06\n",
       "Df Model:                           8                                         \n",
       "Covariance Type:            nonrobust                                         \n",
       "==============================================================================\n",
       "                 coef    std err          t      P>|t|      [95.0% Conf. Int.]\n",
       "------------------------------------------------------------------------------\n",
       "Intercept      6.0342      0.022    270.724      0.000         5.991     6.078\n",
       "inwyr07_f      0.0244      0.002     11.646      0.000         0.020     0.029\n",
       "yrbrn60_f     -0.0175      0.000    -47.085      0.000        -0.018    -0.017\n",
       "edurank_f     -0.2782      0.023    -12.079      0.000        -0.323    -0.233\n",
       "hincrank_f    -0.1569      0.022     -7.236      0.000        -0.199    -0.114\n",
       "tvtot_f       -0.0763      0.003    -25.339      0.000        -0.082    -0.070\n",
       "rdtot_f       -0.0238      0.002    -10.429      0.000        -0.028    -0.019\n",
       "nwsptot_f     -0.0668      0.005    -13.715      0.000        -0.076    -0.057\n",
       "netuse_f      -0.1367      0.002    -57.186      0.000        -0.141    -0.132\n",
       "==============================================================================\n",
       "Omnibus:                    31835.411   Durbin-Watson:                   1.760\n",
       "Prob(Omnibus):                  0.000   Jarque-Bera (JB):             8476.913\n",
       "Skew:                          -0.134   Prob(JB):                         0.00\n",
       "Kurtosis:                       2.107   Cond. No.                         85.0\n",
       "==============================================================================\n",
       "\n",
       "Warnings:\n",
       "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
       "\"\"\""
      ]
     },
     "execution_count": 43,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n",
    "           'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
    "model = smf.ols(formula, data=df)    \n",
    "results = model.fit(disp=False)\n",
    "results.summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 44,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "def run_ols(grouped, formula, var):\n",
    "    for name, group in grouped:\n",
    "        model = smf.ols(formula, data=group)    \n",
    "        results = model.fit(disp=False)\n",
    "        nobs, param, stars = extract_res(results, var=var)\n",
    "        arrow = '<--' if stars and param > 0 else ''\n",
    "        print(name, len(group), '%0.3g    '%param, stars, arrow, sep='\\t')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 45,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t-0.0822    \t**\t\n",
      "BE\t8939\t-0.108    \t**\t\n",
      "BG\t6064\t0.00266    \t\t\n",
      "CH\t9310\t-0.0905    \t**\t\n",
      "CY\t3293\t-0.0555    \t**\t\n",
      "CZ\t8790\t0.00954    \t\t\n",
      "DE\t11568\t-0.0563    \t**\t\n",
      "DK\t7684\t-0.0366    \t**\t\n",
      "EE\t6960\t-0.0176    \t\t\n",
      "ES\t9729\t-0.0823    \t**\t\n",
      "FI\t7969\t-0.0187    \t\t\n",
      "FR\t5787\t-0.0434    \t**\t\n",
      "GB\t11117\t-0.0301    \t**\t\n",
      "GR\t9759\t-0.0619    \t**\t\n",
      "HR\t3133\t-0.0798    \t**\t\n",
      "HU\t7806\t-0.00543    \t\t\n",
      "IE\t10472\t-0.0801    \t**\t\n",
      "IL\t7283\t-0.31    \t**\t\n",
      "IS\t579\t0.0105    \t\t\n",
      "IT\t1207\t-0.142    \t**\t\n",
      "LT\t1677\t-0.038    \t\t\n",
      "LU\t3187\t-0.145    \t**\t\n",
      "LV\t1980\t-0.0209    \t\t\n",
      "NL\t9741\t-0.0827    \t**\t\n",
      "NO\t8643\t-0.065    \t**\t\n",
      "PL\t8917\t-0.0682    \t**\t\n",
      "PT\t10302\t-0.0538    \t**\t\n",
      "RO\t2146\t-0.0302    \t\t\n",
      "RU\t7544\t0.0284    \t*\t<--\n",
      "SE\t9201\t-0.0942    \t**\t\n",
      "SI\t7126\t-0.0718    \t**\t\n",
      "SK\t6944\t-0.034    \t*\t\n",
      "TR\t4272\t-0.0322    \t\t\n",
      "UA\t7809\t-0.0462    \t**\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'netuse_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 46,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t-0.253    \t*\t\n",
      "BE\t8939\t-0.313    \t*\t\n",
      "BG\t6064\t-0.646    \t**\t\n",
      "CH\t9310\t-0.316    \t**\t\n",
      "CY\t3293\t-0.639    \t**\t\n",
      "CZ\t8790\t-0.438    \t**\t\n",
      "DE\t11568\t-0.624    \t**\t\n",
      "DK\t7684\t-0.374    \t**\t\n",
      "EE\t6960\t0.478    \t**\t<--\n",
      "ES\t9729\t-0.429    \t**\t\n",
      "FI\t7969\t0.175    \t\t\n",
      "FR\t5787\t-0.464    \t**\t\n",
      "GB\t11117\t0.428    \t**\t<--\n",
      "GR\t9759\t-1.68    \t**\t\n",
      "HR\t3133\t-1.22    \t**\t\n",
      "HU\t7806\t-0.938    \t**\t\n",
      "IE\t10472\t-0.442    \t**\t\n",
      "IL\t7283\t-0.904    \t**\t\n",
      "IS\t579\t0.0494    \t\t\n",
      "IT\t1207\t0.0235    \t\t\n",
      "LT\t1677\t-0.00891    \t\t\n",
      "LU\t3187\t-1.95    \t**\t\n",
      "LV\t1980\t0.263    \t\t\n",
      "NL\t9741\t-0.157    \t\t\n",
      "NO\t8643\t0.32    \t**\t<--\n",
      "PL\t8917\t-1.29    \t**\t\n",
      "PT\t10302\t-0.864    \t**\t\n",
      "RO\t2146\t-0.432    \t*\t\n",
      "RU\t7544\t-0.0769    \t\t\n",
      "SE\t9201\t0.336    \t**\t<--\n",
      "SI\t7126\t-0.964    \t**\t\n",
      "SK\t6944\t-0.79    \t**\t\n",
      "TR\t4272\t-1.16    \t**\t\n",
      "UA\t7809\t-0.591    \t**\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'edurank_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 47,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t0.162    \t\t\n",
      "BE\t8939\t-0.359    \t**\t\n",
      "BG\t6064\t-0.175    \t\t\n",
      "CH\t9310\t-0.29    \t**\t\n",
      "CY\t3293\t-0.37    \t**\t\n",
      "CZ\t8790\t-0.41    \t**\t\n",
      "DE\t11568\t0.372    \t**\t<--\n",
      "DK\t7684\t0.128    \t\t\n",
      "EE\t6960\t-0.44    \t**\t\n",
      "ES\t9729\t-0.227    \t*\t\n",
      "FI\t7969\t-0.271    \t*\t\n",
      "FR\t5787\t-0.806    \t**\t\n",
      "GB\t11117\t-0.529    \t**\t\n",
      "GR\t9759\t-0.511    \t**\t\n",
      "HR\t3133\t0.124    \t\t\n",
      "HU\t7806\t-0.078    \t\t\n",
      "IE\t10472\t-0.0518    \t\t\n",
      "IL\t7283\t-0.333    \t*\t\n",
      "IS\t579\t0.0341    \t\t\n",
      "IT\t1207\t0.104    \t\t\n",
      "LT\t1677\t-0.531    \t*\t\n",
      "LU\t3187\t-0.382    \t*\t\n",
      "LV\t1980\t-0.274    \t\t\n",
      "NL\t9741\t-0.853    \t**\t\n",
      "NO\t8643\t-0.192    \t\t\n",
      "PL\t8917\t-0.355    \t**\t\n",
      "PT\t10302\t-0.0868    \t\t\n",
      "RO\t2146\t-0.178    \t\t\n",
      "RU\t7544\t-0.0596    \t\t\n",
      "SE\t9201\t-0.249    \t*\t\n",
      "SI\t7126\t-0.876    \t**\t\n",
      "SK\t6944\t0.0426    \t\t\n",
      "TR\t4272\t-0.201    \t\t\n",
      "UA\t7809\t-0.0463    \t\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'hincrank_f')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Let's see what happens if we add quadratic terms for edurank and hincrank:"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 48,
   "metadata": {
    "collapsed": false
   },
   "outputs": [],
   "source": [
    "df['edurank_f2'] = df.edurank_f**2\n",
    "df['hincrank_f2'] = df.hincrank_f**2"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 49,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + edurank_f2 + hincrank_f +'\n",
    "           'tvtot_f + rdtot_f + nwsptot_f + netuse_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 50,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t-0.669    \t\t\n",
      "BE\t8939\t0.186    \t\t\n",
      "BG\t6064\t-2.81    \t**\t\n",
      "CH\t9310\t-0.863    \t*\t\n",
      "CY\t3293\t-1.07    \t\t\n",
      "CZ\t8790\t-2.53    \t**\t\n",
      "DE\t11568\t-2.79    \t**\t\n",
      "DK\t7684\t-0.748    \t\t\n",
      "EE\t6960\t-0.671    \t\t\n",
      "ES\t9729\t-1.55    \t**\t\n",
      "FI\t7969\t0.0851    \t\t\n",
      "FR\t5787\t-1.85    \t**\t\n",
      "GB\t11117\t-2.16    \t**\t\n",
      "GR\t9759\t-1.93    \t**\t\n",
      "HR\t3133\t-1.85    \t**\t\n",
      "HU\t7806\t-3.03    \t**\t\n",
      "IE\t10472\t-0.237    \t\t\n",
      "IL\t7283\t-0.533    \t\t\n",
      "IS\t579\t-1.68    \t\t\n",
      "IT\t1207\t-2.43    \t*\t\n",
      "LT\t1677\t-1.82    \t*\t\n",
      "LU\t3187\t-2.46    \t**\t\n",
      "LV\t1980\t-1.74    \t*\t\n",
      "NL\t9741\t-0.285    \t\t\n",
      "NO\t8643\t-1.05    \t*\t\n",
      "PL\t8917\t-2.44    \t**\t\n",
      "PT\t10302\t-1.2    \t**\t\n",
      "RO\t2146\t-1.35    \t\t\n",
      "RU\t7544\t-1.06    \t*\t\n",
      "SE\t9201\t-0.788    \t\t\n",
      "SI\t7126\t-2.03    \t**\t\n",
      "SK\t6944\t-2.73    \t**\t\n",
      "TR\t4272\t-0.9    \t\t\n",
      "UA\t7809\t-0.66    \t\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'edurank_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 51,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t0.412    \t\t\n",
      "BE\t8939\t-0.5    \t\t\n",
      "BG\t6064\t2.13    \t**\t<--\n",
      "CH\t9310\t0.55    \t\t\n",
      "CY\t3293\t0.427    \t\t\n",
      "CZ\t8790\t2.09    \t**\t<--\n",
      "DE\t11568\t2.15    \t**\t<--\n",
      "DK\t7684\t0.373    \t\t\n",
      "EE\t6960\t1.15    \t*\t<--\n",
      "ES\t9729\t1.09    \t**\t<--\n",
      "FI\t7969\t0.0867    \t\t\n",
      "FR\t5787\t1.36    \t*\t<--\n",
      "GB\t11117\t2.56    \t**\t<--\n",
      "GR\t9759\t0.244    \t\t\n",
      "HR\t3133\t0.62    \t\t\n",
      "HU\t7806\t2.09    \t**\t<--\n",
      "IE\t10472\t-0.204    \t\t\n",
      "IL\t7283\t-0.37    \t\t\n",
      "IS\t579\t1.66    \t\t\n",
      "IT\t1207\t2.36    \t*\t<--\n",
      "LT\t1677\t1.81    \t*\t<--\n",
      "LU\t3187\t0.513    \t\t\n",
      "LV\t1980\t2.02    \t*\t<--\n",
      "NL\t9741\t0.126    \t\t\n",
      "NO\t8643\t1.34    \t**\t<--\n",
      "PL\t8917\t1.14    \t**\t<--\n",
      "PT\t10302\t0.324    \t\t\n",
      "RO\t2146\t0.896    \t\t\n",
      "RU\t7544\t0.979    \t*\t<--\n",
      "SE\t9201\t1.11    \t**\t<--\n",
      "SI\t7126\t1.08    \t*\t<--\n",
      "SK\t6944\t1.94    \t**\t<--\n",
      "TR\t4272\t-0.257    \t\t\n",
      "UA\t7809\t0.0687    \t\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'edurank_f2')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 52,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": [
    "formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + edurank_f2 + '\n",
    "           'hincrank_f + hincrank_f2 + '\n",
    "           'tvtot_f + rdtot_f + nwsptot_f + netuse_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 53,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t1.95    \t**\t<--\n",
      "BE\t8939\t-0.782    \t\t\n",
      "BG\t6064\t-0.412    \t\t\n",
      "CH\t9310\t0.417    \t\t\n",
      "CY\t3293\t-1.34    \t**\t\n",
      "CZ\t8790\t-1.44    \t**\t\n",
      "DE\t11568\t0.149    \t\t\n",
      "DK\t7684\t1.5    \t**\t<--\n",
      "EE\t6960\t-1.25    \t**\t\n",
      "ES\t9729\t0.182    \t\t\n",
      "FI\t7969\t0.265    \t\t\n",
      "FR\t5787\t-1.96    \t**\t\n",
      "GB\t11117\t-0.107    \t\t\n",
      "GR\t9759\t-0.63    \t*\t\n",
      "HR\t3133\t1.97    \t**\t<--\n",
      "HU\t7806\t-0.0722    \t\t\n",
      "IE\t10472\t0.416    \t\t\n",
      "IL\t7283\t-0.525    \t\t\n",
      "IS\t579\t-0.719    \t\t\n",
      "IT\t1207\t-0.197    \t\t\n",
      "LT\t1677\t2.34    \t**\t<--\n",
      "LU\t3187\t-1.03    \t\t\n",
      "LV\t1980\t-0.262    \t\t\n",
      "NL\t9741\t-0.0917    \t\t\n",
      "NO\t8643\t-0.55    \t\t\n",
      "PL\t8917\t-0.273    \t\t\n",
      "PT\t10302\t0.284    \t\t\n",
      "RO\t2146\t-0.35    \t\t\n",
      "RU\t7544\t-0.622    \t\t\n",
      "SE\t9201\t-0.787    \t*\t\n",
      "SI\t7126\t0.0244    \t\t\n",
      "SK\t6944\t0.828    \t\t\n",
      "TR\t4272\t-1.16    \t*\t\n",
      "UA\t7809\t0.304    \t\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'hincrank_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 54,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t-0.782    \t\t\n",
      "BE\t8939\t0.232    \t\t\n",
      "BG\t6064\t-2.79    \t**\t\n",
      "CH\t9310\t-0.901    \t*\t\n",
      "CY\t3293\t-0.992    \t\t\n",
      "CZ\t8790\t-2.44    \t**\t\n",
      "DE\t11568\t-2.77    \t**\t\n",
      "DK\t7684\t-0.859    \t*\t\n",
      "EE\t6960\t-0.641    \t\t\n",
      "ES\t9729\t-1.59    \t**\t\n",
      "FI\t7969\t0.0251    \t\t\n",
      "FR\t5787\t-1.8    \t**\t\n",
      "GB\t11117\t-2.19    \t**\t\n",
      "GR\t9759\t-1.92    \t**\t\n",
      "HR\t3133\t-2.26    \t**\t\n",
      "HU\t7806\t-3.03    \t**\t\n",
      "IE\t10472\t-0.267    \t\t\n",
      "IL\t7283\t-0.519    \t\t\n",
      "IS\t579\t-1.57    \t\t\n",
      "IT\t1207\t-2.39    \t*\t\n",
      "LT\t1677\t-2.11    \t*\t\n",
      "LU\t3187\t-2.42    \t**\t\n",
      "LV\t1980\t-1.74    \t*\t\n",
      "NL\t9741\t-0.375    \t\t\n",
      "NO\t8643\t-1.01    \t*\t\n",
      "PL\t8917\t-2.44    \t**\t\n",
      "PT\t10302\t-1.22    \t**\t\n",
      "RO\t2146\t-1.32    \t\t\n",
      "RU\t7544\t-1.05    \t*\t\n",
      "SE\t9201\t-0.742    \t\t\n",
      "SI\t7126\t-2.16    \t**\t\n",
      "SK\t6944\t-2.76    \t**\t\n",
      "TR\t4272\t-0.774    \t\t\n",
      "UA\t7809\t-0.668    \t\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'edurank_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 55,
   "metadata": {
    "collapsed": false
   },
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "AT\t6918\t-0.0812    \t**\t\n",
      "BE\t8939\t-0.107    \t**\t\n",
      "BG\t6064\t-0.00442    \t\t\n",
      "CH\t9310\t-0.0913    \t**\t\n",
      "CY\t3293\t-0.0597    \t**\t\n",
      "CZ\t8790\t0.00383    \t\t\n",
      "DE\t11568\t-0.0567    \t**\t\n",
      "DK\t7684\t-0.0381    \t**\t\n",
      "EE\t6960\t-0.0199    \t\t\n",
      "ES\t9729\t-0.0874    \t**\t\n",
      "FI\t7969\t-0.0187    \t\t\n",
      "FR\t5787\t-0.0433    \t**\t\n",
      "GB\t11117\t-0.0326    \t**\t\n",
      "GR\t9759\t-0.0635    \t**\t\n",
      "HR\t3133\t-0.0762    \t**\t\n",
      "HU\t7806\t-0.0145    \t\t\n",
      "IE\t10472\t-0.0799    \t**\t\n",
      "IL\t7283\t-0.31    \t**\t\n",
      "IS\t579\t0.0128    \t\t\n",
      "IT\t1207\t-0.158    \t**\t\n",
      "LT\t1677\t-0.0403    \t\t\n",
      "LU\t3187\t-0.146    \t**\t\n",
      "LV\t1980\t-0.0254    \t\t\n",
      "NL\t9741\t-0.0843    \t**\t\n",
      "NO\t8643\t-0.0622    \t**\t\n",
      "PL\t8917\t-0.0759    \t**\t\n",
      "PT\t10302\t-0.0556    \t**\t\n",
      "RO\t2146\t-0.0339    \t\t\n",
      "RU\t7544\t0.0223    \t\t\n",
      "SE\t9201\t-0.0921    \t**\t\n",
      "SI\t7126\t-0.0761    \t**\t\n",
      "SK\t6944\t-0.0405    \t**\t\n",
      "TR\t4272\t-0.0324    \t\t\n",
      "UA\t7809\t-0.0466    \t**\t\n"
     ]
    }
   ],
   "source": [
    "run_ols(grouped, formula, 'netuse_f')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "collapsed": true
   },
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 2",
   "language": "python",
   "name": "python2"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 2
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython2",
   "version": "2.7.10"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}