{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Internet use and religion in Europe, part two\n",
"-----------------------------------------\n",
"\n",
"This notebook presents a second-pass analysis of the association between Internet use and religion in Europe, using data from the European Social Survey (http://www.europeansocialsurvey.org).\n",
"\n",
"Copyright 2015 Allen Downey\n",
"\n",
"MIT License: http://opensource.org/licenses/MIT"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"from __future__ import print_function, division\n",
"\n",
"import numpy as np\n",
"import pandas as pd\n",
"\n",
"import thinkstats2\n",
"import thinkplot\n",
"\n",
"import statsmodels.formula.api as smf\n",
"\n",
"from iso_country_codes import COUNTRY\n",
"\n",
"%matplotlib inline"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The following function selects the columns I need."
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def read_cycle(filename):\n",
" \"\"\"Reads a file containing ESS data and selects columns.\n",
" \n",
" filename: string\n",
" \n",
" returns: DataFrame\n",
" \"\"\" \n",
" df = pd.read_stata(filename, convert_categoricals=False)\n",
"\n",
" if 'hinctnta' not in df.columns:\n",
" df['hinctnta'] = df.hinctnt\n",
" \n",
" if 'inwyr' not in df.columns:\n",
" df['inwyr'] = df.inwyye\n",
" \n",
" cols = ['cntry', 'inwyr', 'tvtot', 'tvpol', 'rdtot', 'rdpol', \n",
" 'nwsptot', 'nwsppol', 'netuse', \n",
" 'rlgblg', 'rlgdgr', 'eduyrs', 'hinctnta', 'yrbrn', \n",
" 'eisced', 'pspwght', 'pweight']\n",
" df = df[cols]\n",
" return df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read data from Cycle 1."
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" cntry | \n",
" inwyr | \n",
" tvtot | \n",
" tvpol | \n",
" rdtot | \n",
" rdpol | \n",
" nwsptot | \n",
" nwsppol | \n",
" netuse | \n",
" rlgblg | \n",
" rlgdgr | \n",
" eduyrs | \n",
" hinctnta | \n",
" yrbrn | \n",
" eisced | \n",
" pspwght | \n",
" pweight | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AT | \n",
" 2003 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 5 | \n",
" 1 | \n",
" 8 | \n",
" 11 | \n",
" 77 | \n",
" 1949 | \n",
" 0 | \n",
" 0.940933 | \n",
" 0.271488 | \n",
"
\n",
" \n",
" 1 | \n",
" AT | \n",
" 2003 | \n",
" 3 | \n",
" 2 | \n",
" 4 | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 6 | \n",
" 2 | \n",
" 5 | \n",
" 14 | \n",
" 2 | \n",
" 1953 | \n",
" 0 | \n",
" 0.470466 | \n",
" 0.271488 | \n",
"
\n",
" \n",
" 2 | \n",
" AT | \n",
" 2003 | \n",
" 7 | \n",
" 3 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 1 | \n",
" 7 | \n",
" 9 | \n",
" 77 | \n",
" 1940 | \n",
" 0 | \n",
" 1.392155 | \n",
" 0.271488 | \n",
"
\n",
" \n",
" 3 | \n",
" AT | \n",
" 2003 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 2 | \n",
" 4 | \n",
" 1 | \n",
" 7 | \n",
" 18 | \n",
" 9 | \n",
" 1959 | \n",
" 0 | \n",
" 1.382163 | \n",
" 0.271488 | \n",
"
\n",
" \n",
" 4 | \n",
" AT | \n",
" 2003 | \n",
" 0 | \n",
" 66 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 66 | \n",
" 7 | \n",
" 1 | \n",
" 10 | \n",
" 15 | \n",
" 9 | \n",
" 1962 | \n",
" 0 | \n",
" 1.437766 | \n",
" 0.271488 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n",
"0 AT 2003 1 1 1 1 1 1 5 1 \n",
"1 AT 2003 3 2 4 1 2 2 6 2 \n",
"2 AT 2003 7 3 0 66 0 66 0 1 \n",
"3 AT 2003 1 1 1 1 2 2 4 1 \n",
"4 AT 2003 0 66 1 1 0 66 7 1 \n",
"\n",
" rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n",
"0 8 11 77 1949 0 0.940933 0.271488 \n",
"1 5 14 2 1953 0 0.470466 0.271488 \n",
"2 7 9 77 1940 0 1.392155 0.271488 \n",
"3 7 18 9 1959 0 1.382163 0.271488 \n",
"4 10 15 9 1962 0 1.437766 0.271488 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df1 = read_cycle('ESS1e06_4.dta')\n",
"df1.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read data from Cycle 2."
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cntry | \n",
" inwyr | \n",
" tvtot | \n",
" tvpol | \n",
" rdtot | \n",
" rdpol | \n",
" nwsptot | \n",
" nwsppol | \n",
" netuse | \n",
" rlgblg | \n",
" rlgdgr | \n",
" eduyrs | \n",
" hinctnta | \n",
" yrbrn | \n",
" eisced | \n",
" pspwght | \n",
" pweight | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AT | \n",
" 2005 | \n",
" 3 | \n",
" 2 | \n",
" 0 | \n",
" 66 | \n",
" 4 | \n",
" 2 | \n",
" 7 | \n",
" 1 | \n",
" 7 | \n",
" 12 | \n",
" 8 | \n",
" 1971 | \n",
" 0 | \n",
" 0.682185 | \n",
" 0.302006 | \n",
"
\n",
" \n",
" 1 | \n",
" AT | \n",
" 2005 | \n",
" 7 | \n",
" 2 | \n",
" 3 | \n",
" 1 | \n",
" 5 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 7 | \n",
" 8 | \n",
" 4 | \n",
" 1925 | \n",
" 0 | \n",
" 0.565038 | \n",
" 0.302006 | \n",
"
\n",
" \n",
" 2 | \n",
" AT | \n",
" 2005 | \n",
" 6 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 0 | \n",
" 6 | \n",
" 2 | \n",
" 4 | \n",
" 13 | \n",
" 6 | \n",
" 1977 | \n",
" 0 | \n",
" 0.341133 | \n",
" 0.302006 | \n",
"
\n",
" \n",
" 3 | \n",
" AT | \n",
" 2005 | \n",
" 3 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 7 | \n",
" 1 | \n",
" 5 | \n",
" 8 | \n",
" 9 | \n",
" 1989 | \n",
" 0 | \n",
" 0.804050 | \n",
" 0.302006 | \n",
"
\n",
" \n",
" 4 | \n",
" AT | \n",
" 2005 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 4 | \n",
" 2 | \n",
" 1 | \n",
" 11 | \n",
" 88 | \n",
" 1988 | \n",
" 0 | \n",
" 1.125251 | \n",
" 0.302006 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n",
"0 AT 2005 3 2 0 66 4 2 7 1 \n",
"1 AT 2005 7 2 3 1 5 2 0 1 \n",
"2 AT 2005 6 2 1 0 1 0 6 2 \n",
"3 AT 2005 3 1 2 1 2 1 7 1 \n",
"4 AT 2005 2 1 1 1 1 1 4 2 \n",
"\n",
" rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n",
"0 7 12 8 1971 0 0.682185 0.302006 \n",
"1 7 8 4 1925 0 0.565038 0.302006 \n",
"2 4 13 6 1977 0 0.341133 0.302006 \n",
"3 5 8 9 1989 0 0.804050 0.302006 \n",
"4 1 11 88 1988 0 1.125251 0.302006 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df2 = read_cycle('ESS2e03_4.dta')\n",
"df2.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read data from Cycle 3."
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cntry | \n",
" inwyr | \n",
" tvtot | \n",
" tvpol | \n",
" rdtot | \n",
" rdpol | \n",
" nwsptot | \n",
" nwsppol | \n",
" netuse | \n",
" rlgblg | \n",
" rlgdgr | \n",
" eduyrs | \n",
" hinctnta | \n",
" yrbrn | \n",
" eisced | \n",
" pspwght | \n",
" pweight | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" AT | \n",
" 2007 | \n",
" 5 | \n",
" 1 | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" 66 | \n",
" 6 | \n",
" 1 | \n",
" 3 | \n",
" 11 | \n",
" 7 | \n",
" 1980 | \n",
" 0 | \n",
" 0.949578 | \n",
" 0.289116 | \n",
"
\n",
" \n",
" 1 | \n",
" AT | \n",
" 2007 | \n",
" 3 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 7 | \n",
" 1 | \n",
" 9 | \n",
" 20 | \n",
" 5 | \n",
" 1974 | \n",
" 0 | \n",
" 1.412180 | \n",
" 0.289116 | \n",
"
\n",
" \n",
" 2 | \n",
" AT | \n",
" 2007 | \n",
" 1 | \n",
" 1 | \n",
" 7 | \n",
" 2 | \n",
" 2 | \n",
" 1 | \n",
" 7 | \n",
" 1 | \n",
" 6 | \n",
" 16 | \n",
" 77 | \n",
" 1954 | \n",
" 0 | \n",
" 0.723276 | \n",
" 0.289116 | \n",
"
\n",
" \n",
" 3 | \n",
" AT | \n",
" 2007 | \n",
" 4 | \n",
" 1 | \n",
" 7 | \n",
" 2 | \n",
" 3 | \n",
" 2 | \n",
" 6 | \n",
" 1 | \n",
" 5 | \n",
" 12 | \n",
" 88 | \n",
" 1967 | \n",
" 0 | \n",
" 0.625744 | \n",
" 0.289116 | \n",
"
\n",
" \n",
" 4 | \n",
" AT | \n",
" 2007 | \n",
" 6 | \n",
" 2 | \n",
" 6 | \n",
" 2 | \n",
" 3 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 7 | \n",
" 11 | \n",
" 88 | \n",
" 1971 | \n",
" 0 | \n",
" 0.417162 | \n",
" 0.289116 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n",
"0 AT 2007 5 1 7 2 0 66 6 1 \n",
"1 AT 2007 3 1 2 1 2 1 7 1 \n",
"2 AT 2007 1 1 7 2 2 1 7 1 \n",
"3 AT 2007 4 1 7 2 3 2 6 1 \n",
"4 AT 2007 6 2 6 2 3 1 0 1 \n",
"\n",
" rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n",
"0 3 11 7 1980 0 0.949578 0.289116 \n",
"1 9 20 5 1974 0 1.412180 0.289116 \n",
"2 6 16 77 1954 0 0.723276 0.289116 \n",
"3 5 12 88 1967 0 0.625744 0.289116 \n",
"4 7 11 88 1971 0 0.417162 0.289116 "
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df3 = read_cycle('ESS3e03_5.dta')\n",
"df3.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read data from Cycle 4."
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cntry | \n",
" inwyr | \n",
" tvtot | \n",
" tvpol | \n",
" rdtot | \n",
" rdpol | \n",
" nwsptot | \n",
" nwsppol | \n",
" netuse | \n",
" rlgblg | \n",
" rlgdgr | \n",
" eduyrs | \n",
" hinctnta | \n",
" yrbrn | \n",
" eisced | \n",
" pspwght | \n",
" pweight | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" BE | \n",
" 2008 | \n",
" 7 | \n",
" 1 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 2 | \n",
" 1 | \n",
" 18 | \n",
" 4 | \n",
" 1972 | \n",
" 6 | \n",
" 0.823223 | \n",
" 0.503773 | \n",
"
\n",
" \n",
" 1 | \n",
" BE | \n",
" 2008 | \n",
" 7 | \n",
" 2 | \n",
" 7 | \n",
" 3 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 2 | \n",
" 0 | \n",
" 15 | \n",
" 7 | \n",
" 1982 | \n",
" 6 | \n",
" 0.798610 | \n",
" 0.503773 | \n",
"
\n",
" \n",
" 2 | \n",
" BE | \n",
" 2009 | \n",
" 2 | \n",
" 2 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 7 | \n",
" 1 | \n",
" 6 | \n",
" 18 | \n",
" 10 | \n",
" 1940 | \n",
" 7 | \n",
" 0.778020 | \n",
" 0.503773 | \n",
"
\n",
" \n",
" 3 | \n",
" BE | \n",
" 2008 | \n",
" 7 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" 6 | \n",
" 15 | \n",
" 7 | \n",
" 1931 | \n",
" 6 | \n",
" 0.777735 | \n",
" 0.503773 | \n",
"
\n",
" \n",
" 4 | \n",
" BE | \n",
" 2009 | \n",
" 0 | \n",
" 66 | \n",
" 3 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" 13 | \n",
" 7 | \n",
" 1982 | \n",
" 4 | \n",
" 0.960960 | \n",
" 0.503773 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n",
"0 BE 2008 7 1 0 66 0 66 0 2 \n",
"1 BE 2008 7 2 7 3 0 66 0 2 \n",
"2 BE 2009 2 2 3 3 3 3 7 1 \n",
"3 BE 2008 7 2 2 2 2 2 0 1 \n",
"4 BE 2009 0 66 3 0 1 1 7 2 \n",
"\n",
" rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n",
"0 1 18 4 1972 6 0.823223 0.503773 \n",
"1 0 15 7 1982 6 0.798610 0.503773 \n",
"2 6 18 10 1940 7 0.778020 0.503773 \n",
"3 6 15 7 1931 6 0.777735 0.503773 \n",
"4 0 13 7 1982 4 0.960960 0.503773 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df4 = read_cycle('ESS4e04_3.dta')\n",
"df4.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Read data from Cycle 5."
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cntry | \n",
" inwyr | \n",
" tvtot | \n",
" tvpol | \n",
" rdtot | \n",
" rdpol | \n",
" nwsptot | \n",
" nwsppol | \n",
" netuse | \n",
" rlgblg | \n",
" rlgdgr | \n",
" eduyrs | \n",
" hinctnta | \n",
" yrbrn | \n",
" eisced | \n",
" pspwght | \n",
" pweight | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" BE | \n",
" 2010 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 5 | \n",
" 15 | \n",
" 88 | \n",
" 1988 | \n",
" 4 | \n",
" 0.792865 | \n",
" 0.528619 | \n",
"
\n",
" \n",
" 1 | \n",
" BE | \n",
" 2010 | \n",
" 4 | \n",
" 3 | \n",
" 2 | \n",
" 2 | \n",
" 1 | \n",
" 2 | \n",
" 6 | \n",
" 2 | \n",
" 7 | \n",
" 15 | \n",
" 5 | \n",
" 1967 | \n",
" 6 | \n",
" 0.871107 | \n",
" 0.528619 | \n",
"
\n",
" \n",
" 2 | \n",
" BE | \n",
" 2010 | \n",
" 4 | \n",
" 0 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 66 | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" 13 | \n",
" 1 | \n",
" 1991 | \n",
" 3 | \n",
" 0.799453 | \n",
" 0.528619 | \n",
"
\n",
" \n",
" 3 | \n",
" BE | \n",
" 2010 | \n",
" 2 | \n",
" 2 | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" 66 | \n",
" 7 | \n",
" 1 | \n",
" 5 | \n",
" 15 | \n",
" 10 | \n",
" 1987 | \n",
" 6 | \n",
" 0.816030 | \n",
" 0.528619 | \n",
"
\n",
" \n",
" 4 | \n",
" BE | \n",
" 2010 | \n",
" 7 | \n",
" 3 | \n",
" 7 | \n",
" 4 | \n",
" 0 | \n",
" 66 | \n",
" 0 | \n",
" 2 | \n",
" 5 | \n",
" 15 | \n",
" 6 | \n",
" 1952 | \n",
" 6 | \n",
" 0.764902 | \n",
" 0.528619 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n",
"0 BE 2010 5 1 1 0 1 1 2 1 \n",
"1 BE 2010 4 3 2 2 1 2 6 2 \n",
"2 BE 2010 4 0 0 66 0 66 7 2 \n",
"3 BE 2010 2 2 7 2 0 66 7 1 \n",
"4 BE 2010 7 3 7 4 0 66 0 2 \n",
"\n",
" rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n",
"0 5 15 88 1988 4 0.792865 0.528619 \n",
"1 7 15 5 1967 6 0.871107 0.528619 \n",
"2 0 13 1 1991 3 0.799453 0.528619 \n",
"3 5 15 10 1987 6 0.816030 0.528619 \n",
"4 5 15 6 1952 6 0.764902 0.528619 "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df5 = read_cycle('ESS5e03_2.dta')\n",
"df5.head()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"def clean_cycle(df):\n",
" \"\"\"Cleans data from one cycle.\n",
" \n",
" df: DataFrame\n",
" \"\"\"\n",
" df.tvtot.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.rdtot.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.nwsptot.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.netuse.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.tvpol.replace([66, 77, 88, 99], np.nan, inplace=True)\n",
" df.rdpol.replace([66, 77, 88, 99], np.nan, inplace=True)\n",
" df.nwsppol.replace([66, 77, 88, 99], np.nan, inplace=True)\n",
" df.eduyrs.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.rlgblg.replace([7, 8, 9], np.nan, inplace=True)\n",
" df.rlgdgr.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.hinctnta.replace([77, 88, 99], np.nan, inplace=True)\n",
" df.yrbrn.replace([7777, 8888, 9999], np.nan, inplace=True)\n",
" df.inwyr.replace([9999], np.nan, inplace=True)\n",
" \n",
" df['hasrelig'] = (df.rlgblg==1).astype(int)\n",
" df.loc[df.rlgblg.isnull(), 'hasrelig'] = np.nan\n",
" \n",
" df['yrbrn60'] = df.yrbrn - 1960\n",
" df['inwyr07'] = df.inwyr - 2007 + np.random.uniform(-0.5, 0.5, len(df))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"cycles = [df1, df2, df3, df4, df5]\n",
"for cycle in cycles:\n",
" clean_cycle(cycle)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"22 countries\n",
"25 countries\n",
"23 countries\n",
"29 countries\n",
"27 countries\n"
]
}
],
"source": [
"def resample(df):\n",
" \"\"\"Resample data by country.\n",
" \n",
" df: DataFrame\n",
" \n",
" returns: map from country code to DataFrame\n",
" \"\"\"\n",
" res = {}\n",
" grouped = df.groupby('cntry')\n",
" for name, group in grouped:\n",
" sample = group.sample(len(group), weights=group.pspwght, replace=True)\n",
" sample.index = range(len(group))\n",
" res[name] = sample\n",
" return res\n",
"\n",
"# each cycle_map is a map from country code to DataFrame\n",
"cycle_maps = [resample(cycle) for cycle in cycles]\n",
"for cycle_map in cycle_maps:\n",
" print(len(cycle_map), 'countries')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In a each cycle, a few questions were not asked in some countries."
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cycle 1\n",
"FR netuse 0\n",
"FR hinctnta 0\n",
"DE netuse 0\n",
"HU hinctnta 0\n",
"IE hinctnta 0\n",
"Cycle 2\n",
"FR netuse 0\n",
"FI rlgblg 0\n",
"EE hinctnta 0\n",
"UA hinctnta 0\n",
"Cycle 3\n",
"HU hinctnta 0\n",
"EE hinctnta 0\n",
"UA hinctnta 0\n",
"Cycle 4\n",
"BG hinctnta 0\n",
"CY hinctnta 0\n",
"SK hinctnta 0\n",
"Cycle 5\n",
"PT hinctnta 0\n",
"EE inwyr07 0\n"
]
}
],
"source": [
"def check_variables(name, group):\n",
" \"\"\"Print variables missing from a group.\n",
" \n",
" name: group name (country code)\n",
" group: DataFrame\n",
" \"\"\"\n",
" varnames = ['cntry', 'tvtot', 'tvpol', 'rdtot', 'rdpol', \n",
" 'nwsptot', 'nwsppol', 'netuse', 'inwyr07', \n",
" 'rlgblg', 'rlgdgr', 'eduyrs', 'hinctnta', \n",
" 'yrbrn', 'pspwght', 'pweight']\n",
" for var in varnames:\n",
" n = len(group[var].dropna())\n",
" if (n < 100):\n",
" print(name, var, len(group[var].dropna()))\n",
"\n",
"for i, cycle_map in enumerate(cycle_maps):\n",
" print('Cycle', i+1)\n",
" for name, group in cycle_map.items():\n",
" check_variables(name, group)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"I'll remove the cycle/country groups that are missing netuse or rlgblg.\n",
"\n",
"The ones that are missing income information undermine the ability to control for income, but it's a pretty weak effect, so I don't think it's a problem.\n",
"\n",
"inwtr07 is interview year shifted by 1970, the approximate mean."
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"del cycle_maps[0]['FR']\n",
"del cycle_maps[0]['DE']\n",
"del cycle_maps[1]['FR']\n",
"del cycle_maps[1]['FI']\n",
"\n",
"ee = cycle_maps[4]['EE']\n",
"ee.inwyr07 = 3 + np.random.uniform(-0.5, 0.5, len(ee))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Income is reported on a different scale in different cycles, and differs substantially across countries. So I'm replacing it with rank on a per-country basis: that is, each respondent is mapped to their income rank, from 0 to 1, within their country.\n",
"\n",
"I do the same thing with years of education, in part because there are some suspiciously high values. At the very least, mapping to rank reduces the effect of outliers."
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def replace_var_with_rank(name, df, old, new):\n",
" \"\"\"Replaces a scale variable with a rank from 0-1.\n",
" \n",
" Creates a new column.\n",
" \n",
" name: country code\n",
" df: DataFrame\n",
" old: old variable name\n",
" new: new variable name\n",
" \"\"\"\n",
" # jitter the data\n",
" series = df[old] + np.random.uniform(-0.25, 0.25, len(df))\n",
" \n",
" # if there's no data, just put in random values\n",
" if len(series.dropna()) < 10:\n",
" df[new] = np.random.random(len(df))\n",
" return\n",
" \n",
" # map from values to ranks\n",
" cdf = thinkstats2.Cdf(series)\n",
" df[new] = cdf.Probs(series)\n",
" \n",
" # make sure NaN maps to NaN\n",
" df.loc[df[old].isnull(), new] = np.nan\n",
" \n",
" \n",
"def replace_with_ranks(cycle_map):\n",
" \"\"\"Replace variables within countries.\n",
" \n",
" cycle_map: map from country code to DataFrame\n",
" \"\"\"\n",
" for name, group in cycle_map.items():\n",
" replace_var_with_rank(name, group, 'hinctnta', 'hincrank')\n",
" replace_var_with_rank(name, group, 'eduyrs', 'edurank')\n",
" \n",
"for cycle_map in cycle_maps:\n",
" replace_with_ranks(cycle_map)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"To fill missing values, I am drawing random samples from the available values, on a per-country basis.\n",
"\n",
"Since I am not modeling relationships between variables, I am losing some information with this replacement policy; the net effect is to understate the actual strength of all relationships."
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def fill_var(df, old, new):\n",
" \"\"\"Fills missing values.\n",
" \n",
" Creates a new column\n",
" \n",
" df: DataFrame\n",
" old: old variable name\n",
" new: new variable name\n",
" \"\"\"\n",
" # find the NaN rows\n",
" null = df[df[old].isnull()]\n",
" \n",
" # sample from the non-NaN rows\n",
" fill = df[old].dropna().sample(len(null), replace=True)\n",
" fill.index = null.index\n",
" \n",
" # replace NaNs with the random sample\n",
" df[new] = df[old].fillna(fill)\n",
" \n",
" \n",
"def fill_all_vars(df):\n",
" \"\"\"Fills missing values in the variables we need.\n",
" \n",
" df: DataFrame\n",
" \"\"\"\n",
" for old in ['hasrelig', 'rlgdgr', 'yrbrn60', 'edurank', 'hincrank',\n",
" 'tvtot', 'rdtot', 'nwsptot', 'netuse', 'inwyr07']:\n",
" new = old + '_f'\n",
" fill_var(df, old, new)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def fill_vars_by_country(cycle_map):\n",
" for name, group in cycle_map.items():\n",
" fill_all_vars(group)\n",
" \n",
"for cycle_map in cycle_maps:\n",
" fill_vars_by_country(cycle_map)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Concatenate the cycles."
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"37937\n",
"43709\n",
"43000\n",
"56752\n",
"52458\n"
]
}
],
"source": [
"def concat_groups(cycle_map):\n",
" \"\"\"Concat all countries in a cycle.\n",
" \n",
" cycle_map: map from country code to DataFrame\n",
" \n",
" returns: DataFrame\n",
" \"\"\"\n",
" return pd.concat(cycle_map.values(), ignore_index=True)\n",
"\n",
"dfs = [concat_groups(cycle_map) for cycle_map in cycle_maps]\n",
"\n",
"for df in dfs:\n",
" print(len(df))"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"(233856, 32)\n"
]
},
{
"data": {
"text/html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cntry | \n",
" inwyr | \n",
" tvtot | \n",
" tvpol | \n",
" rdtot | \n",
" rdpol | \n",
" nwsptot | \n",
" nwsppol | \n",
" netuse | \n",
" rlgblg | \n",
" ... | \n",
" hasrelig_f | \n",
" rlgdgr_f | \n",
" yrbrn60_f | \n",
" edurank_f | \n",
" hincrank_f | \n",
" tvtot_f | \n",
" rdtot_f | \n",
" nwsptot_f | \n",
" netuse_f | \n",
" inwyr07_f | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" BE | \n",
" 2002 | \n",
" 2 | \n",
" 0 | \n",
" 7 | \n",
" 2 | \n",
" 0 | \n",
" NaN | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 1 | \n",
" 4 | \n",
" -8 | \n",
" 0.476496 | \n",
" 0.761204 | \n",
" 2 | \n",
" 7 | \n",
" 0 | \n",
" 1 | \n",
" -5.265308 | \n",
"
\n",
" \n",
" 1 | \n",
" BE | \n",
" 2002 | \n",
" 4 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 0 | \n",
" NaN | \n",
" 0 | \n",
" 2 | \n",
" ... | \n",
" 0 | \n",
" 6 | \n",
" -23 | \n",
" 0.444444 | \n",
" 0.475585 | \n",
" 4 | \n",
" 1 | \n",
" 0 | \n",
" 0 | \n",
" -5.463721 | \n",
"
\n",
" \n",
" 2 | \n",
" BE | \n",
" 2002 | \n",
" 5 | \n",
" 2 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" NaN | \n",
" 1 | \n",
" 2 | \n",
" ... | \n",
" 0 | \n",
" 2 | \n",
" -7 | \n",
" 0.307158 | \n",
" 0.434114 | \n",
" 5 | \n",
" 2 | \n",
" 0 | \n",
" 1 | \n",
" -4.811951 | \n",
"
\n",
" \n",
" 3 | \n",
" BE | \n",
" 2002 | \n",
" 2 | \n",
" 1 | \n",
" 0 | \n",
" NaN | \n",
" 0 | \n",
" NaN | \n",
" 0 | \n",
" 2 | \n",
" ... | \n",
" 0 | \n",
" 2 | \n",
" -3 | \n",
" 0.167201 | \n",
" 0.460201 | \n",
" 2 | \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" -4.888268 | \n",
"
\n",
" \n",
" 4 | \n",
" BE | \n",
" 2002 | \n",
" 6 | \n",
" 2 | \n",
" 5 | \n",
" 0 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 1 | \n",
" 10 | \n",
" -39 | \n",
" 0.207799 | \n",
" 0.287625 | \n",
" 6 | \n",
" 5 | \n",
" 5 | \n",
" 1 | \n",
" -5.321503 | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 32 columns
\n",
"
"
],
"text/plain": [
" cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n",
"0 BE 2002 2 0 7 2 0 NaN 1 1 \n",
"1 BE 2002 4 1 1 1 0 NaN 0 2 \n",
"2 BE 2002 5 2 2 1 0 NaN 1 2 \n",
"3 BE 2002 2 1 0 NaN 0 NaN 0 2 \n",
"4 BE 2002 6 2 5 0 5 1 1 1 \n",
"\n",
" ... hasrelig_f rlgdgr_f yrbrn60_f edurank_f hincrank_f tvtot_f \\\n",
"0 ... 1 4 -8 0.476496 0.761204 2 \n",
"1 ... 0 6 -23 0.444444 0.475585 4 \n",
"2 ... 0 2 -7 0.307158 0.434114 5 \n",
"3 ... 0 2 -3 0.167201 0.460201 2 \n",
"4 ... 1 10 -39 0.207799 0.287625 6 \n",
"\n",
" rdtot_f nwsptot_f netuse_f inwyr07_f \n",
"0 7 0 1 -5.265308 \n",
"1 1 0 0 -5.463721 \n",
"2 2 0 1 -4.811951 \n",
"3 0 0 0 -4.888268 \n",
"4 5 5 1 -5.321503 \n",
"\n",
"[5 rows x 32 columns]"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df = pd.concat(dfs, ignore_index=True)\n",
"print(df.shape)\n",
"df.head()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"TV watching time on average weekday"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 8029\n",
"1 12609\n",
"2 32932\n",
"3 32466\n",
"4 38880\n",
"5 29248\n",
"6 28409\n",
"7 50587\n",
"Name: tvtot, dtype: int64"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.tvtot.value_counts().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Radio listening, total time on average weekday."
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 57368\n",
"1 35800\n",
"2 36999\n",
"3 18019\n",
"4 15176\n",
"5 10336\n",
"6 9372\n",
"7 49540\n",
"Name: rdtot, dtype: int64"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.rdtot.value_counts().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Newspaper reading, total time on average weekday."
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 68275\n",
"1 72280\n",
"2 62358\n",
"3 17305\n",
"4 6804\n",
"5 2729\n",
"6 1324\n",
"7 1875\n",
"Name: nwsptot, dtype: int64"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.nwsptot.value_counts().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Personal use of Internet, email, www"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 71528\n",
"1 36749\n",
"2 4705\n",
"3 3751\n",
"4 8248\n",
"5 9923\n",
"6 28414\n",
"7 69833\n",
"Name: netuse, dtype: int64"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.netuse.value_counts().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Belong to a particular religion or denomination"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"1 148495\n",
"2 83497\n",
"Name: rlgblg, dtype: int64"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.rlgblg.value_counts().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"How religious"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 29052\n",
"1 13327\n",
"2 16316\n",
"3 18726\n",
"4 15487\n",
"5 40339\n",
"6 22866\n",
"7 27172\n",
"8 23707\n",
"9 10667\n",
"10 14027\n",
"Name: rlgdgr, dtype: int64"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.rlgdgr.value_counts().sort_index()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Total household net income, all sources"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 184192.000000\n",
"mean 0.500507\n",
"std 0.288454\n",
"min 0.000014\n",
"25% 0.250977\n",
"50% 0.500285\n",
"75% 0.750301\n",
"max 1.000000\n",
"Name: hincrank, dtype: float64"
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.hincrank.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Year born"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 232794.000000\n",
"mean 1961.491916\n",
"std 18.727516\n",
"min 1885.000000\n",
"25% 1947.000000\n",
"50% 1962.000000\n",
"75% 1977.000000\n",
"max 1996.000000\n",
"Name: yrbrn, dtype: float64"
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.yrbrn.describe()"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEZCAYAAACJjGL9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHWVJREFUeJzt3Xu4HFWZ7/HvjySAcmcYEcP1QAS8cJFD4HFUNspICGoc\nGIQgd8/AczwZRhFFOIzu0aMjcjw6yIyDigg6Q0SNTvREUA/uARGBOBBuCZKBQC6AXMId5JL3/FFr\nh0pnd+/ee3d1V1X/Ps+zn1RXVVevtXun3lrvWrVKEYGZmVkzG/S6AGZmVm4OFGZm1pIDhZmZteRA\nYWZmLTlQmJlZSw4UZmbWkgOFjYukr0k6t8Dj7yjpKUlKr4ckfaiDx18g6fhOHW8Mn/u/JD0saVW3\nP7sVSSdJurbX5bBymtzrAlj5SFoGvAZ4CXgZuBO4DPh6pBtvIuK/T/D4p0TE1c32iYj7gc3yq9LP\neD5vENg1ItYGhoiYOZ5jTYSkHYEzgB0i4tFuf77ZeLlFYSMJ4D0RsTmwI/AF4Czg4okcVNLwhUkA\namO/utkReLTXQaLo36+kSUUe37rPgcJaioinIuInwNHAiZLeACDp25I+m5a3kfRTSaslPSrpmuH3\nS1om6ROSFgFPS/pXshPmT1Jq6UxJO0taI+kUSfcBv5S0U1qX/xvdTdINkp6Q9GNJW6XPGJC0PF/u\n9LnvkjQDOBs4On3ezWn72lSWMuem9zwk6VJJm6dtw2U7QdJ9KW10TrPfl6QtJF0m6Q/peP8zHf8Q\n4OfA61I5vjXCe2+X9J7c6ymSHpG0d3p9oKTfpN/zLZIOyu17sqQ7JT0p6T8lnZrbNiBpRfoeHqB5\nwJekr0p6XNJiSe/MbXidpPnp+71b0n/LbRuU9ANJ35H0BHBS+v1+VtKvU5mukvQnzX5vVm4OFNaW\niLgJWAG8fXgVr6SCPgYsB7YhS1md3fD2Y4CZwBYRcSxwP1mLZbOI+N+5/d4B7AEcyvotDgEnACcD\n25GlxS5oVeSs2HEl8Hlgbvq8fUco/8nAicAA8F+ATYELG473Z8DrgXcBn5K0R5PP/SpZymwX4KDh\nMkfEL4HDgFWpHKeM8N5LgeNyr2cCKyNikaSpwE+Bz0TEVsCZwA9zJ9+HgMNTK/Bk4MuS9s0da1tg\nK7IgfVqTsh8ALAX+BPg0ME/SlmnbXLLvbTvgL4HPSzo49973Ad+PiC2Af0nrjgFOIvub2DCV2SrI\ngcLGYhWw9QjrXyA7gewcES9HxHW5bQFcEBErI+KPoxx/MCKea7JfAJdFxJ0R8Szwt8AHhju7RyFa\npLqADwJfiohlEfEMWaA7pqE183cR8ceIuBVYBOy93odkKZejgbMj4pmIuA/4EjDcNzJaWf8FOFzS\npun18cB30vJxwIIU+EiBZyFweHq9ICLuTcvXkLVe3p479hrg0xHxYkQ83+Tz/xAR/5C+wyuAu4D3\nSNoBeCtwVkS8EBGLgG+SBcFhv4mI+enznyf7vi6JiKXp9RXAPqPU30rKgcLGYnvgsdzr4RPf+WRX\noj9PaY+zGt63nPaMtl9++/3AFLJWzERtB9zXcOzJZFfhwx7MLT8LbDLCcbZJZWo81tR2ChERq4Dr\ngL9MV/IzeOXqfCfgqJR2Wi1pNVkr57UAkg6T9NuUGlpN1hrJp3oejogXRinCyobX95H9brYDHktB\ntFm9VoxwvPzv7DmylppVkAOFtUXS/sDrgF83bouIpyPizIjYlSwFcUZDWqJxtFKz0UujjWrasWH5\nReAR4Bng1bmyTgL+dAzHXQXs3HDsl8jSOWPxSCpT47FGOok2M5x+OorsKv2BtP5+4DsRsVXuZ7OI\n+KKkjYAfAl8EXpNSUwtYtwXTzoixxoC2E9nvZhWwda6lM1K9PA11jTlQWDPD9y9snjpYLyc7Ud2R\n3572eY+k3VIa6EmyIbVrWhz7IWDXcZTnOEl7Sno18BmynHgAvwc2ljRT0hTgXGCj3HsfBHZukaa6\nHPho6rjelFf6NFrVYb1jRcTLZCmWz0naVNJOwEeB746hnj8C3gKcTjYkedh3gfdKerekSZI2Tp3U\nU8ny/xuSBao1kg4D3j2Gzxz2Gkmnp070o8j6ixZExArgN8DfS9pI0l7AKW3Uq520oFWAA4U18xNJ\nT5JdyZ5Nlms/Obc93xm8G/AL4CmyE8o/RsS/tzj23wPnphTKGbnjNYqG5cuAbwMPkJ0YTweIiCeA\nD5PlzVcAT7Numur76d9HJS0c4XO+RdYXcA1wD1lq6a+blKPVOtL7nknHuZYsdXRJG+8j1eV5YB5Z\nq2Rebv0KYBZwDvAHsu/lY4Ai4imy38UVZKnB2cC/tVne/PbfAtOAh4HPAkdGxOq0fXYq06pUrk/l\n7oNpdo9L4/fnVkdFqcgHF6UhgIeTdZK9uck+F5CNBnkWOCkibi6sQGYVIOlvgWkRccKoO5t1QdEt\nikvIOuRGJGkmsFtETANOBb5WcHnMSk3S1mRpna/3uixmwwoNFBFxLbC6xS7vI+u8IyJuALaUtG2L\n/c1qS9JfkaWUfhYR6w0aMOuVXvdRTGXdXPIKsiGYZn0nIr4REZtGxId7XRazvF4HClh/ZIQ7vMzM\nSqTXk6+tBHbIvd6e9W/6QZKDh5nZOETEhIcp9zpQzAfmAHMlHQg8HhEj3uRU5OisXhscHGRwcLDX\nxShMnetX57qB69dN/7bgLr734zt4/vmX2tp/3mUfGHWf9ma4GV3R0w1fTjYx2jbKZvf8NNkUB0TE\nRRGxIN0ktZRs7PnJzY9mZlYv7QaHjTeezNHvfyOzZu7epZKtq9BAERGz29hnTpFlMDMrq1ZBotfB\nIa/XqScDBgYGel2EQtW5fnWuG7h+RWnWkihTcMgr9M7sTpEUVSinmVk7jj113jpBYuONJ/OvXz+i\n458jqSOd2WUYHmtm1lcag8TR739jD0szOqeezMy6oFm6qYiWRKe5RWFm1gXN+iSqoBqlNDOroFbD\nX6uQchrmQGFmVpBmrYgqpJvynHoyMytIs+GvVeMWhZlZF7Qz5UZZOVCYmXXQWOdsqgKnnszMOqjK\no5uacaAwM+uguvRL5FU7zJmZlUCzdFOV+yXy3KIwM5ugOqab8hwozMwmqI7pprz6hDwzsxKoS7op\nz4HCzGychvsm6s6pJzOzcWrsm6hTv0SeA4WZ2ThV7bkS41XP8Gdm1mVVm+hvLBwozMzGoI5TdIzG\nqSczszGo+z0TI3GgMDMbg7rfMzGSeodBM7MC1fGeiZE4UJiZjaIf+yXynHoyMxtFP/ZL5DlQmJmN\noh/7JfL6JySamY1B3acOHwu3KMzMRtDv6aY8BwozsxH0e7oprz/Do5nZGPRjuinPgcLMLKdfpg4f\nC6eezMxy+mXq8LFwoDAzy+mXqcPHwqHSzKyJOk8dPhYOFGbW9/p9io7RFJp6kjRD0hJJd0s6a4Tt\n20i6UtItkm6XdFKR5TEzG4nvmWitsEAhaRJwITADeAMwW9KeDbvNAW6OiH2AAeBLkvztmFlX+Z6J\n1oo8KU8HlkbEMgBJc4FZwOLcPg8Ae6XlzYFHI8JtPzMrnKfoaF+RgWIqsDz3egVwQMM+3wCulrQK\n2AzwN2RmXeF0U/uK/K1EG/ucA9wSEQOSdgV+IWnviHiqccfBwcG1ywMDAwwMDHSqnGbWh+qYbhoa\nGmJoaKjjx1VEO+fzcRxYOhAYjIgZ6fXZwJqIOC+3zwLgcxFxXXr9/4CzImJhw7GiqHKaWX864oQr\n1i7XNd0kiYjQRI9TZItiITBN0s7AKuBoYHbDPkuAQ4DrJG0L7A7cU2CZzKyPeRjs+BQWKCLiJUlz\ngKuAScDFEbFY0mlp+0XA54FLJC0iG4H1iYh4rKgymVl/c7/E+BT6G4qInwE/a1h3UW75EeC9RZbB\nzGxYHfslusGh1MxqzcNgJ86TAppZrTndNHEOFGZWa043TZzDqpn1DaebxseBwsxqyU+q6xynnsys\nlvykus5xoDCzWvKT6jrHIdbMaqPZUFg/qW5i3KIws9rwUNhiOFCYWW14KGwxHGrNrJY8FLZzHCjM\nrNI8I2zxnHoys0pzv0TxHCjMrNLcL1E8h10zqw33SxTDgcLMKsf9Et3l1JOZVY77JbrLgcLMKsf9\nEt3lEGxmleAn1fWOWxRmVglON/WOA4WZVYLTTb3jcGxmleN0U3c5UJhZqflJdb3n1JOZlZqfVNd7\nDhRmVmp+Ul3vOTSbWen4SXXl4haFmZWOh8KWiwOFmZWOh8KWi0O0mZWah8L2ngOFmZWCZ4QtL6ee\nzKwU3C9RXg4UZlYK7pcoL4drMysd90uUiwOFmfWM+yWqwaknM+sZ90tUQ6GBQtIMSUsk3S3prCb7\nDEi6WdLtkoaKLI+ZlYv7JaqhsNAtaRJwIXAIsBK4SdL8iFic22dL4B+BQyNihaRtiiqPmZWb+yXK\nq8g23nRgaUQsA5A0F5gFLM7tcyzww4hYARARjxRYHjMrAfdLVE+RqaepwPLc6xVpXd40YGtJv5K0\nUNLxBZbHzErA/RLVU+S3E23sMwV4C/Au4NXA9ZJ+GxF3F1guM+sh90tUT5GBYiWwQ+71DmStirzl\nwCMR8RzwnKRrgL2B9QLF4ODg2uWBgQEGBgY6XFwz6zb3S3TW0NAQQ0NDHT+uItq58B/HgaXJwF1k\nrYVVwI3A7IbO7D3IOrwPBTYCbgCOjog7G44VRZXTzLpjpL4JB4piSSIiNNHjFNaiiIiXJM0BrgIm\nARdHxGJJp6XtF0XEEklXArcCa4BvNAYJM6sHP9K0ugprUXSSWxRm1XfECVesXR7ul5g1c/celqj+\nSt+iMDPzI03rwVN4mFlhPBS2HhwozKwwHgpbDw7tZtYVHuFUXQ4UZtZRnqKjfpx6MrOOcr9E/ThQ\nmFlHuV+ifhzmzaww7peoBwcKM5sw90vUm1NPZjZh7peot6aBQtK3c8sndqU0ZlZJ7peot1Yhf+/c\n8keASwsui5nVgPsl6sdtQzMbt+G+Cau3VoFie0kXAAKm5pYBIiJOL7x0ZlZqnjq8P7T6Vj9O9jhT\nAb9r2OY5v81svSDhfol6ahooIuLbXSyHmVWcpw6vr5btREknAacDe6RVdwJfjQh3bJv1Kd8z0X+a\nBoo0JPZvgDOAm8lSUPsC56cnzl3WnSKaWZn4non+0+qGuw8DR0TEryLi8YhYHRFXA0cC/6M7xTOz\nsvE9E/2n1WXAZhFxb+PKiFgmabMCy2RmFeF7JvpDq0Dx/Di3mVnNuF+iv7UKFHtKuq3Jtl2LKIyZ\nlZP7Jfpbq296L2BbYEXD+h2ABworkZmVjvsl+lurQPEV4JMRsSy/UtLmwJeB9xZYLjMrKfdL9J9W\ngWLbiFgv9RQRt0rapcAymVkJuF/ChrUaHrtli20bd7ogZlYu7pewYa0CxUJJpzaulPRXrD/3k5nV\njPslbFiry4OPAD+S9EFeCQz7ARsBf1F0wcysPNwv0d9aTQr4oKS3AgcDbyKbMfan6e5sM6sh90vY\nSFomHCMigKvTj5nVnPslbCSt+ijMrM+4X8JG4ksFMxuR+yVsmAOFmfnZ19aSU09m5mdfW0sOFGbm\nZ19bS75sMLN1+NnX1qjQQCFpBtnkgpOAb0bEeU322x+4HvhARMwrskxmlvE9E9auwlJPkiYBFwIz\ngDcAsyXt2WS/84AryZ7LbWZd4HsmrF1F9lFMB5ZGxLKIeBGYC8waYb+/Bn4APFxgWcysge+ZsHYV\nefkwFViee70COCC/g6SpZMHjncD+ZNOEmFmX+Z4Ja6XIQNHOSX/44UghSTj1ZFYo90vYeBQZKFaS\nPTZ12A6s/1jV/YC5WYxgG+AwSS9GxPzGgw0ODq5dHhgYYGBgoMPFNas/90vU29DQEENDQx0/rrJ5\n/zpP0mTgLuBdwCrgRmB2RCxusv8lwE9GGvUkKYoqp1k/OeKEK9Z5PdwvMWvm7j0qkRVJEhEx4UxN\nYZcSEfGSpDnAVWTDYy+OiMWSTkvbLyrqs81sdO6XsHYV1qLoJLcozMavWb+EA0X9dapF4Sk8zGrO\n/RI2UQ4UZjXn+yVsonxZYdZHnG6y8XCgMKsh3y9hneTUk1kNuV/COsmBwqyG3C9hneRLDLOac7+E\nTZQDhVmN+NnXVgSnnsxqxM++tiI4UJjViJ99bUXw5YZZxTUbCutnX1unuEVhVnEeCmtFc6AwqzgP\nhbWi+bLDrEY8FNaK4EBhVkGeosO6yaknswpyv4R1kwOFWQW5X8K6yZcgZhXhJ9VZr7hFYVYRTjdZ\nrzhQmFWE003WK74cMasgp5usmxwozErMw2CtDJx6Misx90tYGThQmJWY+yWsDHxpYlYR7pewXnGg\nMCshP6nOysSpJ7MS8pPqrEwcKMxKyE+qszLxZYpZSfhJdVZWblGYlYSHwlpZOVCYlYSHwlpZ+XLF\nrIc8I6xVgVsUZj3kdJNVgQOFWQ853WRV4EsXs5JwusnKyoHCrMs8I6xVTeGpJ0kzJC2RdLeks0bY\n/kFJiyTdKuk6SXsVXSazXnK/hFVNoX+dkiYBFwKHACuBmyTNj4jFud3uAd4REU9ImgF8HTiwyHKZ\n9UKzloT7Jazsir6MmQ4sjYhlAJLmArOAtYEiIq7P7X8DsH3BZTLriZHmb/Jd11YFRaeepgLLc69X\npHXNfAhYUGiJzHrE8zdZVRXdooh2d5R0MHAK8GcjbR8cHFy7PDAwwMDAwASLZlY8z99k3TQ0NMTQ\n0FDHj6uIts/lYz+4dCAwGBEz0uuzgTURcV7DfnsB84AZEbF0hONEkeU0K8qxp84bsU/CgcK6QRIR\noYkep+jU00JgmqSdJW0IHA3Mz+8gaUeyIHHcSEHCrMrccW11UGjqKSJekjQHuAqYBFwcEYslnZa2\nXwR8CtgK+JokgBcjYnqR5TIrkudvsropNPXUKU49WZU43WRlUZXUk1nfcbrJ6sa3g5p1gNNNVmdu\nUZh1gKflsDpzoDDrAKebrM58yWM2Tk43Wb9wi8JsnJxusn7hv2qzMfIssNZvHCjMxsizwFq/cerJ\nbIw8C6z1G7cozNrgWWCtn7lFYdYGd1xbP/NfulkTzVoR4JST9RcHCrMmmrUinG6yfuPUk1kTHv5q\nlnGLwizHd1ubrc8tCrMcd1qbrc//A6zvudParDUHCut77rQ2a82BwvqW52wya48DhfUtz9lk1h4H\nCusrbkWYjZ0DhfUV90eYjZ0DhdWeRzWZTYwDhdWeWxFmE+NAYbXkVoRZ5zhQWC25FWHWOQ4UVhtu\nRZgVw4HCasOtCLNiOFBY5fneCLNiOVBYJY2WZnIrwqxzHCisMloFh2FuRZh1ngOFVUarIDEcIGbN\n3L3LpTKrPwcKK7V2RjI5OJgVy4HCSsl9EGbl4UBhpeE+CLNyKjRQSJoBfAWYBHwzIs4bYZ8LgMOA\nZ4GTIuLmIstk5TKW4OAUk1lvFBYoJE0CLgQOAVYCN0maHxGLc/vMBHaLiGmSDgC+BhxYVJnKamho\niIGBgV4XozBDQ0M88ex2owaERlUIEP3w3bl+VmSLYjqwNCKWAUiaC8wCFuf2eR9wKUBE3CBpS0nb\nRsRDBZardOr2x9rYSli86PvsufdRbb23CsEhr27fXSPXz6DYQDEVWJ57vQI4oI19tgf6KlCUVTtp\noU6oWnAw6zdFBopocz+N830tHXHCFZ04TFcsXnQHt95TnfKOlwOCWTUpoiPn5fUPLB0IDEbEjPT6\nbGBNvkNb0j8DQxExN71eAhzUmHqSVEwhzcxqLiIaL8bHrMgWxUJgmqSdgVXA0cDshn3mA3OAuSmw\nPD5S/0QnKmpmZuNTWKCIiJckzQGuIhsee3FELJZ0Wtp+UUQskDRT0lLgGeDkospjZmbjU1jqyczM\n6mGDXnyopG9JekjSbbl10yXdKOlmSTdJ2j+t31jS5ZJulXSnpE/m3rOfpNsk3S3pH3pRl5E0qd/e\nkq5P9ZgvabPctrNTHZZIendufeXrJ+nPJS1M6xdKOjj3nsrXL7d9R0lPS/pYbl3p6jeOv8290rbb\n0/YN0/rS1Q3G/LdZxXPLDpJ+JemO9J2cntZvLekXkn4v6eeStsy9Z+Lnl4jo+g/wdmBf4LbcuiHg\n0LR8GPCrtHwScHlafhVwL7Bjen0jMD0tLwBm9KI+bdbvJuDtaflk4DNp+Q3ALcAUYGdgKa+09OpQ\nv32A16blNwIrcu+pfP1y238AfA/4WJnrN8bvbjKwCHhzer0VsEFZ6zaO+lXx3PJaYJ+0vClwF7An\n8EXgE2n9WcAX0nJHzi89aVFExLXA6obVDwBbpOUtye7mHl6/ibI7vTcBXgCelLQdsFlE3Jj2uwx4\nf6EFb1OT+k1L6wF+CRyZlmeR/bG+GNnNiUuBA+pSv4i4JSIeTOvvBF4laUpd6gcg6f3APWT1G15X\nyvqNsW7vBm6NiNvSe1dHxJqy1g3GXL8qnlsejIhb0vLTZDcwTyV383L6d7i8HTm/9CRQNPFJ4EuS\n7gfOB84BiIirgCfJvtRlwPkR8TjZL2dF7v0r07qyukPSrLR8FLBDWn4d69ZjBVk9GtdXtX55RwK/\ni4gXqcn3J2lT4BPAYMP+Vapfs+/u9UBIulLS7yR9PK2vUt2gSf2qfm5RNqJ0X+AGID+jxUPAtmm5\nI+eXMgWKi4HTI2JH4KPpNZKOI2sWbgfsApwpaZeelXL8TgE+LGkhWZPxhR6Xp9Na1k/SG4EvAKf1\noGyd0Kx+g8CXI+JZ1r95tCqa1W0y8Dbg2PTvX0h6Jx26KbaLRqxflc8t6QLlh8DfRMRT+W2R5ZI6\n+h2VaZrx6RFxSFr+AfDNtPxW4EcR8TLwsKTrgP2AX5NN9zFse15JV5VORNwFHAog6fXA4WnTSta9\n+t6eLNKvpB71Q9L2wDzg+Ii4N62uev1mpk3TgSMlfZEsZbpG0nNk9a1E/Vp8d8uBayLisbRtAfAW\n4LtUpG7Q8rur5LlF0hSyIPGdiPhxWv2QpNdGxIMprfSHtL4j55cytSiWSjooLb8T+H1aXpJeI2kT\nstlll6S895OSDpAk4Hjgx5SUpD9N/24AnEs2Uy5kNx0eI2nDdDUzDbixLvVLoy/+L3BWRFw/vH9E\nPEC16/fPABHxjojYJSJ2IZtS/3MR8U9V+v5a/G1eBbxZ0qskTQYOAu6oUt2g+XdHBc8tqTwXA3dG\nxFdym+YDJ6blE3mlvJ05v/So5/5ysru1XyC7ajkZ+K9kubZbgOuBfdO+G5FdwdwG3MG6o0r2S+uX\nAhf0oi5t1u8U4HSyEQp3AZ9v2P+cVIclpJFfdakf2X/Mp4Gbcz/b1KV+De/7NHBGmb+/cfxtfhC4\nPdXjC2Wu2zj+Nqt4bnkbsCadJ4f/P80AtibrqP898HNgy9x7Jnx+8Q13ZmbWUplST2ZmVkIOFGZm\n1pIDhZmZteRAYWZmLTlQmJlZSw4UZmbWkgOF1Z4y10qakVt3lKSfdfAznu7UsczKxvdRWF9Ic019\nn2wStSnAf5DdfHRvyzeOfKzJEfFSw7qnImKzZu8Z5XiTIptGwqyUHCisb0g6D3iWbErpp4GdgDeR\nBY7BiJifZuS8LO0DMCcirpc0AHwWeAzYIyJ2bzj2U8A3yKbmfhA4JiIekbQP2ZQRrwL+EzglIh6X\nNER2V+3byO4mfh/wW+BgsjmjPhQRvy7g12A2Zk49WT/5O2A22ZQHGwNXR8QBZPP9nC/p1WRTNP95\nROwHHANckHv/vmQzHO/O+jYBboqINwH/TjadB2RB5+MRsTfZdAnD6wOYEhH7R8T/Sa8npfJ8JLef\nWc+VafZYs0JFxLOSvkfWmvgA8F5JZ6bNG5HNsvkgcKGkvYGXySZRG3ZjRNzX5PBryJ5wB9n8QfMk\nbQ5sEa88NOdSsvTXsO+xrnnp3/8gexqZWSk4UFi/WZN+BBwREXfnN0oaBB6IiOPTk8+ez21+ps3P\nECM/D6DxeRWNx/tj+vdl/H/TSsSpJ+tXV5HNKgqApH3T4uZkrQqAE4BJbR5vA7Knp0H2oJ9rI+JJ\nYLWkt6X1x5M9G37tx4692Gbd50Bh/SjIOqanSLpV0u1k/RcA/wScKOkWYHeyNFX+fc08A0yXdBsw\nAHwmrT+RrP9jEbBXbv1ox/MoEysNj3oyM7OW3KIwM7OWHCjMzKwlBwozM2vJgcLMzFpyoDAzs5Yc\nKMzMrCUHCjMza8mBwszMWvr/9X+OumFQm/YAAAAASUVORK5CYII=\n",
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"cdf = thinkstats2.Cdf(df.yrbrn)\n",
"thinkplot.PrePlot(1)\n",
"thinkplot.Cdf(cdf)\n",
"thinkplot.Config(xlabel='Year born', ylabel='CDF', \n",
" title='Disrtribution of year born', legend=False)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Shifted to mean near 0"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 232794.000000\n",
"mean 1.491916\n",
"std 18.727516\n",
"min -75.000000\n",
"25% -13.000000\n",
"50% 2.000000\n",
"75% 17.000000\n",
"max 36.000000\n",
"Name: yrbrn60, dtype: float64"
]
},
"execution_count": 27,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.yrbrn60.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Number of years of education, converted to ranks."
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 230992.000000\n",
"mean 0.500264\n",
"std 0.288676\n",
"min 0.000332\n",
"25% 0.250251\n",
"50% 0.500277\n",
"75% 0.750251\n",
"max 1.000000\n",
"Name: edurank, dtype: float64"
]
},
"execution_count": 28,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.edurank.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Country codes"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"AT 6918\n",
"BE 8939\n",
"BG 6064\n",
"CH 9310\n",
"CY 3293\n",
"CZ 8790\n",
"DE 11568\n",
"DK 7684\n",
"EE 6960\n",
"ES 9729\n",
"FI 7969\n",
"FR 5787\n",
"GB 11117\n",
"GR 9759\n",
"HR 3133\n",
"HU 7806\n",
"IE 10472\n",
"IL 7283\n",
"IS 579\n",
"IT 1207\n",
"LT 1677\n",
"LU 3187\n",
"LV 1980\n",
"NL 9741\n",
"NO 8643\n",
"PL 8917\n",
"PT 10302\n",
"RO 2146\n",
"RU 7544\n",
"SE 9201\n",
"SI 7126\n",
"SK 6944\n",
"TR 4272\n",
"UA 7809\n",
"Name: cntry, dtype: int64"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.cntry.value_counts().sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 29052\n",
"1 13327\n",
"2 16316\n",
"3 18726\n",
"4 15487\n",
"5 40339\n",
"6 22866\n",
"7 27172\n",
"8 23707\n",
"9 10667\n",
"10 14027\n",
"Name: rlgdgr, dtype: int64"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.rlgdgr.value_counts().sort_index()"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"count 233537.000000\n",
"mean -0.219482\n",
"std 2.933184\n",
"min -5.499864\n",
"25% -2.722377\n",
"50% -0.247620\n",
"75% 2.284778\n",
"max 5.499294\n",
"Name: inwyr07, dtype: float64"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.inwyr07.describe()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run the model"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def run_model(df, formula):\n",
" model = smf.logit(formula, data=df) \n",
" results = model.fit(disp=False)\n",
" return results"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Here's the model with all control variables and all media variables:"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Logit Regression Results\n",
"\n",
" Dep. Variable: | hasrelig | No. Observations: | 179125 | \n",
"
\n",
"\n",
" Model: | Logit | Df Residuals: | 179117 | \n",
"
\n",
"\n",
" Method: | MLE | Df Model: | 7 | \n",
"
\n",
"\n",
" Date: | Mon, 16 Nov 2015 | Pseudo R-squ.: | 0.02992 | \n",
"
\n",
"\n",
" Time: | 14:59:20 | Log-Likelihood: | -1.1470e+05 | \n",
"
\n",
"\n",
" converged: | True | LL-Null: | -1.1824e+05 | \n",
"
\n",
"\n",
" | | LLR p-value: | 0.000 | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | z | P>|z| | [95.0% Conf. Int.] | \n",
"
\n",
"\n",
" Intercept | 1.0394 | 0.019 | 54.565 | 0.000 | 1.002 1.077 | \n",
"
\n",
"\n",
" yrbrn60 | -0.0080 | 0.000 | -24.684 | 0.000 | -0.009 -0.007 | \n",
"
\n",
"\n",
" edurank | -0.0096 | 0.020 | -0.490 | 0.624 | -0.048 0.029 | \n",
"
\n",
"\n",
" hincrank | 0.1361 | 0.019 | 7.309 | 0.000 | 0.100 0.173 | \n",
"
\n",
"\n",
" tvtot | -0.0154 | 0.003 | -6.059 | 0.000 | -0.020 -0.010 | \n",
"
\n",
"\n",
" rdtot | -0.0151 | 0.002 | -7.956 | 0.000 | -0.019 -0.011 | \n",
"
\n",
"\n",
" nwsptot | -0.0412 | 0.004 | -10.213 | 0.000 | -0.049 -0.033 | \n",
"
\n",
"\n",
" netuse | -0.1124 | 0.002 | -56.961 | 0.000 | -0.116 -0.109 | \n",
"
\n",
"
"
],
"text/plain": [
"\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: hasrelig No. Observations: 179125\n",
"Model: Logit Df Residuals: 179117\n",
"Method: MLE Df Model: 7\n",
"Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.02992\n",
"Time: 14:59:20 Log-Likelihood: -1.1470e+05\n",
"converged: True LL-Null: -1.1824e+05\n",
" LLR p-value: 0.000\n",
"==============================================================================\n",
" coef std err z P>|z| [95.0% Conf. Int.]\n",
"------------------------------------------------------------------------------\n",
"Intercept 1.0394 0.019 54.565 0.000 1.002 1.077\n",
"yrbrn60 -0.0080 0.000 -24.684 0.000 -0.009 -0.007\n",
"edurank -0.0096 0.020 -0.490 0.624 -0.048 0.029\n",
"hincrank 0.1361 0.019 7.309 0.000 0.100 0.173\n",
"tvtot -0.0154 0.003 -6.059 0.000 -0.020 -0.010\n",
"rdtot -0.0151 0.002 -7.956 0.000 -0.019 -0.011\n",
"nwsptot -0.0412 0.004 -10.213 0.000 -0.049 -0.033\n",
"netuse -0.1124 0.002 -56.961 0.000 -0.116 -0.109\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"formula = ('hasrelig ~ yrbrn60 + edurank + hincrank +'\n",
" 'tvtot + rdtot + nwsptot + netuse')\n",
"res = run_model(df, formula)\n",
"res.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now using the filled variables"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Logit Regression Results\n",
"\n",
" Dep. Variable: | hasrelig_f | No. Observations: | 233856 | \n",
"
\n",
"\n",
" Model: | Logit | Df Residuals: | 233848 | \n",
"
\n",
"\n",
" Method: | MLE | Df Model: | 7 | \n",
"
\n",
"\n",
" Date: | Mon, 16 Nov 2015 | Pseudo R-squ.: | 0.02832 | \n",
"
\n",
"\n",
" Time: | 14:59:21 | Log-Likelihood: | -1.4846e+05 | \n",
"
\n",
"\n",
" converged: | True | LL-Null: | -1.5278e+05 | \n",
"
\n",
"\n",
" | | LLR p-value: | 0.000 | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | z | P>|z| | [95.0% Conf. Int.] | \n",
"
\n",
"\n",
" Intercept | 1.1082 | 0.017 | 66.598 | 0.000 | 1.076 1.141 | \n",
"
\n",
"\n",
" yrbrn60_f | -0.0079 | 0.000 | -28.692 | 0.000 | -0.008 -0.007 | \n",
"
\n",
"\n",
" edurank_f | -0.0123 | 0.017 | -0.725 | 0.469 | -0.045 0.021 | \n",
"
\n",
"\n",
" hincrank_f | 0.0826 | 0.016 | 5.188 | 0.000 | 0.051 0.114 | \n",
"
\n",
"\n",
" tvtot_f | -0.0142 | 0.002 | -6.417 | 0.000 | -0.019 -0.010 | \n",
"
\n",
"\n",
" rdtot_f | -0.0185 | 0.002 | -11.059 | 0.000 | -0.022 -0.015 | \n",
"
\n",
"\n",
" nwsptot_f | -0.0425 | 0.004 | -11.886 | 0.000 | -0.050 -0.035 | \n",
"
\n",
"\n",
" netuse_f | -0.1052 | 0.002 | -60.862 | 0.000 | -0.109 -0.102 | \n",
"
\n",
"
"
],
"text/plain": [
"\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: hasrelig_f No. Observations: 233856\n",
"Model: Logit Df Residuals: 233848\n",
"Method: MLE Df Model: 7\n",
"Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.02832\n",
"Time: 14:59:21 Log-Likelihood: -1.4846e+05\n",
"converged: True LL-Null: -1.5278e+05\n",
" LLR p-value: 0.000\n",
"==============================================================================\n",
" coef std err z P>|z| [95.0% Conf. Int.]\n",
"------------------------------------------------------------------------------\n",
"Intercept 1.1082 0.017 66.598 0.000 1.076 1.141\n",
"yrbrn60_f -0.0079 0.000 -28.692 0.000 -0.008 -0.007\n",
"edurank_f -0.0123 0.017 -0.725 0.469 -0.045 0.021\n",
"hincrank_f 0.0826 0.016 5.188 0.000 0.051 0.114\n",
"tvtot_f -0.0142 0.002 -6.417 0.000 -0.019 -0.010\n",
"rdtot_f -0.0185 0.002 -11.059 0.000 -0.022 -0.015\n",
"nwsptot_f -0.0425 0.004 -11.886 0.000 -0.050 -0.035\n",
"netuse_f -0.1052 0.002 -60.862 0.000 -0.109 -0.102\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"formula = ('hasrelig_f ~ yrbrn60_f + edurank_f + hincrank_f +'\n",
" 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
"res = run_model(df, formula)\n",
"res.summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Now adding inwyr07"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Logit Regression Results\n",
"\n",
" Dep. Variable: | hasrelig_f | No. Observations: | 233856 | \n",
"
\n",
"\n",
" Model: | Logit | Df Residuals: | 233847 | \n",
"
\n",
"\n",
" Method: | MLE | Df Model: | 8 | \n",
"
\n",
"\n",
" Date: | Mon, 16 Nov 2015 | Pseudo R-squ.: | 0.03166 | \n",
"
\n",
"\n",
" Time: | 14:59:21 | Log-Likelihood: | -1.4795e+05 | \n",
"
\n",
"\n",
" converged: | True | LL-Null: | -1.5278e+05 | \n",
"
\n",
"\n",
" | | LLR p-value: | 0.000 | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | z | P>|z| | [95.0% Conf. Int.] | \n",
"
\n",
"\n",
" Intercept | 1.1406 | 0.017 | 68.130 | 0.000 | 1.108 1.173 | \n",
"
\n",
"\n",
" inwyr07_f | 0.0494 | 0.002 | 31.850 | 0.000 | 0.046 0.052 | \n",
"
\n",
"\n",
" yrbrn60_f | -0.0085 | 0.000 | -30.617 | 0.000 | -0.009 -0.008 | \n",
"
\n",
"\n",
" edurank_f | 0.0180 | 0.017 | 1.058 | 0.290 | -0.015 0.051 | \n",
"
\n",
"\n",
" hincrank_f | 0.0949 | 0.016 | 5.949 | 0.000 | 0.064 0.126 | \n",
"
\n",
"\n",
" tvtot_f | -0.0201 | 0.002 | -9.026 | 0.000 | -0.024 -0.016 | \n",
"
\n",
"\n",
" rdtot_f | -0.0148 | 0.002 | -8.817 | 0.000 | -0.018 -0.012 | \n",
"
\n",
"\n",
" nwsptot_f | -0.0352 | 0.004 | -9.780 | 0.000 | -0.042 -0.028 | \n",
"
\n",
"\n",
" netuse_f | -0.1151 | 0.002 | -65.319 | 0.000 | -0.119 -0.112 | \n",
"
\n",
"
"
],
"text/plain": [
"\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: hasrelig_f No. Observations: 233856\n",
"Model: Logit Df Residuals: 233847\n",
"Method: MLE Df Model: 8\n",
"Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.03166\n",
"Time: 14:59:21 Log-Likelihood: -1.4795e+05\n",
"converged: True LL-Null: -1.5278e+05\n",
" LLR p-value: 0.000\n",
"==============================================================================\n",
" coef std err z P>|z| [95.0% Conf. Int.]\n",
"------------------------------------------------------------------------------\n",
"Intercept 1.1406 0.017 68.130 0.000 1.108 1.173\n",
"inwyr07_f 0.0494 0.002 31.850 0.000 0.046 0.052\n",
"yrbrn60_f -0.0085 0.000 -30.617 0.000 -0.009 -0.008\n",
"edurank_f 0.0180 0.017 1.058 0.290 -0.015 0.051\n",
"hincrank_f 0.0949 0.016 5.949 0.000 0.064 0.126\n",
"tvtot_f -0.0201 0.002 -9.026 0.000 -0.024 -0.016\n",
"rdtot_f -0.0148 0.002 -8.817 0.000 -0.018 -0.012\n",
"nwsptot_f -0.0352 0.004 -9.780 0.000 -0.042 -0.028\n",
"netuse_f -0.1151 0.002 -65.319 0.000 -0.119 -0.112\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"formula = ('hasrelig_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n",
" 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
"res = run_model(df, formula)\n",
"res.summary()"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"(233856, -0.11506110306210458, '**')"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def extract_res(res, var='netuse_f'):\n",
" param = res.params[var]\n",
" pvalue = res.pvalues[var]\n",
" stars = '**' if pvalue < 0.01 else '*' if pvalue < 0.05 else ''\n",
" return res.nobs, param, stars\n",
"\n",
"extract_res(res)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Group by country:"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"grouped = df.groupby('cntry')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run a sample country"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"Logit Regression Results\n",
"\n",
" Dep. Variable: | hasrelig_f | No. Observations: | 579 | \n",
"
\n",
"\n",
" Model: | Logit | Df Residuals: | 570 | \n",
"
\n",
"\n",
" Method: | MLE | Df Model: | 8 | \n",
"
\n",
"\n",
" Date: | Mon, 16 Nov 2015 | Pseudo R-squ.: | 0.06961 | \n",
"
\n",
"\n",
" Time: | 14:59:22 | Log-Likelihood: | -372.62 | \n",
"
\n",
"\n",
" converged: | True | LL-Null: | -400.50 | \n",
"
\n",
"\n",
" | | LLR p-value: | 3.143e-09 | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | z | P>|z| | [95.0% Conf. Int.] | \n",
"
\n",
"\n",
" Intercept | -1.6075 | 0.753 | -2.136 | 0.033 | -3.083 -0.133 | \n",
"
\n",
"\n",
" inwyr07_f | -0.0307 | 0.305 | -0.101 | 0.920 | -0.629 0.567 | \n",
"
\n",
"\n",
" yrbrn60_f | -0.0372 | 0.006 | -6.020 | 0.000 | -0.049 -0.025 | \n",
"
\n",
"\n",
" edurank_f | 0.2802 | 0.384 | 0.730 | 0.465 | -0.472 1.033 | \n",
"
\n",
"\n",
" hincrank_f | 0.8706 | 0.326 | 2.672 | 0.008 | 0.232 1.509 | \n",
"
\n",
"\n",
" tvtot_f | 0.0625 | 0.052 | 1.206 | 0.228 | -0.039 0.164 | \n",
"
\n",
"\n",
" rdtot_f | -0.0137 | 0.036 | -0.378 | 0.706 | -0.085 0.057 | \n",
"
\n",
"\n",
" nwsptot_f | -0.0036 | 0.107 | -0.034 | 0.973 | -0.214 0.207 | \n",
"
\n",
"\n",
" netuse_f | 0.1488 | 0.044 | 3.352 | 0.001 | 0.062 0.236 | \n",
"
\n",
"
"
],
"text/plain": [
"\n",
"\"\"\"\n",
" Logit Regression Results \n",
"==============================================================================\n",
"Dep. Variable: hasrelig_f No. Observations: 579\n",
"Model: Logit Df Residuals: 570\n",
"Method: MLE Df Model: 8\n",
"Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.06961\n",
"Time: 14:59:22 Log-Likelihood: -372.62\n",
"converged: True LL-Null: -400.50\n",
" LLR p-value: 3.143e-09\n",
"==============================================================================\n",
" coef std err z P>|z| [95.0% Conf. Int.]\n",
"------------------------------------------------------------------------------\n",
"Intercept -1.6075 0.753 -2.136 0.033 -3.083 -0.133\n",
"inwyr07_f -0.0307 0.305 -0.101 0.920 -0.629 0.567\n",
"yrbrn60_f -0.0372 0.006 -6.020 0.000 -0.049 -0.025\n",
"edurank_f 0.2802 0.384 0.730 0.465 -0.472 1.033\n",
"hincrank_f 0.8706 0.326 2.672 0.008 0.232 1.509\n",
"tvtot_f 0.0625 0.052 1.206 0.228 -0.039 0.164\n",
"rdtot_f -0.0137 0.036 -0.378 0.706 -0.085 0.057\n",
"nwsptot_f -0.0036 0.107 -0.034 0.973 -0.214 0.207\n",
"netuse_f 0.1488 0.044 3.352 0.001 0.062 0.236\n",
"==============================================================================\n",
"\"\"\""
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"group = grouped.get_group('IS')\n",
"run_model(group, formula).summary()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run all countries"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def run_logits(grouped, formula, var):\n",
" for name, group in grouped:\n",
" country = '%14.14s' % COUNTRY[name]\n",
" model = smf.logit(formula, data=group) \n",
" results = model.fit(disp=False)\n",
" nobs, param, stars = extract_res(results, var=var)\n",
" arrow = '<--' if stars and param > 0 else ''\n",
" print(country, nobs, '%0.3g'%param, stars, arrow, sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" AUSTRIA\t6918\t-0.0783\t**\t\n",
" BELGIUM\t8939\t-0.0484\t**\t\n",
" BULGARIA\t6064\t0.0168\t\t\n",
" SWITZERLAND\t9310\t-0.037\t**\t\n",
" CYPRUS\t3293\t-0.0223\t\t\n",
"CZECH REPUBLIC\t8790\t-0.0164\t\t\n",
" GERMANY\t11568\t-0.0243\t**\t\n",
" DENMARK\t7684\t-0.0374\t**\t\n",
" ESTONIA\t6960\t-0.0452\t**\t\n",
" SPAIN\t9729\t-0.0657\t**\t\n",
" FINLAND\t7969\t-0.0472\t**\t\n",
" FRANCE\t5787\t0.000362\t\t\n",
"UNITED KINGDOM\t11117\t-0.0297\t**\t\n",
" GREECE\t9759\t-0.031\t\t\n",
" CROATIA\t3133\t-0.0361\t\t\n",
" HUNGARY\t7806\t-0.0301\t**\t\n",
" IRELAND\t10472\t-0.0582\t**\t\n",
" ISRAEL\t7283\t-0.0813\t**\t\n",
" ICELAND\t579\t0.149\t**\t<--\n",
" ITALY\t1207\t-0.0873\t**\t\n",
" LITHUANIA\t1677\t-0.0398\t\t\n",
" LUXEMBOURG\t3187\t-0.0581\t**\t\n",
" LATVIA\t1980\t-0.0319\t\t\n",
" NETHERLANDS\t9741\t-0.0457\t**\t\n",
" NORWAY\t8643\t-0.0385\t**\t\n",
" POLAND\t8917\t-0.119\t**\t\n",
" PORTUGAL\t10302\t-0.102\t**\t\n",
" ROMANIA\t2146\t-0.00946\t\t\n",
"RUSSIAN FEDERA\t7544\t0.0301\t**\t<--\n",
" SWEDEN\t9201\t-0.0652\t**\t\n",
" SLOVENIA\t7126\t-0.0426\t**\t\n",
" SLOVAKIA\t6944\t-0.0615\t**\t\n",
" TURKEY\t4272\t-0.0939\t*\t\n",
" UKRAINE\t7809\t-0.0601\t**\t\n"
]
}
],
"source": [
"formula = ('hasrelig_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n",
" 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
"\n",
"run_logits(grouped, formula, 'netuse_f')"
]
},
{
"cell_type": "code",
"execution_count": 41,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" AUSTRIA\t6918\t0.375\t**\t<--\n",
" BELGIUM\t8939\t-0.136\t\t\n",
" BULGARIA\t6064\t0.0274\t\t\n",
" SWITZERLAND\t9310\t-0.143\t\t\n",
" CYPRUS\t3293\t0.111\t\t\n",
"CZECH REPUBLIC\t8790\t-0.199\t*\t\n",
" GERMANY\t11568\t0.518\t**\t<--\n",
" DENMARK\t7684\t0.342\t**\t<--\n",
" ESTONIA\t6960\t-0.241\t*\t\n",
" SPAIN\t9729\t0.0597\t\t\n",
" FINLAND\t7969\t0.0164\t\t\n",
" FRANCE\t5787\t-0.0222\t\t\n",
"UNITED KINGDOM\t11117\t-0.0577\t\t\n",
" GREECE\t9759\t-0.758\t**\t\n",
" CROATIA\t3133\t0.12\t\t\n",
" HUNGARY\t7806\t-0.119\t\t\n",
" IRELAND\t10472\t0.285\t**\t<--\n",
" ISRAEL\t7283\t-0.1\t\t\n",
" ICELAND\t579\t0.871\t**\t<--\n",
" ITALY\t1207\t0.519\t*\t<--\n",
" LITHUANIA\t1677\t0.655\t**\t<--\n",
" LUXEMBOURG\t3187\t0.302\t*\t<--\n",
" LATVIA\t1980\t0.0253\t\t\n",
" NETHERLANDS\t9741\t-0.407\t**\t\n",
" NORWAY\t8643\t-0.0125\t\t\n",
" POLAND\t8917\t-0.156\t\t\n",
" PORTUGAL\t10302\t-0.0845\t\t\n",
" ROMANIA\t2146\t0.0659\t\t\n",
"RUSSIAN FEDERA\t7544\t-0.0556\t\t\n",
" SWEDEN\t9201\t-0.112\t\t\n",
" SLOVENIA\t7126\t-0.313\t**\t\n",
" SLOVAKIA\t6944\t0.0315\t\t\n",
" TURKEY\t4272\t-0.119\t\t\n",
" UKRAINE\t7809\t0.0702\t\t\n"
]
}
],
"source": [
"run_logits(grouped, formula, 'hincrank_f')"
]
},
{
"cell_type": "code",
"execution_count": 42,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" AUSTRIA\t6918\t-0.682\t**\t\n",
" BELGIUM\t8939\t0.142\t\t\n",
" BULGARIA\t6064\t-0.247\t\t\n",
" SWITZERLAND\t9310\t-0.665\t**\t\n",
" CYPRUS\t3293\t-1.19\t*\t\n",
"CZECH REPUBLIC\t8790\t-0.38\t**\t\n",
" GERMANY\t11568\t-0.616\t**\t\n",
" DENMARK\t7684\t-0.241\t**\t\n",
" ESTONIA\t6960\t0.0551\t\t\n",
" SPAIN\t9729\t-0.4\t**\t\n",
" FINLAND\t7969\t0.127\t\t\n",
" FRANCE\t5787\t0.0997\t\t\n",
"UNITED KINGDOM\t11117\t0.446\t**\t<--\n",
" GREECE\t9759\t-1.34\t**\t\n",
" CROATIA\t3133\t-0.693\t**\t\n",
" HUNGARY\t7806\t-0.604\t**\t\n",
" IRELAND\t10472\t-0.198\t*\t\n",
" ISRAEL\t7283\t-0.0924\t\t\n",
" ICELAND\t579\t0.28\t\t\n",
" ITALY\t1207\t0.192\t\t\n",
" LITHUANIA\t1677\t0.13\t\t\n",
" LUXEMBOURG\t3187\t-1.01\t**\t\n",
" LATVIA\t1980\t0.303\t\t\n",
" NETHERLANDS\t9741\t-0.412\t**\t\n",
" NORWAY\t8643\t-0.106\t\t\n",
" POLAND\t8917\t-1.16\t**\t\n",
" PORTUGAL\t10302\t-0.715\t**\t\n",
" ROMANIA\t2146\t-1.43\t**\t\n",
"RUSSIAN FEDERA\t7544\t-0.0916\t\t\n",
" SWEDEN\t9201\t0.561\t**\t<--\n",
" SLOVENIA\t7126\t-0.413\t**\t\n",
" SLOVAKIA\t6944\t-0.298\t**\t\n",
" TURKEY\t4272\t-1.69\t**\t\n",
" UKRAINE\t7809\t-0.255\t**\t\n"
]
}
],
"source": [
"run_logits(grouped, formula, 'edurank_f')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Run OLS model with `rlgdgr`: \"Regardless of whether you belong to a particular religion, how religious would you say you are?\""
]
},
{
"cell_type": "code",
"execution_count": 43,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | rlgdgr_f | R-squared: | 0.051 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.051 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 1559. | \n",
"
\n",
"\n",
" Date: | Mon, 16 Nov 2015 | Prob (F-statistic): | 0.00 | \n",
"
\n",
"\n",
" Time: | 14:59:26 | Log-Likelihood: | -5.7994e+05 | \n",
"
\n",
"\n",
" No. Observations: | 233856 | AIC: | 1.160e+06 | \n",
"
\n",
"\n",
" Df Residuals: | 233847 | BIC: | 1.160e+06 | \n",
"
\n",
"\n",
" Df Model: | 8 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [95.0% Conf. Int.] | \n",
"
\n",
"\n",
" Intercept | 6.0342 | 0.022 | 270.724 | 0.000 | 5.991 6.078 | \n",
"
\n",
"\n",
" inwyr07_f | 0.0244 | 0.002 | 11.646 | 0.000 | 0.020 0.029 | \n",
"
\n",
"\n",
" yrbrn60_f | -0.0175 | 0.000 | -47.085 | 0.000 | -0.018 -0.017 | \n",
"
\n",
"\n",
" edurank_f | -0.2782 | 0.023 | -12.079 | 0.000 | -0.323 -0.233 | \n",
"
\n",
"\n",
" hincrank_f | -0.1569 | 0.022 | -7.236 | 0.000 | -0.199 -0.114 | \n",
"
\n",
"\n",
" tvtot_f | -0.0763 | 0.003 | -25.339 | 0.000 | -0.082 -0.070 | \n",
"
\n",
"\n",
" rdtot_f | -0.0238 | 0.002 | -10.429 | 0.000 | -0.028 -0.019 | \n",
"
\n",
"\n",
" nwsptot_f | -0.0668 | 0.005 | -13.715 | 0.000 | -0.076 -0.057 | \n",
"
\n",
"\n",
" netuse_f | -0.1367 | 0.002 | -57.186 | 0.000 | -0.141 -0.132 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 31835.411 | Durbin-Watson: | 1.760 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.000 | Jarque-Bera (JB): | 8476.913 | \n",
"
\n",
"\n",
" Skew: | -0.134 | Prob(JB): | 0.00 | \n",
"
\n",
"\n",
" Kurtosis: | 2.107 | Cond. No. | 85.0 | \n",
"
\n",
"
"
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: rlgdgr_f R-squared: 0.051\n",
"Model: OLS Adj. R-squared: 0.051\n",
"Method: Least Squares F-statistic: 1559.\n",
"Date: Mon, 16 Nov 2015 Prob (F-statistic): 0.00\n",
"Time: 14:59:26 Log-Likelihood: -5.7994e+05\n",
"No. Observations: 233856 AIC: 1.160e+06\n",
"Df Residuals: 233847 BIC: 1.160e+06\n",
"Df Model: 8 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [95.0% Conf. Int.]\n",
"------------------------------------------------------------------------------\n",
"Intercept 6.0342 0.022 270.724 0.000 5.991 6.078\n",
"inwyr07_f 0.0244 0.002 11.646 0.000 0.020 0.029\n",
"yrbrn60_f -0.0175 0.000 -47.085 0.000 -0.018 -0.017\n",
"edurank_f -0.2782 0.023 -12.079 0.000 -0.323 -0.233\n",
"hincrank_f -0.1569 0.022 -7.236 0.000 -0.199 -0.114\n",
"tvtot_f -0.0763 0.003 -25.339 0.000 -0.082 -0.070\n",
"rdtot_f -0.0238 0.002 -10.429 0.000 -0.028 -0.019\n",
"nwsptot_f -0.0668 0.005 -13.715 0.000 -0.076 -0.057\n",
"netuse_f -0.1367 0.002 -57.186 0.000 -0.141 -0.132\n",
"==============================================================================\n",
"Omnibus: 31835.411 Durbin-Watson: 1.760\n",
"Prob(Omnibus): 0.000 Jarque-Bera (JB): 8476.913\n",
"Skew: -0.134 Prob(JB): 0.00\n",
"Kurtosis: 2.107 Cond. No. 85.0\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 43,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n",
" 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n",
"model = smf.ols(formula, data=df) \n",
"results = model.fit(disp=False)\n",
"results.summary()"
]
},
{
"cell_type": "code",
"execution_count": 44,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"def run_ols(grouped, formula, var):\n",
" for name, group in grouped:\n",
" model = smf.ols(formula, data=group) \n",
" results = model.fit(disp=False)\n",
" nobs, param, stars = extract_res(results, var=var)\n",
" arrow = '<--' if stars and param > 0 else ''\n",
" print(name, len(group), '%0.3g '%param, stars, arrow, sep='\\t')"
]
},
{
"cell_type": "code",
"execution_count": 45,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t-0.0822 \t**\t\n",
"BE\t8939\t-0.108 \t**\t\n",
"BG\t6064\t0.00266 \t\t\n",
"CH\t9310\t-0.0905 \t**\t\n",
"CY\t3293\t-0.0555 \t**\t\n",
"CZ\t8790\t0.00954 \t\t\n",
"DE\t11568\t-0.0563 \t**\t\n",
"DK\t7684\t-0.0366 \t**\t\n",
"EE\t6960\t-0.0176 \t\t\n",
"ES\t9729\t-0.0823 \t**\t\n",
"FI\t7969\t-0.0187 \t\t\n",
"FR\t5787\t-0.0434 \t**\t\n",
"GB\t11117\t-0.0301 \t**\t\n",
"GR\t9759\t-0.0619 \t**\t\n",
"HR\t3133\t-0.0798 \t**\t\n",
"HU\t7806\t-0.00543 \t\t\n",
"IE\t10472\t-0.0801 \t**\t\n",
"IL\t7283\t-0.31 \t**\t\n",
"IS\t579\t0.0105 \t\t\n",
"IT\t1207\t-0.142 \t**\t\n",
"LT\t1677\t-0.038 \t\t\n",
"LU\t3187\t-0.145 \t**\t\n",
"LV\t1980\t-0.0209 \t\t\n",
"NL\t9741\t-0.0827 \t**\t\n",
"NO\t8643\t-0.065 \t**\t\n",
"PL\t8917\t-0.0682 \t**\t\n",
"PT\t10302\t-0.0538 \t**\t\n",
"RO\t2146\t-0.0302 \t\t\n",
"RU\t7544\t0.0284 \t*\t<--\n",
"SE\t9201\t-0.0942 \t**\t\n",
"SI\t7126\t-0.0718 \t**\t\n",
"SK\t6944\t-0.034 \t*\t\n",
"TR\t4272\t-0.0322 \t\t\n",
"UA\t7809\t-0.0462 \t**\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'netuse_f')"
]
},
{
"cell_type": "code",
"execution_count": 46,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t-0.253 \t*\t\n",
"BE\t8939\t-0.313 \t*\t\n",
"BG\t6064\t-0.646 \t**\t\n",
"CH\t9310\t-0.316 \t**\t\n",
"CY\t3293\t-0.639 \t**\t\n",
"CZ\t8790\t-0.438 \t**\t\n",
"DE\t11568\t-0.624 \t**\t\n",
"DK\t7684\t-0.374 \t**\t\n",
"EE\t6960\t0.478 \t**\t<--\n",
"ES\t9729\t-0.429 \t**\t\n",
"FI\t7969\t0.175 \t\t\n",
"FR\t5787\t-0.464 \t**\t\n",
"GB\t11117\t0.428 \t**\t<--\n",
"GR\t9759\t-1.68 \t**\t\n",
"HR\t3133\t-1.22 \t**\t\n",
"HU\t7806\t-0.938 \t**\t\n",
"IE\t10472\t-0.442 \t**\t\n",
"IL\t7283\t-0.904 \t**\t\n",
"IS\t579\t0.0494 \t\t\n",
"IT\t1207\t0.0235 \t\t\n",
"LT\t1677\t-0.00891 \t\t\n",
"LU\t3187\t-1.95 \t**\t\n",
"LV\t1980\t0.263 \t\t\n",
"NL\t9741\t-0.157 \t\t\n",
"NO\t8643\t0.32 \t**\t<--\n",
"PL\t8917\t-1.29 \t**\t\n",
"PT\t10302\t-0.864 \t**\t\n",
"RO\t2146\t-0.432 \t*\t\n",
"RU\t7544\t-0.0769 \t\t\n",
"SE\t9201\t0.336 \t**\t<--\n",
"SI\t7126\t-0.964 \t**\t\n",
"SK\t6944\t-0.79 \t**\t\n",
"TR\t4272\t-1.16 \t**\t\n",
"UA\t7809\t-0.591 \t**\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'edurank_f')"
]
},
{
"cell_type": "code",
"execution_count": 47,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t0.162 \t\t\n",
"BE\t8939\t-0.359 \t**\t\n",
"BG\t6064\t-0.175 \t\t\n",
"CH\t9310\t-0.29 \t**\t\n",
"CY\t3293\t-0.37 \t**\t\n",
"CZ\t8790\t-0.41 \t**\t\n",
"DE\t11568\t0.372 \t**\t<--\n",
"DK\t7684\t0.128 \t\t\n",
"EE\t6960\t-0.44 \t**\t\n",
"ES\t9729\t-0.227 \t*\t\n",
"FI\t7969\t-0.271 \t*\t\n",
"FR\t5787\t-0.806 \t**\t\n",
"GB\t11117\t-0.529 \t**\t\n",
"GR\t9759\t-0.511 \t**\t\n",
"HR\t3133\t0.124 \t\t\n",
"HU\t7806\t-0.078 \t\t\n",
"IE\t10472\t-0.0518 \t\t\n",
"IL\t7283\t-0.333 \t*\t\n",
"IS\t579\t0.0341 \t\t\n",
"IT\t1207\t0.104 \t\t\n",
"LT\t1677\t-0.531 \t*\t\n",
"LU\t3187\t-0.382 \t*\t\n",
"LV\t1980\t-0.274 \t\t\n",
"NL\t9741\t-0.853 \t**\t\n",
"NO\t8643\t-0.192 \t\t\n",
"PL\t8917\t-0.355 \t**\t\n",
"PT\t10302\t-0.0868 \t\t\n",
"RO\t2146\t-0.178 \t\t\n",
"RU\t7544\t-0.0596 \t\t\n",
"SE\t9201\t-0.249 \t*\t\n",
"SI\t7126\t-0.876 \t**\t\n",
"SK\t6944\t0.0426 \t\t\n",
"TR\t4272\t-0.201 \t\t\n",
"UA\t7809\t-0.0463 \t\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'hincrank_f')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Let's see what happens if we add quadratic terms for edurank and hincrank:"
]
},
{
"cell_type": "code",
"execution_count": 48,
"metadata": {
"collapsed": false
},
"outputs": [],
"source": [
"df['edurank_f2'] = df.edurank_f**2\n",
"df['hincrank_f2'] = df.hincrank_f**2"
]
},
{
"cell_type": "code",
"execution_count": 49,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + edurank_f2 + hincrank_f +'\n",
" 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t-0.669 \t\t\n",
"BE\t8939\t0.186 \t\t\n",
"BG\t6064\t-2.81 \t**\t\n",
"CH\t9310\t-0.863 \t*\t\n",
"CY\t3293\t-1.07 \t\t\n",
"CZ\t8790\t-2.53 \t**\t\n",
"DE\t11568\t-2.79 \t**\t\n",
"DK\t7684\t-0.748 \t\t\n",
"EE\t6960\t-0.671 \t\t\n",
"ES\t9729\t-1.55 \t**\t\n",
"FI\t7969\t0.0851 \t\t\n",
"FR\t5787\t-1.85 \t**\t\n",
"GB\t11117\t-2.16 \t**\t\n",
"GR\t9759\t-1.93 \t**\t\n",
"HR\t3133\t-1.85 \t**\t\n",
"HU\t7806\t-3.03 \t**\t\n",
"IE\t10472\t-0.237 \t\t\n",
"IL\t7283\t-0.533 \t\t\n",
"IS\t579\t-1.68 \t\t\n",
"IT\t1207\t-2.43 \t*\t\n",
"LT\t1677\t-1.82 \t*\t\n",
"LU\t3187\t-2.46 \t**\t\n",
"LV\t1980\t-1.74 \t*\t\n",
"NL\t9741\t-0.285 \t\t\n",
"NO\t8643\t-1.05 \t*\t\n",
"PL\t8917\t-2.44 \t**\t\n",
"PT\t10302\t-1.2 \t**\t\n",
"RO\t2146\t-1.35 \t\t\n",
"RU\t7544\t-1.06 \t*\t\n",
"SE\t9201\t-0.788 \t\t\n",
"SI\t7126\t-2.03 \t**\t\n",
"SK\t6944\t-2.73 \t**\t\n",
"TR\t4272\t-0.9 \t\t\n",
"UA\t7809\t-0.66 \t\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'edurank_f')"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t0.412 \t\t\n",
"BE\t8939\t-0.5 \t\t\n",
"BG\t6064\t2.13 \t**\t<--\n",
"CH\t9310\t0.55 \t\t\n",
"CY\t3293\t0.427 \t\t\n",
"CZ\t8790\t2.09 \t**\t<--\n",
"DE\t11568\t2.15 \t**\t<--\n",
"DK\t7684\t0.373 \t\t\n",
"EE\t6960\t1.15 \t*\t<--\n",
"ES\t9729\t1.09 \t**\t<--\n",
"FI\t7969\t0.0867 \t\t\n",
"FR\t5787\t1.36 \t*\t<--\n",
"GB\t11117\t2.56 \t**\t<--\n",
"GR\t9759\t0.244 \t\t\n",
"HR\t3133\t0.62 \t\t\n",
"HU\t7806\t2.09 \t**\t<--\n",
"IE\t10472\t-0.204 \t\t\n",
"IL\t7283\t-0.37 \t\t\n",
"IS\t579\t1.66 \t\t\n",
"IT\t1207\t2.36 \t*\t<--\n",
"LT\t1677\t1.81 \t*\t<--\n",
"LU\t3187\t0.513 \t\t\n",
"LV\t1980\t2.02 \t*\t<--\n",
"NL\t9741\t0.126 \t\t\n",
"NO\t8643\t1.34 \t**\t<--\n",
"PL\t8917\t1.14 \t**\t<--\n",
"PT\t10302\t0.324 \t\t\n",
"RO\t2146\t0.896 \t\t\n",
"RU\t7544\t0.979 \t*\t<--\n",
"SE\t9201\t1.11 \t**\t<--\n",
"SI\t7126\t1.08 \t*\t<--\n",
"SK\t6944\t1.94 \t**\t<--\n",
"TR\t4272\t-0.257 \t\t\n",
"UA\t7809\t0.0687 \t\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'edurank_f2')"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + edurank_f2 + '\n",
" 'hincrank_f + hincrank_f2 + '\n",
" 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')"
]
},
{
"cell_type": "code",
"execution_count": 53,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t1.95 \t**\t<--\n",
"BE\t8939\t-0.782 \t\t\n",
"BG\t6064\t-0.412 \t\t\n",
"CH\t9310\t0.417 \t\t\n",
"CY\t3293\t-1.34 \t**\t\n",
"CZ\t8790\t-1.44 \t**\t\n",
"DE\t11568\t0.149 \t\t\n",
"DK\t7684\t1.5 \t**\t<--\n",
"EE\t6960\t-1.25 \t**\t\n",
"ES\t9729\t0.182 \t\t\n",
"FI\t7969\t0.265 \t\t\n",
"FR\t5787\t-1.96 \t**\t\n",
"GB\t11117\t-0.107 \t\t\n",
"GR\t9759\t-0.63 \t*\t\n",
"HR\t3133\t1.97 \t**\t<--\n",
"HU\t7806\t-0.0722 \t\t\n",
"IE\t10472\t0.416 \t\t\n",
"IL\t7283\t-0.525 \t\t\n",
"IS\t579\t-0.719 \t\t\n",
"IT\t1207\t-0.197 \t\t\n",
"LT\t1677\t2.34 \t**\t<--\n",
"LU\t3187\t-1.03 \t\t\n",
"LV\t1980\t-0.262 \t\t\n",
"NL\t9741\t-0.0917 \t\t\n",
"NO\t8643\t-0.55 \t\t\n",
"PL\t8917\t-0.273 \t\t\n",
"PT\t10302\t0.284 \t\t\n",
"RO\t2146\t-0.35 \t\t\n",
"RU\t7544\t-0.622 \t\t\n",
"SE\t9201\t-0.787 \t*\t\n",
"SI\t7126\t0.0244 \t\t\n",
"SK\t6944\t0.828 \t\t\n",
"TR\t4272\t-1.16 \t*\t\n",
"UA\t7809\t0.304 \t\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'hincrank_f')"
]
},
{
"cell_type": "code",
"execution_count": 54,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t-0.782 \t\t\n",
"BE\t8939\t0.232 \t\t\n",
"BG\t6064\t-2.79 \t**\t\n",
"CH\t9310\t-0.901 \t*\t\n",
"CY\t3293\t-0.992 \t\t\n",
"CZ\t8790\t-2.44 \t**\t\n",
"DE\t11568\t-2.77 \t**\t\n",
"DK\t7684\t-0.859 \t*\t\n",
"EE\t6960\t-0.641 \t\t\n",
"ES\t9729\t-1.59 \t**\t\n",
"FI\t7969\t0.0251 \t\t\n",
"FR\t5787\t-1.8 \t**\t\n",
"GB\t11117\t-2.19 \t**\t\n",
"GR\t9759\t-1.92 \t**\t\n",
"HR\t3133\t-2.26 \t**\t\n",
"HU\t7806\t-3.03 \t**\t\n",
"IE\t10472\t-0.267 \t\t\n",
"IL\t7283\t-0.519 \t\t\n",
"IS\t579\t-1.57 \t\t\n",
"IT\t1207\t-2.39 \t*\t\n",
"LT\t1677\t-2.11 \t*\t\n",
"LU\t3187\t-2.42 \t**\t\n",
"LV\t1980\t-1.74 \t*\t\n",
"NL\t9741\t-0.375 \t\t\n",
"NO\t8643\t-1.01 \t*\t\n",
"PL\t8917\t-2.44 \t**\t\n",
"PT\t10302\t-1.22 \t**\t\n",
"RO\t2146\t-1.32 \t\t\n",
"RU\t7544\t-1.05 \t*\t\n",
"SE\t9201\t-0.742 \t\t\n",
"SI\t7126\t-2.16 \t**\t\n",
"SK\t6944\t-2.76 \t**\t\n",
"TR\t4272\t-0.774 \t\t\n",
"UA\t7809\t-0.668 \t\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'edurank_f')"
]
},
{
"cell_type": "code",
"execution_count": 55,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"AT\t6918\t-0.0812 \t**\t\n",
"BE\t8939\t-0.107 \t**\t\n",
"BG\t6064\t-0.00442 \t\t\n",
"CH\t9310\t-0.0913 \t**\t\n",
"CY\t3293\t-0.0597 \t**\t\n",
"CZ\t8790\t0.00383 \t\t\n",
"DE\t11568\t-0.0567 \t**\t\n",
"DK\t7684\t-0.0381 \t**\t\n",
"EE\t6960\t-0.0199 \t\t\n",
"ES\t9729\t-0.0874 \t**\t\n",
"FI\t7969\t-0.0187 \t\t\n",
"FR\t5787\t-0.0433 \t**\t\n",
"GB\t11117\t-0.0326 \t**\t\n",
"GR\t9759\t-0.0635 \t**\t\n",
"HR\t3133\t-0.0762 \t**\t\n",
"HU\t7806\t-0.0145 \t\t\n",
"IE\t10472\t-0.0799 \t**\t\n",
"IL\t7283\t-0.31 \t**\t\n",
"IS\t579\t0.0128 \t\t\n",
"IT\t1207\t-0.158 \t**\t\n",
"LT\t1677\t-0.0403 \t\t\n",
"LU\t3187\t-0.146 \t**\t\n",
"LV\t1980\t-0.0254 \t\t\n",
"NL\t9741\t-0.0843 \t**\t\n",
"NO\t8643\t-0.0622 \t**\t\n",
"PL\t8917\t-0.0759 \t**\t\n",
"PT\t10302\t-0.0556 \t**\t\n",
"RO\t2146\t-0.0339 \t\t\n",
"RU\t7544\t0.0223 \t\t\n",
"SE\t9201\t-0.0921 \t**\t\n",
"SI\t7126\t-0.0761 \t**\t\n",
"SK\t6944\t-0.0405 \t**\t\n",
"TR\t4272\t-0.0324 \t\t\n",
"UA\t7809\t-0.0466 \t**\t\n"
]
}
],
"source": [
"run_ols(grouped, formula, 'netuse_f')"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 2",
"language": "python",
"name": "python2"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 2
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython2",
"version": "2.7.10"
}
},
"nbformat": 4,
"nbformat_minor": 0
}