{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Internet use and religion in Europe, part two\n", "-----------------------------------------\n", "\n", "This notebook presents a second-pass analysis of the association between Internet use and religion in Europe, using data from the European Social Survey (http://www.europeansocialsurvey.org).\n", "\n", "Copyright 2015 Allen Downey\n", "\n", "MIT License: http://opensource.org/licenses/MIT" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from __future__ import print_function, division\n", "\n", "import numpy as np\n", "import pandas as pd\n", "\n", "import thinkstats2\n", "import thinkplot\n", "\n", "import statsmodels.formula.api as smf\n", "\n", "from iso_country_codes import COUNTRY\n", "\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following function selects the columns I need." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def read_cycle(filename):\n", " \"\"\"Reads a file containing ESS data and selects columns.\n", " \n", " filename: string\n", " \n", " returns: DataFrame\n", " \"\"\" \n", " df = pd.read_stata(filename, convert_categoricals=False)\n", "\n", " if 'hinctnta' not in df.columns:\n", " df['hinctnta'] = df.hinctnt\n", " \n", " if 'inwyr' not in df.columns:\n", " df['inwyr'] = df.inwyye\n", " \n", " cols = ['cntry', 'inwyr', 'tvtot', 'tvpol', 'rdtot', 'rdpol', \n", " 'nwsptot', 'nwsppol', 'netuse', \n", " 'rlgblg', 'rlgdgr', 'eduyrs', 'hinctnta', 'yrbrn', \n", " 'eisced', 'pspwght', 'pweight']\n", " df = df[cols]\n", " return df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from Cycle 1." ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cntryinwyrtvtottvpolrdtotrdpolnwsptotnwsppolnetuserlgblgrlgdgreduyrshinctntayrbrneiscedpspwghtpweight
0AT20031111115181177194900.9409330.271488
1AT2003324122625142195300.4704660.271488
2AT200373066066017977194001.3921550.271488
3AT2003111122417189195901.3821630.271488
4AT2003066110667110159196201.4377660.271488
\n", "
" ], "text/plain": [ " cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n", "0 AT 2003 1 1 1 1 1 1 5 1 \n", "1 AT 2003 3 2 4 1 2 2 6 2 \n", "2 AT 2003 7 3 0 66 0 66 0 1 \n", "3 AT 2003 1 1 1 1 2 2 4 1 \n", "4 AT 2003 0 66 1 1 0 66 7 1 \n", "\n", " rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n", "0 8 11 77 1949 0 0.940933 0.271488 \n", "1 5 14 2 1953 0 0.470466 0.271488 \n", "2 7 9 77 1940 0 1.392155 0.271488 \n", "3 7 18 9 1959 0 1.382163 0.271488 \n", "4 10 15 9 1962 0 1.437766 0.271488 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df1 = read_cycle('ESS1e06_4.dta')\n", "df1.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from Cycle 2." ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cntryinwyrtvtottvpolrdtotrdpolnwsptotnwsppolnetuserlgblgrlgdgreduyrshinctntayrbrneiscedpspwghtpweight
0AT20053206642717128197100.6821850.302006
1AT200572315201784192500.5650380.302006
2AT2005621010624136197700.3411330.302006
3AT200531212171589198900.8040500.302006
4AT20052111114211188198801.1252510.302006
\n", "
" ], "text/plain": [ " cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n", "0 AT 2005 3 2 0 66 4 2 7 1 \n", "1 AT 2005 7 2 3 1 5 2 0 1 \n", "2 AT 2005 6 2 1 0 1 0 6 2 \n", "3 AT 2005 3 1 2 1 2 1 7 1 \n", "4 AT 2005 2 1 1 1 1 1 4 2 \n", "\n", " rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n", "0 7 12 8 1971 0 0.682185 0.302006 \n", "1 7 8 4 1925 0 0.565038 0.302006 \n", "2 4 13 6 1977 0 0.341133 0.302006 \n", "3 5 8 9 1989 0 0.804050 0.302006 \n", "4 1 11 88 1988 0 1.125251 0.302006 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df2 = read_cycle('ESS2e03_4.dta')\n", "df2.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from Cycle 3." ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cntryinwyrtvtottvpolrdtotrdpolnwsptotnwsppolnetuserlgblgrlgdgreduyrshinctntayrbrneiscedpspwghtpweight
0AT20075172066613117198000.9495780.289116
1AT2007312121719205197401.4121800.289116
2AT20071172217161677195400.7232760.289116
3AT20074172326151288196700.6257440.289116
4AT20076262310171188197100.4171620.289116
\n", "
" ], "text/plain": [ " cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n", "0 AT 2007 5 1 7 2 0 66 6 1 \n", "1 AT 2007 3 1 2 1 2 1 7 1 \n", "2 AT 2007 1 1 7 2 2 1 7 1 \n", "3 AT 2007 4 1 7 2 3 2 6 1 \n", "4 AT 2007 6 2 6 2 3 1 0 1 \n", "\n", " rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n", "0 3 11 7 1980 0 0.949578 0.289116 \n", "1 9 20 5 1974 0 1.412180 0.289116 \n", "2 6 16 77 1954 0 0.723276 0.289116 \n", "3 5 12 88 1967 0 0.625744 0.289116 \n", "4 7 11 88 1971 0 0.417162 0.289116 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df3 = read_cycle('ESS3e03_5.dta')\n", "df3.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from Cycle 4." ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cntryinwyrtvtottvpolrdtotrdpolnwsptotnwsppolnetuserlgblgrlgdgreduyrshinctntayrbrneiscedpspwghtpweight
0BE200871066066021184197260.8232230.503773
1BE20087273066020157198260.7986100.503773
2BE20092233337161810194070.7780200.503773
3BE2008722222016157193160.7777350.503773
4BE20090663011720137198240.9609600.503773
\n", "
" ], "text/plain": [ " cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n", "0 BE 2008 7 1 0 66 0 66 0 2 \n", "1 BE 2008 7 2 7 3 0 66 0 2 \n", "2 BE 2009 2 2 3 3 3 3 7 1 \n", "3 BE 2008 7 2 2 2 2 2 0 1 \n", "4 BE 2009 0 66 3 0 1 1 7 2 \n", "\n", " rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n", "0 1 18 4 1972 6 0.823223 0.503773 \n", "1 0 15 7 1982 6 0.798610 0.503773 \n", "2 6 18 10 1940 7 0.778020 0.503773 \n", "3 6 15 7 1931 6 0.777735 0.503773 \n", "4 0 13 7 1982 4 0.960960 0.503773 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df4 = read_cycle('ESS4e04_3.dta')\n", "df4.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from Cycle 5." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cntryinwyrtvtottvpolrdtotrdpolnwsptotnwsppolnetuserlgblgrlgdgreduyrshinctntayrbrneiscedpspwghtpweight
0BE20105110112151588198840.7928650.528619
1BE2010432212627155196760.8711070.528619
2BE201040066066720131199130.7994530.528619
3BE201022720667151510198760.8160300.528619
4BE20107374066025156195260.7649020.528619
\n", "
" ], "text/plain": [ " cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n", "0 BE 2010 5 1 1 0 1 1 2 1 \n", "1 BE 2010 4 3 2 2 1 2 6 2 \n", "2 BE 2010 4 0 0 66 0 66 7 2 \n", "3 BE 2010 2 2 7 2 0 66 7 1 \n", "4 BE 2010 7 3 7 4 0 66 0 2 \n", "\n", " rlgdgr eduyrs hinctnta yrbrn eisced pspwght pweight \n", "0 5 15 88 1988 4 0.792865 0.528619 \n", "1 7 15 5 1967 6 0.871107 0.528619 \n", "2 0 13 1 1991 3 0.799453 0.528619 \n", "3 5 15 10 1987 6 0.816030 0.528619 \n", "4 5 15 6 1952 6 0.764902 0.528619 " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df5 = read_cycle('ESS5e03_2.dta')\n", "df5.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def clean_cycle(df):\n", " \"\"\"Cleans data from one cycle.\n", " \n", " df: DataFrame\n", " \"\"\"\n", " df.tvtot.replace([77, 88, 99], np.nan, inplace=True)\n", " df.rdtot.replace([77, 88, 99], np.nan, inplace=True)\n", " df.nwsptot.replace([77, 88, 99], np.nan, inplace=True)\n", " df.netuse.replace([77, 88, 99], np.nan, inplace=True)\n", " df.tvpol.replace([66, 77, 88, 99], np.nan, inplace=True)\n", " df.rdpol.replace([66, 77, 88, 99], np.nan, inplace=True)\n", " df.nwsppol.replace([66, 77, 88, 99], np.nan, inplace=True)\n", " df.eduyrs.replace([77, 88, 99], np.nan, inplace=True)\n", " df.rlgblg.replace([7, 8, 9], np.nan, inplace=True)\n", " df.rlgdgr.replace([77, 88, 99], np.nan, inplace=True)\n", " df.hinctnta.replace([77, 88, 99], np.nan, inplace=True)\n", " df.yrbrn.replace([7777, 8888, 9999], np.nan, inplace=True)\n", " df.inwyr.replace([9999], np.nan, inplace=True)\n", " \n", " df['hasrelig'] = (df.rlgblg==1).astype(int)\n", " df.loc[df.rlgblg.isnull(), 'hasrelig'] = np.nan\n", " \n", " df['yrbrn60'] = df.yrbrn - 1960\n", " df['inwyr07'] = df.inwyr - 2007 + np.random.uniform(-0.5, 0.5, len(df))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [], "source": [ "cycles = [df1, df2, df3, df4, df5]\n", "for cycle in cycles:\n", " clean_cycle(cycle)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "22 countries\n", "25 countries\n", "23 countries\n", "29 countries\n", "27 countries\n" ] } ], "source": [ "def resample(df):\n", " \"\"\"Resample data by country.\n", " \n", " df: DataFrame\n", " \n", " returns: map from country code to DataFrame\n", " \"\"\"\n", " res = {}\n", " grouped = df.groupby('cntry')\n", " for name, group in grouped:\n", " sample = group.sample(len(group), weights=group.pspwght, replace=True)\n", " sample.index = range(len(group))\n", " res[name] = sample\n", " return res\n", "\n", "# each cycle_map is a map from country code to DataFrame\n", "cycle_maps = [resample(cycle) for cycle in cycles]\n", "for cycle_map in cycle_maps:\n", " print(len(cycle_map), 'countries')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "In a each cycle, a few questions were not asked in some countries." ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cycle 1\n", "FR netuse 0\n", "FR hinctnta 0\n", "DE netuse 0\n", "HU hinctnta 0\n", "IE hinctnta 0\n", "Cycle 2\n", "FR netuse 0\n", "FI rlgblg 0\n", "EE hinctnta 0\n", "UA hinctnta 0\n", "Cycle 3\n", "HU hinctnta 0\n", "EE hinctnta 0\n", "UA hinctnta 0\n", "Cycle 4\n", "BG hinctnta 0\n", "CY hinctnta 0\n", "SK hinctnta 0\n", "Cycle 5\n", "PT hinctnta 0\n", "EE inwyr07 0\n" ] } ], "source": [ "def check_variables(name, group):\n", " \"\"\"Print variables missing from a group.\n", " \n", " name: group name (country code)\n", " group: DataFrame\n", " \"\"\"\n", " varnames = ['cntry', 'tvtot', 'tvpol', 'rdtot', 'rdpol', \n", " 'nwsptot', 'nwsppol', 'netuse', 'inwyr07', \n", " 'rlgblg', 'rlgdgr', 'eduyrs', 'hinctnta', \n", " 'yrbrn', 'pspwght', 'pweight']\n", " for var in varnames:\n", " n = len(group[var].dropna())\n", " if (n < 100):\n", " print(name, var, len(group[var].dropna()))\n", "\n", "for i, cycle_map in enumerate(cycle_maps):\n", " print('Cycle', i+1)\n", " for name, group in cycle_map.items():\n", " check_variables(name, group)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I'll remove the cycle/country groups that are missing netuse or rlgblg.\n", "\n", "The ones that are missing income information undermine the ability to control for income, but it's a pretty weak effect, so I don't think it's a problem.\n", "\n", "inwtr07 is interview year shifted by 1970, the approximate mean." ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "del cycle_maps[0]['FR']\n", "del cycle_maps[0]['DE']\n", "del cycle_maps[1]['FR']\n", "del cycle_maps[1]['FI']\n", "\n", "ee = cycle_maps[4]['EE']\n", "ee.inwyr07 = 3 + np.random.uniform(-0.5, 0.5, len(ee))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Income is reported on a different scale in different cycles, and differs substantially across countries. So I'm replacing it with rank on a per-country basis: that is, each respondent is mapped to their income rank, from 0 to 1, within their country.\n", "\n", "I do the same thing with years of education, in part because there are some suspiciously high values. At the very least, mapping to rank reduces the effect of outliers." ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def replace_var_with_rank(name, df, old, new):\n", " \"\"\"Replaces a scale variable with a rank from 0-1.\n", " \n", " Creates a new column.\n", " \n", " name: country code\n", " df: DataFrame\n", " old: old variable name\n", " new: new variable name\n", " \"\"\"\n", " # jitter the data\n", " series = df[old] + np.random.uniform(-0.25, 0.25, len(df))\n", " \n", " # if there's no data, just put in random values\n", " if len(series.dropna()) < 10:\n", " df[new] = np.random.random(len(df))\n", " return\n", " \n", " # map from values to ranks\n", " cdf = thinkstats2.Cdf(series)\n", " df[new] = cdf.Probs(series)\n", " \n", " # make sure NaN maps to NaN\n", " df.loc[df[old].isnull(), new] = np.nan\n", " \n", " \n", "def replace_with_ranks(cycle_map):\n", " \"\"\"Replace variables within countries.\n", " \n", " cycle_map: map from country code to DataFrame\n", " \"\"\"\n", " for name, group in cycle_map.items():\n", " replace_var_with_rank(name, group, 'hinctnta', 'hincrank')\n", " replace_var_with_rank(name, group, 'eduyrs', 'edurank')\n", " \n", "for cycle_map in cycle_maps:\n", " replace_with_ranks(cycle_map)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To fill missing values, I am drawing random samples from the available values, on a per-country basis.\n", "\n", "Since I am not modeling relationships between variables, I am losing some information with this replacement policy; the net effect is to understate the actual strength of all relationships." ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def fill_var(df, old, new):\n", " \"\"\"Fills missing values.\n", " \n", " Creates a new column\n", " \n", " df: DataFrame\n", " old: old variable name\n", " new: new variable name\n", " \"\"\"\n", " # find the NaN rows\n", " null = df[df[old].isnull()]\n", " \n", " # sample from the non-NaN rows\n", " fill = df[old].dropna().sample(len(null), replace=True)\n", " fill.index = null.index\n", " \n", " # replace NaNs with the random sample\n", " df[new] = df[old].fillna(fill)\n", " \n", " \n", "def fill_all_vars(df):\n", " \"\"\"Fills missing values in the variables we need.\n", " \n", " df: DataFrame\n", " \"\"\"\n", " for old in ['hasrelig', 'rlgdgr', 'yrbrn60', 'edurank', 'hincrank',\n", " 'tvtot', 'rdtot', 'nwsptot', 'netuse', 'inwyr07']:\n", " new = old + '_f'\n", " fill_var(df, old, new)" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def fill_vars_by_country(cycle_map):\n", " for name, group in cycle_map.items():\n", " fill_all_vars(group)\n", " \n", "for cycle_map in cycle_maps:\n", " fill_vars_by_country(cycle_map)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate the cycles." ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "37937\n", "43709\n", "43000\n", "56752\n", "52458\n" ] } ], "source": [ "def concat_groups(cycle_map):\n", " \"\"\"Concat all countries in a cycle.\n", " \n", " cycle_map: map from country code to DataFrame\n", " \n", " returns: DataFrame\n", " \"\"\"\n", " return pd.concat(cycle_map.values(), ignore_index=True)\n", "\n", "dfs = [concat_groups(cycle_map) for cycle_map in cycle_maps]\n", "\n", "for df in dfs:\n", " print(len(df))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(233856, 32)\n" ] }, { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cntryinwyrtvtottvpolrdtotrdpolnwsptotnwsppolnetuserlgblg...hasrelig_frlgdgr_fyrbrn60_fedurank_fhincrank_ftvtot_frdtot_fnwsptot_fnetuse_finwyr07_f
0BE200220720NaN11...14-80.4764960.7612042701-5.265308
1BE200241110NaN02...06-230.4444440.4755854100-5.463721
2BE200252210NaN12...02-70.3071580.4341145201-4.811951
3BE2002210NaN0NaN02...02-30.1672010.4602012000-4.888268
4BE200262505111...110-390.2077990.2876256551-5.321503
\n", "

5 rows × 32 columns

\n", "
" ], "text/plain": [ " cntry inwyr tvtot tvpol rdtot rdpol nwsptot nwsppol netuse rlgblg \\\n", "0 BE 2002 2 0 7 2 0 NaN 1 1 \n", "1 BE 2002 4 1 1 1 0 NaN 0 2 \n", "2 BE 2002 5 2 2 1 0 NaN 1 2 \n", "3 BE 2002 2 1 0 NaN 0 NaN 0 2 \n", "4 BE 2002 6 2 5 0 5 1 1 1 \n", "\n", " ... hasrelig_f rlgdgr_f yrbrn60_f edurank_f hincrank_f tvtot_f \\\n", "0 ... 1 4 -8 0.476496 0.761204 2 \n", "1 ... 0 6 -23 0.444444 0.475585 4 \n", "2 ... 0 2 -7 0.307158 0.434114 5 \n", "3 ... 0 2 -3 0.167201 0.460201 2 \n", "4 ... 1 10 -39 0.207799 0.287625 6 \n", "\n", " rdtot_f nwsptot_f netuse_f inwyr07_f \n", "0 7 0 1 -5.265308 \n", "1 1 0 0 -5.463721 \n", "2 2 0 1 -4.811951 \n", "3 0 0 0 -4.888268 \n", "4 5 5 1 -5.321503 \n", "\n", "[5 rows x 32 columns]" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = pd.concat(dfs, ignore_index=True)\n", "print(df.shape)\n", "df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "TV watching time on average weekday" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 8029\n", "1 12609\n", "2 32932\n", "3 32466\n", "4 38880\n", "5 29248\n", "6 28409\n", "7 50587\n", "Name: tvtot, dtype: int64" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.tvtot.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Radio listening, total time on average weekday." ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 57368\n", "1 35800\n", "2 36999\n", "3 18019\n", "4 15176\n", "5 10336\n", "6 9372\n", "7 49540\n", "Name: rdtot, dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.rdtot.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Newspaper reading, total time on average weekday." ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 68275\n", "1 72280\n", "2 62358\n", "3 17305\n", "4 6804\n", "5 2729\n", "6 1324\n", "7 1875\n", "Name: nwsptot, dtype: int64" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.nwsptot.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Personal use of Internet, email, www" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 71528\n", "1 36749\n", "2 4705\n", "3 3751\n", "4 8248\n", "5 9923\n", "6 28414\n", "7 69833\n", "Name: netuse, dtype: int64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.netuse.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Belong to a particular religion or denomination" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 148495\n", "2 83497\n", "Name: rlgblg, dtype: int64" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.rlgblg.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "How religious" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 29052\n", "1 13327\n", "2 16316\n", "3 18726\n", "4 15487\n", "5 40339\n", "6 22866\n", "7 27172\n", "8 23707\n", "9 10667\n", "10 14027\n", "Name: rlgdgr, dtype: int64" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.rlgdgr.value_counts().sort_index()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Total household net income, all sources" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 184192.000000\n", "mean 0.500507\n", "std 0.288454\n", "min 0.000014\n", "25% 0.250977\n", "50% 0.500285\n", "75% 0.750301\n", "max 1.000000\n", "Name: hincrank, dtype: float64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.hincrank.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Year born" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 232794.000000\n", "mean 1961.491916\n", "std 18.727516\n", "min 1885.000000\n", "25% 1947.000000\n", "50% 1962.000000\n", "75% 1977.000000\n", "max 1996.000000\n", "Name: yrbrn, dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.yrbrn.describe()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYoAAAEZCAYAAACJjGL9AAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAHWVJREFUeJzt3Xu4HFWZ7/HvjySAcmcYEcP1QAS8cJFD4HFUNspICGoc\nGIQgd8/AczwZRhFFOIzu0aMjcjw6yIyDigg6Q0SNTvREUA/uARGBOBBuCZKBQC6AXMId5JL3/FFr\nh0pnd+/ee3d1V1X/Ps+zn1RXVVevtXun3lrvWrVKEYGZmVkzG/S6AGZmVm4OFGZm1pIDhZmZteRA\nYWZmLTlQmJlZSw4UZmbWkgOFjYukr0k6t8Dj7yjpKUlKr4ckfaiDx18g6fhOHW8Mn/u/JD0saVW3\nP7sVSSdJurbX5bBymtzrAlj5SFoGvAZ4CXgZuBO4DPh6pBtvIuK/T/D4p0TE1c32iYj7gc3yq9LP\neD5vENg1ItYGhoiYOZ5jTYSkHYEzgB0i4tFuf77ZeLlFYSMJ4D0RsTmwI/AF4Czg4okcVNLwhUkA\namO/utkReLTXQaLo36+kSUUe37rPgcJaioinIuInwNHAiZLeACDp25I+m5a3kfRTSaslPSrpmuH3\nS1om6ROSFgFPS/pXshPmT1Jq6UxJO0taI+kUSfcBv5S0U1qX/xvdTdINkp6Q9GNJW6XPGJC0PF/u\n9LnvkjQDOBs4On3ezWn72lSWMuem9zwk6VJJm6dtw2U7QdJ9KW10TrPfl6QtJF0m6Q/peP8zHf8Q\n4OfA61I5vjXCe2+X9J7c6ymSHpG0d3p9oKTfpN/zLZIOyu17sqQ7JT0p6T8lnZrbNiBpRfoeHqB5\nwJekr0p6XNJiSe/MbXidpPnp+71b0n/LbRuU9ANJ35H0BHBS+v1+VtKvU5mukvQnzX5vVm4OFNaW\niLgJWAG8fXgVr6SCPgYsB7YhS1md3fD2Y4CZwBYRcSxwP1mLZbOI+N+5/d4B7AEcyvotDgEnACcD\n25GlxS5oVeSs2HEl8Hlgbvq8fUco/8nAicAA8F+ATYELG473Z8DrgXcBn5K0R5PP/SpZymwX4KDh\nMkfEL4HDgFWpHKeM8N5LgeNyr2cCKyNikaSpwE+Bz0TEVsCZwA9zJ9+HgMNTK/Bk4MuS9s0da1tg\nK7IgfVqTsh8ALAX+BPg0ME/SlmnbXLLvbTvgL4HPSzo49973Ad+PiC2Af0nrjgFOIvub2DCV2SrI\ngcLGYhWw9QjrXyA7gewcES9HxHW5bQFcEBErI+KPoxx/MCKea7JfAJdFxJ0R8Szwt8AHhju7RyFa\npLqADwJfiohlEfEMWaA7pqE183cR8ceIuBVYBOy93odkKZejgbMj4pmIuA/4EjDcNzJaWf8FOFzS\npun18cB30vJxwIIU+EiBZyFweHq9ICLuTcvXkLVe3p479hrg0xHxYkQ83+Tz/xAR/5C+wyuAu4D3\nSNoBeCtwVkS8EBGLgG+SBcFhv4mI+enznyf7vi6JiKXp9RXAPqPU30rKgcLGYnvgsdzr4RPf+WRX\noj9PaY+zGt63nPaMtl9++/3AFLJWzERtB9zXcOzJZFfhwx7MLT8LbDLCcbZJZWo81tR2ChERq4Dr\ngL9MV/IzeOXqfCfgqJR2Wi1pNVkr57UAkg6T9NuUGlpN1hrJp3oejogXRinCyobX95H9brYDHktB\ntFm9VoxwvPzv7DmylppVkAOFtUXS/sDrgF83bouIpyPizIjYlSwFcUZDWqJxtFKz0UujjWrasWH5\nReAR4Bng1bmyTgL+dAzHXQXs3HDsl8jSOWPxSCpT47FGOok2M5x+OorsKv2BtP5+4DsRsVXuZ7OI\n+KKkjYAfAl8EXpNSUwtYtwXTzoixxoC2E9nvZhWwda6lM1K9PA11jTlQWDPD9y9snjpYLyc7Ud2R\n3572eY+k3VIa6EmyIbVrWhz7IWDXcZTnOEl7Sno18BmynHgAvwc2ljRT0hTgXGCj3HsfBHZukaa6\nHPho6rjelFf6NFrVYb1jRcTLZCmWz0naVNJOwEeB746hnj8C3gKcTjYkedh3gfdKerekSZI2Tp3U\nU8ny/xuSBao1kg4D3j2Gzxz2Gkmnp070o8j6ixZExArgN8DfS9pI0l7AKW3Uq520oFWAA4U18xNJ\nT5JdyZ5Nlms/Obc93xm8G/AL4CmyE8o/RsS/tzj23wPnphTKGbnjNYqG5cuAbwMPkJ0YTweIiCeA\nD5PlzVcAT7Numur76d9HJS0c4XO+RdYXcA1wD1lq6a+blKPVOtL7nknHuZYsdXRJG+8j1eV5YB5Z\nq2Rebv0KYBZwDvAHsu/lY4Ai4imy38UVZKnB2cC/tVne/PbfAtOAh4HPAkdGxOq0fXYq06pUrk/l\n7oNpdo9L4/fnVkdFqcgHF6UhgIeTdZK9uck+F5CNBnkWOCkibi6sQGYVIOlvgWkRccKoO5t1QdEt\nikvIOuRGJGkmsFtETANOBb5WcHnMSk3S1mRpna/3uixmwwoNFBFxLbC6xS7vI+u8IyJuALaUtG2L\n/c1qS9JfkaWUfhYR6w0aMOuVXvdRTGXdXPIKsiGYZn0nIr4REZtGxId7XRazvF4HClh/ZIQ7vMzM\nSqTXk6+tBHbIvd6e9W/6QZKDh5nZOETEhIcp9zpQzAfmAHMlHQg8HhEj3uRU5OisXhscHGRwcLDX\nxShMnetX57qB69dN/7bgLr734zt4/vmX2tp/3mUfGHWf9ma4GV3R0w1fTjYx2jbKZvf8NNkUB0TE\nRRGxIN0ktZRs7PnJzY9mZlYv7QaHjTeezNHvfyOzZu7epZKtq9BAERGz29hnTpFlMDMrq1ZBotfB\nIa/XqScDBgYGel2EQtW5fnWuG7h+RWnWkihTcMgr9M7sTpEUVSinmVk7jj113jpBYuONJ/OvXz+i\n458jqSOd2WUYHmtm1lcag8TR739jD0szOqeezMy6oFm6qYiWRKe5RWFm1gXN+iSqoBqlNDOroFbD\nX6uQchrmQGFmVpBmrYgqpJvynHoyMytIs+GvVeMWhZlZF7Qz5UZZOVCYmXXQWOdsqgKnnszMOqjK\no5uacaAwM+uguvRL5FU7zJmZlUCzdFOV+yXy3KIwM5ugOqab8hwozMwmqI7pprz6hDwzsxKoS7op\nz4HCzGychvsm6s6pJzOzcWrsm6hTv0SeA4WZ2ThV7bkS41XP8Gdm1mVVm+hvLBwozMzGoI5TdIzG\nqSczszGo+z0TI3GgMDMbg7rfMzGSeodBM7MC1fGeiZE4UJiZjaIf+yXynHoyMxtFP/ZL5DlQmJmN\noh/7JfL6JySamY1B3acOHwu3KMzMRtDv6aY8BwozsxH0e7oprz/Do5nZGPRjuinPgcLMLKdfpg4f\nC6eezMxy+mXq8LFwoDAzy+mXqcPHwqHSzKyJOk8dPhYOFGbW9/p9io7RFJp6kjRD0hJJd0s6a4Tt\n20i6UtItkm6XdFKR5TEzG4nvmWitsEAhaRJwITADeAMwW9KeDbvNAW6OiH2AAeBLkvztmFlX+Z6J\n1oo8KU8HlkbEMgBJc4FZwOLcPg8Ae6XlzYFHI8JtPzMrnKfoaF+RgWIqsDz3egVwQMM+3wCulrQK\n2AzwN2RmXeF0U/uK/K1EG/ucA9wSEQOSdgV+IWnviHiqccfBwcG1ywMDAwwMDHSqnGbWh+qYbhoa\nGmJoaKjjx1VEO+fzcRxYOhAYjIgZ6fXZwJqIOC+3zwLgcxFxXXr9/4CzImJhw7GiqHKaWX864oQr\n1i7XNd0kiYjQRI9TZItiITBN0s7AKuBoYHbDPkuAQ4DrJG0L7A7cU2CZzKyPeRjs+BQWKCLiJUlz\ngKuAScDFEbFY0mlp+0XA54FLJC0iG4H1iYh4rKgymVl/c7/E+BT6G4qInwE/a1h3UW75EeC9RZbB\nzGxYHfslusGh1MxqzcNgJ86TAppZrTndNHEOFGZWa043TZzDqpn1DaebxseBwsxqyU+q6xynnsys\nlvykus5xoDCzWvKT6jrHIdbMaqPZUFg/qW5i3KIws9rwUNhiOFCYWW14KGwxHGrNrJY8FLZzHCjM\nrNI8I2zxnHoys0pzv0TxHCjMrNLcL1E8h10zqw33SxTDgcLMKsf9Et3l1JOZVY77JbrLgcLMKsf9\nEt3lEGxmleAn1fWOWxRmVglON/WOA4WZVYLTTb3jcGxmleN0U3c5UJhZqflJdb3n1JOZlZqfVNd7\nDhRmVmp+Ul3vOTSbWen4SXXl4haFmZWOh8KWiwOFmZWOh8KWi0O0mZWah8L2ngOFmZWCZ4QtL6ee\nzKwU3C9RXg4UZlYK7pcoL4drMysd90uUiwOFmfWM+yWqwaknM+sZ90tUQ6GBQtIMSUsk3S3prCb7\nDEi6WdLtkoaKLI+ZlYv7JaqhsNAtaRJwIXAIsBK4SdL8iFic22dL4B+BQyNihaRtiiqPmZWb+yXK\nq8g23nRgaUQsA5A0F5gFLM7tcyzww4hYARARjxRYHjMrAfdLVE+RqaepwPLc6xVpXd40YGtJv5K0\nUNLxBZbHzErA/RLVU+S3E23sMwV4C/Au4NXA9ZJ+GxF3F1guM+sh90tUT5GBYiWwQ+71DmStirzl\nwCMR8RzwnKRrgL2B9QLF4ODg2uWBgQEGBgY6XFwz6zb3S3TW0NAQQ0NDHT+uItq58B/HgaXJwF1k\nrYVVwI3A7IbO7D3IOrwPBTYCbgCOjog7G44VRZXTzLpjpL4JB4piSSIiNNHjFNaiiIiXJM0BrgIm\nARdHxGJJp6XtF0XEEklXArcCa4BvNAYJM6sHP9K0ugprUXSSWxRm1XfECVesXR7ul5g1c/celqj+\nSt+iMDPzI03rwVN4mFlhPBS2HhwozKwwHgpbDw7tZtYVHuFUXQ4UZtZRnqKjfpx6MrOOcr9E/ThQ\nmFlHuV+ifhzmzaww7peoBwcKM5sw90vUm1NPZjZh7peot6aBQtK3c8sndqU0ZlZJ7peot1Yhf+/c\n8keASwsui5nVgPsl6sdtQzMbt+G+Cau3VoFie0kXAAKm5pYBIiJOL7x0ZlZqnjq8P7T6Vj9O9jhT\nAb9r2OY5v81svSDhfol6ahooIuLbXSyHmVWcpw6vr5btREknAacDe6RVdwJfjQh3bJv1Kd8z0X+a\nBoo0JPZvgDOAm8lSUPsC56cnzl3WnSKaWZn4non+0+qGuw8DR0TEryLi8YhYHRFXA0cC/6M7xTOz\nsvE9E/2n1WXAZhFxb+PKiFgmabMCy2RmFeF7JvpDq0Dx/Di3mVnNuF+iv7UKFHtKuq3Jtl2LKIyZ\nlZP7Jfpbq296L2BbYEXD+h2ABworkZmVjvsl+lurQPEV4JMRsSy/UtLmwJeB9xZYLjMrKfdL9J9W\ngWLbiFgv9RQRt0rapcAymVkJuF/ChrUaHrtli20bd7ogZlYu7pewYa0CxUJJpzaulPRXrD/3k5nV\njPslbFiry4OPAD+S9EFeCQz7ARsBf1F0wcysPNwv0d9aTQr4oKS3AgcDbyKbMfan6e5sM6sh90vY\nSFomHCMigKvTj5nVnPslbCSt+ijMrM+4X8JG4ksFMxuR+yVsmAOFmfnZ19aSU09m5mdfW0sOFGbm\nZ19bS75sMLN1+NnX1qjQQCFpBtnkgpOAb0bEeU322x+4HvhARMwrskxmlvE9E9auwlJPkiYBFwIz\ngDcAsyXt2WS/84AryZ7LbWZd4HsmrF1F9lFMB5ZGxLKIeBGYC8waYb+/Bn4APFxgWcysge+ZsHYV\nefkwFViee70COCC/g6SpZMHjncD+ZNOEmFmX+Z4Ja6XIQNHOSX/44UghSTj1ZFYo90vYeBQZKFaS\nPTZ12A6s/1jV/YC5WYxgG+AwSS9GxPzGgw0ODq5dHhgYYGBgoMPFNas/90vU29DQEENDQx0/rrJ5\n/zpP0mTgLuBdwCrgRmB2RCxusv8lwE9GGvUkKYoqp1k/OeKEK9Z5PdwvMWvm7j0qkRVJEhEx4UxN\nYZcSEfGSpDnAVWTDYy+OiMWSTkvbLyrqs81sdO6XsHYV1qLoJLcozMavWb+EA0X9dapF4Sk8zGrO\n/RI2UQ4UZjXn+yVsonxZYdZHnG6y8XCgMKsh3y9hneTUk1kNuV/COsmBwqyG3C9hneRLDLOac7+E\nTZQDhVmN+NnXVgSnnsxqxM++tiI4UJjViJ99bUXw5YZZxTUbCutnX1unuEVhVnEeCmtFc6AwqzgP\nhbWi+bLDrEY8FNaK4EBhVkGeosO6yaknswpyv4R1kwOFWQW5X8K6yZcgZhXhJ9VZr7hFYVYRTjdZ\nrzhQmFWE003WK74cMasgp5usmxwozErMw2CtDJx6Misx90tYGThQmJWY+yWsDHxpYlYR7pewXnGg\nMCshP6nOysSpJ7MS8pPqrEwcKMxKyE+qszLxZYpZSfhJdVZWblGYlYSHwlpZOVCYlYSHwlpZ+XLF\nrIc8I6xVgVsUZj3kdJNVgQOFWQ853WRV4EsXs5JwusnKyoHCrMs8I6xVTeGpJ0kzJC2RdLeks0bY\n/kFJiyTdKuk6SXsVXSazXnK/hFVNoX+dkiYBFwKHACuBmyTNj4jFud3uAd4REU9ImgF8HTiwyHKZ\n9UKzloT7Jazsir6MmQ4sjYhlAJLmArOAtYEiIq7P7X8DsH3BZTLriZHmb/Jd11YFRaeepgLLc69X\npHXNfAhYUGiJzHrE8zdZVRXdooh2d5R0MHAK8GcjbR8cHFy7PDAwwMDAwASLZlY8z99k3TQ0NMTQ\n0FDHj6uIts/lYz+4dCAwGBEz0uuzgTURcV7DfnsB84AZEbF0hONEkeU0K8qxp84bsU/CgcK6QRIR\noYkep+jU00JgmqSdJW0IHA3Mz+8gaUeyIHHcSEHCrMrccW11UGjqKSJekjQHuAqYBFwcEYslnZa2\nXwR8CtgK+JokgBcjYnqR5TIrkudvsropNPXUKU49WZU43WRlUZXUk1nfcbrJ6sa3g5p1gNNNVmdu\nUZh1gKflsDpzoDDrAKebrM58yWM2Tk43Wb9wi8JsnJxusn7hv2qzMfIssNZvHCjMxsizwFq/cerJ\nbIw8C6z1G7cozNrgWWCtn7lFYdYGd1xbP/NfulkTzVoR4JST9RcHCrMmmrUinG6yfuPUk1kTHv5q\nlnGLwizHd1ubrc8tCrMcd1qbrc//A6zvudParDUHCut77rQ2a82BwvqW52wya48DhfUtz9lk1h4H\nCusrbkWYjZ0DhfUV90eYjZ0DhdWeRzWZTYwDhdWeWxFmE+NAYbXkVoRZ5zhQWC25FWHWOQ4UVhtu\nRZgVw4HCasOtCLNiOFBY5fneCLNiOVBYJY2WZnIrwqxzHCisMloFh2FuRZh1ngOFVUarIDEcIGbN\n3L3LpTKrPwcKK7V2RjI5OJgVy4HCSsl9EGbl4UBhpeE+CLNyKjRQSJoBfAWYBHwzIs4bYZ8LgMOA\nZ4GTIuLmIstk5TKW4OAUk1lvFBYoJE0CLgQOAVYCN0maHxGLc/vMBHaLiGmSDgC+BhxYVJnKamho\niIGBgV4XozBDQ0M88ex2owaERlUIEP3w3bl+VmSLYjqwNCKWAUiaC8wCFuf2eR9wKUBE3CBpS0nb\nRsRDBZardOr2x9rYSli86PvsufdRbb23CsEhr27fXSPXz6DYQDEVWJ57vQI4oI19tgf6KlCUVTtp\noU6oWnAw6zdFBopocz+N830tHXHCFZ04TFcsXnQHt95TnfKOlwOCWTUpoiPn5fUPLB0IDEbEjPT6\nbGBNvkNb0j8DQxExN71eAhzUmHqSVEwhzcxqLiIaL8bHrMgWxUJgmqSdgVXA0cDshn3mA3OAuSmw\nPD5S/0QnKmpmZuNTWKCIiJckzQGuIhsee3FELJZ0Wtp+UUQskDRT0lLgGeDkospjZmbjU1jqyczM\n6mGDXnyopG9JekjSbbl10yXdKOlmSTdJ2j+t31jS5ZJulXSnpE/m3rOfpNsk3S3pH3pRl5E0qd/e\nkq5P9ZgvabPctrNTHZZIendufeXrJ+nPJS1M6xdKOjj3nsrXL7d9R0lPS/pYbl3p6jeOv8290rbb\n0/YN0/rS1Q3G/LdZxXPLDpJ+JemO9J2cntZvLekXkn4v6eeStsy9Z+Lnl4jo+g/wdmBf4LbcuiHg\n0LR8GPCrtHwScHlafhVwL7Bjen0jMD0tLwBm9KI+bdbvJuDtaflk4DNp+Q3ALcAUYGdgKa+09OpQ\nv32A16blNwIrcu+pfP1y238AfA/4WJnrN8bvbjKwCHhzer0VsEFZ6zaO+lXx3PJaYJ+0vClwF7An\n8EXgE2n9WcAX0nJHzi89aVFExLXA6obVDwBbpOUtye7mHl6/ibI7vTcBXgCelLQdsFlE3Jj2uwx4\nf6EFb1OT+k1L6wF+CRyZlmeR/bG+GNnNiUuBA+pSv4i4JSIeTOvvBF4laUpd6gcg6f3APWT1G15X\nyvqNsW7vBm6NiNvSe1dHxJqy1g3GXL8qnlsejIhb0vLTZDcwTyV383L6d7i8HTm/9CRQNPFJ4EuS\n7gfOB84BiIirgCfJvtRlwPkR8TjZL2dF7v0r07qyukPSrLR8FLBDWn4d69ZjBVk9GtdXtX55RwK/\ni4gXqcn3J2lT4BPAYMP+Vapfs+/u9UBIulLS7yR9PK2vUt2gSf2qfm5RNqJ0X+AGID+jxUPAtmm5\nI+eXMgWKi4HTI2JH4KPpNZKOI2sWbgfsApwpaZeelXL8TgE+LGkhWZPxhR6Xp9Na1k/SG4EvAKf1\noGyd0Kx+g8CXI+JZ1r95tCqa1W0y8Dbg2PTvX0h6Jx26KbaLRqxflc8t6QLlh8DfRMRT+W2R5ZI6\n+h2VaZrx6RFxSFr+AfDNtPxW4EcR8TLwsKTrgP2AX5NN9zFse15JV5VORNwFHAog6fXA4WnTSta9\n+t6eLNKvpB71Q9L2wDzg+Ii4N62uev1mpk3TgSMlfZEsZbpG0nNk9a1E/Vp8d8uBayLisbRtAfAW\n4LtUpG7Q8rur5LlF0hSyIPGdiPhxWv2QpNdGxIMprfSHtL4j55cytSiWSjooLb8T+H1aXpJeI2kT\nstlll6S895OSDpAk4Hjgx5SUpD9N/24AnEs2Uy5kNx0eI2nDdDUzDbixLvVLoy/+L3BWRFw/vH9E\nPEC16/fPABHxjojYJSJ2IZtS/3MR8U9V+v5a/G1eBbxZ0qskTQYOAu6oUt2g+XdHBc8tqTwXA3dG\nxFdym+YDJ6blE3mlvJ05v/So5/5ysru1XyC7ajkZ+K9kubZbgOuBfdO+G5FdwdwG3MG6o0r2S+uX\nAhf0oi5t1u8U4HSyEQp3AZ9v2P+cVIclpJFfdakf2X/Mp4Gbcz/b1KV+De/7NHBGmb+/cfxtfhC4\nPdXjC2Wu2zj+Nqt4bnkbsCadJ4f/P80AtibrqP898HNgy9x7Jnx+8Q13ZmbWUplST2ZmVkIOFGZm\n1pIDhZmZteRAYWZmLTlQmJlZSw4UZmbWkgOF1Z4y10qakVt3lKSfdfAznu7UsczKxvdRWF9Ic019\nn2wStSnAf5DdfHRvyzeOfKzJEfFSw7qnImKzZu8Z5XiTIptGwqyUHCisb0g6D3iWbErpp4GdgDeR\nBY7BiJifZuS8LO0DMCcirpc0AHwWeAzYIyJ2bzj2U8A3yKbmfhA4JiIekbQP2ZQRrwL+EzglIh6X\nNER2V+3byO4mfh/wW+BgsjmjPhQRvy7g12A2Zk49WT/5O2A22ZQHGwNXR8QBZPP9nC/p1WRTNP95\nROwHHANckHv/vmQzHO/O+jYBboqINwH/TjadB2RB5+MRsTfZdAnD6wOYEhH7R8T/Sa8npfJ8JLef\nWc+VafZYs0JFxLOSvkfWmvgA8F5JZ6bNG5HNsvkgcKGkvYGXySZRG3ZjRNzX5PBryJ5wB9n8QfMk\nbQ5sEa88NOdSsvTXsO+xrnnp3/8gexqZWSk4UFi/WZN+BBwREXfnN0oaBB6IiOPTk8+ez21+ps3P\nECM/D6DxeRWNx/tj+vdl/H/TSsSpJ+tXV5HNKgqApH3T4uZkrQqAE4BJbR5vA7Knp0H2oJ9rI+JJ\nYLWkt6X1x5M9G37tx4692Gbd50Bh/SjIOqanSLpV0u1k/RcA/wScKOkWYHeyNFX+fc08A0yXdBsw\nAHwmrT+RrP9jEbBXbv1ox/MoEysNj3oyM7OW3KIwM7OWHCjMzKwlBwozM2vJgcLMzFpyoDAzs5Yc\nKMzMrCUHCjMza8mBwszMWvr/9X+OumFQm/YAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "cdf = thinkstats2.Cdf(df.yrbrn)\n", "thinkplot.PrePlot(1)\n", "thinkplot.Cdf(cdf)\n", "thinkplot.Config(xlabel='Year born', ylabel='CDF', \n", " title='Disrtribution of year born', legend=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Shifted to mean near 0" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 232794.000000\n", "mean 1.491916\n", "std 18.727516\n", "min -75.000000\n", "25% -13.000000\n", "50% 2.000000\n", "75% 17.000000\n", "max 36.000000\n", "Name: yrbrn60, dtype: float64" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.yrbrn60.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Number of years of education, converted to ranks." ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 230992.000000\n", "mean 0.500264\n", "std 0.288676\n", "min 0.000332\n", "25% 0.250251\n", "50% 0.500277\n", "75% 0.750251\n", "max 1.000000\n", "Name: edurank, dtype: float64" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.edurank.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Country codes" ] }, { "cell_type": "code", "execution_count": 29, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "AT 6918\n", "BE 8939\n", "BG 6064\n", "CH 9310\n", "CY 3293\n", "CZ 8790\n", "DE 11568\n", "DK 7684\n", "EE 6960\n", "ES 9729\n", "FI 7969\n", "FR 5787\n", "GB 11117\n", "GR 9759\n", "HR 3133\n", "HU 7806\n", "IE 10472\n", "IL 7283\n", "IS 579\n", "IT 1207\n", "LT 1677\n", "LU 3187\n", "LV 1980\n", "NL 9741\n", "NO 8643\n", "PL 8917\n", "PT 10302\n", "RO 2146\n", "RU 7544\n", "SE 9201\n", "SI 7126\n", "SK 6944\n", "TR 4272\n", "UA 7809\n", "Name: cntry, dtype: int64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.cntry.value_counts().sort_index()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 29052\n", "1 13327\n", "2 16316\n", "3 18726\n", "4 15487\n", "5 40339\n", "6 22866\n", "7 27172\n", "8 23707\n", "9 10667\n", "10 14027\n", "Name: rlgdgr, dtype: int64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.rlgdgr.value_counts().sort_index()" ] }, { "cell_type": "code", "execution_count": 31, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 233537.000000\n", "mean -0.219482\n", "std 2.933184\n", "min -5.499864\n", "25% -2.722377\n", "50% -0.247620\n", "75% 2.284778\n", "max 5.499294\n", "Name: inwyr07, dtype: float64" ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.inwyr07.describe()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run the model" ] }, { "cell_type": "code", "execution_count": 32, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def run_model(df, formula):\n", " model = smf.logit(formula, data=df) \n", " results = model.fit(disp=False)\n", " return results" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Here's the model with all control variables and all media variables:" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Logit Regression Results
Dep. Variable: hasrelig No. Observations: 179125
Model: Logit Df Residuals: 179117
Method: MLE Df Model: 7
Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.02992
Time: 14:59:20 Log-Likelihood: -1.1470e+05
converged: True LL-Null: -1.1824e+05
LLR p-value: 0.000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [95.0% Conf. Int.]
Intercept 1.0394 0.019 54.565 0.000 1.002 1.077
yrbrn60 -0.0080 0.000 -24.684 0.000 -0.009 -0.007
edurank -0.0096 0.020 -0.490 0.624 -0.048 0.029
hincrank 0.1361 0.019 7.309 0.000 0.100 0.173
tvtot -0.0154 0.003 -6.059 0.000 -0.020 -0.010
rdtot -0.0151 0.002 -7.956 0.000 -0.019 -0.011
nwsptot -0.0412 0.004 -10.213 0.000 -0.049 -0.033
netuse -0.1124 0.002 -56.961 0.000 -0.116 -0.109
" ], "text/plain": [ "\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: hasrelig No. Observations: 179125\n", "Model: Logit Df Residuals: 179117\n", "Method: MLE Df Model: 7\n", "Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.02992\n", "Time: 14:59:20 Log-Likelihood: -1.1470e+05\n", "converged: True LL-Null: -1.1824e+05\n", " LLR p-value: 0.000\n", "==============================================================================\n", " coef std err z P>|z| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept 1.0394 0.019 54.565 0.000 1.002 1.077\n", "yrbrn60 -0.0080 0.000 -24.684 0.000 -0.009 -0.007\n", "edurank -0.0096 0.020 -0.490 0.624 -0.048 0.029\n", "hincrank 0.1361 0.019 7.309 0.000 0.100 0.173\n", "tvtot -0.0154 0.003 -6.059 0.000 -0.020 -0.010\n", "rdtot -0.0151 0.002 -7.956 0.000 -0.019 -0.011\n", "nwsptot -0.0412 0.004 -10.213 0.000 -0.049 -0.033\n", "netuse -0.1124 0.002 -56.961 0.000 -0.116 -0.109\n", "==============================================================================\n", "\"\"\"" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "formula = ('hasrelig ~ yrbrn60 + edurank + hincrank +'\n", " 'tvtot + rdtot + nwsptot + netuse')\n", "res = run_model(df, formula)\n", "res.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now using the filled variables" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Logit Regression Results
Dep. Variable: hasrelig_f No. Observations: 233856
Model: Logit Df Residuals: 233848
Method: MLE Df Model: 7
Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.02832
Time: 14:59:21 Log-Likelihood: -1.4846e+05
converged: True LL-Null: -1.5278e+05
LLR p-value: 0.000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [95.0% Conf. Int.]
Intercept 1.1082 0.017 66.598 0.000 1.076 1.141
yrbrn60_f -0.0079 0.000 -28.692 0.000 -0.008 -0.007
edurank_f -0.0123 0.017 -0.725 0.469 -0.045 0.021
hincrank_f 0.0826 0.016 5.188 0.000 0.051 0.114
tvtot_f -0.0142 0.002 -6.417 0.000 -0.019 -0.010
rdtot_f -0.0185 0.002 -11.059 0.000 -0.022 -0.015
nwsptot_f -0.0425 0.004 -11.886 0.000 -0.050 -0.035
netuse_f -0.1052 0.002 -60.862 0.000 -0.109 -0.102
" ], "text/plain": [ "\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: hasrelig_f No. Observations: 233856\n", "Model: Logit Df Residuals: 233848\n", "Method: MLE Df Model: 7\n", "Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.02832\n", "Time: 14:59:21 Log-Likelihood: -1.4846e+05\n", "converged: True LL-Null: -1.5278e+05\n", " LLR p-value: 0.000\n", "==============================================================================\n", " coef std err z P>|z| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept 1.1082 0.017 66.598 0.000 1.076 1.141\n", "yrbrn60_f -0.0079 0.000 -28.692 0.000 -0.008 -0.007\n", "edurank_f -0.0123 0.017 -0.725 0.469 -0.045 0.021\n", "hincrank_f 0.0826 0.016 5.188 0.000 0.051 0.114\n", "tvtot_f -0.0142 0.002 -6.417 0.000 -0.019 -0.010\n", "rdtot_f -0.0185 0.002 -11.059 0.000 -0.022 -0.015\n", "nwsptot_f -0.0425 0.004 -11.886 0.000 -0.050 -0.035\n", "netuse_f -0.1052 0.002 -60.862 0.000 -0.109 -0.102\n", "==============================================================================\n", "\"\"\"" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "formula = ('hasrelig_f ~ yrbrn60_f + edurank_f + hincrank_f +'\n", " 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n", "res = run_model(df, formula)\n", "res.summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Now adding inwyr07" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Logit Regression Results
Dep. Variable: hasrelig_f No. Observations: 233856
Model: Logit Df Residuals: 233847
Method: MLE Df Model: 8
Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.03166
Time: 14:59:21 Log-Likelihood: -1.4795e+05
converged: True LL-Null: -1.5278e+05
LLR p-value: 0.000
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [95.0% Conf. Int.]
Intercept 1.1406 0.017 68.130 0.000 1.108 1.173
inwyr07_f 0.0494 0.002 31.850 0.000 0.046 0.052
yrbrn60_f -0.0085 0.000 -30.617 0.000 -0.009 -0.008
edurank_f 0.0180 0.017 1.058 0.290 -0.015 0.051
hincrank_f 0.0949 0.016 5.949 0.000 0.064 0.126
tvtot_f -0.0201 0.002 -9.026 0.000 -0.024 -0.016
rdtot_f -0.0148 0.002 -8.817 0.000 -0.018 -0.012
nwsptot_f -0.0352 0.004 -9.780 0.000 -0.042 -0.028
netuse_f -0.1151 0.002 -65.319 0.000 -0.119 -0.112
" ], "text/plain": [ "\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: hasrelig_f No. Observations: 233856\n", "Model: Logit Df Residuals: 233847\n", "Method: MLE Df Model: 8\n", "Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.03166\n", "Time: 14:59:21 Log-Likelihood: -1.4795e+05\n", "converged: True LL-Null: -1.5278e+05\n", " LLR p-value: 0.000\n", "==============================================================================\n", " coef std err z P>|z| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept 1.1406 0.017 68.130 0.000 1.108 1.173\n", "inwyr07_f 0.0494 0.002 31.850 0.000 0.046 0.052\n", "yrbrn60_f -0.0085 0.000 -30.617 0.000 -0.009 -0.008\n", "edurank_f 0.0180 0.017 1.058 0.290 -0.015 0.051\n", "hincrank_f 0.0949 0.016 5.949 0.000 0.064 0.126\n", "tvtot_f -0.0201 0.002 -9.026 0.000 -0.024 -0.016\n", "rdtot_f -0.0148 0.002 -8.817 0.000 -0.018 -0.012\n", "nwsptot_f -0.0352 0.004 -9.780 0.000 -0.042 -0.028\n", "netuse_f -0.1151 0.002 -65.319 0.000 -0.119 -0.112\n", "==============================================================================\n", "\"\"\"" ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "formula = ('hasrelig_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n", " 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n", "res = run_model(df, formula)\n", "res.summary()" ] }, { "cell_type": "code", "execution_count": 36, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(233856, -0.11506110306210458, '**')" ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "def extract_res(res, var='netuse_f'):\n", " param = res.params[var]\n", " pvalue = res.pvalues[var]\n", " stars = '**' if pvalue < 0.01 else '*' if pvalue < 0.05 else ''\n", " return res.nobs, param, stars\n", "\n", "extract_res(res)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Group by country:" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false }, "outputs": [], "source": [ "grouped = df.groupby('cntry')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run a sample country" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Logit Regression Results
Dep. Variable: hasrelig_f No. Observations: 579
Model: Logit Df Residuals: 570
Method: MLE Df Model: 8
Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.06961
Time: 14:59:22 Log-Likelihood: -372.62
converged: True LL-Null: -400.50
LLR p-value: 3.143e-09
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err z P>|z| [95.0% Conf. Int.]
Intercept -1.6075 0.753 -2.136 0.033 -3.083 -0.133
inwyr07_f -0.0307 0.305 -0.101 0.920 -0.629 0.567
yrbrn60_f -0.0372 0.006 -6.020 0.000 -0.049 -0.025
edurank_f 0.2802 0.384 0.730 0.465 -0.472 1.033
hincrank_f 0.8706 0.326 2.672 0.008 0.232 1.509
tvtot_f 0.0625 0.052 1.206 0.228 -0.039 0.164
rdtot_f -0.0137 0.036 -0.378 0.706 -0.085 0.057
nwsptot_f -0.0036 0.107 -0.034 0.973 -0.214 0.207
netuse_f 0.1488 0.044 3.352 0.001 0.062 0.236
" ], "text/plain": [ "\n", "\"\"\"\n", " Logit Regression Results \n", "==============================================================================\n", "Dep. Variable: hasrelig_f No. Observations: 579\n", "Model: Logit Df Residuals: 570\n", "Method: MLE Df Model: 8\n", "Date: Mon, 16 Nov 2015 Pseudo R-squ.: 0.06961\n", "Time: 14:59:22 Log-Likelihood: -372.62\n", "converged: True LL-Null: -400.50\n", " LLR p-value: 3.143e-09\n", "==============================================================================\n", " coef std err z P>|z| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept -1.6075 0.753 -2.136 0.033 -3.083 -0.133\n", "inwyr07_f -0.0307 0.305 -0.101 0.920 -0.629 0.567\n", "yrbrn60_f -0.0372 0.006 -6.020 0.000 -0.049 -0.025\n", "edurank_f 0.2802 0.384 0.730 0.465 -0.472 1.033\n", "hincrank_f 0.8706 0.326 2.672 0.008 0.232 1.509\n", "tvtot_f 0.0625 0.052 1.206 0.228 -0.039 0.164\n", "rdtot_f -0.0137 0.036 -0.378 0.706 -0.085 0.057\n", "nwsptot_f -0.0036 0.107 -0.034 0.973 -0.214 0.207\n", "netuse_f 0.1488 0.044 3.352 0.001 0.062 0.236\n", "==============================================================================\n", "\"\"\"" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "group = grouped.get_group('IS')\n", "run_model(group, formula).summary()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run all countries" ] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def run_logits(grouped, formula, var):\n", " for name, group in grouped:\n", " country = '%14.14s' % COUNTRY[name]\n", " model = smf.logit(formula, data=group) \n", " results = model.fit(disp=False)\n", " nobs, param, stars = extract_res(results, var=var)\n", " arrow = '<--' if stars and param > 0 else ''\n", " print(country, nobs, '%0.3g'%param, stars, arrow, sep='\\t')" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " AUSTRIA\t6918\t-0.0783\t**\t\n", " BELGIUM\t8939\t-0.0484\t**\t\n", " BULGARIA\t6064\t0.0168\t\t\n", " SWITZERLAND\t9310\t-0.037\t**\t\n", " CYPRUS\t3293\t-0.0223\t\t\n", "CZECH REPUBLIC\t8790\t-0.0164\t\t\n", " GERMANY\t11568\t-0.0243\t**\t\n", " DENMARK\t7684\t-0.0374\t**\t\n", " ESTONIA\t6960\t-0.0452\t**\t\n", " SPAIN\t9729\t-0.0657\t**\t\n", " FINLAND\t7969\t-0.0472\t**\t\n", " FRANCE\t5787\t0.000362\t\t\n", "UNITED KINGDOM\t11117\t-0.0297\t**\t\n", " GREECE\t9759\t-0.031\t\t\n", " CROATIA\t3133\t-0.0361\t\t\n", " HUNGARY\t7806\t-0.0301\t**\t\n", " IRELAND\t10472\t-0.0582\t**\t\n", " ISRAEL\t7283\t-0.0813\t**\t\n", " ICELAND\t579\t0.149\t**\t<--\n", " ITALY\t1207\t-0.0873\t**\t\n", " LITHUANIA\t1677\t-0.0398\t\t\n", " LUXEMBOURG\t3187\t-0.0581\t**\t\n", " LATVIA\t1980\t-0.0319\t\t\n", " NETHERLANDS\t9741\t-0.0457\t**\t\n", " NORWAY\t8643\t-0.0385\t**\t\n", " POLAND\t8917\t-0.119\t**\t\n", " PORTUGAL\t10302\t-0.102\t**\t\n", " ROMANIA\t2146\t-0.00946\t\t\n", "RUSSIAN FEDERA\t7544\t0.0301\t**\t<--\n", " SWEDEN\t9201\t-0.0652\t**\t\n", " SLOVENIA\t7126\t-0.0426\t**\t\n", " SLOVAKIA\t6944\t-0.0615\t**\t\n", " TURKEY\t4272\t-0.0939\t*\t\n", " UKRAINE\t7809\t-0.0601\t**\t\n" ] } ], "source": [ "formula = ('hasrelig_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n", " 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n", "\n", "run_logits(grouped, formula, 'netuse_f')" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " AUSTRIA\t6918\t0.375\t**\t<--\n", " BELGIUM\t8939\t-0.136\t\t\n", " BULGARIA\t6064\t0.0274\t\t\n", " SWITZERLAND\t9310\t-0.143\t\t\n", " CYPRUS\t3293\t0.111\t\t\n", "CZECH REPUBLIC\t8790\t-0.199\t*\t\n", " GERMANY\t11568\t0.518\t**\t<--\n", " DENMARK\t7684\t0.342\t**\t<--\n", " ESTONIA\t6960\t-0.241\t*\t\n", " SPAIN\t9729\t0.0597\t\t\n", " FINLAND\t7969\t0.0164\t\t\n", " FRANCE\t5787\t-0.0222\t\t\n", "UNITED KINGDOM\t11117\t-0.0577\t\t\n", " GREECE\t9759\t-0.758\t**\t\n", " CROATIA\t3133\t0.12\t\t\n", " HUNGARY\t7806\t-0.119\t\t\n", " IRELAND\t10472\t0.285\t**\t<--\n", " ISRAEL\t7283\t-0.1\t\t\n", " ICELAND\t579\t0.871\t**\t<--\n", " ITALY\t1207\t0.519\t*\t<--\n", " LITHUANIA\t1677\t0.655\t**\t<--\n", " LUXEMBOURG\t3187\t0.302\t*\t<--\n", " LATVIA\t1980\t0.0253\t\t\n", " NETHERLANDS\t9741\t-0.407\t**\t\n", " NORWAY\t8643\t-0.0125\t\t\n", " POLAND\t8917\t-0.156\t\t\n", " PORTUGAL\t10302\t-0.0845\t\t\n", " ROMANIA\t2146\t0.0659\t\t\n", "RUSSIAN FEDERA\t7544\t-0.0556\t\t\n", " SWEDEN\t9201\t-0.112\t\t\n", " SLOVENIA\t7126\t-0.313\t**\t\n", " SLOVAKIA\t6944\t0.0315\t\t\n", " TURKEY\t4272\t-0.119\t\t\n", " UKRAINE\t7809\t0.0702\t\t\n" ] } ], "source": [ "run_logits(grouped, formula, 'hincrank_f')" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " AUSTRIA\t6918\t-0.682\t**\t\n", " BELGIUM\t8939\t0.142\t\t\n", " BULGARIA\t6064\t-0.247\t\t\n", " SWITZERLAND\t9310\t-0.665\t**\t\n", " CYPRUS\t3293\t-1.19\t*\t\n", "CZECH REPUBLIC\t8790\t-0.38\t**\t\n", " GERMANY\t11568\t-0.616\t**\t\n", " DENMARK\t7684\t-0.241\t**\t\n", " ESTONIA\t6960\t0.0551\t\t\n", " SPAIN\t9729\t-0.4\t**\t\n", " FINLAND\t7969\t0.127\t\t\n", " FRANCE\t5787\t0.0997\t\t\n", "UNITED KINGDOM\t11117\t0.446\t**\t<--\n", " GREECE\t9759\t-1.34\t**\t\n", " CROATIA\t3133\t-0.693\t**\t\n", " HUNGARY\t7806\t-0.604\t**\t\n", " IRELAND\t10472\t-0.198\t*\t\n", " ISRAEL\t7283\t-0.0924\t\t\n", " ICELAND\t579\t0.28\t\t\n", " ITALY\t1207\t0.192\t\t\n", " LITHUANIA\t1677\t0.13\t\t\n", " LUXEMBOURG\t3187\t-1.01\t**\t\n", " LATVIA\t1980\t0.303\t\t\n", " NETHERLANDS\t9741\t-0.412\t**\t\n", " NORWAY\t8643\t-0.106\t\t\n", " POLAND\t8917\t-1.16\t**\t\n", " PORTUGAL\t10302\t-0.715\t**\t\n", " ROMANIA\t2146\t-1.43\t**\t\n", "RUSSIAN FEDERA\t7544\t-0.0916\t\t\n", " SWEDEN\t9201\t0.561\t**\t<--\n", " SLOVENIA\t7126\t-0.413\t**\t\n", " SLOVAKIA\t6944\t-0.298\t**\t\n", " TURKEY\t4272\t-1.69\t**\t\n", " UKRAINE\t7809\t-0.255\t**\t\n" ] } ], "source": [ "run_logits(grouped, formula, 'edurank_f')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Run OLS model with `rlgdgr`: \"Regardless of whether you belong to a particular religion, how religious would you say you are?\"" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
OLS Regression Results
Dep. Variable: rlgdgr_f R-squared: 0.051
Model: OLS Adj. R-squared: 0.051
Method: Least Squares F-statistic: 1559.
Date: Mon, 16 Nov 2015 Prob (F-statistic): 0.00
Time: 14:59:26 Log-Likelihood: -5.7994e+05
No. Observations: 233856 AIC: 1.160e+06
Df Residuals: 233847 BIC: 1.160e+06
Df Model: 8
Covariance Type: nonrobust
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
coef std err t P>|t| [95.0% Conf. Int.]
Intercept 6.0342 0.022 270.724 0.000 5.991 6.078
inwyr07_f 0.0244 0.002 11.646 0.000 0.020 0.029
yrbrn60_f -0.0175 0.000 -47.085 0.000 -0.018 -0.017
edurank_f -0.2782 0.023 -12.079 0.000 -0.323 -0.233
hincrank_f -0.1569 0.022 -7.236 0.000 -0.199 -0.114
tvtot_f -0.0763 0.003 -25.339 0.000 -0.082 -0.070
rdtot_f -0.0238 0.002 -10.429 0.000 -0.028 -0.019
nwsptot_f -0.0668 0.005 -13.715 0.000 -0.076 -0.057
netuse_f -0.1367 0.002 -57.186 0.000 -0.141 -0.132
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Omnibus: 31835.411 Durbin-Watson: 1.760
Prob(Omnibus): 0.000 Jarque-Bera (JB): 8476.913
Skew: -0.134 Prob(JB): 0.00
Kurtosis: 2.107 Cond. No. 85.0
" ], "text/plain": [ "\n", "\"\"\"\n", " OLS Regression Results \n", "==============================================================================\n", "Dep. Variable: rlgdgr_f R-squared: 0.051\n", "Model: OLS Adj. R-squared: 0.051\n", "Method: Least Squares F-statistic: 1559.\n", "Date: Mon, 16 Nov 2015 Prob (F-statistic): 0.00\n", "Time: 14:59:26 Log-Likelihood: -5.7994e+05\n", "No. Observations: 233856 AIC: 1.160e+06\n", "Df Residuals: 233847 BIC: 1.160e+06\n", "Df Model: 8 \n", "Covariance Type: nonrobust \n", "==============================================================================\n", " coef std err t P>|t| [95.0% Conf. Int.]\n", "------------------------------------------------------------------------------\n", "Intercept 6.0342 0.022 270.724 0.000 5.991 6.078\n", "inwyr07_f 0.0244 0.002 11.646 0.000 0.020 0.029\n", "yrbrn60_f -0.0175 0.000 -47.085 0.000 -0.018 -0.017\n", "edurank_f -0.2782 0.023 -12.079 0.000 -0.323 -0.233\n", "hincrank_f -0.1569 0.022 -7.236 0.000 -0.199 -0.114\n", "tvtot_f -0.0763 0.003 -25.339 0.000 -0.082 -0.070\n", "rdtot_f -0.0238 0.002 -10.429 0.000 -0.028 -0.019\n", "nwsptot_f -0.0668 0.005 -13.715 0.000 -0.076 -0.057\n", "netuse_f -0.1367 0.002 -57.186 0.000 -0.141 -0.132\n", "==============================================================================\n", "Omnibus: 31835.411 Durbin-Watson: 1.760\n", "Prob(Omnibus): 0.000 Jarque-Bera (JB): 8476.913\n", "Skew: -0.134 Prob(JB): 0.00\n", "Kurtosis: 2.107 Cond. No. 85.0\n", "==============================================================================\n", "\n", "Warnings:\n", "[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n", "\"\"\"" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + hincrank_f +'\n", " 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')\n", "model = smf.ols(formula, data=df) \n", "results = model.fit(disp=False)\n", "results.summary()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def run_ols(grouped, formula, var):\n", " for name, group in grouped:\n", " model = smf.ols(formula, data=group) \n", " results = model.fit(disp=False)\n", " nobs, param, stars = extract_res(results, var=var)\n", " arrow = '<--' if stars and param > 0 else ''\n", " print(name, len(group), '%0.3g '%param, stars, arrow, sep='\\t')" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t-0.0822 \t**\t\n", "BE\t8939\t-0.108 \t**\t\n", "BG\t6064\t0.00266 \t\t\n", "CH\t9310\t-0.0905 \t**\t\n", "CY\t3293\t-0.0555 \t**\t\n", "CZ\t8790\t0.00954 \t\t\n", "DE\t11568\t-0.0563 \t**\t\n", "DK\t7684\t-0.0366 \t**\t\n", "EE\t6960\t-0.0176 \t\t\n", "ES\t9729\t-0.0823 \t**\t\n", "FI\t7969\t-0.0187 \t\t\n", "FR\t5787\t-0.0434 \t**\t\n", "GB\t11117\t-0.0301 \t**\t\n", "GR\t9759\t-0.0619 \t**\t\n", "HR\t3133\t-0.0798 \t**\t\n", "HU\t7806\t-0.00543 \t\t\n", "IE\t10472\t-0.0801 \t**\t\n", "IL\t7283\t-0.31 \t**\t\n", "IS\t579\t0.0105 \t\t\n", "IT\t1207\t-0.142 \t**\t\n", "LT\t1677\t-0.038 \t\t\n", "LU\t3187\t-0.145 \t**\t\n", "LV\t1980\t-0.0209 \t\t\n", "NL\t9741\t-0.0827 \t**\t\n", "NO\t8643\t-0.065 \t**\t\n", "PL\t8917\t-0.0682 \t**\t\n", "PT\t10302\t-0.0538 \t**\t\n", "RO\t2146\t-0.0302 \t\t\n", "RU\t7544\t0.0284 \t*\t<--\n", "SE\t9201\t-0.0942 \t**\t\n", "SI\t7126\t-0.0718 \t**\t\n", "SK\t6944\t-0.034 \t*\t\n", "TR\t4272\t-0.0322 \t\t\n", "UA\t7809\t-0.0462 \t**\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'netuse_f')" ] }, { "cell_type": "code", "execution_count": 46, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t-0.253 \t*\t\n", "BE\t8939\t-0.313 \t*\t\n", "BG\t6064\t-0.646 \t**\t\n", "CH\t9310\t-0.316 \t**\t\n", "CY\t3293\t-0.639 \t**\t\n", "CZ\t8790\t-0.438 \t**\t\n", "DE\t11568\t-0.624 \t**\t\n", "DK\t7684\t-0.374 \t**\t\n", "EE\t6960\t0.478 \t**\t<--\n", "ES\t9729\t-0.429 \t**\t\n", "FI\t7969\t0.175 \t\t\n", "FR\t5787\t-0.464 \t**\t\n", "GB\t11117\t0.428 \t**\t<--\n", "GR\t9759\t-1.68 \t**\t\n", "HR\t3133\t-1.22 \t**\t\n", "HU\t7806\t-0.938 \t**\t\n", "IE\t10472\t-0.442 \t**\t\n", "IL\t7283\t-0.904 \t**\t\n", "IS\t579\t0.0494 \t\t\n", "IT\t1207\t0.0235 \t\t\n", "LT\t1677\t-0.00891 \t\t\n", "LU\t3187\t-1.95 \t**\t\n", "LV\t1980\t0.263 \t\t\n", "NL\t9741\t-0.157 \t\t\n", "NO\t8643\t0.32 \t**\t<--\n", "PL\t8917\t-1.29 \t**\t\n", "PT\t10302\t-0.864 \t**\t\n", "RO\t2146\t-0.432 \t*\t\n", "RU\t7544\t-0.0769 \t\t\n", "SE\t9201\t0.336 \t**\t<--\n", "SI\t7126\t-0.964 \t**\t\n", "SK\t6944\t-0.79 \t**\t\n", "TR\t4272\t-1.16 \t**\t\n", "UA\t7809\t-0.591 \t**\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'edurank_f')" ] }, { "cell_type": "code", "execution_count": 47, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t0.162 \t\t\n", "BE\t8939\t-0.359 \t**\t\n", "BG\t6064\t-0.175 \t\t\n", "CH\t9310\t-0.29 \t**\t\n", "CY\t3293\t-0.37 \t**\t\n", "CZ\t8790\t-0.41 \t**\t\n", "DE\t11568\t0.372 \t**\t<--\n", "DK\t7684\t0.128 \t\t\n", "EE\t6960\t-0.44 \t**\t\n", "ES\t9729\t-0.227 \t*\t\n", "FI\t7969\t-0.271 \t*\t\n", "FR\t5787\t-0.806 \t**\t\n", "GB\t11117\t-0.529 \t**\t\n", "GR\t9759\t-0.511 \t**\t\n", "HR\t3133\t0.124 \t\t\n", "HU\t7806\t-0.078 \t\t\n", "IE\t10472\t-0.0518 \t\t\n", "IL\t7283\t-0.333 \t*\t\n", "IS\t579\t0.0341 \t\t\n", "IT\t1207\t0.104 \t\t\n", "LT\t1677\t-0.531 \t*\t\n", "LU\t3187\t-0.382 \t*\t\n", "LV\t1980\t-0.274 \t\t\n", "NL\t9741\t-0.853 \t**\t\n", "NO\t8643\t-0.192 \t\t\n", "PL\t8917\t-0.355 \t**\t\n", "PT\t10302\t-0.0868 \t\t\n", "RO\t2146\t-0.178 \t\t\n", "RU\t7544\t-0.0596 \t\t\n", "SE\t9201\t-0.249 \t*\t\n", "SI\t7126\t-0.876 \t**\t\n", "SK\t6944\t0.0426 \t\t\n", "TR\t4272\t-0.201 \t\t\n", "UA\t7809\t-0.0463 \t\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'hincrank_f')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Let's see what happens if we add quadratic terms for edurank and hincrank:" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df['edurank_f2'] = df.edurank_f**2\n", "df['hincrank_f2'] = df.hincrank_f**2" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + edurank_f2 + hincrank_f +'\n", " 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t-0.669 \t\t\n", "BE\t8939\t0.186 \t\t\n", "BG\t6064\t-2.81 \t**\t\n", "CH\t9310\t-0.863 \t*\t\n", "CY\t3293\t-1.07 \t\t\n", "CZ\t8790\t-2.53 \t**\t\n", "DE\t11568\t-2.79 \t**\t\n", "DK\t7684\t-0.748 \t\t\n", "EE\t6960\t-0.671 \t\t\n", "ES\t9729\t-1.55 \t**\t\n", "FI\t7969\t0.0851 \t\t\n", "FR\t5787\t-1.85 \t**\t\n", "GB\t11117\t-2.16 \t**\t\n", "GR\t9759\t-1.93 \t**\t\n", "HR\t3133\t-1.85 \t**\t\n", "HU\t7806\t-3.03 \t**\t\n", "IE\t10472\t-0.237 \t\t\n", "IL\t7283\t-0.533 \t\t\n", "IS\t579\t-1.68 \t\t\n", "IT\t1207\t-2.43 \t*\t\n", "LT\t1677\t-1.82 \t*\t\n", "LU\t3187\t-2.46 \t**\t\n", "LV\t1980\t-1.74 \t*\t\n", "NL\t9741\t-0.285 \t\t\n", "NO\t8643\t-1.05 \t*\t\n", "PL\t8917\t-2.44 \t**\t\n", "PT\t10302\t-1.2 \t**\t\n", "RO\t2146\t-1.35 \t\t\n", "RU\t7544\t-1.06 \t*\t\n", "SE\t9201\t-0.788 \t\t\n", "SI\t7126\t-2.03 \t**\t\n", "SK\t6944\t-2.73 \t**\t\n", "TR\t4272\t-0.9 \t\t\n", "UA\t7809\t-0.66 \t\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'edurank_f')" ] }, { "cell_type": "code", "execution_count": 51, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t0.412 \t\t\n", "BE\t8939\t-0.5 \t\t\n", "BG\t6064\t2.13 \t**\t<--\n", "CH\t9310\t0.55 \t\t\n", "CY\t3293\t0.427 \t\t\n", "CZ\t8790\t2.09 \t**\t<--\n", "DE\t11568\t2.15 \t**\t<--\n", "DK\t7684\t0.373 \t\t\n", "EE\t6960\t1.15 \t*\t<--\n", "ES\t9729\t1.09 \t**\t<--\n", "FI\t7969\t0.0867 \t\t\n", "FR\t5787\t1.36 \t*\t<--\n", "GB\t11117\t2.56 \t**\t<--\n", "GR\t9759\t0.244 \t\t\n", "HR\t3133\t0.62 \t\t\n", "HU\t7806\t2.09 \t**\t<--\n", "IE\t10472\t-0.204 \t\t\n", "IL\t7283\t-0.37 \t\t\n", "IS\t579\t1.66 \t\t\n", "IT\t1207\t2.36 \t*\t<--\n", "LT\t1677\t1.81 \t*\t<--\n", "LU\t3187\t0.513 \t\t\n", "LV\t1980\t2.02 \t*\t<--\n", "NL\t9741\t0.126 \t\t\n", "NO\t8643\t1.34 \t**\t<--\n", "PL\t8917\t1.14 \t**\t<--\n", "PT\t10302\t0.324 \t\t\n", "RO\t2146\t0.896 \t\t\n", "RU\t7544\t0.979 \t*\t<--\n", "SE\t9201\t1.11 \t**\t<--\n", "SI\t7126\t1.08 \t*\t<--\n", "SK\t6944\t1.94 \t**\t<--\n", "TR\t4272\t-0.257 \t\t\n", "UA\t7809\t0.0687 \t\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'edurank_f2')" ] }, { "cell_type": "code", "execution_count": 52, "metadata": { "collapsed": true }, "outputs": [], "source": [ "formula = ('rlgdgr_f ~ inwyr07_f + yrbrn60_f + edurank_f + edurank_f2 + '\n", " 'hincrank_f + hincrank_f2 + '\n", " 'tvtot_f + rdtot_f + nwsptot_f + netuse_f')" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t1.95 \t**\t<--\n", "BE\t8939\t-0.782 \t\t\n", "BG\t6064\t-0.412 \t\t\n", "CH\t9310\t0.417 \t\t\n", "CY\t3293\t-1.34 \t**\t\n", "CZ\t8790\t-1.44 \t**\t\n", "DE\t11568\t0.149 \t\t\n", "DK\t7684\t1.5 \t**\t<--\n", "EE\t6960\t-1.25 \t**\t\n", "ES\t9729\t0.182 \t\t\n", "FI\t7969\t0.265 \t\t\n", "FR\t5787\t-1.96 \t**\t\n", "GB\t11117\t-0.107 \t\t\n", "GR\t9759\t-0.63 \t*\t\n", "HR\t3133\t1.97 \t**\t<--\n", "HU\t7806\t-0.0722 \t\t\n", "IE\t10472\t0.416 \t\t\n", "IL\t7283\t-0.525 \t\t\n", "IS\t579\t-0.719 \t\t\n", "IT\t1207\t-0.197 \t\t\n", "LT\t1677\t2.34 \t**\t<--\n", "LU\t3187\t-1.03 \t\t\n", "LV\t1980\t-0.262 \t\t\n", "NL\t9741\t-0.0917 \t\t\n", "NO\t8643\t-0.55 \t\t\n", "PL\t8917\t-0.273 \t\t\n", "PT\t10302\t0.284 \t\t\n", "RO\t2146\t-0.35 \t\t\n", "RU\t7544\t-0.622 \t\t\n", "SE\t9201\t-0.787 \t*\t\n", "SI\t7126\t0.0244 \t\t\n", "SK\t6944\t0.828 \t\t\n", "TR\t4272\t-1.16 \t*\t\n", "UA\t7809\t0.304 \t\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'hincrank_f')" ] }, { "cell_type": "code", "execution_count": 54, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t-0.782 \t\t\n", "BE\t8939\t0.232 \t\t\n", "BG\t6064\t-2.79 \t**\t\n", "CH\t9310\t-0.901 \t*\t\n", "CY\t3293\t-0.992 \t\t\n", "CZ\t8790\t-2.44 \t**\t\n", "DE\t11568\t-2.77 \t**\t\n", "DK\t7684\t-0.859 \t*\t\n", "EE\t6960\t-0.641 \t\t\n", "ES\t9729\t-1.59 \t**\t\n", "FI\t7969\t0.0251 \t\t\n", "FR\t5787\t-1.8 \t**\t\n", "GB\t11117\t-2.19 \t**\t\n", "GR\t9759\t-1.92 \t**\t\n", "HR\t3133\t-2.26 \t**\t\n", "HU\t7806\t-3.03 \t**\t\n", "IE\t10472\t-0.267 \t\t\n", "IL\t7283\t-0.519 \t\t\n", "IS\t579\t-1.57 \t\t\n", "IT\t1207\t-2.39 \t*\t\n", "LT\t1677\t-2.11 \t*\t\n", "LU\t3187\t-2.42 \t**\t\n", "LV\t1980\t-1.74 \t*\t\n", "NL\t9741\t-0.375 \t\t\n", "NO\t8643\t-1.01 \t*\t\n", "PL\t8917\t-2.44 \t**\t\n", "PT\t10302\t-1.22 \t**\t\n", "RO\t2146\t-1.32 \t\t\n", "RU\t7544\t-1.05 \t*\t\n", "SE\t9201\t-0.742 \t\t\n", "SI\t7126\t-2.16 \t**\t\n", "SK\t6944\t-2.76 \t**\t\n", "TR\t4272\t-0.774 \t\t\n", "UA\t7809\t-0.668 \t\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'edurank_f')" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AT\t6918\t-0.0812 \t**\t\n", "BE\t8939\t-0.107 \t**\t\n", "BG\t6064\t-0.00442 \t\t\n", "CH\t9310\t-0.0913 \t**\t\n", "CY\t3293\t-0.0597 \t**\t\n", "CZ\t8790\t0.00383 \t\t\n", "DE\t11568\t-0.0567 \t**\t\n", "DK\t7684\t-0.0381 \t**\t\n", "EE\t6960\t-0.0199 \t\t\n", "ES\t9729\t-0.0874 \t**\t\n", "FI\t7969\t-0.0187 \t\t\n", "FR\t5787\t-0.0433 \t**\t\n", "GB\t11117\t-0.0326 \t**\t\n", "GR\t9759\t-0.0635 \t**\t\n", "HR\t3133\t-0.0762 \t**\t\n", "HU\t7806\t-0.0145 \t\t\n", "IE\t10472\t-0.0799 \t**\t\n", "IL\t7283\t-0.31 \t**\t\n", "IS\t579\t0.0128 \t\t\n", "IT\t1207\t-0.158 \t**\t\n", "LT\t1677\t-0.0403 \t\t\n", "LU\t3187\t-0.146 \t**\t\n", "LV\t1980\t-0.0254 \t\t\n", "NL\t9741\t-0.0843 \t**\t\n", "NO\t8643\t-0.0622 \t**\t\n", "PL\t8917\t-0.0759 \t**\t\n", "PT\t10302\t-0.0556 \t**\t\n", "RO\t2146\t-0.0339 \t\t\n", "RU\t7544\t0.0223 \t\t\n", "SE\t9201\t-0.0921 \t**\t\n", "SI\t7126\t-0.0761 \t**\t\n", "SK\t6944\t-0.0405 \t**\t\n", "TR\t4272\t-0.0324 \t\t\n", "UA\t7809\t-0.0466 \t**\t\n" ] } ], "source": [ "run_ols(grouped, formula, 'netuse_f')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }