{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import numpy as np\n", "import statsmodels.api as sm\n", "import statsmodels.formula.api as smf\n", "import pandas\n" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Loading required package: lattice\n", "Loading required package: Matrix\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%load_ext rpy2.ipython\n", "%R library(lme4)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by maximum likelihood ['lmerMod']\n", "Formula: logdtf ~ (1 | subspecies) + (1 | inventoryID) \n", " Data: data \n", "\n", " AIC BIC logLik deviance \n", "-118.0267 -106.4653 63.0134 -126.0267 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " inventoryID (Intercept) 0.033431 0.1828 \n", " subspecies (Intercept) 0.094382 0.3072 \n", " Residual 0.009644 0.0982 \n", "Number of obs: 133, groups: inventoryID, 36; subspecies, 9\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 3.9756 0.1092 36.42\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R data <- read.csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', sep='\\t')\n", "%R data <- subset(data, !is.na(dtf))\n", "%R data$logdtf <- log(data$dtf)\n", "%R fit <- lmer(logdtf ~ (1|subspecies)+(1|inventoryID), data=data, REML=F)\n", "%R print(summary(fit))\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: MixedLM Dependent Variable: logdtf
No. Observations: 133 Method: ML
No. Groups: 9 Scale: 0.0096
Min. group size: 4 Likelihood: 63.0134
Max. group size: 32 Converged: Yes
Mean group size: 14.8
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
Intercept 3.976 0.109 36.415 0.000 3.762 4.190
inventoryID RE 0.033 0.114
subspecies RE 0.094 0.554
" ], "text/plain": [ "\n", "\"\"\"\n", " Mixed Linear Model Regression Results\n", "========================================================\n", "Model: MixedLM Dependent Variable: logdtf \n", "No. Observations: 133 Method: ML \n", "No. Groups: 9 Scale: 0.0096 \n", "Min. group size: 4 Likelihood: 63.0134\n", "Max. group size: 32 Converged: Yes \n", "Mean group size: 14.8 \n", "--------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "--------------------------------------------------------\n", "Intercept 3.976 0.109 36.415 0.000 3.762 4.190\n", "inventoryID RE 0.033 0.114 \n", "subspecies RE 0.094 0.554 \n", "========================================================\n", "\n", "\"\"\"" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pandas.read_csv('http://www.stat.wisc.edu/~ane/st572/data/floweringTime.txt', delimiter='\\t', index_col=0)\n", "data = data.dropna()\n", "data['logdtf'] = np.log(data['dtf'])\n", "vcf = {\"subspecies\" : \"0 + C(subspecies)\", \"inventoryID\" : \"0 + C(inventoryID)\"}\n", "\n", "model = sm.MixedLM.from_formula('logdtf ~ 1', groups=\"subspecies\", vc_formula=vcf, data=data)\n", "result = model.fit(reml=False)\n", "result.summary()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by maximum likelihood ['lmerMod']\n", "Formula: logdtf ~ (1 | subspecies) \n", " Data: data \n", "\n", " AIC BIC logLik deviance \n", "-23.3853 -14.7142 14.6926 -29.3853 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " subspecies (Intercept) 0.10092 0.3177 \n", " Residual 0.03709 0.1926 \n", "Number of obs: 133, groups: subspecies, 9\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 3.959 0.108 36.66\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R fit.noinventory <- update(fit, .~.- (1|inventoryID))\n", "%R print(summary(fit.noinventory))\n" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: MixedLM Dependent Variable: logdtf
No. Observations: 133 Method: ML
No. Groups: 9 Scale: 0.0371
Min. group size: 4 Likelihood: 14.6926
Max. group size: 32 Converged: Yes
Mean group size: 14.8
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
Intercept 3.959 0.108 36.656 0.000 3.747 4.171
subspecies RE 0.101 0.270
" ], "text/plain": [ "\n", "\"\"\"\n", " Mixed Linear Model Regression Results\n", "=======================================================\n", "Model: MixedLM Dependent Variable: logdtf \n", "No. Observations: 133 Method: ML \n", "No. Groups: 9 Scale: 0.0371 \n", "Min. group size: 4 Likelihood: 14.6926\n", "Max. group size: 32 Converged: Yes \n", "Mean group size: 14.8 \n", "-------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "-------------------------------------------------------\n", "Intercept 3.959 0.108 36.656 0.000 3.747 4.171\n", "subspecies RE 0.101 0.270 \n", "=======================================================\n", "\n", "\"\"\"" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "vcf = {\"subspecies\" : \"1 + C(subspecies)\"}\n", "model_noinventory = sm.MixedLM.from_formula('logdtf ~ 1', groups=\"subspecies\", vc_formula=vcf, data=data)\n", "result_noinventory = model_noinventory.fit(reml=False)\n", "result_noinventory.summary()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Data: data\n", "Models:\n", "fit.noinventory: logdtf ~ (1 | subspecies)\n", "fit: logdtf ~ (1 | subspecies) + (1 | inventoryID)\n", " Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)\n", "fit.noinventory 3 -23.385 -14.714 14.693 -29.385 \n", "fit 4 -118.027 -106.465 63.013 -126.027 96.641 1 < 2.2e-16\n", " \n", "fit.noinventory \n", "fit ***\n", "---\n", "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R print(anova(fit, fit.noinventory))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(96.641440367875362, 8.3090825023326631e-23, 1)" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.compare_lr_test(result_noinventory)\n" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by REML ['lmerMod']\n", "Formula: score ~ phase * time * language + (1 | pair) + (1 | child) \n", " Data: data \n", "\n", "REML criterion at convergence: 1172.664 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " child (Intercept) 26.58 5.155 \n", " pair (Intercept) 0.00 0.000 \n", " Residual 92.59 9.622 \n", "Number of obs: 160, groups: child, 40; pair, 20\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 24.000 2.441 9.832\n", "phasepre-switch 6.500 3.043 2.136\n", "timeend 34.250 3.043 11.256\n", "languagemonolingual -1.000 3.452 -0.290\n", "phasepre-switch:timeend -5.500 4.303 -1.278\n", "phasepre-switch:languagemonolingual -5.750 4.303 -1.336\n", "timeend:languagemonolingual -17.500 4.303 -4.067\n", "phasepre-switch:timeend:languagemonolingual 20.250 6.086 3.327\n", "\n", "Correlation of Fixed Effects:\n", " (Intr) phspr- timend lnggmn phspr-swtch:t phspr-swtch:l tmnd:l\n", "phspr-swtch -0.623 \n", "timeend -0.623 0.500 \n", "langgmnlngl -0.707 0.441 0.441 \n", "phspr-swtch:t 0.441 -0.707 -0.707 -0.312 \n", "phspr-swtch:l 0.441 -0.707 -0.354 -0.623 0.500 \n", "tmnd:lnggmn 0.441 -0.354 -0.707 -0.623 0.500 0.500 \n", "phspr-swt:: -0.312 0.500 0.500 0.441 -0.707 -0.707 -0.707\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R data <- read.table('http://www.stat.wisc.edu/~ane/st572/data/bilingual.txt', header=T)\n", "%R fit = lmer(score ~ phase * time * language + (1|pair) + (1|child), data=data)\n", "%R print(summary(fit))" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Mixed Linear Model Regression Results\n", "====================================================================================================\n", "Model: MixedLM Dependent Variable: score \n", "No. Observations: 160 Method: ML \n", "No. Groups: 20 Scale: 87.9611 \n", "Min. group size: 8 Likelihood: -600.4745\n", "Max. group size: 8 Converged: Yes \n", "Mean group size: 8.0 \n", "----------------------------------------------------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "----------------------------------------------------------------------------------------------------\n", "Intercept 24.000 2.379 10.087 0.000 19.337 28.663\n", "phase[T.pre-switch] 6.500 2.966 2.192 0.028 0.687 12.313\n", "time[T.end] 34.250 2.966 11.548 0.000 28.437 40.063\n", "language[T.monolingual] -1.000 3.365 -0.297 0.766 -7.595 5.595\n", "phase[T.pre-switch]:time[T.end] -5.500 4.194 -1.311 0.190 -13.721 2.721\n", "phase[T.pre-switch]:language[T.monolingual] -5.750 4.194 -1.371 0.170 -13.971 2.471\n", "time[T.end]:language[T.monolingual] -17.500 4.194 -4.172 0.000 -25.721 -9.279\n", "phase[T.pre-switch]:time[T.end]:language[T.monolingual] 20.250 5.932 3.414 0.001 8.624 31.876\n", "child RE 25.250 1.639 \n", "pair RE 0.000 1.133 \n", "====================================================================================================\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1906: ConvergenceWarning: The MLE may be on the boundary of the parameter space.\n", " warnings.warn(msg, ConvergenceWarning)\n" ] } ], "source": [ "data = pandas.read_table('bilingual.txt', skipinitialspace=True)\n", "vcf = {\"pair\" : \"0 + C(pair)\", \"child\" : \"0 + C(child)\"}\n", "model = sm.MixedLM.from_formula('score ~ phase*time*language', groups='pair', vc_formula=vcf, data=data)\n", "result = model.fit(reml=False)\n", "print (result.summary())" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by REML ['lmerMod']\n", "Formula: score ~ phase + time + language + (1 | pair) + (1 | child) + phase:time + phase:language + time:language \n", " Data: data \n", "\n", "REML criterion at convergence: 1188.769 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " child (Intercept) 24.55 4.955 \n", " pair (Intercept) 0.00 0.000 \n", " Residual 100.70 10.035 \n", "Number of obs: 160, groups: child, 40; pair, 20\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 26.531 2.373 11.178\n", "phasepre-switch 1.437 2.748 0.523\n", "timeend 29.187 2.748 10.621\n", "languagemonolingual -6.063 3.163 -1.916\n", "phasepre-switch:timeend 4.625 3.173 1.457\n", "phasepre-switch:languagemonolingual 4.375 3.173 1.379\n", "timeend:languagemonolingual -7.375 3.173 -2.324\n", "\n", "Correlation of Fixed Effects:\n", " (Intr) phspr- timend lnggmn phspr-swtch:t phspr-swtch:l\n", "phspr-swtch -0.579 \n", "timeend -0.579 0.333 \n", "langgmnlngl -0.666 0.290 0.290 \n", "phspr-swtch:t 0.334 -0.577 -0.577 0.000 \n", "phspr-swtch:l 0.334 -0.577 0.000 -0.502 0.000 \n", "tmnd:lnggmn 0.334 0.000 -0.577 -0.502 0.000 0.000 \n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "#%R fitno3way = lmer(score ~ (1|pair) + (1|child), data=data)\n", "%R fitno3way <- update(fit, .~. - phase:time:language)\n", "%R print(summary(fitno3way))\n" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " Mixed Linear Model Regression Results\n", "=======================================================================================\n", "Model: MixedLM Dependent Variable: score \n", "No. Observations: 160 Method: ML \n", "No. Groups: 20 Scale: 96.5032 \n", "Min. group size: 8 Likelihood: -606.0360\n", "Max. group size: 8 Converged: Yes \n", "Mean group size: 8.0 \n", "---------------------------------------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "---------------------------------------------------------------------------------------\n", "Intercept 26.531 2.319 11.441 0.000 21.986 31.076\n", "phase[T.pre-switch] 1.438 2.690 0.534 0.593 -3.835 6.710\n", "time[T.end] 29.188 2.690 10.849 0.000 23.915 34.460\n", "language[T.monolingual] -6.062 3.090 -1.962 0.050 -12.119 -0.006\n", "phase[T.pre-switch]:time[T.end] 4.625 3.106 1.489 0.137 -1.464 10.714\n", "phase[T.pre-switch]:language[T.monolingual] 4.375 3.106 1.408 0.159 -1.714 10.464\n", "time[T.end]:language[T.monolingual] -7.375 3.106 -2.374 0.018 -13.464 -1.286\n", "child RE 23.115 1.565 \n", "pair RE 0.000 1.081 \n", "=======================================================================================\n", "\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/regression/mixed_linear_model.py:1906: ConvergenceWarning: The MLE may be on the boundary of the parameter space.\n", " warnings.warn(msg, ConvergenceWarning)\n" ] } ], "source": [ "modelno3way = sm.MixedLM.from_formula('score ~ phase*time*language - phase:language:time', groups='pair', vc_formula=vcf, data=data)\n", "resultno3way = modelno3way.fit(reml=False)\n", "print (resultno3way.summary())" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(11.122879559027069, 0.00085269302140603945, 1)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result.compare_lr_test(resultno3way)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Data: data\n", "Models:\n", "fitno3way: score ~ phase + time + language + (1 | pair) + (1 | child) + \n", "fitno3way: phase:time + phase:language + time:language\n", "fit: score ~ phase * time * language + (1 | pair) + (1 | child)\n", " Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) \n", "fitno3way 10 1232.1 1262.8 -606.04 1212.1 \n", "fit 11 1223.0 1256.8 -600.47 1201.0 11.123 1 0.0008527 ***\n", "---\n", "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R print(anova(fit, fitno3way))\n" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "\n", "[IntVe..., IntVe..., IntVe..., ..., IntVe..., IntVe..., IntVe...]\n", " sex: \n", " \n", "[ 1, 0, 1, ..., 0, 0, 1]\n", " minority: \n", " \n", "[ 1, 1, 1, ..., 0, 0, 0]\n", " mathkind: \n", " \n", "[ 448, 460, 511, ..., 485, 473, 453]\n", " ...\n", " sex: \n", " \n", "[ 160, 160, 160, ..., 96, 239, 239]\n", " minority: \n", " \n", "[ 1, 1, 1, ..., 107, 107, 107]\n", " mathkind: \n", " \n", "[ 1, 2, 3, ..., 1188, 1189, 1190]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "%R data <- read.csv('http://www-personal.umich.edu/~bwest/classroom.csv')\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by maximum likelihood ['lmerMod']\n", "Formula: mathgain ~ 1 + (1 | schoolid) + (1 | classid) \n", " Data: data \n", "\n", " AIC BIC logLik deviance \n", "11779.331 11799.658 -5885.666 11771.331 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " classid (Intercept) 99.14 9.957 \n", " schoolid (Intercept) 75.38 8.682 \n", " Residual 1028.30 32.067 \n", "Number of obs: 1190, groups: classid, 312; schoolid, 107\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 57.429 1.436 40\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R model <- lmer(mathgain ~ 1 + (1|schoolid) + (1|classid), REML=F, data=data)\n", "%R print(summary(model))" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: MixedLM Dependent Variable: mathgain
No. Observations: 1190 Method: ML
No. Groups: 107 Scale: 1050.5873
Min. group size: 2 Likelihood: -5887.0811
Max. group size: 31 Converged: Yes
Mean group size: 11.1
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
Intercept 57.296 1.459 39.263 0.000 54.436 60.156
classid RE 87.878 1.451
schoolid RE 11.238 1.763
" ], "text/plain": [ "\n", "\"\"\"\n", " Mixed Linear Model Regression Results\n", "========================================================\n", "Model: MixedLM Dependent Variable: mathgain \n", "No. Observations: 1190 Method: ML \n", "No. Groups: 107 Scale: 1050.5873 \n", "Min. group size: 2 Likelihood: -5887.0811\n", "Max. group size: 31 Converged: Yes \n", "Mean group size: 11.1 \n", "--------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "--------------------------------------------------------\n", "Intercept 57.296 1.459 39.263 0.000 54.436 60.156\n", "classid RE 87.878 1.451 \n", "schoolid RE 11.238 1.763 \n", "========================================================\n", "\n", "\"\"\"" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pandas.read_csv('http://www-personal.umich.edu/~bwest/classroom.csv')\n", "vcf = {\"schoolid\" : \"1 + C(schoolid)\", \"classid\" : \"1 + C(classid)\"}\n", "\n", "model = sm.MixedLM.from_formula('mathgain ~ 1', groups=\"schoolid\", vc_formula=vcf, data=data)\n", "result = model.fit(reml=False)\n", "result.summary()" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by maximum likelihood ['lmerMod']\n", "Formula: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + (1 | classid) \n", " Data: data \n", "\n", " AIC BIC logLik deviance \n", "11406.963 11447.617 -5695.481 11390.963 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " classid (Intercept) 83.20 9.122 \n", " schoolid (Intercept) 72.67 8.524 \n", " Residual 732.19 27.059 \n", "Number of obs: 1190, groups: classid, 312; schoolid, 107\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 282.71600 10.82832 26.109\n", "mathkind -0.46965 0.02222 -21.139\n", "sex -1.24898 1.65469 -0.755\n", "minority -8.25670 2.33081 -3.542\n", "ses 5.34235 1.23840 4.314\n", "\n", "Correlation of Fixed Effects:\n", " (Intr) mthknd sex minrty\n", "mathkind -0.978 \n", "sex -0.044 -0.032 \n", "minority -0.307 0.164 -0.018 \n", "ses 0.140 -0.169 0.019 0.164\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R model2 <- lmer(mathgain ~ mathkind + sex + minority + ses +(1|schoolid) + (1|classid), data=data, REML = F)\n", "%R print(summary(model2))" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: MixedLM Dependent Variable: mathgain
No. Observations: 1190 Method: ML
No. Groups: 107 Scale: 755.8745
Min. group size: 2 Likelihood: -5698.6892
Max. group size: 31 Converged: Yes
Mean group size: 11.1
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
Intercept 281.779 10.911 25.826 0.000 260.394 303.163
mathkind -0.469 0.022 -20.951 0.000 -0.513 -0.425
sex -1.259 1.665 -0.756 0.450 -4.523 2.005
minority -7.625 2.358 -3.234 0.001 -12.246 -3.003
ses 5.449 1.246 4.373 0.000 3.007 7.891
classid RE 66.430 1.234
schoolid RE 26.071 1.540
" ], "text/plain": [ "\n", "\"\"\"\n", " Mixed Linear Model Regression Results\n", "==========================================================\n", "Model: MixedLM Dependent Variable: mathgain \n", "No. Observations: 1190 Method: ML \n", "No. Groups: 107 Scale: 755.8745 \n", "Min. group size: 2 Likelihood: -5698.6892\n", "Max. group size: 31 Converged: Yes \n", "Mean group size: 11.1 \n", "----------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "----------------------------------------------------------\n", "Intercept 281.779 10.911 25.826 0.000 260.394 303.163\n", "mathkind -0.469 0.022 -20.951 0.000 -0.513 -0.425\n", "sex -1.259 1.665 -0.756 0.450 -4.523 2.005\n", "minority -7.625 2.358 -3.234 0.001 -12.246 -3.003\n", "ses 5.449 1.246 4.373 0.000 3.007 7.891\n", "classid RE 66.430 1.234 \n", "schoolid RE 26.071 1.540 \n", "==========================================================\n", "\n", "\"\"\"" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data = pandas.read_csv('http://www-personal.umich.edu/~bwest/classroom.csv')\n", "vcf = {\"schoolid\" : \"1 + C(schoolid)\", \"classid\" : \"1 + C(classid)\"}\n", "\n", "model2 = sm.MixedLM.from_formula('mathgain ~ mathkind + sex + minority + ses', groups=\"schoolid\", vc_formula=vcf, data=data)\n", "result2 = model2.fit(reml=False)\n", "result2.summary()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Data: data\n", "Models:\n", "model: mathgain ~ 1 + (1 | schoolid) + (1 | classid)\n", "model2: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + \n", "model2: (1 | classid)\n", " Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq) \n", "model 4 11779 11800 -5885.7 11771 \n", "model2 8 11407 11448 -5695.5 11391 380.37 4 < 2.2e-16 ***\n", "---\n", "Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R print(anova(model,model2))" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(376.78373760784962, 2.8827851248469664e-80, 4)" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result2.compare_lr_test(result)" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Linear mixed model fit by maximum likelihood ['lmerMod']\n", "Formula: mathgain ~ minority + (mathkind | schoolid) + (sex | schoolid) + (ses | schoolid) \n", " Data: data \n", "\n", " AIC BIC logLik deviance \n", "11539.117 11600.097 -5757.558 11515.117 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev. Corr \n", " schoolid (Intercept) 4.328e+04 2.080e+02 \n", " mathkind 1.898e-01 4.356e-01 -1.00\n", " schoolid.1 (Intercept) 1.109e+02 1.053e+01 \n", " sex 1.561e+01 3.951e+00 -0.64\n", " schoolid.2 (Intercept) 1.980e-20 1.407e-10 \n", " ses 4.670e+00 2.161e+00 -1.00\n", " Residual 7.565e+02 2.750e+01 \n", "Number of obs: 1190, groups: schoolid, 107\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 57.830 2.124 27.222\n", "minority -6.463 2.342 -2.759\n", "\n", "Correlation of Fixed Effects:\n", " (Intr)\n", "minority -0.775\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R model3 <- lmer(mathgain ~ minority +(mathkind|schoolid) + (sex|schoolid) + (ses|schoolid) , data=data, REML = F)\n", "%R print(summary(model3))" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Data: data\n", "Models:\n", "model2: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + \n", "model2: (1 | classid)\n", "model3: mathgain ~ minority + (mathkind | schoolid) + (sex | schoolid) + \n", "model3: (ses | schoolid)\n", " Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)\n", "model2 8 11407 11448 -5695.5 11391 \n", "model3 12 11539 11600 -5757.6 11515 0 4 1\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R print(anova(model2, model3))" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(NM) 20: f = 11481.7 at 0.4575 1.08188\n", "(NM) 40: f = 11394.1 at 0.327462 0.403996\n", "(NM) 60: f = 11392.2 at 0.286625 0.311154\n", "(NM) 80: f = 11391.5 at 0.335163 0.313464\n", "(NM) 100: f = 11391.5 at 0.33588 0.314426\n", "(NM) 120: f = 11391.5 at 0.335987 0.31454\n" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "text/plain": [ "Linear mixed model fit by maximum likelihood ['lmerMod']\n", "Formula: mathgain ~ mathkind + minority + ses + (1 | schoolid) + (1 | classid) \n", " Data: data \n", "\n", " AIC BIC logLik deviance \n", "11405.532 11441.104 -5695.766 11391.532 \n", "\n", "Random effects:\n", " Groups Name Variance Std.Dev.\n", " classid (Intercept) 82.73 9.096 \n", " schoolid (Intercept) 72.51 8.515 \n", " Residual 732.93 27.073 \n", "Number of obs: 1190, groups: classid, 312; schoolid, 107\n", "\n", "Fixed effects:\n", " Estimate Std. Error t value\n", "(Intercept) 282.34364 10.82030 26.094\n", "mathkind -0.47015 0.02221 -21.168\n", "minority -8.28499 2.33040 -3.555\n", "ses 5.36063 1.23850 4.328\n", "\n", "Correlation of Fixed Effects:\n", " (Intr) mthknd minrty\n", "mathkind -0.981 \n", "minority -0.308 0.164 \n", "ses 0.141 -0.168 0.164\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R model3 <- lmer(mathgain ~ mathkind + minority + ses +(1|schoolid) + (1|classid), data=data, REML = F, verbose=T)\n", "%R print(summary(model3))" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Model: MixedLM Dependent Variable: mathgain
No. Observations: 1190 Method: ML
No. Groups: 107 Scale: 756.7407
Min. group size: 2 Likelihood: -5698.9749
Max. group size: 31 Converged: Yes
Mean group size: 11.1
\n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "\n", " \n", "\n", "
Coef. Std.Err. z P>|z| [0.025 0.975]
Intercept 281.406 10.902 25.812 0.000 260.039 302.774
mathkind -0.469 0.022 -20.979 0.000 -0.513 -0.426
minority -7.654 2.357 -3.247 0.001 -12.274 -3.034
ses 5.470 1.246 4.389 0.000 3.027 7.912
classid RE 65.463 1.228
schoolid RE 26.867 1.536
" ], "text/plain": [ "\n", "\"\"\"\n", " Mixed Linear Model Regression Results\n", "==========================================================\n", "Model: MixedLM Dependent Variable: mathgain \n", "No. Observations: 1190 Method: ML \n", "No. Groups: 107 Scale: 756.7407 \n", "Min. group size: 2 Likelihood: -5698.9749\n", "Max. group size: 31 Converged: Yes \n", "Mean group size: 11.1 \n", "----------------------------------------------------------\n", " Coef. Std.Err. z P>|z| [0.025 0.975]\n", "----------------------------------------------------------\n", "Intercept 281.406 10.902 25.812 0.000 260.039 302.774\n", "mathkind -0.469 0.022 -20.979 0.000 -0.513 -0.426\n", "minority -7.654 2.357 -3.247 0.001 -12.274 -3.034\n", "ses 5.470 1.246 4.389 0.000 3.027 7.912\n", "classid RE 65.463 1.228 \n", "schoolid RE 26.867 1.536 \n", "==========================================================\n", "\n", "\"\"\"" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model3 = sm.MixedLM.from_formula('mathgain ~ mathkind + minority + ses*ses', groups=\"schoolid\", vc_formula=vcf, data=data)\n", "result3 = model3.fit(reml=False)\n", "result3.summary()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "Data: data\n", "Models:\n", "model3: mathgain ~ mathkind + minority + ses + (1 | schoolid) + (1 | \n", "model3: classid)\n", "model2: mathgain ~ mathkind + sex + minority + ses + (1 | schoolid) + \n", "model2: (1 | classid)\n", " Df AIC BIC logLik deviance Chisq Chi Df Pr(>Chisq)\n", "model3 7 11406 11441 -5695.8 11392 \n", "model2 8 11407 11448 -5695.5 11391 0.5691 1 0.4506\n" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "%R print(anova(model2,model3))" ] }, { "cell_type": "code", "execution_count": 27, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(0.57129857088511926, 0.44974335987194591, 1)" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result2.compare_lr_test(result3)" ] }, { "cell_type": "markdown", "metadata": { "collapsed": true }, "source": [ "Raise warnings for REML:\n" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/usr/local/lib/python2.7/dist-packages/statsmodels-0.7.0-py2.7-linux-x86_64.egg/statsmodels/base/model.py:1414: InvalidTestWarning: Likelihood Ratio test is likely invalid with .fit(REML=True), proceeding anyway\n", " InvalidTestWarning)\n" ] }, { "data": { "text/plain": [ "(-1.7970366599784029, 1.0, 1)" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model4 = sm.MixedLM.from_formula('mathgain ~ mathkind + minority + ses*ses', groups=\"schoolid\", vc_formula=vcf, data=data)\n", "\n", "#REML[Should raise warning]\n", "result4 = model4.fit()\n", "result2.compare_lr_test(result4)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.6" } }, "nbformat": 4, "nbformat_minor": 0 }