{
"metadata": {
"name": "anova_with_multiple_factors"
},
"nbformat": 3,
"nbformat_minor": 0,
"worksheets": [
{
"cells": [
{
"cell_type": "code",
"collapsed": false,
"input": [
"import pandas as pd\n",
"import numpy as np"
],
"language": "python",
"metadata": {},
"outputs": [],
"prompt_number": 1
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"movies = pd.read_csv('http://www.rossmanchance.com/iscam2/data/movies03RT.txt', sep='\\t')\n",
"movies.columns = ['X', 'score', 'rating', 'genre', 'box_office', 'running_time']\n",
"movies.head()"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"
\n",
"
\n",
" \n",
" \n",
" | \n",
" X | \n",
" score | \n",
" rating | \n",
" genre | \n",
" box_office | \n",
" running_time | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2 Fast 2 Furious | \n",
" 48.9 | \n",
" PG-13 | \n",
" action/adventure | \n",
" 127.146 | \n",
" 107 | \n",
"
\n",
" \n",
" 1 | \n",
" 28 Days Later | \n",
" 78.2 | \n",
" R | \n",
" horror | \n",
" 45.065 | \n",
" 113 | \n",
"
\n",
" \n",
" 2 | \n",
" A Guy Thing | \n",
" 39.5 | \n",
" PG-13 | \n",
" rom comedy | \n",
" 15.545 | \n",
" 101 | \n",
"
\n",
" \n",
" 3 | \n",
" A Man Apart | \n",
" 42.9 | \n",
" R | \n",
" action/adventure | \n",
" 26.248 | \n",
" 110 | \n",
"
\n",
" \n",
" 4 | \n",
" A Mighty Wind | \n",
" 79.9 | \n",
" PG-13 | \n",
" comedy | \n",
" 17.781 | \n",
" 91 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"output_type": "pyout",
"prompt_number": 17,
"text": [
" X score rating genre box_office running_time\n",
"0 2 Fast 2 Furious 48.9 PG-13 action/adventure 127.146 107\n",
"1 28 Days Later 78.2 R horror 45.065 113\n",
"2 A Guy Thing 39.5 PG-13 rom comedy 15.545 101\n",
"3 A Man Apart 42.9 R action/adventure 26.248 110\n",
"4 A Mighty Wind 79.9 PG-13 comedy 17.781 91"
]
}
],
"prompt_number": 17
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"from statsmodels.stats.anova import anova_lm\n",
"from statsmodels.formula.api import ols\n",
"\n",
"lm = ols('score ~ rating', movies).fit()\n",
"\n",
"aovObject = anova_lm(lm)\n",
"aovObject"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" df | \n",
" sum_sq | \n",
" mean_sq | \n",
" F | \n",
" PR(>F) | \n",
"
\n",
" \n",
" \n",
" \n",
" rating | \n",
" 3 | \n",
" 570.123813 | \n",
" 190.041271 | \n",
" 0.918184 | \n",
" 0.433975 | \n",
"
\n",
" \n",
" Residual | \n",
" 136 | \n",
" 28148.635044 | \n",
" 206.975258 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"output_type": "pyout",
"prompt_number": 24,
"text": [
" df sum_sq mean_sq F PR(>F)\n",
"rating 3 570.123813 190.041271 0.918184 0.433975\n",
"Residual 136 28148.635044 206.975258 NaN NaN"
]
}
],
"prompt_number": 24
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lm.params"
],
"language": "python",
"metadata": {},
"outputs": [
{
"output_type": "pyout",
"prompt_number": 25,
"text": [
"Intercept 67.650000\n",
"rating[T.PG] -12.592857\n",
"rating[T.PG-13] -11.814615\n",
"rating[T.R] -12.020000"
]
}
],
"prompt_number": 25
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lm2 = ols('score ~ rating + genre', movies).fit()\n",
"\n",
"aovObject2 = anova_lm(lm2)\n",
"aovObject2"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" df | \n",
" sum_sq | \n",
" mean_sq | \n",
" F | \n",
" PR(>F) | \n",
"
\n",
" \n",
" \n",
" \n",
" rating | \n",
" 3 | \n",
" 570.123813 | \n",
" 190.041271 | \n",
" 0.973214 | \n",
" 0.407720 | \n",
"
\n",
" \n",
" genre | \n",
" 12 | \n",
" 3934.928021 | \n",
" 327.910668 | \n",
" 1.679252 | \n",
" 0.079134 | \n",
"
\n",
" \n",
" Residual | \n",
" 124 | \n",
" 24213.707023 | \n",
" 195.271831 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"output_type": "pyout",
"prompt_number": 28,
"text": [
" df sum_sq mean_sq F PR(>F)\n",
"rating 3 570.123813 190.041271 0.973214 0.407720\n",
"genre 12 3934.928021 327.910668 1.679252 0.079134\n",
"Residual 124 24213.707023 195.271831 NaN NaN"
]
}
],
"prompt_number": 28
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lm3 = ols('score ~ genre + rating', movies).fit()\n",
"\n",
"aovObject3 = anova_lm(lm3)\n",
"aovObject3"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" df | \n",
" sum_sq | \n",
" mean_sq | \n",
" F | \n",
" PR(>F) | \n",
"
\n",
" \n",
" \n",
" \n",
" genre | \n",
" 12 | \n",
" 4221.505277 | \n",
" 351.792106 | \n",
" 1.801551 | \n",
" 0.054737 | \n",
"
\n",
" \n",
" rating | \n",
" 3 | \n",
" 283.546557 | \n",
" 94.515519 | \n",
" 0.484020 | \n",
" 0.693992 | \n",
"
\n",
" \n",
" Residual | \n",
" 124 | \n",
" 24213.707023 | \n",
" 195.271831 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"output_type": "pyout",
"prompt_number": 29,
"text": [
" df sum_sq mean_sq F PR(>F)\n",
"genre 12 4221.505277 351.792106 1.801551 0.054737\n",
"rating 3 283.546557 94.515519 0.484020 0.693992\n",
"Residual 124 24213.707023 195.271831 NaN NaN"
]
}
],
"prompt_number": 29
},
{
"cell_type": "code",
"collapsed": false,
"input": [
"lm4 = ols('score ~ genre + rating + box_office', movies).fit()\n",
"\n",
"aovObject4 = anova_lm(lm4)\n",
"aovObject4"
],
"language": "python",
"metadata": {},
"outputs": [
{
"html": [
"\n",
"
\n",
" \n",
" \n",
" | \n",
" df | \n",
" sum_sq | \n",
" mean_sq | \n",
" F | \n",
" PR(>F) | \n",
"
\n",
" \n",
" \n",
" \n",
" genre | \n",
" 12 | \n",
" 4221.505277 | \n",
" 351.792106 | \n",
" 2.186135 | \n",
" 0.016198 | \n",
"
\n",
" \n",
" rating | \n",
" 3 | \n",
" 283.546557 | \n",
" 94.515519 | \n",
" 0.587346 | \n",
" 0.624421 | \n",
"
\n",
" \n",
" box_office | \n",
" 1 | \n",
" 4420.588612 | \n",
" 4420.588612 | \n",
" 27.470780 | \n",
" 0.000001 | \n",
"
\n",
" \n",
" Residual | \n",
" 123 | \n",
" 19793.118411 | \n",
" 160.919662 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"output_type": "pyout",
"prompt_number": 30,
"text": [
" df sum_sq mean_sq F PR(>F)\n",
"genre 12 4221.505277 351.792106 2.186135 0.016198\n",
"rating 3 283.546557 94.515519 0.587346 0.624421\n",
"box_office 1 4420.588612 4420.588612 27.470780 0.000001\n",
"Residual 123 19793.118411 160.919662 NaN NaN"
]
}
],
"prompt_number": 30
},
{
"cell_type": "code",
"collapsed": false,
"input": [],
"language": "python",
"metadata": {},
"outputs": []
}
],
"metadata": {}
}
]
}