{ "metadata": { "name": "anova_with_multiple_factors" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "code", "collapsed": false, "input": [ "movies = pd.read_csv('http://www.rossmanchance.com/iscam2/data/movies03RT.txt', sep='\\t')\n", "movies.columns = ['X', 'score', 'rating', 'genre', 'box_office', 'running_time']\n", "movies.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Xscoreratinggenrebox_officerunning_time
0 2 Fast 2 Furious 48.9 PG-13 action/adventure 127.146 107
1 28 Days Later 78.2 R horror 45.065 113
2 A Guy Thing 39.5 PG-13 rom comedy 15.545 101
3 A Man Apart 42.9 R action/adventure 26.248 110
4 A Mighty Wind 79.9 PG-13 comedy 17.781 91
\n", "
" ], "output_type": "pyout", "prompt_number": 17, "text": [ " X score rating genre box_office running_time\n", "0 2 Fast 2 Furious 48.9 PG-13 action/adventure 127.146 107\n", "1 28 Days Later 78.2 R horror 45.065 113\n", "2 A Guy Thing 39.5 PG-13 rom comedy 15.545 101\n", "3 A Man Apart 42.9 R action/adventure 26.248 110\n", "4 A Mighty Wind 79.9 PG-13 comedy 17.781 91" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "from statsmodels.stats.anova import anova_lm\n", "from statsmodels.formula.api import ols\n", "\n", "lm = ols('score ~ rating', movies).fit()\n", "\n", "aovObject = anova_lm(lm)\n", "aovObject" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dfsum_sqmean_sqFPR(>F)
rating 3 570.123813 190.041271 0.918184 0.433975
Residual 136 28148.635044 206.975258 NaN NaN
\n", "
" ], "output_type": "pyout", "prompt_number": 24, "text": [ " df sum_sq mean_sq F PR(>F)\n", "rating 3 570.123813 190.041271 0.918184 0.433975\n", "Residual 136 28148.635044 206.975258 NaN NaN" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "lm.params" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 25, "text": [ "Intercept 67.650000\n", "rating[T.PG] -12.592857\n", "rating[T.PG-13] -11.814615\n", "rating[T.R] -12.020000" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "lm2 = ols('score ~ rating + genre', movies).fit()\n", "\n", "aovObject2 = anova_lm(lm2)\n", "aovObject2" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dfsum_sqmean_sqFPR(>F)
rating 3 570.123813 190.041271 0.973214 0.407720
genre 12 3934.928021 327.910668 1.679252 0.079134
Residual 124 24213.707023 195.271831 NaN NaN
\n", "
" ], "output_type": "pyout", "prompt_number": 28, "text": [ " df sum_sq mean_sq F PR(>F)\n", "rating 3 570.123813 190.041271 0.973214 0.407720\n", "genre 12 3934.928021 327.910668 1.679252 0.079134\n", "Residual 124 24213.707023 195.271831 NaN NaN" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "lm3 = ols('score ~ genre + rating', movies).fit()\n", "\n", "aovObject3 = anova_lm(lm3)\n", "aovObject3" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dfsum_sqmean_sqFPR(>F)
genre 12 4221.505277 351.792106 1.801551 0.054737
rating 3 283.546557 94.515519 0.484020 0.693992
Residual 124 24213.707023 195.271831 NaN NaN
\n", "
" ], "output_type": "pyout", "prompt_number": 29, "text": [ " df sum_sq mean_sq F PR(>F)\n", "genre 12 4221.505277 351.792106 1.801551 0.054737\n", "rating 3 283.546557 94.515519 0.484020 0.693992\n", "Residual 124 24213.707023 195.271831 NaN NaN" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [ "lm4 = ols('score ~ genre + rating + box_office', movies).fit()\n", "\n", "aovObject4 = anova_lm(lm4)\n", "aovObject4" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
dfsum_sqmean_sqFPR(>F)
genre 12 4221.505277 351.792106 2.186135 0.016198
rating 3 283.546557 94.515519 0.587346 0.624421
box_office 1 4420.588612 4420.588612 27.470780 0.000001
Residual 123 19793.118411 160.919662 NaN NaN
\n", "
" ], "output_type": "pyout", "prompt_number": 30, "text": [ " df sum_sq mean_sq F PR(>F)\n", "genre 12 4221.505277 351.792106 2.186135 0.016198\n", "rating 3 283.546557 94.515519 0.587346 0.624421\n", "box_office 1 4420.588612 4420.588612 27.470780 0.000001\n", "Residual 123 19793.118411 160.919662 NaN NaN" ] } ], "prompt_number": 30 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }