{ "metadata": { "name": "summarizing_data" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "heading", "level": 2, "metadata": {}, "source": [ "Summarizing data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "import numpy as np" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 1 }, { "cell_type": "markdown", "metadata": {}, "source": [ "
Earthquake data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fileUrl = 'http://earthquake.usgs.gov/earthquakes/catalogs/eqs7day-M1.txt'\n", "\n", "eData = pd.read_csv(fileUrl)\n", "\n", "dateDownloaded = !date\n", "dateDownloaded" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 2, "text": [ "['Mon Mar 18 21:45:48 CET 2013']" ] } ], "prompt_number": 2 }, { "cell_type": "code", "collapsed": false, "input": [ "# looking at data\n", "# for large data, only a summary is shown\n", "eData" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 3, "text": [ "\n", "Int64Index: 1126 entries, 0 to 1125\n", "Data columns:\n", "Src 1126 non-null values\n", "Eqid 1126 non-null values\n", "Version 1126 non-null values\n", "Datetime 1126 non-null values\n", "Lat 1126 non-null values\n", "Lon 1126 non-null values\n", "Magnitude 1126 non-null values\n", "Depth 1126 non-null values\n", "NST 1126 non-null values\n", "Region 1126 non-null values\n", "dtypes: float64(4), int64(1), object(5)" ] } ], "prompt_number": 3 }, { "cell_type": "code", "collapsed": false, "input": [ "eData.head()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
SrcEqidVersionDatetimeLatLonMagnitudeDepthNSTRegion
0 ci 15309081 0 Monday, March 18, 2013 20:39:21 UTC 34.2713-117.2475 1.8 30.1 14 Southern California
1 ci 15309073 0 Monday, March 18, 2013 20:38:22 UTC 33.5075-116.4293 1.0 6.6 32 Southern California
2 ci 15309057 0 Monday, March 18, 2013 20:28:36 UTC 33.5195-116.4300 1.0 11.3 40 Southern California
3 nn 00406332 1 Monday, March 18, 2013 20:22:00 UTC 38.2462-118.6417 1.0 7.0 10 Nevada
4 ci 15309049 0 Monday, March 18, 2013 20:21:41 UTC 34.4252-117.0105 2.1 1.1 11 Southern California
\n", "
" ], "output_type": "pyout", "prompt_number": 4, "text": [ " Src Eqid Version Datetime Lat Lon \\\n", "0 ci 15309081 0 Monday, March 18, 2013 20:39:21 UTC 34.2713 -117.2475 \n", "1 ci 15309073 0 Monday, March 18, 2013 20:38:22 UTC 33.5075 -116.4293 \n", "2 ci 15309057 0 Monday, March 18, 2013 20:28:36 UTC 33.5195 -116.4300 \n", "3 nn 00406332 1 Monday, March 18, 2013 20:22:00 UTC 38.2462 -118.6417 \n", "4 ci 15309049 0 Monday, March 18, 2013 20:21:41 UTC 34.4252 -117.0105 \n", "\n", " Magnitude Depth NST Region \n", "0 1.8 30.1 14 Southern California \n", "1 1.0 6.6 32 Southern California \n", "2 1.0 11.3 40 Southern California \n", "3 1.0 7.0 10 Nevada \n", "4 2.1 1.1 11 Southern California " ] } ], "prompt_number": 4 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's dim()\n", "eData.shape" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 5, "text": [ "(1126, 10)" ] } ], "prompt_number": 5 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's names()\n", "eData.columns" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 6, "text": [ "Index([Src, Eqid, Version, Datetime, Lat, Lon, Magnitude, Depth, NST, Region], dtype=object)" ] } ], "prompt_number": 6 }, { "cell_type": "code", "collapsed": false, "input": [ "# computing quantiles, equivalent to R's quantile()\n", "p = [0, 0.25, 0.5, 0.75, 1]\n", "[eData['Lat'].quantile(q=i) for i in p]" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 7, "text": [ "[-55.428199999999997,\n", " 33.512300000000003,\n", " 38.027299999999997,\n", " 53.807400000000001,\n", " 68.162499999999994]" ] } ], "prompt_number": 7 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to (but not as detailed as) R summary()\n", "eData.describe()" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LatLonMagnitudeDepthNST
count 1126.000000 1126.000000 1126.000000 1126.000000 1126.000000
mean 40.467150 -115.068258 1.841741 23.956217 34.748668
std 16.661746 58.041865 0.964461 48.891918 29.083693
min -55.428200 -179.967400 1.000000 0.000000 0.000000
25% 33.512300 -146.874675 1.200000 4.700000 15.000000
50% 38.027300 -119.670800 1.500000 10.400000 26.000000
75% 53.807400 -116.459800 2.100000 17.675000 44.000000
max 68.162500 167.418600 5.600000 607.300000 254.000000
\n", "
" ], "output_type": "pyout", "prompt_number": 8, "text": [ " Lat Lon Magnitude Depth NST\n", "count 1126.000000 1126.000000 1126.000000 1126.000000 1126.000000\n", "mean 40.467150 -115.068258 1.841741 23.956217 34.748668\n", "std 16.661746 58.041865 0.964461 48.891918 29.083693\n", "min -55.428200 -179.967400 1.000000 0.000000 0.000000\n", "25% 33.512300 -146.874675 1.200000 4.700000 15.000000\n", "50% 38.027300 -119.670800 1.500000 10.400000 26.000000\n", "75% 53.807400 -116.459800 2.100000 17.675000 44.000000\n", "max 68.162500 167.418600 5.600000 607.300000 254.000000" ] } ], "prompt_number": 8 }, { "cell_type": "code", "collapsed": false, "input": [ "# R's class() is equivalent to type()\n", "type(eData)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 9, "text": [ "pandas.core.frame.DataFrame" ] } ], "prompt_number": 9 }, { "cell_type": "code", "collapsed": false, "input": [ "# get the data types of all columns\n", "# similarly to the method used in the video, \n", "# we just apply the function type() to values in a row\n", "# the zip() method is just for nice printing\n", "zip(eData.columns, [type(x) for x in eData.ix[0,:]])" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 10, "text": [ "[('Src', str),\n", " ('Eqid', str),\n", " ('Version', str),\n", " ('Datetime', str),\n", " ('Lat', numpy.float64),\n", " ('Lon', numpy.float64),\n", " ('Magnitude', numpy.float64),\n", " ('Depth', numpy.float64),\n", " ('NST', numpy.int64),\n", " ('Region', str)]" ] } ], "prompt_number": 10 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's unique() command\n", "eData['Src'].unique()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 11, "text": [ "array(['ci', 'nn', 'nc', 'us', 'ak', 'hv', 'uw', 'uu', 'mb', 'pr', 'ld',\n", " 'nm'], dtype=object)" ] } ], "prompt_number": 11 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's length() command\n", "len(eData['Src'].unique())" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 12, "text": [ "12" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "# for this particular case, value_counts() is equivalent to R's table()\n", "eData['Src'].value_counts() \n", "\n", "# or alternatively: pd.crosstab(eData['Src'], [])" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 13, "text": [ "ci 375\n", "ak 306\n", "nc 212\n", "us 88\n", "nn 67\n", "uw 22\n", "hv 18\n", "uu 16\n", "pr 14\n", "ld 4\n", "nm 3\n", "mb 1" ] } ], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's table() to compute frequency table\n", "pd.crosstab(eData['Src'], eData['Version'])" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Version0123456789ABDN
Src
ak 0 77 214 15 0 0 0 0 0 0 0 0 0 0
ci 170 60 49 72 21 2 0 0 0 1 0 0 0 0
hv 0 10 4 1 2 1 0 0 0 0 0 0 0 0
ld 0 4 0 0 0 0 0 0 0 0 0 0 0 0
mb 0 0 1 0 0 0 0 0 0 0 0 0 0 0
nc 91 55 28 25 6 5 0 1 0 0 1 0 0 0
nm 0 0 0 0 0 0 0 0 0 0 3 0 0 0
nn 0 67 0 0 0 0 0 0 0 0 0 0 0 0
pr 14 0 0 0 0 0 0 0 0 0 0 0 0 0
us 0 0 0 2 10 18 26 16 10 2 1 1 1 1
uu 0 0 5 2 9 0 0 0 0 0 0 0 0 0
uw 0 17 5 0 0 0 0 0 0 0 0 0 0 0
\n", "
" ], "output_type": "pyout", "prompt_number": 14, "text": [ "Version 0 1 2 3 4 5 6 7 8 9 A B D N\n", "Src \n", "ak 0 77 214 15 0 0 0 0 0 0 0 0 0 0\n", "ci 170 60 49 72 21 2 0 0 0 1 0 0 0 0\n", "hv 0 10 4 1 2 1 0 0 0 0 0 0 0 0\n", "ld 0 4 0 0 0 0 0 0 0 0 0 0 0 0\n", "mb 0 0 1 0 0 0 0 0 0 0 0 0 0 0\n", "nc 91 55 28 25 6 5 0 1 0 0 1 0 0 0\n", "nm 0 0 0 0 0 0 0 0 0 0 3 0 0 0\n", "nn 0 67 0 0 0 0 0 0 0 0 0 0 0 0\n", "pr 14 0 0 0 0 0 0 0 0 0 0 0 0 0\n", "us 0 0 0 2 10 18 26 16 10 2 1 1 1 1\n", "uu 0 0 5 2 9 0 0 0 0 0 0 0 0 0\n", "uw 0 17 5 0 0 0 0 0 0 0 0 0 0 0" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "eData.ix[0:9,'Lat']" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 15, "text": [ "0 34.2713\n", "1 33.5075\n", "2 33.5195\n", "3 38.2462\n", "4 34.4252\n", "5 33.8267\n", "6 32.6598\n", "7 33.8420\n", "8 33.9153\n", "9 38.0265\n", "Name: Lat" ] } ], "prompt_number": 15 }, { "cell_type": "code", "collapsed": false, "input": [ "eData.ix[0:9,'Lat'] > 40" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 16, "text": [ "0 False\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", "5 False\n", "6 False\n", "7 False\n", "8 False\n", "9 False\n", "Name: Lat" ] } ], "prompt_number": 16 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's any()\n", "(eData.ix[0:9,'Lat'] > 40).any()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 17, "text": [ "False" ] } ], "prompt_number": 17 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's all()\n", "(eData.ix[0:9,'Lat'] > 40).all()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 18, "text": [ "False" ] } ], "prompt_number": 18 }, { "cell_type": "code", "collapsed": false, "input": [ "# looking at subsets; very similar to R's & operator\n", "eData[(eData['Lat'] > 0) & (eData['Lon'] > 0)][['Lat', 'Lon']][:10]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LatLon
85 36.2654 69.7945
86 36.5114 70.0785
102 36.4774 140.4687
132 2.4794 128.6353
133 34.0983 135.4945
183 44.3796 148.9563
221 36.5178 70.8805
246 9.6966 125.3708
253 36.1766 141.4287
295 35.7555 5.6368
\n", "
" ], "output_type": "pyout", "prompt_number": 19, "text": [ " Lat Lon\n", "85 36.2654 69.7945\n", "86 36.5114 70.0785\n", "102 36.4774 140.4687\n", "132 2.4794 128.6353\n", "133 34.0983 135.4945\n", "183 44.3796 148.9563\n", "221 36.5178 70.8805\n", "246 9.6966 125.3708\n", "253 36.1766 141.4287\n", "295 35.7555 5.6368" ] } ], "prompt_number": 19 }, { "cell_type": "code", "collapsed": false, "input": [ "# looking at subsets; very similar to R's | operator\n", "eData[(eData['Lat'] > 0) | (eData['Lon'] > 0)][['Lat', 'Lon']][-10:]" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
LatLon
1116 33.5147-116.4460
1117 63.2469-150.4575
1118 33.5015-116.4617
1119 63.4993-146.4718
1120 49.3783-120.4828
1121 33.5075-116.4638
1122 33.5103-116.4387
1123 38.7893-122.7495
1124 33.5040-116.4415
1125 33.5047-116.4570
\n", "
" ], "output_type": "pyout", "prompt_number": 20, "text": [ " Lat Lon\n", "1116 33.5147 -116.4460\n", "1117 63.2469 -150.4575\n", "1118 33.5015 -116.4617\n", "1119 63.4993 -146.4718\n", "1120 49.3783 -120.4828\n", "1121 33.5075 -116.4638\n", "1122 33.5103 -116.4387\n", "1123 38.7893 -122.7495\n", "1124 33.5040 -116.4415\n", "1125 33.5047 -116.4570" ] } ], "prompt_number": 20 }, { "cell_type": "markdown", "metadata": {}, "source": [ "
Peer review experiment data" ] }, { "cell_type": "code", "collapsed": false, "input": [ "fileUrl1 = 'https://dl.dropbox.com/u/7710864/data/reviews-apr29.csv'\n", "fileUrl2 = 'https://dl.dropbox.com/u/7710864/data/solutions-apr29.csv'\n", "\n", "reviews = pd.read_csv(fileUrl1)\n", "solutions = pd.read_csv(fileUrl2)" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 21 }, { "cell_type": "code", "collapsed": false, "input": [ "reviews.head(2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idsolution_idreviewer_idstartstoptime_leftaccept
0 1 3 27 1304095698 1304095758 1754 1
1 2 4 22 1304095188 1304095206 2306 1
\n", "
" ], "output_type": "pyout", "prompt_number": 22, "text": [ " id solution_id reviewer_id start stop time_left accept\n", "0 1 3 27 1304095698 1304095758 1754 1\n", "1 2 4 22 1304095188 1304095206 2306 1" ] } ], "prompt_number": 22 }, { "cell_type": "code", "collapsed": false, "input": [ "solutions.head(2)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idproblem_idsubject_idstartstoptime_leftanswer
0 1 156 29 1304095119 1304095169 2343 B
1 2 269 25 1304095119 1304095183 2329 C
\n", "
" ], "output_type": "pyout", "prompt_number": 23, "text": [ " id problem_id subject_id start stop time_left answer\n", "0 1 156 29 1304095119 1304095169 2343 B\n", "1 2 269 25 1304095119 1304095183 2329 C" ] } ], "prompt_number": 23 }, { "cell_type": "code", "collapsed": false, "input": [ "# find if there are missing values; equivalent to R's is.na()\n", "reviews.ix[0:9,'time_left'].isnull()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 24, "text": [ "0 False\n", "1 False\n", "2 False\n", "3 False\n", "4 False\n", "5 False\n", "6 False\n", "7 True\n", "8 False\n", "9 False\n", "Name: time_left" ] } ], "prompt_number": 24 }, { "cell_type": "code", "collapsed": false, "input": [ "reviews['time_left'].isnull().sum()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 25, "text": [ "84" ] } ], "prompt_number": 25 }, { "cell_type": "code", "collapsed": false, "input": [ "reviews['time_left'].isnull().value_counts()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 26, "text": [ "False 115\n", "True 84" ] } ], "prompt_number": 26 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's colSums()\n", "# notice we need to specifically include missing values with the option skipna=False\n", "# this is in contrary to R, where the default is to *include* NA\n", "reviews.sum(skipna=False) " ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 27, "text": [ "id 19900\n", "solution_id 19929\n", "reviewer_id 5064\n", "start NaN\n", "stop NaN\n", "time_left NaN\n", "accept NaN" ] } ], "prompt_number": 27 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's colMeans\n", "# same remark as above: here missing values are by default excluded\n", "reviews.mean()" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 28, "text": [ "id 1.000000e+02\n", "solution_id 1.001457e+02\n", "reviewer_id 2.544724e+01\n", "start 1.304096e+09\n", "stop 1.304096e+09\n", "time_left 1.114287e+03\n", "accept 6.434783e-01" ] } ], "prompt_number": 28 }, { "cell_type": "code", "collapsed": false, "input": [ "# equivalent to R's rowMeans()\n", "reviews.mean(axis=1)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "pyout", "prompt_number": 29, "text": [ "0 3.725990e+08\n", "1 3.725990e+08\n", "2 3.725990e+08\n", "3 3.725990e+08\n", "4 3.725990e+08\n", "5 3.725990e+08\n", "6 3.725990e+08\n", "7 1.300000e+01\n", "8 3.725990e+08\n", "9 3.725990e+08\n", "10 3.725990e+08\n", "11 3.725990e+08\n", "12 3.725990e+08\n", "13 3.725990e+08\n", "14 3.725990e+08\n", "...\n", "184 1.326667e+02\n", "185 1.333333e+02\n", "186 1.343333e+02\n", "187 1.340000e+02\n", "188 3.725993e+08\n", "189 1.356667e+02\n", "190 1.370000e+02\n", "191 3.725993e+08\n", "192 1.390000e+02\n", "193 1.383333e+02\n", "194 1.366667e+02\n", "195 1.410000e+02\n", "196 1.396667e+02\n", "197 1.420000e+02\n", "198 1.393333e+02\n", "Length: 199" ] } ], "prompt_number": 29 }, { "cell_type": "code", "collapsed": false, "input": [], "language": "python", "metadata": {}, "outputs": [] } ], "metadata": {} } ] }