{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "#One million is a lot\n", "\n", "This notebook presents analysis of data from the first million page views on my blog, Probably Overthinking It.\n", "\n", "Copyright 2015 Allen Downey\n", "\n", "MIT License: http://opensource.org/licenses/MIT" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "\n", "def read_table(filename):\n", " fp = open(filename)\n", " t = pd.read_html(fp)\n", " table = t[5]\n", " return table" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(100, 8)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table1 = read_table('blogger1.html')\n", "table1.shape" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(20, 8)" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table2 = read_table('blogger2.html')\n", "table2.shape" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(120, 9)" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table = pd.concat([table1, table2], ignore_index=True)\n", "table.shape" ] }, { "cell_type": "code", "execution_count": 19, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import string\n", "chars = string.ascii_letters + ' '\n", "\n", "def convert(s):\n", " return (int(s.rstrip(chars)))\n", "\n", "def clean(s):\n", " i = s.find('Edit')\n", " return s[:i]" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 One million is a lot\n", "1 When will I win the Great Bear Run?\n", "2 Bayes meets Fourier\n", "3 First babies are more likely to be late\n", "4 Bayesian analysis of gluten sensitivity\n", "5 Bayes theorem in real life\n", "6 The Inspection Paradox is Everywhere\n", "7 Orange is the new stat\n", "8 Will Millennials Ever Get Married?\n", "9 Bayesian Billiards\n", "10 The Sleeping Beauty Problem\n", "11 Hypothesis testing is only mostly useless\n", "12 Two hour marathon by 2041 -- probably\n", "13 Bayesian survival analysis for \"Game of Thrones\"\n", "14 Statistical inference is only mostly wrong\n", "15 Upcoming talk on survival analysis in Python\n", "16 Bayesian analysis of match rates on Tinder\n", "17 Godless freshmen: now more Nones than Catholics\n", "18 Bayesian predictions for Super Bowl XLIX\n", "19 Statistics tutorials at PyCon 2015\n", "20 The Rock Hyrax Problem\n", "21 The World Cup Problem Part 2: Germany v. Argen...\n", "22 The World Cup Problem: Germany v. Brazil\n", "23 On efficient algorithms for finding the goddam...\n", "24 Two hour marathon in 2041\n", "25 Bayesian election forecasting\n", "26 Regression with Python, pandas and StatsModels\n", "27 New study: vaccines prevent disease and death\n", "28 An exercise in hypothesis testing\n", "29 More likely to be killed by a terrorist\n", " ... \n", "90 Girl Named Florida solutions\n", "91 The red-haired girl named Florida\n", "92 Somebody bet on the Bayes\n", "93 All your Bayes are belong to us!\n", "94 My favorite Bayes's Theorem problems\n", "95 The Blinky Monty Problem\n", "96 Repeated tests: how bad can it be?\n", "97 The Jimmy Nut Company problem\n", "98 Upcoming webcast: Only One Test\n", "99 News flash: OJ did it.\n", "100 Postcard from NKS Summer Camp\n", "101 A hierarchical Bayesian model of pond scum\n", "102 More hypotheses, less trivia\n", "103 There is only one test!\n", "104 Statistics Workshop\n", "105 Think Stats will be published by O'Reilly in June\n", "106 Two Hour Marathon in 2045\n", "107 Bayesianness is next to Godliness\n", "108 Survival analysis\n", "109 Freshman hordes more godless than ever!\n", "110 Predicting marathon times\n", "111 BQ is unfair to women\n", "112 Moving the goalposts\n", "113 The BQ Effect\n", "114 Are first babies more likely to be late?\n", "115 Yet another reason SAT scores are non-predictive\n", "116 Are you popular? Hint: no.\n", "117 Obesity epidemic cured!\n", "118 Observer effect in relay races\n", "119 Proofiness and elections\n", "Name: title, dtype: object" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table['title'] = table[1].apply(clean)\n", "table.title" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 1\n", "2 7\n", "3 2\n", "4 9\n", "Name: plusses, dtype: float64" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table['plusses'] = table[4].fillna(0)\n", "table.plusses.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 1\n", "2 1\n", "3 3\n", "4 1\n", "Name: comments, dtype: int64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table['comments'] = table[5].apply(convert)\n", "table.comments.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 0\n", "1 723\n", "2 2363\n", "3 944\n", "4 3110\n", "5 2514\n", "6 30484\n", "7 2131\n", "8 589\n", "9 1273\n", "10 2348\n", "11 1816\n", "12 2891\n", "13 32406\n", "14 4666\n", "15 1242\n", "16 7602\n", "17 1491\n", "18 2254\n", "19 1193\n", "20 648\n", "21 1789\n", "22 3040\n", "23 819\n", "24 3090\n", "25 1621\n", "26 6456\n", "27 1834\n", "28 1057\n", "29 1536\n", " ... \n", "90 9454\n", "91 1153\n", "92 2332\n", "93 48836\n", "94 34384\n", "95 3367\n", "96 3797\n", "97 1929\n", "98 885\n", "99 0\n", "100 0\n", "101 2162\n", "102 1520\n", "103 4246\n", "104 203\n", "105 1445\n", "106 1745\n", "107 1083\n", "108 2849\n", "109 1379\n", "110 3847\n", "111 815\n", "112 513\n", "113 3066\n", "114 130722\n", "115 17876\n", "116 1468\n", "117 289\n", "118 725\n", "119 396\n", "Name: views, dtype: int64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table['views'] = table[6].apply(convert)\n", "table.views" ] }, { "cell_type": "code", "execution_count": 26, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0 2015-11-01\n", "1 2015-10-26\n", "2 2015-10-23\n", "3 2015-09-23\n", "4 2015-09-01\n", "Name: date, dtype: datetime64[ns]" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table['date'] = pd.to_datetime(table[7])\n", "table.date.head()" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(115, 13)" ] }, "execution_count": 34, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table = table[table.views > 0]\n", "table.shape" ] }, { "cell_type": "code", "execution_count": 38, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "115 When will I win the Great Bear Run?\n", "114 Bayes meets Fourier\n", "113 First babies are more likely to be late\n", "112 Bayesian analysis of gluten sensitivity\n", "111 Bayes theorem in real life\n", "110 The Inspection Paradox is Everywhere\n", "109 Orange is the new stat\n", "108 Will Millennials Ever Get Married?\n", "107 Bayesian Billiards\n", "106 The Sleeping Beauty Problem\n", "105 Hypothesis testing is only mostly useless\n", "104 Two hour marathon by 2041 -- probably\n", "103 Bayesian survival analysis for \"Game of Thrones\"\n", "102 Statistical inference is only mostly wrong\n", "101 Upcoming talk on survival analysis in Python\n", "100 Bayesian analysis of match rates on Tinder\n", "99 Godless freshmen: now more Nones than Catholics\n", "98 Bayesian predictions for Super Bowl XLIX\n", "97 Statistics tutorials at PyCon 2015\n", "96 The Rock Hyrax Problem\n", "95 The World Cup Problem Part 2: Germany v. Argen...\n", "94 The World Cup Problem: Germany v. Brazil\n", "93 On efficient algorithms for finding the goddam...\n", "92 Two hour marathon in 2041\n", "91 Bayesian election forecasting\n", "90 Regression with Python, pandas and StatsModels\n", "89 New study: vaccines prevent disease and death\n", "88 An exercise in hypothesis testing\n", "87 More likely to be killed by a terrorist\n", "86 Bayesian solution to the Lincoln index problem\n", " ... \n", "30 Estimating the age of renal tumors\n", "29 Comment on \"Racism and Meritocracy\"\n", "28 Girl Named Florida solutions\n", "27 The red-haired girl named Florida\n", "26 Somebody bet on the Bayes\n", "25 All your Bayes are belong to us!\n", "24 My favorite Bayes's Theorem problems\n", "23 The Blinky Monty Problem\n", "22 Repeated tests: how bad can it be?\n", "21 The Jimmy Nut Company problem\n", "20 Upcoming webcast: Only One Test\n", "19 A hierarchical Bayesian model of pond scum\n", "18 More hypotheses, less trivia\n", "17 There is only one test!\n", "16 Statistics Workshop\n", "15 Think Stats will be published by O'Reilly in June\n", "14 Two Hour Marathon in 2045\n", "13 Bayesianness is next to Godliness\n", "12 Survival analysis\n", "11 Freshman hordes more godless than ever!\n", "10 Predicting marathon times\n", "9 BQ is unfair to women\n", "8 Moving the goalposts\n", "7 The BQ Effect\n", "6 Are first babies more likely to be late?\n", "5 Yet another reason SAT scores are non-predictive\n", "4 Are you popular? Hint: no.\n", "3 Obesity epidemic cured!\n", "2 Observer effect in relay races\n", "1 Proofiness and elections\n", "Name: title, dtype: object" ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table.index = range(115, 0, -1)\n", "table.title" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 39, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "1 NaT\n", "2 6 days\n", "3 7 days\n", "4 7 days\n", "5 9 days\n", "Name: date, dtype: timedelta64[ns]" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dates = table.date.sort_values()\n", "diffs = dates.diff()\n", "diffs.head()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "count 114\n", "mean 15 days 09:41:03.157894\n", "std 20 days 04:36:55.930513\n", "min 1 days 00:00:00\n", "25% 5 days 00:00:00\n", "50% 10 days 00:00:00\n", "75% 17 days 18:00:00\n", "max 180 days 00:00:00\n", "Name: date, dtype: object" ] }, "execution_count": 40, "metadata": {}, "output_type": "execute_result" } ], "source": [ "diffs.dropna().describe()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titleviewsdate
6Are first babies more likely to be late?1307222011-02-07
25All your Bayes are belong to us!488362011-10-27
24My favorite Bayes's Theorem problems343842011-10-20
103Bayesian survival analysis for \"Game of Thrones\"324062015-03-25
110The Inspection Paradox is Everywhere304842015-08-18
41Bayesian statistics made simple238922012-03-14
5Yet another reason SAT scores are non-predictive178762011-02-02
72Are your data normal? Hint: no.161522013-08-07
36Freshman hordes even more godless!108262012-01-29
34Think Complexity106702012-01-23
54Secularization in America: part six97732012-07-10
28Girl Named Florida solutions94542011-11-10
55Secularization in America: part seven77052012-07-11
100Bayesian analysis of match rates on Tinder76022015-02-10
90Regression with Python, pandas and StatsModels64562014-09-14
57Are first babies more likely to be late, revis...57762013-01-08
78Correlation is evidence of causation49112014-02-20
102Statistical inference is only mostly wrong46662015-03-02
17There is only one test!42462011-05-31
65The Price is Right Problem40622013-04-22
\n", "
" ], "text/plain": [ " title views date\n", "6 Are first babies more likely to be late? 130722 2011-02-07\n", "25 All your Bayes are belong to us! 48836 2011-10-27\n", "24 My favorite Bayes's Theorem problems 34384 2011-10-20\n", "103 Bayesian survival analysis for \"Game of Thrones\" 32406 2015-03-25\n", "110 The Inspection Paradox is Everywhere 30484 2015-08-18\n", "41 Bayesian statistics made simple 23892 2012-03-14\n", "5 Yet another reason SAT scores are non-predictive 17876 2011-02-02\n", "72 Are your data normal? Hint: no. 16152 2013-08-07\n", "36 Freshman hordes even more godless! 10826 2012-01-29\n", "34 Think Complexity 10670 2012-01-23\n", "54 Secularization in America: part six 9773 2012-07-10\n", "28 Girl Named Florida solutions 9454 2011-11-10\n", "55 Secularization in America: part seven 7705 2012-07-11\n", "100 Bayesian analysis of match rates on Tinder 7602 2015-02-10\n", "90 Regression with Python, pandas and StatsModels 6456 2014-09-14\n", "57 Are first babies more likely to be late, revis... 5776 2013-01-08\n", "78 Correlation is evidence of causation 4911 2014-02-20\n", "102 Statistical inference is only mostly wrong 4666 2015-03-02\n", "17 There is only one test! 4246 2011-05-31\n", "65 The Price is Right Problem 4062 2013-04-22" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table.sort_values(by=['views'], ascending=False)[['title', 'views', 'date']].head(20)" ] }, { "cell_type": "code", "execution_count": 56, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titleviewsdate
16Statistics Workshop2032011-05-17
3Obesity epidemic cured!2892011-01-17
1Proofiness and elections3962011-01-04
45Fog warning system: part two5042012-04-20
8Moving the goalposts5132011-02-24
108Will Millennials Ever Get Married?5892015-07-13
96The Rock Hyrax Problem6482014-12-04
62Belly Button Biodiversity: Part Four6752013-03-22
46Fog warning system: part three7042012-04-25
115When will I win the Great Bear Run?7232015-10-26
2Observer effect in relay races7252011-01-10
60Belly Button Biodiversity: Part Two7832013-02-08
9BQ is unfair to women8152011-03-02
93On efficient algorithms for finding the goddam...8192014-10-04
70Belly Button Biodiversity: The End Game8392013-05-30
20Upcoming webcast: Only One Test8852011-08-16
50Secularization in America: part three9272012-06-22
61Belly Button Biodiversity: Part Three9322013-02-18
113First babies are more likely to be late9442015-09-23
32Frank is a scoundrel, probably9472012-01-05
\n", "
" ], "text/plain": [ " title views date\n", "16 Statistics Workshop 203 2011-05-17\n", "3 Obesity epidemic cured! 289 2011-01-17\n", "1 Proofiness and elections 396 2011-01-04\n", "45 Fog warning system: part two 504 2012-04-20\n", "8 Moving the goalposts 513 2011-02-24\n", "108 Will Millennials Ever Get Married? 589 2015-07-13\n", "96 The Rock Hyrax Problem 648 2014-12-04\n", "62 Belly Button Biodiversity: Part Four 675 2013-03-22\n", "46 Fog warning system: part three 704 2012-04-25\n", "115 When will I win the Great Bear Run? 723 2015-10-26\n", "2 Observer effect in relay races 725 2011-01-10\n", "60 Belly Button Biodiversity: Part Two 783 2013-02-08\n", "9 BQ is unfair to women 815 2011-03-02\n", "93 On efficient algorithms for finding the goddam... 819 2014-10-04\n", "70 Belly Button Biodiversity: The End Game 839 2013-05-30\n", "20 Upcoming webcast: Only One Test 885 2011-08-16\n", "50 Secularization in America: part three 927 2012-06-22\n", "61 Belly Button Biodiversity: Part Three 932 2013-02-18\n", "113 First babies are more likely to be late 944 2015-09-23\n", "32 Frank is a scoundrel, probably 947 2012-01-05" ] }, "execution_count": 56, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table.sort_values(by=['views'], ascending=True)[['title', 'views', 'date']].head(20)" ] }, { "cell_type": "code", "execution_count": 55, "metadata": { "collapsed": false }, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAY4AAAEWCAYAAABxMXBSAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAFztJREFUeJzt3X+0pHVdwPH3R1a9ZZFHTU5yyPXnIhuQqdCxjGv+WtYf\nd12FBRQ4pmzWUcuTRzxlcqljUpo/CZWQVTzCLtbKiqLGKQfWOgUoIIJskIfiR5Gn7IfVJsSnP+a5\nu7OzM3PnufeZmeeZeb/OuYeZZ555nu/9Mjuf+/1+vj8iM5EkaVgPm3QBJEnNYuCQJJVi4JAklWLg\nkCSVYuCQJJVi4JAklWLgkCSVYuCQJJWyZtIF6CciHgVcAPwv0MrMSydcJEkS9W5xbAYuz8ytwCsm\nXRhJUttYA0dEXBwR90fELV3HN0TE7RFxR0ScXRw+HLi7ePx/4yynJKm/cbc4tgEbOg9ExCHA+cXx\no4BTI+IZwD3AEcVpdW4ZSdJMGesXcmbuBr7Xdfg44M7MvCszHwC2AwvATuBVEXEB8PlxllOS1F8d\nkuOdXVLQbmkcn5n/DfzSoDdGhEv7StIKZGas9L116AJa1Zd/Zo7055Wn7+DIY17NK0/fUfqnzPuW\nO3fQ671eO/KYV3PqWX/KFV+8fd/vcs455xzwuy33fBQ/q7nHsO8d5rx+55Q5bn0u//qw9blc/Y6j\nLldznzLvq7o+V/LZXK06tDjuZX8ug+LxPRMqy0F2XnIyrdbjmZ+fL/3eYd6366o97LjiVh532FED\nzxv0eq/XHnfYUezd+yCf2n4zn9p+MwDf/afkb7fuZMum9SxsXLei32m1VnPPYd87zHn9zilzvPvY\nrNbnoNeHrc9h6nccVnrPMu+ruj4n8dmMKqJPqRtGrAWuzMyji+drgD3AC4D7gOuAUzPz20NcK885\n5xzm5+cn8iGrk6UAtHfvg0O/Z25uzb4gArC4uMji4uKISjh7rM/qWJfVaLVatFotzj33XHIVXVVj\nDRwRcRlwAvBY4J+Bd2Xmtog4EfggcAjwicx8z5DXy3EHvqYoE0iWAsiP/fA/znwArlKr1bI+K2Jd\nVisimhM4qmbgKGeYYNLdCpE0fQwcDS7/JA3bIjGQSNNn5gOHOY7VMYBIs6OROY6q2eKo1jBB5MxT\njjV4SA038y2OJpe/7pYLJLZCpGYycDS4/E2x66o9++aC9GIrRGqW1QaOOkwAXJXFxUVzHCO2FBT6\ntT6WgorBQ6q3pRzHatni0Ir0aoXY8pCaYbUtjjqsVaUGWti4jjNPOfaAY5/afjO7rtozoRJJGhdb\nHFqVXi0Pk+ZSvZkcb3D5p0W/5LkBRKonk+MmxyeuX/J8aXXeznMkTY7JcWxx1FG/uR8mzqX6sKuq\nweWfZt3dV3Nza7j0ws0TLJGkJQaOBpd/2vUKHuY8pMlzOK5qa2HjOubm9qfRlnIeDtmVms3AoZHa\nsmn9AcEDnO8hNV3jA8fi4mIlowQ0Ggsb13HphZsPmiy444pbJ1QiaXa1Wq1KtuA1x6GxMech1YM5\nDjVGr5yHLQ+peQwcGqvunMfevQ+a75Aaxq4qTcRpW3ceMEnQbitpfOyqUiNt2bT+gOd2W0nN0fi1\nqtRMvda3GrTXuaT6aHyLw+G4zbU0VLeT+Q5pdByOizmOadGZ73BNK2n0zHGo8TrzHY6ykurPwKGJ\n657f4ZIkUr0ZOFQL3aOsDB5SfRk4VAsLG9cdtJ6VwUOqJ5PjqpVe+5c7OVCqlhs5Nbj86q1X8AC3\nn5Wq4qgqTZ2lbqvufTycWS7Vgy0O1Vp362PnJSdPsDTSdJj5Foczx6dbd9eUyXJp5Zw5ji2OWeHM\ncqlaM9/i0PRzZrlULwYO1V73zHKT5NJkGTjUCN2tDkmTY+BQI5gkl+rDwKHGsLtKqgcDhxrD7iqp\nHgwcaozu7qrTtu60y0qaAOdxqFE653T04oKI0vKcx6GZsmXT+oPWsOq0d++D5j+kEbPFoUbaddUe\ndlxxa9/Wh2taSf25rHqDy69qbT7j8n2PDRxSfzPfVeUih+rFpLl0MBc5xBaHDuRiiNJwZr7FIS1x\nnoc0HgYOTQ2XJZHGw8ChqeKyJNLoGTg0Vbq7q5xdLlXPwKGp0r13hxMCpeo5qkpTp9/kQJcjkdqc\nANjg8mu0eq1r5TBdyeG4Ul+91rUy7yGtni0OzYTlVtUFu7I0O2xxSENYblVdMJEuDcsWh2bKcqvq\nggskavqZHG9w+VUfnSvrLrHrStPKriqpAr26sey6knqzxSExXBfWElsiajq7qhpcftXXMHubOx9E\nTTW1XVUR8aSIuCgiPjvpsmj2DLO3uTSrat/iiIjPZuZJfV6zxaGxcntaTYPatzgi4uKIuD8ibuk6\nviEibo+IOyLi7FGXQ5JUjcEzoqqxDfgIcMnSgYg4BDgfeCFwL3B9RHweeDbwM8B7M/O+MZRNWrFe\nQ3jB5Lmm38hbHJm5G/he1+HjgDsz867MfADYDixk5qcz862ZeV9EPCYiPgb8tC0S1cVys8/BYbya\nfuNocfRyOHB3x/N7gOM7T8jMfwXeuNyFFhcX9z2en59nfn6+kgJKvWzZtH6oYbsmz1UnrVaLVqtV\n2fXGkhyPiLXAlZl5dPH8VcCGzDyreP5a4PjMfHPJ65ocV610dl+decqxdleplmqfHO/jXuCIjudH\n0G51SI3mnueaBZMKHDcAT4uItRHxCGAL8PkJlUWqTPee59I0Gsdw3MuAvwKeHhF3R8TrMvNB4E3A\nV4DbgB2Z+e2VXH9xcbHSvjtpNbq7pjafcbkbR6k2Wq3WAXnhlar9BMBBzHGojtyyVnXX1ByHNLX6\nbVkrTYvGBw67qlQ3CxvXcemFmw9aksTuKk2aXVXYVaX66+y2srtKdWFXlVRjjrLSNDJwSCPkBEBN\no0ktOSLNpH4LI/bjgomqo8a3OEyOq+6GWRixHxdMVJVMjmNyXM1QZj/zftw0SlVyz/EGl18axN0G\nNSqrDRzmOKQGWAoi5jxUB43PcUjTqlduxJyH6qDxgcPkuKZVr6VLwPkgWjmT45jj0Gwx56GqOHNc\nkjRWJselBlpuIqFJdI2SLQ6pIcpMJDSJrlEycEgN0S9Z3o9JdI1K47uqFhcXmZ+fZ35+ftJFkUZq\nYeO6obqeyq6HpdnRarUqGYXqqCppyjj6SstxVJUkaaz6dlVFxO9l5m8Wj1+UmVePr1iSqjCo28qR\nV1qpQS2OEzse/8GoCyKpGsMm0B15pZWyq0qaMmVGXznySivRNzkeEfcA7wcCeGvHY4DMzPePpYQD\nmByXVsYE+mwb5bLqFwE/2uNxrTgcV5KG43BcbHFIK2WLY7aNdDhuRPxiROyMiNuKnz+JiOev9GaS\npObrGzgi4qXAJ4ArgdOA1wBXAZ8oXpM0BTafcTmnbd3Jrqv2TLooaohBLY63A5syc1tm3pSZN2bm\nxcAm4OzxFE/SKHSPunJorsoYFDgOy8ybuw9m5jeBx4+uSJJGrdeQXYfmaliDRlX99wpfk1RznQsm\nuiiiyhoUOJ4SEVf2ee3JoyiMJKn+BgWOBSDZP+mv0/tGUxxJUt0NChy3AT+emQdkzCJiPfDdkZaq\nBCcAStJwRj4BMCJ2ABdk5jVdx38BeGNmnrbqu6+SEwCl1XMy4OwZ5QTAp3YHDYDMvBY4dqU3lCQ1\n26DAMWhtqodXXRBJUjMMChx39pohHhEbgb8bXZEkTYozyDWMQcnxXwe+EBEnAV+nPbrqWcBzgZeN\noWySxmBubs2+yX9LM8jdFVCDDGpxJPB64FpgLfBE4BrgdYBTTKUp0T2L3BnkWs6gUVVfBN6Rmbd0\nHT8GeHdmvnwM5RvIUVVSdRxdNTtGOarqsO6gAfvWqnrSSm8oSWq2QYHj0QNem6u6IJKkZhgUOG6I\niK3dByPiLNrJcknSDFpuVNXnIuI17A8UzwIeCbxy1AWTJNVT38CRmf8UEc8Fng/8FO1RVl/IzL8Y\nV+EkSfUzqMVBMWTpL4qfWnKRQ0kazsgXOWwCh+NK1XE47uwY5XBcSZIOYuCQJJUyMMchaTYNuw/5\n3Nwatmxa79pWM8YWhySAA9arGtbSooiaLQYOScDBix0Oy0URZ4+jqiStiKOwmstRVZKksTJwSJJK\nMXBIkkoxcEiSSjFwSJJKMXBIkkoxcEiSSjFwSJJKMXBIkkoxcEiSSqnt6rgRsQC8FDgU+ERmXj3h\nIkmSqHHgyMxdwK6IeDTwPsDAIUk1MPKuqoi4OCLuj4hbuo5viIjbI+KOiDh7wCXeCZw/2lJKkoY1\njhzHNmBD54GIOIR2MNgAHAWcGhHPiIjTI+IDEfGEaPt94EuZedMYyilJGsLIu6oyc3dErO06fBxw\nZ2beBRAR24GFzDwP+HRx7C3AC4BDI+KpmfnxUZdVkrS8SeU4Dgfu7nh+D3B85wmZ+WHgw8tdaHFx\ncd/j+fl55ufnKymgJE2LVqtFq9Wq7HqTChyV7b7UGTgkSQfr/qP63HPPXdX1JjWP417giI7nR9Bu\ndUiSam5SLY4bgKcVuY/7gC3AqSu50OLiol1U0oR1biM7SXNza9iyaT0LG9dNuii1VFWX1cj3HI+I\ny4ATgMcC/wy8KzO3RcSJwAeBQ2hP8HvPCq7tnuPShJy2dSd79z446WIcZG5uDZdeuHnSxai11e45\nPvLAMUoGDmlydl21hx1X3FrL4LHzkpMnXYRaM3A0uPySqtPZXWbgGGy1gaPxixwuLi5WOsxMkqZV\nq9WqZCSqLQ5JU8EWx/BmvsUhSRovA4ckqZTGBw5zHJI0HHMcmOOQtJ85juGZ45AkjZWBQ5JUioFD\nklRK4wOHyXFJGo7JcUyOS9rP5PjwTI5LksbKwCFJKsXAIUkqxcAhSSql8YHDUVWSNBxHVeGoKkn7\nOapqeI6qkiSNlYFDklSKgUOSVIqBQ5JUioFDklRK4wOHw3ElaTgOx8XhuJL2czju8ByOK0kaKwOH\nJKkUA4ckqRQDhySpFAOHJKkUA4ckqRQDhySplMYHDicAStJwnACIEwAl7ecEwOE5AVCSNFYGDklS\nKQYOSVIpBg5JUikGDklSKQYOSVIpBg5JUikGDklSKQYOSVIpBg5JUikGDklSKQYOSVIpjQ8cro4r\nScNxdVxcHVfSfq6OOzxXx5UkjZWBQ5JUioFDklSKgUOSVIqBQ5JUioFDklSKgUOSVIqBQ5JUioFD\nklSKgUOSVIqBQ5JUioFDklSKgUOSVIqBQ5JUioFDklRKbQNHRBwZER+NiMsj4vWTLo8kqa22gSMz\nb8/MXwFOAV4y6fLMAndSrJb1WR3rsl5GHjgi4uKIuD8ibuk6viEibo+IOyLi7D7vfTnwRWD7qMsp\n/3FWzfqsjnVZL+NocWwDNnQeiIhDgPOL40cBp0bEMyLi9Ij4QEQ8ASAzr8zME4Ezx1DOvlb6oS3z\nvuXOHfR6r9eGOTaJf4yrueew7x3mvH7nlDlufS7/+rD1OexneNT8tz6ckQeOzNwNfK/r8HHAnZl5\nV2Y+QLtFsZCZn87Mt2bmfRFxQkR8KCI+Dnx11OUcxA9TderwRTfoHANH+fMMHLP3bz0ys9IL9rxJ\nxFrgysw8unj+auAlmXlW8fy1wPGZ+eaS1x194SVpCmVmrPS9a6osSAmVfOGv5heXJK3MpEZV3Qsc\n0fH8COCeCZVFklTCpALHDcDTImJtRDwC2AJ8fkJlkSSVMI7huJcBfwU8PSLujojXZeaDwJuArwC3\nATsy89ujLoskafXGkhyXJE2P2s4clyTV01QFjohYiIgLI2J7RLxo0uVpOtcLq1ZEPCoiro+Il066\nLE0XEfMRsbv4fJ4w6fI0XbS9OyI+HBFnLHf+VAWOzNyVmVuBN9JOuGsVXC+scm8Hdky6EFPiIeA/\ngUfiiMwqbAIOB37AEPVZ+8CxwrWu3kl7SRN1KVufrhfWX5m6LFrAtwHfnURZm6DkZ3N3Zm4E3gGc\nO/bCNkDJ+nw68JeZ+TbgV5a7du0DB+XWuoqI+H3gS5l50/iL2ghD1yfUZ72wmipTlycAPwucBpwV\nEU5ePdjQ9Zn7R/X8G+1Whw5W5vN5D+26hHZrbqBJzRwfWmbuLpYs6bRvrSuAiNgOLAAvBF4AHBoR\nT83Mj4+xqI1Qpj4j4vHAZmCOCa8XVkdl6jIz31k8PxP4bjqc8SAlP5tH0u4+fTTwkTEWszFKfnd+\nCPhIRDwPaC137doHjj4OB+7ueH4P+9e68kNUXr/6vAa4ZjJFaqyedbn0JDM/NfYSNVu/z+Z5wOcm\nU6RG61ef/wO8YdiLNKGrqhf/WquW9Vkd67Ja1me1KqnPpgYO17qqlvVZHeuyWtZntSqpz6YGDte6\nqpb1WR3rslrWZ7Uqqc/aBw7XuqqW9Vkd67Ja1me1RlmfrlUlSSql9i0OSVK9GDgkSaUYOCRJpRg4\nJEmlGDgkSaUYOCRJpRg4JEmlGDg0URHxUES8r+P52yLinIqu/cmIeFUV11rmPidFxG0R8eejvtdK\nRMSzIuJDky6HpoeBQ5P2A+CVEfHY4nmVM1JXfK2IKLNy9OuBN2TmC1Z6v1HKzK9n5q9NuhyaHgYO\nTdoDwIXAW7tf6G4xRMT3i//OR8Q1EXFFRPxdRJwXEadHxHUR8c2IeHLHZV4Y7X2+90Sx13dEHBIR\n7y3OvzkitnZcd3dE7AJu7VGeU4vr3xIR5xXH3gX8HHBxRPxB1/nzEXFtRHyh2HHto0sbOEXEBUW5\nvhURix3v2RgR346IG6K9//OVxfFHFTu6/U1EfCMiXtGjfJdFxMbu+ivKMfA6EfHFiDi6eHxjRPx2\n8fh3IuINEfETxe9yY/H7/3zf/6OaegYO1cEFwGsi4tCu490ths7nxwC/DDwDOB14SmYeB1wEvLk4\nJ4AnZuZzgJcCH4uIR9JuIfxbcf5xtHfkW1u855nAWzJzXeeNI+IJwHnA84GfBp4TEQuZ+Tu0F447\nLTPf3uN3ew7ttYGOAp5Ce2MsgN8qynUscEJEHB0Rc8DHgA2Z+WzgcR2/828Bf56ZxwO/CLw3In64\n6147gJOL8j6iOO8LXef0u861wPOK/wcPAM8tzv952nuynAp8OTOfSbvu3WFzhhk4NHGZ+Z/AJcBb\nSrzt+sy8PzN/ANxJe9E2gG8Ba5cuDVxe3ONO4DvAkcCLgTMi4kbgr4HHAE8t3nNdZv59j/s9B/hq\nZv5LZv4f8BngFzpe77cV7HWZeVdmPgRcRvuLGGBLRHwd+AawnnZgORL4Tsf9L+u47ouBdxRl/irt\n7VI7l8cG+DLw/CJonAhck5n/23VOv+vsLn6fn6O9x/yPRMQPAU/KzDuA64HXFfmnYzLz+31+X82A\npu4AqOnzQdpfots6jj1I8cdNRDwMeETHa51fiA91PH+IwZ/rpb/g35SZV3e+EBHzwH8NeF9ncAgO\nbAH1y6d0Hg8gi9bNbwDPzsx/j4httLfn7b5GdzDaXHyJ975R5t6IaNHeUvVk2oGnl4OuExEPB55N\nO7heTbu1s5V2a2ppG9LnAS8DPhkR78/MT/cri6abLQ7VQmZ+j3br4PXs/wK9C3hW8fgVwMNLXjaA\nk6LtKcCTgdtpt05+dSkBHhFP79Ht0+162l1Kj42IQ4BTGG5b3eOivffBw2h/me8GDqUdoP4jIg6j\n3TpIYA/w5Ih4YvHeLeyvi6/Q0SKLiGf2ud8O4JeA59FugXTreZ3MfID2hj4n0V6KezfwNtpdWETE\nT9LeK/0i2t2B/e6vGWDg0KR1/pX9h7T/0l3yx7S/rG8Cfhb4fp/3dV8vOx7/A3AdcBXwy0XX1kW0\n9yL4RkTcAnyUdiul870HXjTzH4F30O7euQm4ITOvHOJ3ux44v7jfdzLzc5n5TeBG2kHsM8DXinvs\nBX4V+HJE3AD8R/ED8LvAw4vk/LeAc/vc889odzldXey90F0ng65zLXB/0b31NeAJtAMIwDxwU0R8\ng3YAdHjvDHM/DmlEiq6v38jMl5d4z6My87+Kx38E/G1m+iWtWrHFIY1O3xbMAGcVQ15vpd2l9fHq\niyWtji0OSVIptjgkSaUYOCRJpRg4JEmlGDgkSaUYOCRJpfw/dn8rYQE+M9kAAAAASUVORK5CYII=\n", "text/plain": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import thinkstats2\n", "import thinkplot\n", "\n", "cdf = thinkstats2.Cdf(table.views)\n", "\n", "thinkplot.PrePlot(1)\n", "thinkplot.Cdf(cdf, complement=True)\n", "thinkplot.Config(xlabel ='Number of page views', xscale='log', \n", " ylabel='CCDF', yscale='log', \n", " legend=False)" ] }, { "cell_type": "code", "execution_count": 45, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titlecommentsdate
25All your Bayes are belong to us!562011-10-27
106The Sleeping Beauty Problem532015-06-12
28Girl Named Florida solutions252011-11-10
110The Inspection Paradox is Everywhere232015-08-18
54Secularization in America: part six142012-07-10
\n", "
" ], "text/plain": [ " title comments date\n", "25 All your Bayes are belong to us! 56 2011-10-27\n", "106 The Sleeping Beauty Problem 53 2015-06-12\n", "28 Girl Named Florida solutions 25 2011-11-10\n", "110 The Inspection Paradox is Everywhere 23 2015-08-18\n", "54 Secularization in America: part six 14 2012-07-10" ] }, "execution_count": 45, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table.sort_values(by=['comments'], ascending=False)[['title', 'comments', 'date']].head(5)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
titleplussesdate
110The Inspection Paradox is Everywhere9092015-08-18
25All your Bayes are belong to us!592011-10-27
103Bayesian survival analysis for \"Game of Thrones\"542015-03-25
67Software engineering practices for graduate st...342013-05-06
102Statistical inference is only mostly wrong312015-03-02
\n", "
" ], "text/plain": [ " title plusses date\n", "110 The Inspection Paradox is Everywhere 909 2015-08-18\n", "25 All your Bayes are belong to us! 59 2011-10-27\n", "103 Bayesian survival analysis for \"Game of Thrones\" 54 2015-03-25\n", "67 Software engineering practices for graduate st... 34 2013-05-06\n", "102 Statistical inference is only mostly wrong 31 2015-03-02" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "table.sort_values(by=['plusses'], ascending=False)[['title', 'plusses', 'date']].head(5)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.10" } }, "nbformat": 4, "nbformat_minor": 0 }