{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# English Wikipedia Page Views by Topics in Septermber & August 2019\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"https://phabricator.wikimedia.org/T234839\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"In this analysis, we use Adam's topic dataset of articles with \"best\" topic prediction for pages accessed in September 2019. (see [example](https://dr0ptp4kt.github.io/topics-7.html) of first 10K non-randomized rows for an HTML view). \n",
"\n",
"The outcome topics are from the \"predicted\" field, which is the post-enrichment best guess for the articels.\n"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"The raw code for this notebook is by default hidden for easier reading.\n",
"
\n"
],
"text/plain": [
""
]
},
"execution_count": 1,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import HTML\n",
"\n",
"HTML('''\n",
"The raw code for this notebook is by default hidden for easier reading.\n",
"\n",
"''')"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"You can find the source for `wmfdata` at https://github.com/neilpquinn/wmfdata\n"
]
}
],
"source": [
"import requests\n",
"import pandas as pd\n",
"import json\n",
"import matplotlib.pyplot as plt\n",
"import gzip\n",
"from wmfdata import hive\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"##read topic prediction file\n",
"topic = pd.read_csv('topic_prediction.tsv.gz', sep='\\t',compression='gzip', header=0)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [],
"source": [
"pageview_query = '''\n",
" SELECT \n",
" CONCAT(year,\"-\",month,\"-01\") AS date,\n",
" page_id, \n",
" SUM(view_count) AS pageviews\n",
" FROM \n",
" wmf.pageview_hourly\n",
" WHERE year = \"{year}\"\n",
" AND month = \"{month}\" \n",
" AND project = \"{wiki}\"\n",
" AND namespace_id = 0\n",
" AND agent_type = \"user\"\n",
" AND NOT (\n",
" country_code IN (\"PK\", \"IR\", \"AF\") \n",
" AND user_agent_map[\"browser_family\"] = \"IE\" AND user_agent_map[\"browser_major\"] = 7\n",
" )\n",
" GROUP BY CONCAT(year,\"-\",month,\"-01\"), page_id\n",
"'''"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## September 2019 Topic Analysis "
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_sept_all = hive.run([\n",
" \"SET mapreduce.map.memory.mb=4096\", \n",
" pageview_query.format(\n",
" year = 2019,\n",
" month = 9,\n",
" wiki = \"en.wikipedia\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_sept_all['proportion']= enwiki_pv_sept_all['pageviews']/enwiki_pv_sept_all['pageviews'].sum()\n",
"enwiki_pv_sep_all = enwiki_pv_sept_all.sort_values(by='pageviews', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" date | \n",
" page_id | \n",
" pageviews | \n",
" proportion | \n",
"
\n",
" \n",
" \n",
" \n",
" 4156974 | \n",
" 2019-9-01 | \n",
" NaN | \n",
" 48039 | \n",
" 0.000007 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" date page_id pageviews proportion\n",
"4156974 2019-9-01 NaN 48039 0.000007"
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enwiki_pv_sept_all[enwiki_pv_sept.page_id.isnull()]"
]
},
{
"cell_type": "code",
"execution_count": 70,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total page views: 7198970305\n"
]
}
],
"source": [
"print('Total page views in September: ' + str(enwiki_pv_sept_all.pageviews.sum()))"
]
},
{
"cell_type": "code",
"execution_count": 71,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of unqiue pages: 8043636\n"
]
}
],
"source": [
"print('Number of unqiue pages in September: ' + str(enwiki_pv_sept_all.shape[0]))"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 1M pages account for 91.8% of total page views.\n"
]
}
],
"source": [
"print('Top 1M pages account for ' + str(round(enwiki_pv_sept_all.proportion[:1000000].sum() * 100,2)) + '% of total page views in September.')"
]
},
{
"cell_type": "code",
"execution_count": 50,
"metadata": {},
"outputs": [],
"source": [
"pageview_title_query = '''\n",
"WITH v AS (\n",
" SELECT page_id, SUM(view_count) AS pageviews\n",
" FROM wmf.pageview_hourly\n",
" WHERE year = \"{year}\"\n",
" AND month = \"{month}\" \n",
" AND project = \"{wiki}\"\n",
" AND namespace_id = 0\n",
" AND agent_type = \"user\"\n",
" AND NOT (\n",
" country_code IN (\"PK\", \"IR\", \"AF\") AND user_agent_map[\"browser_family\"] = \"IE\" AND user_agent_map[\"browser_major\"] = 7\n",
" )\n",
" GROUP BY page_id\n",
" LIMIT 10000000\n",
"), p AS (\n",
" SELECT page_id, page_title, page_latest\n",
" FROM wmf_raw.mediawiki_page\n",
" WHERE wiki_db = \"enwiki\"\n",
" AND snapshot = \"{snapshot}\"\n",
" AND page_id IS NOT NULL\n",
" AND page_namespace = 0\n",
" AND NOT page_is_redirect\n",
")\n",
"\n",
"SELECT v.page_id, p.page_title, v.pageviews\n",
"FROM v LEFT JOIN p ON v.page_id=p.page_id\n",
"'''"
]
},
{
"cell_type": "code",
"execution_count": 51,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_sept = hive.run([\n",
" \"SET mapreduce.map.memory.mb=4096\", \n",
" pageview_title_query.format(\n",
" year = 2019,\n",
" month = 9,\n",
" wiki = \"en.wikipedia\",\n",
" snapshot = \"2019-09\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_sept['proportion']= enwiki_pv_sept['pageviews']/enwiki_pv_sept['pageviews'].sum()\n",
"enwiki_pv_sept = enwiki_pv_sept.sort_values(by='pageviews', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 56,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_topic_sept = enwiki_pv_sept.merge(topic, how = 'left', on = 'page_id')"
]
},
{
"cell_type": "code",
"execution_count": 57,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_topic_sept['predicted'] = enwiki_pv_topic_sept['predicted'].fillna(value='Unknown')\n",
"enwiki_pv_topic_sept['proportion']= enwiki_pv_topic_sept['pageviews']/enwiki_pv_topic_sept['pageviews'].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Top 50 articles read in September 2019 on English Wikipedia¶"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The table below shows the top 50 articles viewed in English Wikipedia in September 2019, with the corresponding propotions among the total pageviews and the best predicted topic."
]
},
{
"cell_type": "code",
"execution_count": 59,
"metadata": {},
"outputs": [],
"source": [
"enwiki_page_sept_summary = enwiki_pv_topic_sept[['page_title','pageviews','proportion','predicted']].sort_values(by='pageviews', ascending=False).reset_index(drop=True).head(50)\n"
]
},
{
"cell_type": "code",
"execution_count": 60,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 50 articles account for 7.84% of total page views in September.\n"
]
}
],
"source": [
"print('Top 50 articles account for ' + str(round(enwiki_page_sept_summary.proportion.sum() * 100,2))+ '% of total page views in September.')"
]
},
{
"cell_type": "code",
"execution_count": 61,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" page_title | \n",
" pageviews | \n",
" proportion | \n",
" predicted | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Main_Page | \n",
" 473316359 | \n",
" 0.065746 | \n",
" Internet culture | \n",
"
\n",
" \n",
" 1 | \n",
" Wikipedia | \n",
" 7376833 | \n",
" 0.001025 | \n",
" Language and literature | \n",
"
\n",
" \n",
" 2 | \n",
" List_of_Queen_of_the_South_episodes | \n",
" 6426243 | \n",
" 0.000893 | \n",
" Broadcasting | \n",
"
\n",
" \n",
" 3 | \n",
" It_Chapter_Two | \n",
" 3777663 | \n",
" 0.000525 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 4 | \n",
" Deaths_in_2019 | \n",
" 3225335 | \n",
" 0.000448 | \n",
" Time | \n",
"
\n",
" \n",
" 5 | \n",
" Greta_Thunberg | \n",
" 3146905 | \n",
" 0.000437 | \n",
" History and society | \n",
"
\n",
" \n",
" 6 | \n",
" Saaho | \n",
" 2927278 | \n",
" 0.000407 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 7 | \n",
" Joker_(2019_film) | \n",
" 2864258 | \n",
" 0.000398 | \n",
" Visual arts | \n",
"
\n",
" \n",
" 8 | \n",
" September_11_attacks | \n",
" 2435866 | \n",
" 0.000338 | \n",
" Politics and government | \n",
"
\n",
" \n",
" 9 | \n",
" Antonio_Brown | \n",
" 2359216 | \n",
" 0.000328 | \n",
" Sports | \n",
"
\n",
" \n",
" 10 | \n",
" Algorithms_for_calculating_variance | \n",
" 2222385 | \n",
" 0.000309 | \n",
" Mathematics | \n",
"
\n",
" \n",
" 11 | \n",
" Chandrayaan-2 | \n",
" 2164700 | \n",
" 0.000301 | \n",
" Space | \n",
"
\n",
" \n",
" 12 | \n",
" Hustlers_(2019_film) | \n",
" 1909821 | \n",
" 0.000265 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 13 | \n",
" The_Bahamas | \n",
" 1724124 | \n",
" 0.000239 | \n",
" The_Bahamas | \n",
"
\n",
" \n",
" 14 | \n",
" Eli_Cohen | \n",
" 1718906 | \n",
" 0.000239 | \n",
" Military and warfare | \n",
"
\n",
" \n",
" 15 | \n",
" Storm_Area_51,_They_Can't_Stop_All_of_Us | \n",
" 1716245 | \n",
" 0.000238 | \n",
" Internet culture | \n",
"
\n",
" \n",
" 16 | \n",
" Ad_Astra_(film) | \n",
" 1713237 | \n",
" 0.000238 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 17 | \n",
" 6ix9ine | \n",
" 1599551 | \n",
" 0.000222 | \n",
" Internet culture | \n",
"
\n",
" \n",
" 18 | \n",
" Bianca_Andreescu | \n",
" 1594039 | \n",
" 0.000221 | \n",
" Sports | \n",
"
\n",
" \n",
" 19 | \n",
" Ric_Ocasek | \n",
" 1538688 | \n",
" 0.000214 | \n",
" Performing arts | \n",
"
\n",
" \n",
" 20 | \n",
" 2019_FIBA_Basketball_World_Cup | \n",
" 1536804 | \n",
" 0.000213 | \n",
" Sports | \n",
"
\n",
" \n",
" 21 | \n",
" Unbelievable_(miniseries) | \n",
" 1530294 | \n",
" 0.000213 | \n",
" Broadcasting | \n",
"
\n",
" \n",
" 22 | \n",
" Line_shaft | \n",
" 1526395 | \n",
" 0.000212 | \n",
" Technology | \n",
"
\n",
" \n",
" 23 | \n",
" Billie_Eilish | \n",
" 1496020 | \n",
" 0.000208 | \n",
" Performing arts | \n",
"
\n",
" \n",
" 24 | \n",
" Mindhunter_(TV_series) | \n",
" 1479612 | \n",
" 0.000206 | \n",
" Broadcasting | \n",
"
\n",
" \n",
" 25 | \n",
" Solar_System | \n",
" 1455847 | \n",
" 0.000202 | \n",
" Space | \n",
"
\n",
" \n",
" 26 | \n",
" Freddie_Mercury | \n",
" 1449146 | \n",
" 0.000201 | \n",
" Performing arts | \n",
"
\n",
" \n",
" 27 | \n",
" Rafael_Nadal | \n",
" 1426186 | \n",
" 0.000198 | \n",
" Sports | \n",
"
\n",
" \n",
" 28 | \n",
" It_(2017_film) | \n",
" 1382100 | \n",
" 0.000192 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 29 | \n",
" United_States | \n",
" 1370054 | \n",
" 0.000190 | \n",
" United_States | \n",
"
\n",
" \n",
" 30 | \n",
" List_of_Bollywood_films_of_2019 | \n",
" 1359921 | \n",
" 0.000189 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 31 | \n",
" John_Bercow | \n",
" 1342478 | \n",
" 0.000186 | \n",
" Politics and government | \n",
"
\n",
" \n",
" 32 | \n",
" Hurricane_Dorian | \n",
" 1302209 | \n",
" 0.000181 | \n",
" Politics and government | \n",
"
\n",
" \n",
" 33 | \n",
" Peaky_Blinders_(TV_series) | \n",
" 1286720 | \n",
" 0.000179 | \n",
" Broadcasting | \n",
"
\n",
" \n",
" 34 | \n",
" Elton_John | \n",
" 1253440 | \n",
" 0.000174 | \n",
" History and society | \n",
"
\n",
" \n",
" 35 | \n",
" Wayne_Williams | \n",
" 1238887 | \n",
" 0.000172 | \n",
" History and society | \n",
"
\n",
" \n",
" 36 | \n",
" Once_Upon_a_Time_in_Hollywood | \n",
" 1229706 | \n",
" 0.000171 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 37 | \n",
" Donald_Trump | \n",
" 1217332 | \n",
" 0.000169 | \n",
" Politics and government | \n",
"
\n",
" \n",
" 38 | \n",
" Joaquin_Phoenix | \n",
" 1206288 | \n",
" 0.000168 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 39 | \n",
" Moving_average | \n",
" 1186636 | \n",
" 0.000165 | \n",
" Mathematics | \n",
"
\n",
" \n",
" 40 | \n",
" Apple_Network_Server | \n",
" 1157855 | \n",
" 0.000161 | \n",
" Technology | \n",
"
\n",
" \n",
" 41 | \n",
" Eddie_Money | \n",
" 1151955 | \n",
" 0.000160 | \n",
" Performing arts | \n",
"
\n",
" \n",
" 42 | \n",
" Sylvester_Stallone | \n",
" 1141540 | \n",
" 0.000159 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 43 | \n",
" Judy_Garland | \n",
" 1128921 | \n",
" 0.000157 | \n",
" History and society | \n",
"
\n",
" \n",
" 44 | \n",
" Elizabeth_II | \n",
" 1117237 | \n",
" 0.000155 | \n",
" Language and literature | \n",
"
\n",
" \n",
" 45 | \n",
" Atlanta_murders_of_1979–1981 | \n",
" 1116224 | \n",
" 0.000155 | \n",
" History and society | \n",
"
\n",
" \n",
" 46 | \n",
" YouTube | \n",
" 1114222 | \n",
" 0.000155 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 47 | \n",
" Charles_Manson | \n",
" 1099406 | \n",
" 0.000153 | \n",
" History and society | \n",
"
\n",
" \n",
" 48 | \n",
" Clash_of_Champions_(2019) | \n",
" 1097502 | \n",
" 0.000152 | \n",
" Entertainment | \n",
"
\n",
" \n",
" 49 | \n",
" Clint_Eastwood | \n",
" 1074402 | \n",
" 0.000149 | \n",
" Entertainment | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" page_title pageviews proportion \\\n",
"0 Main_Page 473316359 0.065746 \n",
"1 Wikipedia 7376833 0.001025 \n",
"2 List_of_Queen_of_the_South_episodes 6426243 0.000893 \n",
"3 It_Chapter_Two 3777663 0.000525 \n",
"4 Deaths_in_2019 3225335 0.000448 \n",
"5 Greta_Thunberg 3146905 0.000437 \n",
"6 Saaho 2927278 0.000407 \n",
"7 Joker_(2019_film) 2864258 0.000398 \n",
"8 September_11_attacks 2435866 0.000338 \n",
"9 Antonio_Brown 2359216 0.000328 \n",
"10 Algorithms_for_calculating_variance 2222385 0.000309 \n",
"11 Chandrayaan-2 2164700 0.000301 \n",
"12 Hustlers_(2019_film) 1909821 0.000265 \n",
"13 The_Bahamas 1724124 0.000239 \n",
"14 Eli_Cohen 1718906 0.000239 \n",
"15 Storm_Area_51,_They_Can't_Stop_All_of_Us 1716245 0.000238 \n",
"16 Ad_Astra_(film) 1713237 0.000238 \n",
"17 6ix9ine 1599551 0.000222 \n",
"18 Bianca_Andreescu 1594039 0.000221 \n",
"19 Ric_Ocasek 1538688 0.000214 \n",
"20 2019_FIBA_Basketball_World_Cup 1536804 0.000213 \n",
"21 Unbelievable_(miniseries) 1530294 0.000213 \n",
"22 Line_shaft 1526395 0.000212 \n",
"23 Billie_Eilish 1496020 0.000208 \n",
"24 Mindhunter_(TV_series) 1479612 0.000206 \n",
"25 Solar_System 1455847 0.000202 \n",
"26 Freddie_Mercury 1449146 0.000201 \n",
"27 Rafael_Nadal 1426186 0.000198 \n",
"28 It_(2017_film) 1382100 0.000192 \n",
"29 United_States 1370054 0.000190 \n",
"30 List_of_Bollywood_films_of_2019 1359921 0.000189 \n",
"31 John_Bercow 1342478 0.000186 \n",
"32 Hurricane_Dorian 1302209 0.000181 \n",
"33 Peaky_Blinders_(TV_series) 1286720 0.000179 \n",
"34 Elton_John 1253440 0.000174 \n",
"35 Wayne_Williams 1238887 0.000172 \n",
"36 Once_Upon_a_Time_in_Hollywood 1229706 0.000171 \n",
"37 Donald_Trump 1217332 0.000169 \n",
"38 Joaquin_Phoenix 1206288 0.000168 \n",
"39 Moving_average 1186636 0.000165 \n",
"40 Apple_Network_Server 1157855 0.000161 \n",
"41 Eddie_Money 1151955 0.000160 \n",
"42 Sylvester_Stallone 1141540 0.000159 \n",
"43 Judy_Garland 1128921 0.000157 \n",
"44 Elizabeth_II 1117237 0.000155 \n",
"45 Atlanta_murders_of_1979–1981 1116224 0.000155 \n",
"46 YouTube 1114222 0.000155 \n",
"47 Charles_Manson 1099406 0.000153 \n",
"48 Clash_of_Champions_(2019) 1097502 0.000152 \n",
"49 Clint_Eastwood 1074402 0.000149 \n",
"\n",
" predicted \n",
"0 Internet culture \n",
"1 Language and literature \n",
"2 Broadcasting \n",
"3 Entertainment \n",
"4 Time \n",
"5 History and society \n",
"6 Entertainment \n",
"7 Visual arts \n",
"8 Politics and government \n",
"9 Sports \n",
"10 Mathematics \n",
"11 Space \n",
"12 Entertainment \n",
"13 The_Bahamas \n",
"14 Military and warfare \n",
"15 Internet culture \n",
"16 Entertainment \n",
"17 Internet culture \n",
"18 Sports \n",
"19 Performing arts \n",
"20 Sports \n",
"21 Broadcasting \n",
"22 Technology \n",
"23 Performing arts \n",
"24 Broadcasting \n",
"25 Space \n",
"26 Performing arts \n",
"27 Sports \n",
"28 Entertainment \n",
"29 United_States \n",
"30 Entertainment \n",
"31 Politics and government \n",
"32 Politics and government \n",
"33 Broadcasting \n",
"34 History and society \n",
"35 History and society \n",
"36 Entertainment \n",
"37 Politics and government \n",
"38 Entertainment \n",
"39 Mathematics \n",
"40 Technology \n",
"41 Performing arts \n",
"42 Entertainment \n",
"43 History and society \n",
"44 Language and literature \n",
"45 History and society \n",
"46 Entertainment \n",
"47 History and society \n",
"48 Entertainment \n",
"49 Entertainment "
]
},
"execution_count": 61,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enwiki_page_sept_summary"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### September Top 50 Topics Viewed\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The table below shows the page views by top 50 topics in September 2019 on English Wikipedia. Main page is excluded in this table."
]
},
{
"cell_type": "code",
"execution_count": 65,
"metadata": {},
"outputs": [],
"source": [
"enwiki_topic_sept_summary = (enwiki_pv_topic_sept[enwiki_pv_topic_sept.page_title != 'Main_Page']\n",
" .groupby('predicted', as_index = False)['pageviews', 'proportion']\n",
" .sum()\n",
" .sort_values(by='pageviews', ascending=False))"
]
},
{
"cell_type": "code",
"execution_count": 66,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 10 topics account for 62.49% of total page views in September.\n",
"Top 50 topics account for 92.03% of total page views in September.\n"
]
}
],
"source": [
"print('Top 10 topics account for ' + str(round(enwiki_topic_sept_summary.proportion[:10].sum() * 100,2))+ '% of total page views in September.')\n",
"print('Top 50 topics account for ' + str(round(enwiki_topic_sept_summary.proportion[:50].sum() * 100,2))+ '% of total page views in September.')"
]
},
{
"cell_type": "code",
"execution_count": 67,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" predicted | \n",
" pageviews | \n",
" proportion | \n",
"
\n",
" \n",
" \n",
" \n",
" 148 | \n",
" Entertainment | \n",
" 951015501 | \n",
" 0.132101 | \n",
"
\n",
" \n",
" 424 | \n",
" Sports | \n",
" 651186372 | \n",
" 0.090453 | \n",
"
\n",
" \n",
" 339 | \n",
" Performing arts | \n",
" 582187240 | \n",
" 0.080869 | \n",
"
\n",
" \n",
" 75 | \n",
" Broadcasting | \n",
" 508963501 | \n",
" 0.070698 | \n",
"
\n",
" \n",
" 202 | \n",
" History and society | \n",
" 490649682 | \n",
" 0.068154 | \n",
"
\n",
" \n",
" 347 | \n",
" Politics and government | \n",
" 390908702 | \n",
" 0.054299 | \n",
"
\n",
" \n",
" 245 | \n",
" Language and literature | \n",
" 233216220 | \n",
" 0.032395 | \n",
"
\n",
" \n",
" 446 | \n",
" Technology | \n",
" 233007905 | \n",
" 0.032366 | \n",
"
\n",
" \n",
" 83 | \n",
" Business and economics | \n",
" 228801818 | \n",
" 0.031782 | \n",
"
\n",
" \n",
" 342 | \n",
" Philosophy and religion | \n",
" 228716606 | \n",
" 0.031770 | \n",
"
\n",
" \n",
" 56 | \n",
" Biology | \n",
" 220055114 | \n",
" 0.030567 | \n",
"
\n",
" \n",
" 278 | \n",
" Medicine | \n",
" 197890857 | \n",
" 0.027488 | \n",
"
\n",
" \n",
" 459 | \n",
" Transportation | \n",
" 189356967 | \n",
" 0.026303 | \n",
"
\n",
" \n",
" 282 | \n",
" Military and warfare | \n",
" 187945486 | \n",
" 0.026107 | \n",
"
\n",
" \n",
" 503 | \n",
" Visual arts | \n",
" 161411619 | \n",
" 0.022421 | \n",
"
\n",
" \n",
" 355 | \n",
" Regional geography | \n",
" 115483591 | \n",
" 0.016041 | \n",
"
\n",
" \n",
" 357 | \n",
" Regional society | \n",
" 110243831 | \n",
" 0.015313 | \n",
"
\n",
" \n",
" 167 | \n",
" Food and drink | \n",
" 99778456 | \n",
" 0.013860 | \n",
"
\n",
" \n",
" 215 | \n",
" Internet culture | \n",
" 99516902 | \n",
" 0.013823 | \n",
"
\n",
" \n",
" 429 | \n",
" Structures of note | \n",
" 80883092 | \n",
" 0.011235 | \n",
"
\n",
" \n",
" 142 | \n",
" Education | \n",
" 73448968 | \n",
" 0.010202 | \n",
"
\n",
" \n",
" 478 | \n",
" United States | \n",
" 59122893 | \n",
" 0.008212 | \n",
"
\n",
" \n",
" 130 | \n",
" Disambiguation | \n",
" 58367391 | \n",
" 0.008108 | \n",
"
\n",
" \n",
" 343 | \n",
" Physics | \n",
" 56172269 | \n",
" 0.007803 | \n",
"
\n",
" \n",
" 98 | \n",
" Chemistry | \n",
" 54165745 | \n",
" 0.007524 | \n",
"
\n",
" \n",
" 422 | \n",
" Space | \n",
" 43268636 | \n",
" 0.006010 | \n",
"
\n",
" \n",
" 272 | \n",
" Mathematics | \n",
" 41116650 | \n",
" 0.005711 | \n",
"
\n",
" \n",
" 182 | \n",
" Geosciences | \n",
" 27172192 | \n",
" 0.003774 | \n",
"
\n",
" \n",
" 279 | \n",
" Meteorology | \n",
" 22631546 | \n",
" 0.003144 | \n",
"
\n",
" \n",
" 393 | \n",
" Science | \n",
" 22147222 | \n",
" 0.003076 | \n",
"
\n",
" \n",
" 211 | \n",
" India | \n",
" 20943288 | \n",
" 0.002909 | \n",
"
\n",
" \n",
" 58 | \n",
" Bodies of water | \n",
" 18631407 | \n",
" 0.002588 | \n",
"
\n",
" \n",
" 455 | \n",
" Time | \n",
" 18066422 | \n",
" 0.002510 | \n",
"
\n",
" \n",
" 146 | \n",
" Engineering | \n",
" 16869893 | \n",
" 0.002343 | \n",
"
\n",
" \n",
" 294 | \n",
" Music | \n",
" 15193024 | \n",
" 0.002110 | \n",
"
\n",
" \n",
" 244 | \n",
" Landforms | \n",
" 14819421 | \n",
" 0.002058 | \n",
"
\n",
" \n",
" 277 | \n",
" Media | \n",
" 13913078 | \n",
" 0.001933 | \n",
"
\n",
" \n",
" 115 | \n",
" Crafts and hobbies | \n",
" 13450923 | \n",
" 0.001868 | \n",
"
\n",
" \n",
" 492 | \n",
" Unknown | \n",
" 12500400 | \n",
" 0.001736 | \n",
"
\n",
" \n",
" 267 | \n",
" Maps | \n",
" 9156431 | \n",
" 0.001272 | \n",
"
\n",
" \n",
" 90 | \n",
" Canada | \n",
" 7718540 | \n",
" 0.001072 | \n",
"
\n",
" \n",
" 36 | \n",
" Australia | \n",
" 7413482 | \n",
" 0.001030 | \n",
"
\n",
" \n",
" 33 | \n",
" Arts | \n",
" 6264661 | \n",
" 0.000870 | \n",
"
\n",
" \n",
" 168 | \n",
" France | \n",
" 5750101 | \n",
" 0.000799 | \n",
"
\n",
" \n",
" 214 | \n",
" Information science | \n",
" 5459799 | \n",
" 0.000758 | \n",
"
\n",
" \n",
" 184 | \n",
" Germany | \n",
" 5000979 | \n",
" 0.000695 | \n",
"
\n",
" \n",
" 223 | \n",
" Italy | \n",
" 4353786 | \n",
" 0.000605 | \n",
"
\n",
" \n",
" 178 | \n",
" Games and toys | \n",
" 4268883 | \n",
" 0.000593 | \n",
"
\n",
" \n",
" 371 | \n",
" Russia | \n",
" 3669942 | \n",
" 0.000510 | \n",
"
\n",
" \n",
" 100 | \n",
" China | \n",
" 3425639 | \n",
" 0.000476 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" predicted pageviews proportion\n",
"148 Entertainment 951015501 0.132101\n",
"424 Sports 651186372 0.090453\n",
"339 Performing arts 582187240 0.080869\n",
"75 Broadcasting 508963501 0.070698\n",
"202 History and society 490649682 0.068154\n",
"347 Politics and government 390908702 0.054299\n",
"245 Language and literature 233216220 0.032395\n",
"446 Technology 233007905 0.032366\n",
"83 Business and economics 228801818 0.031782\n",
"342 Philosophy and religion 228716606 0.031770\n",
"56 Biology 220055114 0.030567\n",
"278 Medicine 197890857 0.027488\n",
"459 Transportation 189356967 0.026303\n",
"282 Military and warfare 187945486 0.026107\n",
"503 Visual arts 161411619 0.022421\n",
"355 Regional geography 115483591 0.016041\n",
"357 Regional society 110243831 0.015313\n",
"167 Food and drink 99778456 0.013860\n",
"215 Internet culture 99516902 0.013823\n",
"429 Structures of note 80883092 0.011235\n",
"142 Education 73448968 0.010202\n",
"478 United States 59122893 0.008212\n",
"130 Disambiguation 58367391 0.008108\n",
"343 Physics 56172269 0.007803\n",
"98 Chemistry 54165745 0.007524\n",
"422 Space 43268636 0.006010\n",
"272 Mathematics 41116650 0.005711\n",
"182 Geosciences 27172192 0.003774\n",
"279 Meteorology 22631546 0.003144\n",
"393 Science 22147222 0.003076\n",
"211 India 20943288 0.002909\n",
"58 Bodies of water 18631407 0.002588\n",
"455 Time 18066422 0.002510\n",
"146 Engineering 16869893 0.002343\n",
"294 Music 15193024 0.002110\n",
"244 Landforms 14819421 0.002058\n",
"277 Media 13913078 0.001933\n",
"115 Crafts and hobbies 13450923 0.001868\n",
"492 Unknown 12500400 0.001736\n",
"267 Maps 9156431 0.001272\n",
"90 Canada 7718540 0.001072\n",
"36 Australia 7413482 0.001030\n",
"33 Arts 6264661 0.000870\n",
"168 France 5750101 0.000799\n",
"214 Information science 5459799 0.000758\n",
"184 Germany 5000979 0.000695\n",
"223 Italy 4353786 0.000605\n",
"178 Games and toys 4268883 0.000593\n",
"371 Russia 3669942 0.000510\n",
"100 China 3425639 0.000476"
]
},
"execution_count": 67,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enwiki_topic_sept_summary.head(50)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## September & August 2019 Topic Data Comparison"
]
},
{
"cell_type": "code",
"execution_count": 72,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_aug_all = hive.run([\n",
" \"SET mapreduce.map.memory.mb=4096\", \n",
" pageview_query.format(\n",
" year = 2019,\n",
" month = 8,\n",
" wiki = \"en.wikipedia\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 73,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_aug_all['proportion']= enwiki_pv_aug_all['pageviews']/enwiki_pv_aug_all['pageviews'].sum()\n",
"enwiki_pv_aug_all = enwiki_pv_aug_all.sort_values(by='pageviews', ascending=False)"
]
},
{
"cell_type": "code",
"execution_count": 74,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Total page views in August: 7212202447\n"
]
}
],
"source": [
"print('Total page views in August: ' + str(enwiki_pv_aug_all.pageviews.sum()))"
]
},
{
"cell_type": "code",
"execution_count": 76,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of unqiue pages in August: 8813929\n"
]
}
],
"source": [
"print('Number of unqiue pages in August: ' + str(enwiki_pv_aug_all.shape[0]))"
]
},
{
"cell_type": "code",
"execution_count": 69,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_aug = hive.run([\n",
" \"SET mapreduce.map.memory.mb=4096\", \n",
" pageview_title_query.format(\n",
" year = 2019,\n",
" month = 8,\n",
" wiki = \"en.wikipedia\",\n",
" snapshot = \"2019-09\")\n",
"])"
]
},
{
"cell_type": "code",
"execution_count": 77,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_topic_aug = enwiki_pv_aug.merge(topic, how = 'left', on = 'page_id')"
]
},
{
"cell_type": "code",
"execution_count": 79,
"metadata": {},
"outputs": [],
"source": [
"enwiki_pv_topic_aug['predicted'] = enwiki_pv_topic_aug['predicted'].fillna(value='Unknown')\n",
"enwiki_pv_topic_aug['proportion']= enwiki_pv_topic_aug['pageviews']/enwiki_pv_topic_aug['pageviews'].sum()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### August Top 50 Topics Viewed"
]
},
{
"cell_type": "code",
"execution_count": 83,
"metadata": {},
"outputs": [],
"source": [
"enwiki_topic_aug_summary = (enwiki_pv_topic_aug[enwiki_pv_topic_aug.page_title != 'Main_Page']\n",
" .groupby('predicted', as_index = False)['pageviews', 'proportion']\n",
" .sum()\n",
" .sort_values(by='pageviews', ascending=False))"
]
},
{
"cell_type": "code",
"execution_count": 84,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Top 10 topics account for 62.48% of total page views in August\n",
"Top 50 topics account for 91.99% of total page views in August\n"
]
}
],
"source": [
"print('Top 10 topics account for ' + str(round(enwiki_topic_aug_summary.proportion[:10].sum() * 100,2))+ '% of total page views in August')\n",
"print('Top 50 topics account for ' + str(round(enwiki_topic_aug_summary.proportion[:50].sum() * 100,2))+ '% of total page views in August')"
]
},
{
"cell_type": "code",
"execution_count": 85,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" predicted | \n",
" pageviews | \n",
" proportion | \n",
"
\n",
" \n",
" \n",
" \n",
" 148 | \n",
" Entertainment | \n",
" 951015501 | \n",
" 0.132101 | \n",
"
\n",
" \n",
" 424 | \n",
" Sports | \n",
" 651186372 | \n",
" 0.090453 | \n",
"
\n",
" \n",
" 339 | \n",
" Performing arts | \n",
" 582187240 | \n",
" 0.080869 | \n",
"
\n",
" \n",
" 75 | \n",
" Broadcasting | \n",
" 508963501 | \n",
" 0.070698 | \n",
"
\n",
" \n",
" 202 | \n",
" History and society | \n",
" 490649682 | \n",
" 0.068154 | \n",
"
\n",
" \n",
" 347 | \n",
" Politics and government | \n",
" 390908702 | \n",
" 0.054299 | \n",
"
\n",
" \n",
" 245 | \n",
" Language and literature | \n",
" 233216220 | \n",
" 0.032395 | \n",
"
\n",
" \n",
" 446 | \n",
" Technology | \n",
" 233007905 | \n",
" 0.032366 | \n",
"
\n",
" \n",
" 83 | \n",
" Business and economics | \n",
" 228801818 | \n",
" 0.031782 | \n",
"
\n",
" \n",
" 342 | \n",
" Philosophy and religion | \n",
" 228716606 | \n",
" 0.031770 | \n",
"
\n",
" \n",
" 56 | \n",
" Biology | \n",
" 220055114 | \n",
" 0.030567 | \n",
"
\n",
" \n",
" 278 | \n",
" Medicine | \n",
" 197890857 | \n",
" 0.027488 | \n",
"
\n",
" \n",
" 459 | \n",
" Transportation | \n",
" 189356967 | \n",
" 0.026303 | \n",
"
\n",
" \n",
" 282 | \n",
" Military and warfare | \n",
" 187945486 | \n",
" 0.026107 | \n",
"
\n",
" \n",
" 503 | \n",
" Visual arts | \n",
" 161411619 | \n",
" 0.022421 | \n",
"
\n",
" \n",
" 355 | \n",
" Regional geography | \n",
" 115483591 | \n",
" 0.016041 | \n",
"
\n",
" \n",
" 357 | \n",
" Regional society | \n",
" 110243831 | \n",
" 0.015313 | \n",
"
\n",
" \n",
" 167 | \n",
" Food and drink | \n",
" 99778456 | \n",
" 0.013860 | \n",
"
\n",
" \n",
" 215 | \n",
" Internet culture | \n",
" 99516902 | \n",
" 0.013823 | \n",
"
\n",
" \n",
" 429 | \n",
" Structures of note | \n",
" 80883092 | \n",
" 0.011235 | \n",
"
\n",
" \n",
" 142 | \n",
" Education | \n",
" 73448968 | \n",
" 0.010202 | \n",
"
\n",
" \n",
" 478 | \n",
" United States | \n",
" 59122893 | \n",
" 0.008212 | \n",
"
\n",
" \n",
" 130 | \n",
" Disambiguation | \n",
" 58367391 | \n",
" 0.008108 | \n",
"
\n",
" \n",
" 343 | \n",
" Physics | \n",
" 56172269 | \n",
" 0.007803 | \n",
"
\n",
" \n",
" 98 | \n",
" Chemistry | \n",
" 54165745 | \n",
" 0.007524 | \n",
"
\n",
" \n",
" 422 | \n",
" Space | \n",
" 43268636 | \n",
" 0.006010 | \n",
"
\n",
" \n",
" 272 | \n",
" Mathematics | \n",
" 41116650 | \n",
" 0.005711 | \n",
"
\n",
" \n",
" 182 | \n",
" Geosciences | \n",
" 27172192 | \n",
" 0.003774 | \n",
"
\n",
" \n",
" 279 | \n",
" Meteorology | \n",
" 22631546 | \n",
" 0.003144 | \n",
"
\n",
" \n",
" 393 | \n",
" Science | \n",
" 22147222 | \n",
" 0.003076 | \n",
"
\n",
" \n",
" 211 | \n",
" India | \n",
" 20943288 | \n",
" 0.002909 | \n",
"
\n",
" \n",
" 58 | \n",
" Bodies of water | \n",
" 18631407 | \n",
" 0.002588 | \n",
"
\n",
" \n",
" 455 | \n",
" Time | \n",
" 18066422 | \n",
" 0.002510 | \n",
"
\n",
" \n",
" 146 | \n",
" Engineering | \n",
" 16869893 | \n",
" 0.002343 | \n",
"
\n",
" \n",
" 294 | \n",
" Music | \n",
" 15193024 | \n",
" 0.002110 | \n",
"
\n",
" \n",
" 244 | \n",
" Landforms | \n",
" 14819421 | \n",
" 0.002058 | \n",
"
\n",
" \n",
" 277 | \n",
" Media | \n",
" 13913078 | \n",
" 0.001933 | \n",
"
\n",
" \n",
" 115 | \n",
" Crafts and hobbies | \n",
" 13450923 | \n",
" 0.001868 | \n",
"
\n",
" \n",
" 492 | \n",
" Unknown | \n",
" 12500400 | \n",
" 0.001736 | \n",
"
\n",
" \n",
" 267 | \n",
" Maps | \n",
" 9156431 | \n",
" 0.001272 | \n",
"
\n",
" \n",
" 90 | \n",
" Canada | \n",
" 7718540 | \n",
" 0.001072 | \n",
"
\n",
" \n",
" 36 | \n",
" Australia | \n",
" 7413482 | \n",
" 0.001030 | \n",
"
\n",
" \n",
" 33 | \n",
" Arts | \n",
" 6264661 | \n",
" 0.000870 | \n",
"
\n",
" \n",
" 168 | \n",
" France | \n",
" 5750101 | \n",
" 0.000799 | \n",
"
\n",
" \n",
" 214 | \n",
" Information science | \n",
" 5459799 | \n",
" 0.000758 | \n",
"
\n",
" \n",
" 184 | \n",
" Germany | \n",
" 5000979 | \n",
" 0.000695 | \n",
"
\n",
" \n",
" 223 | \n",
" Italy | \n",
" 4353786 | \n",
" 0.000605 | \n",
"
\n",
" \n",
" 178 | \n",
" Games and toys | \n",
" 4268883 | \n",
" 0.000593 | \n",
"
\n",
" \n",
" 371 | \n",
" Russia | \n",
" 3669942 | \n",
" 0.000510 | \n",
"
\n",
" \n",
" 100 | \n",
" China | \n",
" 3425639 | \n",
" 0.000476 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" predicted pageviews proportion\n",
"148 Entertainment 951015501 0.132101\n",
"424 Sports 651186372 0.090453\n",
"339 Performing arts 582187240 0.080869\n",
"75 Broadcasting 508963501 0.070698\n",
"202 History and society 490649682 0.068154\n",
"347 Politics and government 390908702 0.054299\n",
"245 Language and literature 233216220 0.032395\n",
"446 Technology 233007905 0.032366\n",
"83 Business and economics 228801818 0.031782\n",
"342 Philosophy and religion 228716606 0.031770\n",
"56 Biology 220055114 0.030567\n",
"278 Medicine 197890857 0.027488\n",
"459 Transportation 189356967 0.026303\n",
"282 Military and warfare 187945486 0.026107\n",
"503 Visual arts 161411619 0.022421\n",
"355 Regional geography 115483591 0.016041\n",
"357 Regional society 110243831 0.015313\n",
"167 Food and drink 99778456 0.013860\n",
"215 Internet culture 99516902 0.013823\n",
"429 Structures of note 80883092 0.011235\n",
"142 Education 73448968 0.010202\n",
"478 United States 59122893 0.008212\n",
"130 Disambiguation 58367391 0.008108\n",
"343 Physics 56172269 0.007803\n",
"98 Chemistry 54165745 0.007524\n",
"422 Space 43268636 0.006010\n",
"272 Mathematics 41116650 0.005711\n",
"182 Geosciences 27172192 0.003774\n",
"279 Meteorology 22631546 0.003144\n",
"393 Science 22147222 0.003076\n",
"211 India 20943288 0.002909\n",
"58 Bodies of water 18631407 0.002588\n",
"455 Time 18066422 0.002510\n",
"146 Engineering 16869893 0.002343\n",
"294 Music 15193024 0.002110\n",
"244 Landforms 14819421 0.002058\n",
"277 Media 13913078 0.001933\n",
"115 Crafts and hobbies 13450923 0.001868\n",
"492 Unknown 12500400 0.001736\n",
"267 Maps 9156431 0.001272\n",
"90 Canada 7718540 0.001072\n",
"36 Australia 7413482 0.001030\n",
"33 Arts 6264661 0.000870\n",
"168 France 5750101 0.000799\n",
"214 Information science 5459799 0.000758\n",
"184 Germany 5000979 0.000695\n",
"223 Italy 4353786 0.000605\n",
"178 Games and toys 4268883 0.000593\n",
"371 Russia 3669942 0.000510\n",
"100 China 3425639 0.000476"
]
},
"execution_count": 85,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"enwiki_topic_sept_summary.head(50)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Top Topics Rank Comparison September vs. August"
]
},
{
"cell_type": "code",
"execution_count": 86,
"metadata": {},
"outputs": [],
"source": [
"enwiki_topic_sept_summary[\"sept_rank\"] = enwiki_topic_sept_summary[\"proportion\"].rank(ascending=0) \n",
"enwiki_topic_aug_summary[\"aug_rank\"] = enwiki_topic_aug_summary[\"proportion\"].rank(ascending=0) "
]
},
{
"cell_type": "code",
"execution_count": 87,
"metadata": {},
"outputs": [],
"source": [
"topic_rank = enwiki_topic_sept_summary.merge(enwiki_topic_aug_summary, how = 'left', on = 'predicted')\n",
"topic_rank = topic_rank.rename(columns={'predicted': 'topic', 'proportion_x': 'proportion_sept','proportion_y': 'proportion_aug','pageviews_x':'pageviews_sept','pageviews_y':'pageviews_aug'})"
]
},
{
"cell_type": "code",
"execution_count": 88,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" topic | \n",
" proportion_sept | \n",
" proportion_aug | \n",
" sept_rank | \n",
" aug_rank | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" Entertainment | \n",
" 0.132101 | \n",
" 0.138937 | \n",
" 1.0 | \n",
" 1.0 | \n",
"
\n",
" \n",
" 1 | \n",
" Sports | \n",
" 0.090453 | \n",
" 0.087186 | \n",
" 2.0 | \n",
" 2.0 | \n",
"
\n",
" \n",
" 2 | \n",
" Performing arts | \n",
" 0.080869 | \n",
" 0.081190 | \n",
" 3.0 | \n",
" 3.0 | \n",
"
\n",
" \n",
" 3 | \n",
" Broadcasting | \n",
" 0.070698 | \n",
" 0.071853 | \n",
" 4.0 | \n",
" 4.0 | \n",
"
\n",
" \n",
" 4 | \n",
" History and society | \n",
" 0.068154 | \n",
" 0.069659 | \n",
" 5.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
" 5 | \n",
" Politics and government | \n",
" 0.054299 | \n",
" 0.052139 | \n",
" 6.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 6 | \n",
" Language and literature | \n",
" 0.032395 | \n",
" 0.032178 | \n",
" 7.0 | \n",
" 7.0 | \n",
"
\n",
" \n",
" 7 | \n",
" Technology | \n",
" 0.032366 | \n",
" 0.030678 | \n",
" 8.0 | \n",
" 9.0 | \n",
"
\n",
" \n",
" 8 | \n",
" Business and economics | \n",
" 0.031782 | \n",
" 0.030793 | \n",
" 9.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 9 | \n",
" Philosophy and religion | \n",
" 0.031770 | \n",
" 0.030154 | \n",
" 10.0 | \n",
" 11.0 | \n",
"
\n",
" \n",
" 10 | \n",
" Biology | \n",
" 0.030567 | \n",
" 0.030221 | \n",
" 11.0 | \n",
" 10.0 | \n",
"
\n",
" \n",
" 11 | \n",
" Medicine | \n",
" 0.027488 | \n",
" 0.025845 | \n",
" 12.0 | \n",
" 14.0 | \n",
"
\n",
" \n",
" 12 | \n",
" Transportation | \n",
" 0.026303 | \n",
" 0.027177 | \n",
" 13.0 | \n",
" 12.0 | \n",
"
\n",
" \n",
" 13 | \n",
" Military and warfare | \n",
" 0.026107 | \n",
" 0.026282 | \n",
" 14.0 | \n",
" 13.0 | \n",
"
\n",
" \n",
" 14 | \n",
" Visual arts | \n",
" 0.022421 | \n",
" 0.023707 | \n",
" 15.0 | \n",
" 15.0 | \n",
"
\n",
" \n",
" 15 | \n",
" Regional geography | \n",
" 0.016041 | \n",
" 0.017046 | \n",
" 16.0 | \n",
" 16.0 | \n",
"
\n",
" \n",
" 16 | \n",
" Regional society | \n",
" 0.015313 | \n",
" 0.016365 | \n",
" 17.0 | \n",
" 17.0 | \n",
"
\n",
" \n",
" 17 | \n",
" Food and drink | \n",
" 0.013860 | \n",
" 0.014437 | \n",
" 18.0 | \n",
" 19.0 | \n",
"
\n",
" \n",
" 18 | \n",
" Internet culture | \n",
" 0.013823 | \n",
" 0.014654 | \n",
" 19.0 | \n",
" 18.0 | \n",
"
\n",
" \n",
" 19 | \n",
" Structures of note | \n",
" 0.011235 | \n",
" 0.011200 | \n",
" 20.0 | \n",
" 20.0 | \n",
"
\n",
" \n",
" 20 | \n",
" Education | \n",
" 0.010202 | \n",
" 0.009655 | \n",
" 21.0 | \n",
" 21.0 | \n",
"
\n",
" \n",
" 21 | \n",
" United States | \n",
" 0.008212 | \n",
" 0.008730 | \n",
" 22.0 | \n",
" 22.0 | \n",
"
\n",
" \n",
" 22 | \n",
" Disambiguation | \n",
" 0.008108 | \n",
" 0.008134 | \n",
" 23.0 | \n",
" 23.0 | \n",
"
\n",
" \n",
" 23 | \n",
" Physics | \n",
" 0.007803 | \n",
" 0.006779 | \n",
" 24.0 | \n",
" 24.0 | \n",
"
\n",
" \n",
" 24 | \n",
" Chemistry | \n",
" 0.007524 | \n",
" 0.006209 | \n",
" 25.0 | \n",
" 25.0 | \n",
"
\n",
" \n",
" 25 | \n",
" Space | \n",
" 0.006010 | \n",
" 0.005227 | \n",
" 26.0 | \n",
" 26.0 | \n",
"
\n",
" \n",
" 26 | \n",
" Mathematics | \n",
" 0.005711 | \n",
" 0.004364 | \n",
" 27.0 | \n",
" 27.0 | \n",
"
\n",
" \n",
" 27 | \n",
" Geosciences | \n",
" 0.003774 | \n",
" 0.003477 | \n",
" 28.0 | \n",
" 28.0 | \n",
"
\n",
" \n",
" 28 | \n",
" Meteorology | \n",
" 0.003144 | \n",
" 0.002263 | \n",
" 29.0 | \n",
" 35.0 | \n",
"
\n",
" \n",
" 29 | \n",
" Science | \n",
" 0.003076 | \n",
" 0.002795 | \n",
" 30.0 | \n",
" 32.0 | \n",
"
\n",
" \n",
" 30 | \n",
" India | \n",
" 0.002909 | \n",
" 0.003165 | \n",
" 31.0 | \n",
" 29.0 | \n",
"
\n",
" \n",
" 31 | \n",
" Bodies of water | \n",
" 0.002588 | \n",
" 0.002971 | \n",
" 32.0 | \n",
" 30.0 | \n",
"
\n",
" \n",
" 32 | \n",
" Time | \n",
" 0.002510 | \n",
" 0.002395 | \n",
" 33.0 | \n",
" 33.0 | \n",
"
\n",
" \n",
" 33 | \n",
" Engineering | \n",
" 0.002343 | \n",
" 0.002810 | \n",
" 34.0 | \n",
" 31.0 | \n",
"
\n",
" \n",
" 34 | \n",
" Music | \n",
" 0.002110 | \n",
" 0.002175 | \n",
" 35.0 | \n",
" 37.0 | \n",
"
\n",
" \n",
" 35 | \n",
" Landforms | \n",
" 0.002058 | \n",
" 0.002204 | \n",
" 36.0 | \n",
" 36.0 | \n",
"
\n",
" \n",
" 36 | \n",
" Media | \n",
" 0.001933 | \n",
" 0.001866 | \n",
" 37.0 | \n",
" 39.0 | \n",
"
\n",
" \n",
" 37 | \n",
" Crafts and hobbies | \n",
" 0.001868 | \n",
" 0.001894 | \n",
" 38.0 | \n",
" 38.0 | \n",
"
\n",
" \n",
" 38 | \n",
" Unknown | \n",
" 0.001736 | \n",
" 0.002364 | \n",
" 39.0 | \n",
" 34.0 | \n",
"
\n",
" \n",
" 39 | \n",
" Maps | \n",
" 0.001272 | \n",
" 0.001173 | \n",
" 40.0 | \n",
" 40.0 | \n",
"
\n",
" \n",
" 40 | \n",
" Canada | \n",
" 0.001072 | \n",
" 0.001172 | \n",
" 41.0 | \n",
" 41.0 | \n",
"
\n",
" \n",
" 41 | \n",
" Australia | \n",
" 0.001030 | \n",
" 0.001022 | \n",
" 42.0 | \n",
" 42.0 | \n",
"
\n",
" \n",
" 42 | \n",
" Arts | \n",
" 0.000870 | \n",
" 0.000856 | \n",
" 43.0 | \n",
" 44.0 | \n",
"
\n",
" \n",
" 43 | \n",
" France | \n",
" 0.000799 | \n",
" 0.000862 | \n",
" 44.0 | \n",
" 43.0 | \n",
"
\n",
" \n",
" 44 | \n",
" Information science | \n",
" 0.000758 | \n",
" 0.000655 | \n",
" 45.0 | \n",
" 46.0 | \n",
"
\n",
" \n",
" 45 | \n",
" Germany | \n",
" 0.000695 | \n",
" 0.000740 | \n",
" 46.0 | \n",
" 45.0 | \n",
"
\n",
" \n",
" 46 | \n",
" Italy | \n",
" 0.000605 | \n",
" 0.000627 | \n",
" 47.0 | \n",
" 47.0 | \n",
"
\n",
" \n",
" 47 | \n",
" Games and toys | \n",
" 0.000593 | \n",
" 0.000594 | \n",
" 48.0 | \n",
" 48.0 | \n",
"
\n",
" \n",
" 48 | \n",
" Russia | \n",
" 0.000510 | \n",
" 0.000516 | \n",
" 49.0 | \n",
" 49.0 | \n",
"
\n",
" \n",
" 49 | \n",
" China | \n",
" 0.000476 | \n",
" 0.000515 | \n",
" 50.0 | \n",
" 50.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" topic proportion_sept proportion_aug sept_rank \\\n",
"0 Entertainment 0.132101 0.138937 1.0 \n",
"1 Sports 0.090453 0.087186 2.0 \n",
"2 Performing arts 0.080869 0.081190 3.0 \n",
"3 Broadcasting 0.070698 0.071853 4.0 \n",
"4 History and society 0.068154 0.069659 5.0 \n",
"5 Politics and government 0.054299 0.052139 6.0 \n",
"6 Language and literature 0.032395 0.032178 7.0 \n",
"7 Technology 0.032366 0.030678 8.0 \n",
"8 Business and economics 0.031782 0.030793 9.0 \n",
"9 Philosophy and religion 0.031770 0.030154 10.0 \n",
"10 Biology 0.030567 0.030221 11.0 \n",
"11 Medicine 0.027488 0.025845 12.0 \n",
"12 Transportation 0.026303 0.027177 13.0 \n",
"13 Military and warfare 0.026107 0.026282 14.0 \n",
"14 Visual arts 0.022421 0.023707 15.0 \n",
"15 Regional geography 0.016041 0.017046 16.0 \n",
"16 Regional society 0.015313 0.016365 17.0 \n",
"17 Food and drink 0.013860 0.014437 18.0 \n",
"18 Internet culture 0.013823 0.014654 19.0 \n",
"19 Structures of note 0.011235 0.011200 20.0 \n",
"20 Education 0.010202 0.009655 21.0 \n",
"21 United States 0.008212 0.008730 22.0 \n",
"22 Disambiguation 0.008108 0.008134 23.0 \n",
"23 Physics 0.007803 0.006779 24.0 \n",
"24 Chemistry 0.007524 0.006209 25.0 \n",
"25 Space 0.006010 0.005227 26.0 \n",
"26 Mathematics 0.005711 0.004364 27.0 \n",
"27 Geosciences 0.003774 0.003477 28.0 \n",
"28 Meteorology 0.003144 0.002263 29.0 \n",
"29 Science 0.003076 0.002795 30.0 \n",
"30 India 0.002909 0.003165 31.0 \n",
"31 Bodies of water 0.002588 0.002971 32.0 \n",
"32 Time 0.002510 0.002395 33.0 \n",
"33 Engineering 0.002343 0.002810 34.0 \n",
"34 Music 0.002110 0.002175 35.0 \n",
"35 Landforms 0.002058 0.002204 36.0 \n",
"36 Media 0.001933 0.001866 37.0 \n",
"37 Crafts and hobbies 0.001868 0.001894 38.0 \n",
"38 Unknown 0.001736 0.002364 39.0 \n",
"39 Maps 0.001272 0.001173 40.0 \n",
"40 Canada 0.001072 0.001172 41.0 \n",
"41 Australia 0.001030 0.001022 42.0 \n",
"42 Arts 0.000870 0.000856 43.0 \n",
"43 France 0.000799 0.000862 44.0 \n",
"44 Information science 0.000758 0.000655 45.0 \n",
"45 Germany 0.000695 0.000740 46.0 \n",
"46 Italy 0.000605 0.000627 47.0 \n",
"47 Games and toys 0.000593 0.000594 48.0 \n",
"48 Russia 0.000510 0.000516 49.0 \n",
"49 China 0.000476 0.000515 50.0 \n",
"\n",
" aug_rank \n",
"0 1.0 \n",
"1 2.0 \n",
"2 3.0 \n",
"3 4.0 \n",
"4 5.0 \n",
"5 6.0 \n",
"6 7.0 \n",
"7 9.0 \n",
"8 8.0 \n",
"9 11.0 \n",
"10 10.0 \n",
"11 14.0 \n",
"12 12.0 \n",
"13 13.0 \n",
"14 15.0 \n",
"15 16.0 \n",
"16 17.0 \n",
"17 19.0 \n",
"18 18.0 \n",
"19 20.0 \n",
"20 21.0 \n",
"21 22.0 \n",
"22 23.0 \n",
"23 24.0 \n",
"24 25.0 \n",
"25 26.0 \n",
"26 27.0 \n",
"27 28.0 \n",
"28 35.0 \n",
"29 32.0 \n",
"30 29.0 \n",
"31 30.0 \n",
"32 33.0 \n",
"33 31.0 \n",
"34 37.0 \n",
"35 36.0 \n",
"36 39.0 \n",
"37 38.0 \n",
"38 34.0 \n",
"39 40.0 \n",
"40 41.0 \n",
"41 42.0 \n",
"42 44.0 \n",
"43 43.0 \n",
"44 46.0 \n",
"45 45.0 \n",
"46 47.0 \n",
"47 48.0 \n",
"48 49.0 \n",
"49 50.0 "
]
},
"execution_count": 88,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"topic_rank[['topic','proportion_sept','proportion_aug','sept_rank','aug_rank']].head(50)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"The changes in proportion and rank between September and August for top 50 topics are not very noticeable. "
]
},
{
"cell_type": "code",
"execution_count": 89,
"metadata": {},
"outputs": [],
"source": [
"topic_rank['rank_diff_abs'] = abs(topic_rank['sept_rank'] - topic_rank['aug_rank'])\n"
]
},
{
"cell_type": "code",
"execution_count": 91,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" topic | \n",
" proportion_sept | \n",
" proportion_aug | \n",
" sept_rank | \n",
" aug_rank | \n",
" rank_diff_abs | \n",
"
\n",
" \n",
" \n",
" \n",
" 59 | \n",
" The_Bahamas | \n",
" 0.000239 | \n",
" 0.000022 | \n",
" 60.0 | \n",
" 196.0 | \n",
" 136.0 | \n",
"
\n",
" \n",
" 99 | \n",
" Hong_Kong | \n",
" 0.000074 | \n",
" 0.000156 | \n",
" 100.0 | \n",
" 73.0 | \n",
" 27.0 | \n",
"
\n",
" \n",
" 78 | \n",
" predicted | \n",
" 0.000133 | \n",
" 0.000104 | \n",
" 79.0 | \n",
" 89.0 | \n",
" 10.0 | \n",
"
\n",
" \n",
" 73 | \n",
" Netherlands | \n",
" 0.000147 | \n",
" 0.000127 | \n",
" 74.0 | \n",
" 82.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 76 | \n",
" Syria | \n",
" 0.000139 | \n",
" 0.000120 | \n",
" 77.0 | \n",
" 85.0 | \n",
" 8.0 | \n",
"
\n",
" \n",
" 28 | \n",
" Meteorology | \n",
" 0.003144 | \n",
" 0.002263 | \n",
" 29.0 | \n",
" 35.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 69 | \n",
" Israel | \n",
" 0.000164 | \n",
" 0.000146 | \n",
" 70.0 | \n",
" 76.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 84 | \n",
" Denmark | \n",
" 0.000105 | \n",
" 0.000138 | \n",
" 85.0 | \n",
" 79.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 94 | \n",
" Saudi Arabia | \n",
" 0.000088 | \n",
" 0.000075 | \n",
" 95.0 | \n",
" 101.0 | \n",
" 6.0 | \n",
"
\n",
" \n",
" 86 | \n",
" Argentina | \n",
" 0.000104 | \n",
" 0.000100 | \n",
" 87.0 | \n",
" 92.0 | \n",
" 5.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" topic proportion_sept proportion_aug sept_rank aug_rank \\\n",
"59 The_Bahamas 0.000239 0.000022 60.0 196.0 \n",
"99 Hong_Kong 0.000074 0.000156 100.0 73.0 \n",
"78 predicted 0.000133 0.000104 79.0 89.0 \n",
"73 Netherlands 0.000147 0.000127 74.0 82.0 \n",
"76 Syria 0.000139 0.000120 77.0 85.0 \n",
"28 Meteorology 0.003144 0.002263 29.0 35.0 \n",
"69 Israel 0.000164 0.000146 70.0 76.0 \n",
"84 Denmark 0.000105 0.000138 85.0 79.0 \n",
"94 Saudi Arabia 0.000088 0.000075 95.0 101.0 \n",
"86 Argentina 0.000104 0.000100 87.0 92.0 \n",
"\n",
" rank_diff_abs \n",
"59 136.0 \n",
"99 27.0 \n",
"78 10.0 \n",
"73 8.0 \n",
"76 8.0 \n",
"28 6.0 \n",
"69 6.0 \n",
"84 6.0 \n",
"94 6.0 \n",
"86 5.0 "
]
},
"execution_count": 91,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"topic_rank[['topic','proportion_sept','proportion_aug','sept_rank','aug_rank','rank_diff_abs']].head(100).sort_values(by='rank_diff_abs', ascending=False).head(10)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"By looking at top 10 topics change in rank from Auguat to September, the topics related to \"Country/Region\" changes the most between two month."
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Top Topics Pageviews Comparison September vs. August"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compare changes in pageviews for top 10 topics between Spetember and August 2019."
]
},
{
"cell_type": "code",
"execution_count": 92,
"metadata": {},
"outputs": [],
"source": [
"## Load the RPython library so we can use R for graphs\n",
"\n",
"%load_ext rpy2.ipython"
]
},
{
"cell_type": "code",
"execution_count": 97,
"metadata": {},
"outputs": [],
"source": [
"%%R\n",
"library(ggplot2)\n",
"library (tidyverse)\n",
"library(data.table)"
]
},
{
"cell_type": "code",
"execution_count": 98,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAMAAABKCk6nAAAC+lBMVEUAAAAAv8QBAQECAgIDAwMEBAQFBQUGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcYGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUmJiYnJycoKCgpKSkqKiorKyssLCwtLS0uLi4vLy8xMTEyMjIzMzM0NDQ1NTU2NjY3Nzc4ODg5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFCQkJDQ0NERERGRkZHR0dISEhJSUlKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFSUlJTU1NUVFRVVVVWVlZXV1dYWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZGRlZWVmZmZnZ2doaGhpaWlqampra2tsbGxtbW1ubm5vb29wcHBxcXFycnJzc3N0dHR1dXV2dnZ3d3d4eHh5eXl6enp7e3t8fHx9fX1+fn5/f3+AgICBgYGCgoKDg4OEhISFhYWGhoaHh4eIiIiJiYmKioqLi4uMjIyNjY2Ojo6Pj4+QkJCRkZGSkpKTk5OUlJSVlZWWlpaXl5eYmJiampqbm5udnZ2enp6fn5+goKChoaGioqKjo6OkpKSlpaWmpqanp6eoqKipqamqqqqrq6usrKytra2urq6vr6+wsLCxsbGysrKzs7O0tLS1tbW2tra3t7e4uLi5ubm6urq7u7u8vLy9vb2+vr6/v7/AwMDBwcHCwsLDw8PExMTFxcXGxsbHx8fIyMjJycnKysrLy8vMzMzNzc3Ozs7Pz8/Q0NDR0dHS0tLT09PU1NTV1dXW1tbX19fY2NjZ2dna2trb29vc3Nzd3d3e3t7f39/g4ODh4eHi4uLj4+Pk5OTl5eXm5ubn5+fo6Ojp6enq6urr6+vs7Ozt7e3u7u7v7+/w8PDx8fHy8vLz8/P09PT19fX29vb39/f4dm34+Pj5+fn6+vr7+/v8/Pz9/f3+/v7///9zY+HcAAAe2klEQVR4nO2dC3hU5ZnHZy1UQbQC3tdbUbt1a3Er2trLrpfSrtqTcDEm4JUVLdfFYJWL2JZSUAtoi1UXLWG9gK4uSq2tVlpR1CIoahAEkYKCYDDkNplsJsnM+zx7zvm+c857JjM530wmM99M/r/H5Nze7/0m55dzMhDPnxCBoiaU7xcAehYILnJ6n+A8fcXzL++BppfeGVhSdIIDv6CuCrpxNgKGho/ZnnnvlGw9NhJUoq/gkCDFIWsRv23QoJlxa23bSa32gbqhB4P7BnVPMtvx136u9JpT898l9uLuPgvTG1c/8yv9B5W+lORV2Z8vXRHUQF/B1NWLs4/817k7d577kL19xVJ7cef4NJqqfulm3Z6ysYrFqSh7xPocO/2+02NpjfvhjdujNU9cmPRVmSwbE9RAd8Gt0044fpp5eYbuPm7w5Cg/QvTt54me/669/dY/WectevIH1pHYvC8PGhemM94nqiKqPtPZQdFJg4//VaiL7uau9jtOHbiI6KVv9DvtIW+u/UfTttGDjxp5wBwzYdDxd7uz1B5j3TNqjzuYbF7WZMgO6/Nz59L5f3BefsjrZXZw+rAxFv3r5YrzNciXKm8/289QOIf6Yr64O4Z/8snFPzVXh+/ZM/zn/AjRl8zTXTNQ7PjBU+anZaPsI4u/v/Pg1dNp0gP0yZGNdP9kZwf9bPieTy4OddHd3PXLi3YcvJnoxKdad9/gzfXZMXTOXyL1U8w7xJxL9+79gTfLhLvMw3fenHRe1qR/s/X50t/RssvJE+z0MnH6sDH2iCvXiR+zztfAXqpJuL/COdQX88Wdbl4O1ea3achcbj6DHyH6QhtRWx+x46/fND8N/Zt95KwPzEvuNHq2nO469iG6crWzw762qkNddDd3nVltHzxlycesbm9Fhb3acLI5ZivR+94sW09tp/Yhu5LOy5r0swR/dGwLtRy70xPs9DJx+rAx9ox3/Eu/039S7zbnL9Wk+XCFc6gv5ovr10LU0s9clUvviP8Kpgv+Sn+8SBzpb92/vkBNp9LQ575DpzY5O0SzUBfd5S6Tt0Yc/ZXnZV0odNxVNbThkkGhkPnt1K9VNHGaXrqSVoxNPi9rMuRD89NP7Dd2t3qCnV4Wsg8bI4ltvu5ybzr2Uk22n65wDvWFX2NbzO/1hCuY/Qwmevbf6ZI/iiNf/bvYddET36RvrrrY23GG2WRz0itYdjd3faVaHo4/dwKbyxy8vLbjILvqnKYvfJsueDv5vKxJWZXp5VirwLqMret5n/8KdvqwMS51R3rNnZd6iL21rELhHOqL+eJuN39KXjLHXP3hnj0/+Ck/QvSg9y7aPC1frzpHHvn18C3R98qJ7jppCf3m5Lu9HXeYTYaHuuhu7pp/0UfWz+CK96PPncjmIjphVetHo60x5s/NH7JZ6Owlw1PMy5os/5Fp40f26qVV9N254Z0lrJeN6OOMkXsvfGJ/284fX+o1d17qsVusw5c/pnAO9cV8cS1Tjz9+aov95nHQxFZnv3gPGb914MAZcaf4kb6PyjGxJWcdNvRZok19a+izvu94O8w3rcctDHXR3dzVNvvkQfcQrfhqv2/8xa2z+MNXv3jqEmvMTYOO+8UXvab00BdeSDEva9J8zDY6T9x6V59P1Rf0O+1B1stG9HHGyFnXjBzUb8jkWq+581IXDzQrPijkv+jw07OvM93u7w5Je4oFl2Wnl++lXlY8f1WpkeCb9+36fmWWJk6/V7onAoLT7v7rk4+7sTlLE6ffq1gFgwyB4CIHgoscCC5yILjIgeAiR1/BTQc4kciBYFRqoo0KRc0KNR11wTV1CpN9Tp8HFzUeVJisI2GHfRohOBkQnAMgmAHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1IUHwP4BAILjIgeAiB4KLHAguciC4yIHgIgeCixwILnJ6TrBh4W0lHk26mrKXXEJw2vSg4C62lKwmqYbgtOlxwcbK8nEb7MuZ6uddOWWbueeZcSXi6jZKxr9tF4oic1Fxzesrxly3kdxabzQEZ0SP36KNR1vW/VjoXrimbeNUc/W+ekd/vHqiECyLzMUo82OCV+uNpvdmzHilldOR75NXCPjOWGtjNgXLRZRiJWJrrGm81FwNy6Obpoy0t70ic2F9lHq13mja+/TT7zdxov8HOuM7RZGYb7OprgcEiw9bcK13wPo0bn00YrhHvYX1wWqNlD+D830utcR3inry14V+weX7iRYvrj+wUB6wtsdWR5amEsxq5WgIViN3guXPYKHo8XKDGheNGb9OCra211x17YpUglmtHA3BauRMcE8AwcFAcJEDwUUOBBc5EFzkQHCRA8FFDgQXOcUkGE82BE4GwRYQrAsQzIBgUaRQA8G6AMEMCBZFCjUQrAsQzIBgUaRQA8G6AMEMCBZFCjUQrAsQzIBgUaRQA8G6AMEMCBZFCjUQrAsQzIBgUaRQA8G6AMGMXiA430/uZQgEpwSCIbgAgOCUQDAEFwAQnBIIhuACAIJTAsEFKXjfgqtHzXwz6aGEOC0ILkjB01bUt1XPTXoIgotBcPln0mZV2fT9RJ/NKptdw/LSXp886oYXILiABT87du7D71iCH21ePo9o3vLmqvksL23s5o6aJeayraGh9nNOoQr+PJBaqg0uaqoLrqnvSNiRF8HU+LcnJi23E9LCFUQVYWoaw/LSHr75AUs/vTRs2Er/uHw/yNc9cn2WLZrtz/l4F11fzgWHx7C8NNq/+tbFoqqoHh/tPbfoudXR8FNTrFt0xL5Fm4v5LC/tt3s7to6F4AIWvGHmqLE/293pTZZ1yEpA+9ONoye/AcEFLNghOEMagiG4cOh9goOBYAguHCC4MxAMwYUDBHcGgiG4cIDgzkBw7xKMJxsCJ4NgCwjWBQhmQLAoUqiBYF2AYAYEiyKFGgjWBQhmQLAoUqiBYF2AYAYEiyKFGgjWBQhmQLAoUqiBYF2AYAYEiyKFGgjWBQhmQLAoUqiBYF2AYEYvEJzv50DTAIKVgGAI1hQIVgKCIVhTIFgJCIZgTSlmwW+MC37wVxEIzrtgwxg5fYd/141bU1cHtvNvQnD+BVPb6mn+XSXx7rTzAcEaCKaW0UT1866cso2l1bH0OjJWVlzz+oox1220q42V5eM2EO2rLKtybBol498utqS7IhLcbl3BC9e0bZzK0up86XWPtqwbZX5MEILNtR8TzX20ebl7ucarJyZJuvtwyZKNEU57vq2lgfeqW6ORYEihJtoSXNMa9283dF+wMbLyQ1OKeemVsrQ6X3pdlGLWR6kQbK6VEJU3U1gK3jRlpG+sk3TXWXC+nw9Uh5/zAhcsFmNrvS0p2JdeJz8Md40JHrc+GjGKLOmO3zUL/RZts3hx/YGFTBJPr0sqeO5jkUecb47qyFJHcLEk3fFzXhSCGxeNGb+OCeXpdUkF760sW1omxq656toVjuBiSbrj57ywBWdO+6oZgTUQzCgwwUbp1I8CiyCYUWCClYBgBgRrBD/nEJwaCGZAsEbwcw7BqYFgBgRrBD/nEJwa/I/vDAgWRQo1EKwLEMyAYFGkUAPBugDBDAgWRQo1EKwLEMyAYFGkUAPBugDBDAgWRQo1EKwLEMyAYFGkUAPBugDBDAgWRQo1EKwLEMyAYFGkUAPBugDBjF4gON+PDKYg4JxDcGogmAHB+SLgnENwaiCYAcH5IuCcQ3BqIJgBwfki4JxDcGogmFEIgtONwINghhaCDcO4onJT6sOpdyc9BMEMPQQTRV+9uqvDaeyGYB/aCLYTz6yIO5nEIpPs9k+3s+0OzBlluPtEqJ2db5c0BA+CGXoINinbKmPrZNydTLKbJ7Lt5j/cSu4+GWrnZPD4Q/BeGjZspb97vk2mIHunL+s025+zfAWvmiVj62TcnUyyKw/b0WcVTVad3CdD7RzB/hC8yJ49NQc5Lfl+KDQZB4NoCAeWHKyj4JqD4frgmsaYf/vzHhBsh5Pat1gZdyeT7HyC5T4ZauelaCWE4BXA88HBd83iukWbV7AVTmoLknF3MsnO2jIXC5ZFyd0nQ+2sfLukIXgQzNBDsGGMvm2nFCzfZMkkO+dN1uwRhrtPhtpZ+XZJQ/AgmKGF4OzghuBBMKN4BHsheBDMKB7BHhDMgOC8EHzOITg1EMyA4LwQfM4hODUQzIDgvBB8ziE4NXiygQHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmNELBOf7McLOQHD3gGAGBOcBCO4eEMyA4DwAwd0DghkQnAcgmJFuChpBsI8cCDbkJ8O/J53RQYf8RRDMyKFgJWldWFQugmBGbq9gFmEmoziMZ8YZE+NEbdd/KortqDMZbiYDOizESGeUyEUz/6ufd+WUbXZD1gSCGbkQbGP7YBFmMu/Myj6btZboxUWy2o46k+FmMgXNQo6Uo0Qumnlo4Zq2jVPtNafJ3qeffr+JE823zs40NwXS3BJcE6bgmqYWhckiMf92XcZXMIswk3lnVvZZ9aR4fPJuu1ZGnTnhZmE3+0qOlKNEbJb1HWN+65Taa06T92bMeKWV05HvJwkTaG2NR1sDaWsPrmklhZp2lcni/u3GjAWzCDOZd2YfnP3qmwtErYw6c8LNPMFypBzlCa51p/Ca6P34aPH9NokJZhFmMu/MPrh5yswdolZGnUnBMgXNQo6Uo0Qumnlo8eL6AwtFQ68JBDNyK5hFmDlvsuySmXNlrYw6k4LZmyw50nmTZeeimf81Lhozfp1o6DWBYIYWf9HRfu+ubDaBYIYOgo3SNVltAsEMHQRnGwhmQHCugeBuA8EMCM41ENxtIJgBwbkGgrsNBDN6gWA82RA4GQRbQLAuQDADgkWRQg0E6wIEMyBYFCnUQLAuQDADgkWRQg0E6wIEMyBYFCnUQLAuQDADgkWRQg0E6wIEMyBYFCnUQLAuQDADgkWRQg0E60JeHx8NPp0Q3F0gmAHBEAzBEKwXEMyAYAiGYAjWCwhm9IjgdNPpMkiz6woIZmRJsGGMnL6DC/NyGBTIkmCnDQQzsiWY2lZPSzjT6toguPM5104wtYwmlqxiiDw7w0mlk0l1ojhFmt2+SnvVbtI4toGoYWyjiLKzAvFKnBHGyoprXl8x5rqNTtCdPCDi1iDYT9YEt1tXsBNfx2/RIpVOJtUJUqTZzRWrosmSJ4meuM+Nsruv3h1hLUaZHxO8oDt5wGryWknJ6g5OPLeCO4KhWHBNTKGmgxRqVBrFEhqFkwg2RlZ+6MXXccEitEwm1VmkTLMrb7ZXRZOd13d0XP93N8rOmlOOsBbWR6kXdCcPWE0atmzZU8dpyf0DhHVdE2sMKDBpbA6uqafgmrrmBoXJYv7t2qS3aAsvvi5RsEyqs0iZZucJtprMfGXtLF+UnTPC8FbZUZZXnPfHRwPuiIV5i7bx4usMET9nOKl0MqnOImWa3dzHIo8YbpN1t9zymhtll0IwOyonhOAEsiuYv8my4+cMJ5VOJtVZpEyz21tZtrTMbdIxblzMjbJLIZgdlRNCcAJ6/U1W+6oZWegCwQytBBulUz/KQhsIZmglOEtAMAOCIRiCIVgvIJgBwRAMwRCsF3iygQHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmAHBokihBoJ1AYIZECyKFGogWBcgmNELBGfjmUGrEQTrAgQzIBiCIRiC9QKCGRAMwRAMwVnG8C3SBoIZ2RKc1bA6T7CRsnMXE0IwQ3PBqTtDcF4E+zLOZLCZm6lhH5M7ZYRZ50Ess8PLU+NxaKLciVmzi6zdrCEEM7J9BfOMMxls5mZW2sfkThlh1nkQC0bzbtE8Ds0pJ/fD3u00rF2/flc9pzUbgq1GbZH6YFoVamJNwTVNzcE1DaQwWbPKZDH/9sGUghMyzkTuldQgj8mdMsIsySAvN8sTzOPQnHImOOyV0Pqrr/5TOyeWjecFrUbxjvZgYgo1Ko06VCYjhRqVRh1x/3ZTSsEJGWfCZUmMooZ7zBFc6w5NGYzGBLM4NFYuO9u7WcMeeHzUaoRbdGLGmQw2u2ldy+OGe0zulBFmnQexYDQvT43Hoclya7fsbO9mDSGYkTXB9rsef8aZDDZbf335k4abfyZ3yggzi5TBaF6eGo9Dk+XWbtnZ3s0aQjCjZ/+iI2mwWXbSzroAghk9KjhpsFmW0s66AIIZWv5VZTeBYAYEQzAEQ7BeQDADgiEYgiFYLyCY0QsE48mGwMkg2AKCdQGCGRAsihRqIFgXIJgBwaJIoQaCdQGCGRAsihRqIFgXIJgBwaJIoQaCdQGCGRAsihRqIFgXIJgBwaJIoQaCdQGCGRAsihRqIFgXsvz4qNMIgnUBghkQDMEQLIBgXYBgBgRDMAQLIFgXIJhRCIKTRmLlLifLaQTBWcEQBLqE4AIVnMQdBCenkAXL3LoDc0YZbgaeXHw2q2x2jV0k1+zsvPcmxonarv8UghPQVLDMrZv/cCu5GXhOFN7y5qr5InFJrInsvFlriV5cRPT2xIkvtXFi3RXsNupoC0alJt4eXNOu0ogUalT6tMf928mC0LIsWObWVdhTyQw8uagIU9MYu0iuiWi16knx+OTd5mX95z9vb+REs/F0odWovaUxmFaFmlhzcE04ElzTRAqTRcLBNc0x/3ayKMNsCxa5dVIwuZF3Qmt4DF+T2XmzX31zgRiPx0cZmt6iZW7dgmXRToKtKLz5fE1m522eMnMHBHdCU8Eyt+7A7BFGouBOb7Jkdh7NnEsQ3AkdBaeNnZ3Xfu8uuQnBjGIQbGfnGaVrnG0IZhSD4EQgmAHBEAzBEKwXEMyAYAiGYAjWCzzZwIBgUaRQA8G6AMEMCBZFCjUQrAsQzIBgUaRQA8G6AMEMCBZFCjV5EJzjE+fObm9BcDIgOAdAcIa4s9tbEJwMCM4BEJwh7uz2FgQnA4JzAARniDu7vQXByYDgHADBGeLObm9BcDIgOAdk+fFR5/lRCNaFYhOc+P8PxXrqxEFwXgTHITgBCM4QCIZgLYBgykwPBBeQ4DRwTfa04C6SkdICgtOja8GGYVxRuSlxTEauMhmUbEwvERyaMeDrb5mLQ4a8QBvOHjAjRNsvGjB0/QcntlDkxG1yi0749MXQS3uOo18eE5LG7DVx0G4RCjlHkgsmir56dQZq1GRlQm8RfH/zkvPMZezls2jYg833h+hfq1qfP4dK76MlI8jZumztZTMvf/kSOvKhiDw99po4KFoEXMGm4HUT3ASN1yePuuEFEakhcstkpJncLxdiZMn4t92y/dPLqqRgXwiak8jhD0FbWXHN6yvGXLfRnkgUss69RXCEmvvTA0P6hvpQf3M9RAPMa7EPvXZK+JTXydm649YzY2fe+p/02AUDfybG2WvioGgReIs2yra6gsdu7qhZIgSL3DIZaSb3y4UgXj3Rizez08xsfCFoTuyZPwTN3BplfojvK1HodG7YsmVPHael24Jlo7bmumBaFGpijcE1jfZkXQt+MHL/efSlP4R/HzKv4Ih5BX+vqtk68J1LvmvpEVurDr2TFhy61FzbMMA58eaaOCha9P9U7ncmr028glfNcgU/fPMD74g1mVsmI83kfrmw2DRlpHVAlpWH7bArC18ImhN75g9BM7esj1IRl2UXOp1fKylZ3cHpdH4yQDSKdQQTV6ghhUYxu6ZrwTMON38G/+KoE+eGaMPXDp92BO287EvWj9NnQs9aesTWx333076+5hUdOma+HGetiYOixS1HSJfO5OEEwdQymqgkRlFrff/qWxfz1CMZaSb3OwuTceujEa+MC+YhaF7smT8EjWUriULWOfuPj+p5iyZOdNH3KG0S/iSU/BZt9l49jeimdS2Pm+u/3duxdSwXLCPN5H65sD1WR5b6482kYF8Imhd75g9BY4JFIevcGwWH+pzzdrBP7w1zGoINY/RtO4nWX1/+pHm2/3Tj6MlvcMEy0kzulwuLNVddu8IrY2+yfCFoXuwZddqSs4hC1rmXCM4+SQVrCARnCATnRTAlvgHrsRMHwfkRHE0g3lMnDoLzIjie+Ody3KIhODMgGIK1AIKpV/3Cv3cKzgQIhmAtKLInG1IJzvQX/mKEtRGS4xzc2UVZDpWlR28RnOEv/MUIIViMc3BnF/1z5StteovgDH/hL0aY+mIhOc7BnV30z6m0dOgtgjP8hb8Y8Y9/jNznjHNwZxf9cyYsXXqL4Ax/4S9GrDzhqPnOOAd3dtE/p9LSobcI9n3RGf3CP2GcO3uS/jrRGwVn9gv/hHHu7J37a0UvEZx93NntLQhOBgTngCITTBCcQLEJzhnu7PYWBCcDgnMABGeIO7u9BcHJgOAcAMEZ4s5ub0FwMiA4B2T38dF8Cu4a/JsNNhAcPBkEW0CwLkAwA4IhGIIFEKwLEMyAYAhOV7CRZC2gMJ2S4FEQzMimYMMYOX2Ha8DoOcEB30IQzMiqYGqz4lhwBQdQwILtQCWZSmfw2BSZQ+eEqFSVTd8vC9+bGCdqu15kcaUKvzOeGVfiJOCJvvsq7YOdsvAgOIHsCm63r2CRSidOv8ymkzl0blJd8/J5TuGstUQvLpItUoTfGffVuwl4ou9ccdBIzMKjrfPnv9HCae+mYLdRrK0lmHaFmng0uCaqMhkp1LS1KkwW9283pBJsjKz80E2lM5zoMiubTubQOUl1YQpXOIXVk+LxybvtBinD7wwre00m4Im+5c32QSMxC492VVW9G+a0ZeHpQrtRR2s4mDaFmngkuCaiMhkp1LSqTBbzb9envkU7C4MJtrLpZA6dk1QnBYvC2a++uUCMTBl+Zy9kAl4SwTz9rvMtOguC7Ua4RScTLLPpZA6dk1RnLua5hZunzNwhRqYMv7MXMgFP3qIfizzimwGCk9PTguVbIJlDl/AmSxbPnCsbpAy/sxcyAU8U7K0sW1qWLP0Ogv3k5W+y/H/Sab93VyZN2lfNSHEEghn5F2yUrsmoR+nUj1IcgmBGL/i7aAgOnAyCLSBYFyCYAcEQDMEQrBcQzOgFggv4yYaugWAbCA6eDIItIFgXIJgBwaJIoQaCdQGCGRAsihRqIFgXIJgBwaJIoQaCdQGCGRAsihRqIFgXIJhR/IL37Qv+Cg+EFWr27s9So08UGh1sUGi0uya4pkFB8Gd7EnbYp1FfwX5++5ssNar8fZYaGVuy0ycyLJqdRtUjk+2F4EyB4Kzyl4z+p80kPPFulhrdvy87faLzs/RPCe99INneQhEMMgSCi5wCEdw4Z9ScpkwHr5t0xcxd1mOThuG1Ml4x19cqPL3O8HfIvJHVpzwbL4g1lIsrKjf5DxWI4GVLGpcsy3TwXbta/3eK+ySGbGVMilN8UpqCfR260YjoyQey8YISXpr5Kfrq1f4DBSJ4wif08YRujG+scM+nbGVMe4XWTstIsOzQjUYUv/HDbLyghJdmCV6XcJoKRHBZlKJl3Ri/con55VeMXXDAbWW8M6lj0jtpCvZ16EYjem9qp3YZ9fFemv3JpGyr/0DvEPxCpf2PdtY/fLt3Pulnd/1cJUHGB+/QnUaLV3dql9kLcnCv4FWz/AcKRHD3btGrJ4u8Cmou8+6ItGvErvTPJ+vQjUbhisZsvSCJI9gO0OEUiODfdedN1hMTD4qV8PKZbitxJtM9n7xDNxo996tsvSAH9wq2AnQ4BSK48faRtzdmOtj60WS0mIvyufvdVpmcT3+HbjSatqlzu+4INsSfuMxPo2/b6T9UIIJBpkBwkQPBRQ4EFzkQXORAcCh06PlvdV2RUdfO/1B3Xs41BIeo9Tff6JnGOqDHq8gn5hkI97eWhwx5gWjHeQNmmHtqjCOHrv988AGiA4NrnW168d+o5vAa+vI2evrsw057SI6fMeD8nU4Fhe45+RCv8a4Lj7hot1ti7vhk+GE5PuMQHKKouIJjL59FdPmchlnmORlT1fr8OXTDfKJfjid3u+Go2D197m06Kk6D17btvkGON0eUOBUUGl/jNSZjVsOMEW6JuWPELc05//J6O+bP4GEbiF4cemioD9FR9VRnnpOjzZ+ffeidk9rbTnqX3G362pahd53ztwuJpp878c/O+DqqG+hWmFtuY6KBdXRwkFti7TiY8y8v1xNqhzwDJ69uaQy5ggfvtXf+64rHL7Qr5PZ/TPsa/fOkSnNt56+/c5UcL+zJipCvsSm4DoLzjDwDR7/cOM1cveynjbebi6uuqvl4DNH/fOtbT9kVcntp33vpnr6PEt20ve21wXL8nEbr/isr/IIN89gIt8TcMfK2SH6+vF6MPANVx5w411zdPmzAtCOIascOGvIkUfspp3TYFXK7+rBaqj1si2n6jH5nr5LjxTsoWeEX/PcLByS8ybr4i3iTlWeii76XVr3CGcznSYbgBEJ9znk7vQFZKekxILjIgeAiB4KLHAguciC4yPl/tjplKwG0pJYAAAAASUVORK5CYII=\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%R -i topic_rank\n",
"\n",
"data.table(topic_rank)[1:10] %>%\n",
" melt(id.vars = c(\"topic\"), measure.vars = c(\"pageviews_sept\", \"pageviews_aug\"),variable.name = \"month\", value.name = 'count') %>%\n",
" ggplot(aes(fill=month, y=count, x=reorder(topic,count))) + \n",
" geom_bar(position=\"dodge\", stat=\"identity\",width = 0.6) + coord_flip() +\n",
" scale_y_continuous(\"Pageviews per Topic\",\n",
" labels = polloi::compress) +\n",
" theme(axis.title.y=element_blank(),\n",
" axis.text=element_text(size=11),\n",
" legend.position = c(0.8, 0.15), legend.title = element_blank(),legend.text =element_text( hjust = 0,size = 10))+\n",
" labs(color = \"type\",\n",
" title = \"Top 10 Viewed Topics Pageviews (Aug vs. Sept)\")\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Compare topics with top 10 changes in pageview percentage between Spetember and August 2019."
]
},
{
"cell_type": "code",
"execution_count": 99,
"metadata": {},
"outputs": [],
"source": [
"topic_rank['pv_diff_pct'] = abs(topic_rank['pageviews_aug'] / topic_rank['pageviews_sept']-1)\n"
]
},
{
"cell_type": "code",
"execution_count": 105,
"metadata": {},
"outputs": [],
"source": [
"pv_diff = topic_rank[['topic','pageviews_sept','pageviews_aug','pv_diff_pct']].head(50).sort_values(by='pv_diff_pct', ascending=False).head(10)\n"
]
},
{
"cell_type": "code",
"execution_count": 106,
"metadata": {},
"outputs": [
{
"data": {
"image/png": "iVBORw0KGgoAAAANSUhEUgAAAeAAAAHgCAMAAABKCk6nAAAC91BMVEUAAAAAv8QBAQECAgIDAwMEBAQFBQUGBgYHBwcICAgJCQkKCgoLCwsMDAwNDQ0ODg4PDw8QEBARERESEhITExMUFBQVFRUWFhYXFxcYGBgZGRkaGhobGxscHBwdHR0eHh4fHx8gICAhISEiIiIjIyMkJCQlJSUnJycoKCgpKSkqKiorKyssLCwtLS0uLi4vLy8wMDAxMTEyMjIzMzM0NDQ1NTU2NjY3Nzc4ODg5OTk6Ojo7Ozs8PDw9PT0+Pj4/Pz9AQEBBQUFCQkJDQ0NERERGRkZHR0dJSUlKSkpLS0tMTExNTU1OTk5PT09QUFBRUVFSUlJTU1NUVFRVVVVWVlZXV1dYWFhZWVlaWlpbW1tcXFxdXV1eXl5fX19gYGBhYWFiYmJjY2NkZGRlZWVmZmZnZ2doaGhpaWlqampra2tsbGxtbW1ubm5vb29wcHBxcXFycnJzc3N0dHR1dXV2dnZ3d3d4eHh5eXl6enp7e3t8fHx9fX1+fn5/f3+AgICBgYGCgoKDg4OEhISFhYWGhoaHh4eIiIiJiYmLi4uMjIyNjY2Ojo6Pj4+QkJCRkZGSkpKTk5OUlJSVlZWWlpaXl5eYmJiampqbm5ucnJydnZ2enp6fn5+goKChoaGioqKjo6OkpKSlpaWmpqanp6eoqKipqamqqqqrq6usrKytra2urq6vr6+wsLCxsbGysrKzs7O0tLS1tbW2tra3t7e4uLi5ubm6urq7u7u8vLy9vb2+vr6/v7/AwMDBwcHCwsLDw8PExMTFxcXGxsbHx8fIyMjJycnKysrLy8vMzMzNzc3Ozs7Pz8/Q0NDR0dHS0tLT09PU1NTV1dXW1tbX19fY2NjZ2dna2trb29vc3Nzd3d3e3t7f39/g4ODh4eHi4uLj4+Pk5OTl5eXm5ubn5+fo6Ojp6enq6urr6+vs7Ozt7e3u7u7v7+/w8PDx8fHy8vLz8/P09PT19fX29vb39/f4dm34+Pj5+fn6+vr7+/v8/Pz9/f3+/v7///99LQhCAAAbEklEQVR4nO2dfXgU1b3H9ypUQVReRJQWtKitV4ttrfZF2+tLadFOCKExIWptLqCQYAO2yItYpRhEwZfgy9VSwi0VvNdW4VKuCKlCKZSigAahoNKAIBCaEJJNcvO6vz/uOTNnZmeWSbI7nDm7O/l+nifZmTPnzHfO+Ti7q3nmZ4hAoAkl+wKAv0BwwIFgnQ2XhPSfBCm+3YdrGfm4zLMFTLCn6bBBN75p/CR24vDAfV7yumDPhQ0Sz5ZEwSGDDg7xl8iD/fvPiFhNF/3kn6eXWDZ0yJ/Yy5KvtegnPOP8r02vNI70bTZ+Or+sWP4zQ395oseTiV1HzYwv9e4/av2pB4zUkcsTO12nJPcO7jhdP/If1+7ff+3LVsOh7LzTi7t69cqvEB0c9IE4Yfi9n33+H1Zc9GLiXZTs3/Lf7Zctuqw9oev44YR9zZWv3nTqASN4ydiEztY5SRfcVHTxRUVNbPOJQQMKm+1HiL6zhmjNjdGGoxfQ3jED+o4+zoZN7H/RE6ytfe4X++eHqwZWs8NVg6qtFrr8Q6JSovIraP3Xe11q/FNydnNTL4qMeDyaQA/fxbdCztvW5bJYU+vDl/RbQLbT0bCP+e/V19L1fzRPGIpeGrsg87JsYzi9a8SGuFYzRVzCvsvlrXDyBT884tNPb/kl2xxx6NCIR+1HiM5nJiv7RRuODaRr/tRQM3k80eyRhw//gLUt/P7+6rseoInz2eHHp1C0peAF+vS8Wnq+kAa/1nRgnH6Oq9as+Qot+k5bNIEOXKxvnXIHx14Wa3rs5o+rp5DtdNS7nv8e+RtacjtFBZuXxjAvyzZGH3HHJuNjVlyrPYUR7i1lcW0LmTRY+mXsTitn/8iG2Ouuy+1HiM5kn5UtPayGw7m5+ubJIWzYHqIPWduVf2c39qW055JWah1WQdGWlTk0/8KX6Y5VNLTkoDjpuiFDyj4auHfdkKFvmzNv+Zy74NjLYk1XlOsHo6ejXlzwJxc2UuOF+6OCzUtjmJdlG6NP4OGv9rrsFzXWtdpTGPXnyFnd6EImDZbeq5GosRfbFK/RIzF3cCg06M5K2nZr/1CIOe/F3j4bWafe/G3tTHZTrKDlefow0VJ3CQ1ffQNdUkfvZV7wpTXmedtvKKGrVq8ebt3Bg90Fx16WaGLYTjfsI/brF/qb+7SoYPPSOOKyHJdgXMeue26PXr0thbHvMlnrSykg2LpVdrN/7mPu4NjPYMblS6vaqm23yZeNL0m09jv07e16L7Pl5le/Sd98/Ra+GVl9sXnex2+JGB/F5mfw3V3cweKyWNOXysVh63TZpczLhTyP38b8fj7ivIPNy3JcguDEedFrNVPO0PeW5Ca8kB2TdMEPsQ+7W2ezzR8eOvSDX9qPEL3o/BbNufj1pk/G8GHsg+6H7PWZEbubP8hhB64uGWH0Mlvmf6GEnh3yBFHuh82rB4vTfjCwQnwU867124v4t2g3wbGXxZqKb/6EfwbbTrf0R8zGj/TNkaV045zw/gzbpekYl2WOEa03vXq0Zf99I6PXaqZcuJsfvv138lY4+YIb77/oovsb9S+S/Sc1me3G98nItH79pkesrpw/fvlzl5TwYff2H/Qr9vnZXnLl2cNXsgMvn7nW6GW27OhZScd67iRa/uVeX/+TMbrla78m46NY//fgc7/64DFyFxx7WaypZdaQ/k85Tlc/cC9dZ7z1rrqeyr/d69IXbZemY1yWOUZElI3u32tYYVX0Ws2Uhf1Yj78H5T90OPFwIe8Pk38ZsXR+WfNu6+BAgpfmSLktmP+pMtELmXKk4vtTfbkSB17WJ/FL89FC2gp+ZsigCfW+XIkDL+uT+KV1B8HAHyA44EBwwIHggAPBAQeCA04SBdcdd1AVOe6RBo/jqttVJ55QmgjBEAzBchMhWNnkORAMwZITIVjZ5DkQDMGSEyFY2eQ5EAzBkhMhWNnkORAMwZITIVjZ5DkQDMGSEyFY2eQ5EKxW8L8ANyA44EBwwIHggAPBAQeCAw4EBxwIDjgQHHBST7AGwTJJIcGaNvqBj90Fu7VBcFykkmBqWVWEO1gyKSWYGsewlxU5+ds+mBQhavnpZ5sLs8at1Q8dn53Ffot9CI6blBLcqt/Byxo33UczNxC9tYDydrVVluiCixfzEitinzbccssfIk6SvZIpSsQr9fIFa6OnfsRemqk9g8oLIpHCA7R4ygs7dfeUW8f7iH1qOHSostpBzf+B08O5ntX/lC84+sJ+Zv35b/PY1tFV0xbaBIt9TuxbdLLXJ+1R8RlsE7xr8gz2lfq5w2178vT9eUt4LVmxD8E+oFgwzZjDfr05YUzhFuNL1qxMzdqHYB/wXbCT1qcrOu8AwZJRK1gbVdZFDwiWjOI7uEsgWDIQHHAgOOBAcMCB4IADwQEHggNOqgvGs0mSEyEYgiFYbiIEK5s8B4IhWHIiBCubPAeCIVhyIgQrmzwHgiFYciIEK5s8B4IhWHIiBCubPAeCIVhyIgQrmzwHgiFYciIEK5s8B4IhWHIiBCubPAeCIVhyYvcWnOwnrZMNBAccCA44EBxwIDjgQHDAgeCAA8EBB4IDTgoLjrc6JQR3RioKfqdw9IT1nQjWXLYguANSUPD2/B2NR56J6w6G4C5JQcEztgp7vL4sUc3cOybv5Xu5d29ePvaed3WtRnlZjUHaG/maKEILwaeSgoLz6oRgvb4sPVnW8u79xl4W+5moC46WmyVtUY1ZhJb+kpGxqs1Jshc42bR1RqTTo66EJQrW68tSHrtNRxl7/GeUrjVabpY0FimK0NLJ3bsPnXBwMtkPyKclJzqm6vQFW2/Rxk9elX2P//AXq9ys8TksitASSjhIwd+36PfG7Ww68qxlb+HCmuNPxgoW5WVzjgrBRhFaCJaEv4Lp7YLMe8ssnbULxo7fFCtYlJd9JUcT36T1IrQQLAmfBXsgWoQWgiWQcoJtRWghWAIpJ9gGBEsAggMOBAccCA44EBxwIDjgQHDASSfBeDZJciIEQzAEy02EYGWT50AwBEtOhGBlk+dAMARLToRgZZPnQDAES06EYGWT50AwBEtOhGBlk+dAMARLToRgZZPnQDAES06EYGWT50AwBEtOhGBlk+dAMARLTuzegpP9ALZPQDAEQ3A6A8EQDMHpDARDMASnMxAMwRCczkAwBKeb4CPz7sqa8TcINgig4KLlNS3lc7rsBsHpKjjnmPGqlWY/cJS/ZozfTnR8dpZmVaSF4HQWvDJvzmK9AOmy+qVzeUOkfBJR8eImsirS0ta77nqz1UmyTfiEfYpt1OqR9sSH1PkmmGr/+mrBUr26bDiXaMfk0bwIba4eKCrSUtXWrRU1DmqT/bB8kqiJi6b4utmp9k8woybHEpy/tblBswRXWV1QwkEnDd+i55Q3h1+bzN+iG/hbdF55w0tM8LwlzWRVpIVgizQUvG1GVt4jB6wvWWV3/mQ5E3x8Vma0Ii0EW6ShYJMu/l8dEKwDwQEnjQV3AQTrQHDAgeCAA8EBB4IDDgQHHAgOOBAccLqPYDx8JjkRgiEYguUmQrCyyXMgGIIlJ0KwsslzIBiCJSdCsLLJcyAYgiUnQrCyyXMgGIIlJ0KwsslzIBiCJSdCsLLJcyAYgiUnQrCyyXMgGIIlJ0KwsslzIFit4GQ/qe2FBJccgtONBJccgtONBJccgtONBJccgtONBJccgtONBJccgtONBJccgtONBJccgtONBJc8PQV3UYoDgqOkvmBtI/u1IapUi0NwR30gOAGUCS6IUKQgccFuQHACKBNctJE2FOlaeQlZjUHaipz8bVYdWW1F7t2bl4+9511nH7Pa7ObCrHFrIThhlAneWdBWsFO/J/USsrrqZY2b7rPqyPK9LPYz0dnHrDabt6utsoS9bp80aX2Lg7SsNtuSGK2RBAdYtCc+xFO1WY0emf8o1yVKyOrymqk9w6ojy/f4zyhnH7MY6eIpL/BKw3Rs3bp9tQ7CyX4SWyG1CdOc+BBP1WY1qsis4LpECVnzM1iz6sjaWpx9DMF0dNW0hca5uvET/in8Fm3+FiVkc45aAkUdWbtyRx+j2uxzh9v25EFw6gsuM0rIvpKjmTpFHVmbYGcfo9rsmxPGFG6B4JQVLBMIhuCgAsEBB4IDDgQHHAgOOBAccCA44EBwwIHggNM9BePhM8mJEAzBECw3EYKVTZ4DwRAsORGClU2eA8EQLDkRgpVNngPBECw5EYKVTZ4DwRAsORGClU2eA8EQLDkRgpVNngPBECw5EYKVTZ4DwRAsORGClU2eA8FqBSfxOW4Vy82BYAiWmAjBEAzB8pabA8EQLDERgiEYguUtNweCIVhiIgRD8Kl0UZhyS378tWch2OfErgVrmvbjqTtiBXemcMKeDs4EwZyUE0zU/Oe7ujRlIyMCwZ2QioJ5VdFjM7NnVRIdfSC71LiDRWVZW+lYo4teWZaxYwbV/LiGxh2yV5w1y9G+kZ8BwSoS4xDMyGZvunOX1pcWs5dl9UsNwaKyrFk6lqwu4k6tz4m8MWplYw6/n62Ks2Y52kU1bGdPcfGWRgdNSRTcmCCtiQ4w5xjxONBL4sn47uDXZxLlhqluLFFOmMKGYFFZ1iwdS1YX86244ODk1+7fO9NRcdYsRxvmHSpKS98PO6hX+AC2kdgQCXukxeM4pYk1cQmmxjG6vbBTsFFZ1lY6VnQxBT/7UgEVvPhrR8VZWzlanSQ+4W8k4u/B/A5eVcTfmxuWFhsvmr2yrK10rOhi2nszcyW9kfm2o+KsrRwtBKtIjEOwpo15cL/LlyxRWdZWOlZ0Me1VZNVSbdZBR8VZWzlaCFaR2LVgf4FgnxMhGIIhWN5ycyAYgiUmQjAEQ7C85eZAMARLTIRgCIZgecvNgWAIlpiYaoLx8JnkRAiGYAiWmwjByibPgWAIlpwIwcomz4FgCJacCMHKJs+BYAiWnAjByibPgWAIlpwIwcomz4FgCJacCMHKJs+BYAiWnAjByibPgWAIlpwIwcomz4FgtYJ9fcTbbQEgGII7AIIh2BUIhmCJiRAMwRDsDgRDsCsQDMESEyEYgk+fTmpbQrDPiR4FaxvZrw1RcZ2XJ4VgO+khuCBCkQII9kJ6CC7aSBuKrJKz9kKyZjU0Xk82WhlNbB2Zml2qfTApQtTy088gWEGiV8E7C9oKdkZLztoKyZolZ3k92Wj5WbE1R69VO3MD0VsLiD4qKXm3wUGjr4IbXGiMuLXGQ6vHcU0qE+OoNusumB6Z/6it5KytkKxZcpbXk42WnzVr1dbzUqblBZFI4QFXwQofAHfH1+XWSRPBFZkVtpKztkKy9pKz0W2zVq0umGb9+W/zjBMl8Ql/d/x9w+Skx1u0+VtUj7UVkrWXnI1ui605v2v4LdvbNXnGxxCcMMkQLKrH2grJ2kvOnvIl6/DU7Jey2YEZcwiCEyZl/0OHndbXp7NfT1dAcOKkg2Bt1P2fsF9l5j4EJ0A6CI4FghMAgiH4FCAYgiFY3nJzIBiCJSZCMARDsLzl5kAwBEtMTDXBePhMciIEQzAEy02EYGWT50AwBEtOhGBlk+dAMARLToRgZZPnQDAES06EYGWT50AwBEtOhGBlk+dAMARLToRgZZPnQDAES06EYGWT50AwBEtOhGBlk+dAsFrBvj4A3gEKl5sDwRAsMRGCIRiC5S03B4IhWGIiBEMwBMtbbg4EQ7DERAiGYAiWt9ycNBGs6fUpYxs7HQHBOuki2MMYCOakl2BtRU7+NlFFVm8UDaL0rHjRS89qsb0hWEGid8HGW7S2rHHTfWYVWYo2iNKzZjFaXnr2lN5UUVr6fthBfTIEh73R4nFcQ8TjQC+JNad9BzdTe4ZVZFSzGkTpWbMYbZjsB0Vv2lNcvKXRQZPvD3g3xtAUiW2Jl1aP45Qmeq02GxVs/NgEGw2i9KytGK3toCmYkvCEf+xbGP4eHJdgUUU26lCUnhUvMYJFbwhWkOhdsPgMNpSJKrLRBlF6VrzECDZrzkKw/4meBceiV5FNvDcE+5woS7BeRdZDbwj2OVGWYK9AsM+JEAzBEOwOBEOwKxAMwRITIRiCIdgdCIZgV7q3YDx8JjkRgiEYguUmQrCyyXMgGIIlJ0KwsslzIBiCJSdCsLLJcyAYgiUnQrCyyXMgGIIlJ0KwsslzIBiCJSdCsLLJcyAYgiUnQrCyyXMgGIIlJ0Kwsslz/BesdkFd01NIcMJPbye63LFAsL9AsGRc0yHYCxAcDxAsGdd0CPYCBMcDBEvGNR2CvQDB8QDBknFNh2AvQLA7ziKXECwZ13Spgt8pHD1hfSfHu6fg9tiH4GQuuR3XdJmCt+fvaDzyTCcduqfgtsAInrFVbGgZ47db9WXF3tEHjDKzxh4HgiXjmi5TcF6duRUpn2TVlxV7c0WZWWOPDv/+9x/WOQgnLNgc2VLnjfqIx4FxJ+qLoUxwTPgJvdEPwTsmj9brzBr1ZcVeTlivQir26IPp0zc2OWhO5Flux8i2Jm80RzwOjDtRXw5vgj2IiQmv9XqeDjHfovO3NjdEq1OKPSFY7HFO5wl/T2+YsQTlLdp06JouU/B743Y2HXmW3cnlDS/Zqgsbe3OXNSyN7nEgWBLKBNPbBZn3lhGV3fmT5VHBYk98yRJ7nO4uODS9z1feYy9nDFtL267uMz1E+27uM3zr3wc3UsPgvWKPLv7srdD6Q4PosYEh4UrfMg7qpwiFxBHX9BT6L1ndTvDz9SXXsdf2d66kb7xY/3yIvlfatOYaGrWISjLJ3Lttw20zbn/nVjrv5QYxTt8yDhqnUHcHJ0a3F9xA9b3phWE9Qz2oN9sOUR92L/agvwwND91M5t7D065ov2Laz+h33+73iDFO3zIOGqeA4A5ItuAXG56/js7/Y/h/QuwObmB38HdL6/mBG269kYsx9l4/63Gad9ZLbGtbH3Pl2JZx0DhF78+MZtd0CPaCpM/gc9hn8K/6Dp4Tom1XnVN0Lu2/7Xz+cfpGaCUXY+wd7HmUjvRkd3RoYLEYx7eMg8Ypfn4uPoNdSbZgx2o0L/hu4kvoPIVrOgR7Qb7gUI9rtne5YiHrC7PbKSA4lqD8e7CJazoEewGC46G7CKa2GPxaUNd0CPZCQoIjzTH4taCu6SkkOKgPn7XF/pnTrwV1TYdgPxP1eUKwAQSfLq7pEOxnoj5Pb4I9iHFNh2A/E/V54g42gODTxTUdgv1M1OfZkWCvf/A3RvCdkBgncE2HYD8T9Xl2KNjjH/yNEYZgY5zANR2C/UzU59mhYI9/8DdGMHHtITFO4JoOwX4m6vPsULDHP/gbIz7/vw2LzHEC13QI9jNRn2fHn8He/uBvjFhxcd9ic5zANR2C/UzU59mhYMdqePqDv3OcazoE+5mozzMuwd7+4O8c55oOwX4m6vPEvwcbQPDp4poOwX4m6vNsh2CdwApWhms6BPuZqHZBXdNTSHDsO5kNOcsdCwT7CwRLxjUdgr0AwfEAwZJxTYdgL+D/2QDBrkAwBEtMhGAIhmC5iakgeEu+dooKCJaUmDzBUakT9nQlRqPYsqId9YLgGFJBcEbk9NXFBwT7nBgj2CgOqzHo2MzsWZWs5Y38DNace/fm5WPveZdEpVi9B+tvdhI1ZRmbC7PGrWX/vj07yyiDVjP3jsl7rR5Gs2iDYAWJsYKN4rD8/py7tL60mG0uqjGas9jPRN7NqCJr9Dc7mTVlifJ2tVWWEBUvbjJ6PFnW8u79Vg+jWbTRsXXr9tU66KzabG2nNHd+uEPCEY8D0yOxOlawURyW68sNU91Ythkmo5n/jIpWkTX6m52MYZzFU17YyUfXiTPmsXs9WnXWaBZttH3SpPUtDlq7eui7pSPaOzzSOa3kcaD3xIjCxLpYwdEX5i48VnzYatEfZxVZWyfzU/noqmkL7YKryHYGIbiqw8/ghJ7qP923L053e4uOvvDKsMWnChaVYnOOGm/R0U5C8HOH2/bkEc1b0my0LVxYc/xJq4fRLNogWEFix4LN708xgsuMSrGv5Di+ZEUFvzlhTOEW9m1qVqZxntoFY8dvsnoYzaINghUk2gUnAwj2ORGCIRiC5SZCMARLTIRgCIZguYkQDMESEyEYgiFYbiIEQ7DExFQTHMCHz2KBYGWT50AwBEtOhGBlk+dAMARLToRgZZPnQDAES06EYGWT50AwBEtOhGBlk+dAMARLToRgZZPnQDAES07s1oIrD3idfNjjuMqDqhOPfaoyMdmCY/jse6oTD3xfdeK+kaoTIVgp3VpwzZOqE6ueUp14rER1YgoJBr4AwQEnZQTXzs6aXackaVPBj2dUWIHaRta0we/KQm9nkeJEk5QRvKSktmSJkqT5FU1/mGwFagURihT4vNz7+IPxShMtUkbwxE/p4ERVYbW5VqBWtJE2FPm73NVTjj9KShOjpIzg7GZqzlYVtqLECtR2FrQV7PR1uVtm7aFCUploo1sKXju1Ibrc9Mj8R6XV/nLl2TKiHFKZaCNlBCt8i15VeDIaqFFFZoW/y63pqEy0kTKCf6PsS9ark6ptgcZC+77cmvJEQcoIrn1o9EO1SpL0+6nRDFQoWG2iIGUEA3+A4IADwQEHggMOBAecbig4FDrr+vc67+HprKf+/7lTYXFT4RoUE6KmZ7/uz4lTkJS8KH9hUw735q9nDFtL9PF1faazlkrtvOFb/zngONHxAVXmPr31b1R5TiV9cS/9/uqzL31ZjJ/e5/r9Zg8KPTXkjOiJK2469+YDVhfW8OmIs5O7xN1ScLNxB7e/cyXR7bNPzmSLMLa0ac01NK6Y6LHxZO2f7Nv+VI+n6/pGaMCGlgPjxHg2IsPsQaHxldETkzbz5PRMqwtryPx5fXJmabuo7gb7DP7GNqK3hp8V6kHUt4ZOsEW4gH1+9qCdX2ht+cL7ZO3TVbuHz7/mrzcRPXDtpHXm+BN0op/Vg+1ZJybqd4Kq+1tdeEN1ciZpv6juhpjykFWNtSFL8IDDeuP3lr9yk95D7P970VX0rwVT2db+Z264U4w37IkeIceJmeATEJxkxJQveKe2iG3e9svah9jLnXdWHhxL9N/f+tZreg+x/1LPp+mpnsuI7t3X8pcBYvzsWv7+K3o4BWvsWKbVhTWMfrBB/QztdF/BpQMHz2Gb+77Rp+hcoqq8/sP+i6h16NA2vYfYLz+7iqrO3s1MX97r6tfFeOMblOjhFPyPm/rEfMm65XP4kpVkmhd8N6H+cSxZCq1qCl1Kkgj1uGZ7YgOkdFFFCl0K8AMIDjgQHHAgOOBAcMD5f/vAR4NhzeHfAAAAAElFTkSuQmCC\n"
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"%%R -i pv_diff\n",
"\n",
"data.table(pv_diff) %>%\n",
" filter(topic != 'Unknown') %>%\n",
" melt(id.vars = c(\"topic\"), measure.vars = c(\"pageviews_sept\", \"pageviews_aug\"),variable.name = \"month\", value.name = 'count') %>%\n",
" ggplot(aes(fill=month, y=count, x=reorder(topic,count))) + \n",
" geom_bar(position=\"dodge\", stat=\"identity\",width = 0.6) + coord_flip() +\n",
" scale_y_continuous(\"Pageviews per Topic\",\n",
" labels = polloi::compress) +\n",
" theme(axis.title.y=element_blank(),\n",
" axis.text=element_text(size=11),\n",
" legend.position = c(0.8, 0.15), legend.title = element_blank(),legend.text =element_text( hjust = 0,size = 10))+\n",
" labs(color = \"type\",\n",
" title = \"Top 10 Pageviews %Diff Topics (Aug vs. Sept)\")\n"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}