{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "from datetime import date as dt\n", "import re " ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Page2016-01-01 00:00:002016-01-02 00:00:002016-01-03 00:00:002016-01-04 00:00:002016-01-05 00:00:002016-01-06 00:00:002016-01-07 00:00:002016-01-08 00:00:002016-01-09 00:00:00...2016-12-22 00:00:002016-12-23 00:00:002016-12-24 00:00:002016-12-25 00:00:002016-12-26 00:00:002016-12-27 00:00:002016-12-28 00:00:002016-12-29 00:00:002016-12-30 00:00:002016-12-31 00:00:00
0.xxx_en.wikipedia.org_all-access_all-agents708975927436703264986887720970227194...4931.04034.05204.04331.04572.04509.05392.04950.02931.04101.0
1.xxx_en.wikipedia.org_mobile-web_all-agents618264476437591252055770599859486165...4617.03663.04882.04028.04263.04176.05068.04539.02666.03754.0
21._Juli_de.wikipedia.org_desktop_all-agents204933493426662818...26.020.019.043.024.041.039.039.031.031.0
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...536362806297628860526200593559586200...3356.03366.02912.02850.03140.03379.03497.03338.03329.03550.0
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...318637843832405940224213398538233794...1719.01593.01255.01160.01311.01468.01728.01494.01515.01464.0
\n", "

5 rows × 367 columns

\n", "
" ], "text/plain": [ " Page 2016-01-01 00:00:00 \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 7089 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 6182 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 20 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 5363 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 3186 \n", "\n", " 2016-01-02 00:00:00 2016-01-03 00:00:00 2016-01-04 00:00:00 \\\n", "0 7592 7436 7032 \n", "1 6447 6437 5912 \n", "2 49 33 49 \n", "3 6280 6297 6288 \n", "4 3784 3832 4059 \n", "\n", " 2016-01-05 00:00:00 2016-01-06 00:00:00 2016-01-07 00:00:00 \\\n", "0 6498 6887 7209 \n", "1 5205 5770 5998 \n", "2 34 26 66 \n", "3 6052 6200 5935 \n", "4 4022 4213 3985 \n", "\n", " 2016-01-08 00:00:00 2016-01-09 00:00:00 ... 2016-12-22 00:00:00 \\\n", "0 7022 7194 ... 4931.0 \n", "1 5948 6165 ... 4617.0 \n", "2 28 18 ... 26.0 \n", "3 5958 6200 ... 3356.0 \n", "4 3823 3794 ... 1719.0 \n", "\n", " 2016-12-23 00:00:00 2016-12-24 00:00:00 2016-12-25 00:00:00 \\\n", "0 4034.0 5204.0 4331.0 \n", "1 3663.0 4882.0 4028.0 \n", "2 20.0 19.0 43.0 \n", "3 3366.0 2912.0 2850.0 \n", "4 1593.0 1255.0 1160.0 \n", "\n", " 2016-12-26 00:00:00 2016-12-27 00:00:00 2016-12-28 00:00:00 \\\n", "0 4572.0 4509.0 5392.0 \n", "1 4263.0 4176.0 5068.0 \n", "2 24.0 41.0 39.0 \n", "3 3140.0 3379.0 3497.0 \n", "4 1311.0 1468.0 1728.0 \n", "\n", " 2016-12-29 00:00:00 2016-12-30 00:00:00 2016-12-31 00:00:00 \n", "0 4950.0 2931.0 4101.0 \n", "1 4539.0 2666.0 3754.0 \n", "2 39.0 31.0 31.0 \n", "3 3338.0 3329.0 3550.0 \n", "4 1494.0 1515.0 1464.0 \n", "\n", "[5 rows x 367 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## import the wikipedia dataset and view it\n", "wikipedia_pivoted = pd.read_excel('wikipedia_dataset.xlsx')\n", "wikipedia_pivoted.head()" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisits
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0
............
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0
\n", "

549000 rows × 3 columns

\n", "
" ], "text/plain": [ " Page Date Visits\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0\n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0\n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0\n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0\n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0\n", "... ... ... ...\n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0\n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0\n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0\n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0\n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0\n", "\n", "[549000 rows x 3 columns]" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## To convert the dataset from a wide format to a long format (to unpivot)\n", "wikipedia_unpivot = wikipedia_pivoted.melt(id_vars = 'Page', var_name= 'Date', value_name= 'Visits')\n", "wikipedia_unpivot" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Page 0\n", "Date 0\n", "Visits 87\n", "dtype: int64" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wikipedia_unpivot.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Page 0\n", "Date 0\n", "Visits 0\n", "dtype: int64" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Filling missing values with 0 and validating it\n", "wiki = wikipedia_unpivot.fillna(0)\n", "wiki.isnull().sum()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonth
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary
..................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember
\n", "

549000 rows × 5 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month \n", "0 Friday January \n", "1 Friday January \n", "2 Friday January \n", "3 Friday January \n", "4 Friday January \n", "... ... ... \n", "548995 Saturday December \n", "548996 Saturday December \n", "548997 Saturday December \n", "548998 Saturday December \n", "548999 Saturday December \n", "\n", "[549000 rows x 5 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## To add a weekday and month column\n", "wiki['Weekday'] = wiki['Date'].dt.day_name()\n", "wiki['Month'] = wiki['Date'].dt.month_name() \n", "wiki" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonthLanguage_Codes
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.
.....................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.
\n", "

549000 rows × 6 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month Language_Codes \n", "0 Friday January _en. \n", "1 Friday January _en. \n", "2 Friday January _de. \n", "3 Friday January _en. \n", "4 Friday January _en. \n", "... ... ... ... \n", "548995 Saturday December _en. \n", "548996 Saturday December _en. \n", "548997 Saturday December _en. \n", "548998 Saturday December _en. \n", "548999 Saturday December _en. \n", "\n", "[549000 rows x 6 columns]" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## To extract the language codes from the Page strings\n", "wiki['Language_Codes'] = wiki['Page'].str.extract(r\"(_[a-z]{2}\\.)\")\n", "wiki" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonthLanguage_CodesLanguage_Names
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.German
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English
........................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.English
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.English
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.English
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.English
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.English
\n", "

549000 rows × 7 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month Language_Codes Language_Names \n", "0 Friday January _en. English \n", "1 Friday January _en. English \n", "2 Friday January _de. German \n", "3 Friday January _en. English \n", "4 Friday January _en. English \n", "... ... ... ... ... \n", "548995 Saturday December _en. English \n", "548996 Saturday December _en. English \n", "548997 Saturday December _en. English \n", "548998 Saturday December _en. English \n", "548999 Saturday December _en. English \n", "\n", "[549000 rows x 7 columns]" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Replacing language codes with language names\n", "wiki['Language_Names'] = wiki['Language_Codes'].replace(['_de.','_es.','_en.','_fr.', '_ru.','_ja.','_zh.'],['German','Spanish', 'English','French','Russian','Japanese','Chinese'])\n", "wiki" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonthLanguage_CodesLanguage_NamesTitleDevice
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English.xxx_en.wikipediaall-access_all-agents
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English.xxx_en.wikipediamobile-web_all-agents
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.German1._Juli_de.wikipediadesktop_all-agents
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediaall-access_all-agents
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediadesktop_all-agents
..............................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.EnglishZac_Efron_en.wikipediaall-access_all-agents
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.EnglishZayn_Malik_en.wikipediaall-access_all-agents
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.EnglishZendaya_en.wikipediaall-access_all-agents
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.EnglishZendaya_en.wikipediamobile-web_all-agents
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.EnglishZodiac_Killer_en.wikipediaall-access_all-agents
\n", "

549000 rows × 9 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month Language_Codes Language_Names \\\n", "0 Friday January _en. English \n", "1 Friday January _en. English \n", "2 Friday January _de. German \n", "3 Friday January _en. English \n", "4 Friday January _en. English \n", "... ... ... ... ... \n", "548995 Saturday December _en. English \n", "548996 Saturday December _en. English \n", "548997 Saturday December _en. English \n", "548998 Saturday December _en. English \n", "548999 Saturday December _en. English \n", "\n", " Title Device \n", "0 .xxx_en.wikipedia all-access_all-agents \n", "1 .xxx_en.wikipedia mobile-web_all-agents \n", "2 1._Juli_de.wikipedia desktop_all-agents \n", "3 2014_FIFA_World_Cup_en.wikipedia all-access_all-agents \n", "4 2014_FIFA_World_Cup_en.wikipedia desktop_all-agents \n", "... ... ... \n", "548995 Zac_Efron_en.wikipedia all-access_all-agents \n", "548996 Zayn_Malik_en.wikipedia all-access_all-agents \n", "548997 Zendaya_en.wikipedia all-access_all-agents \n", "548998 Zendaya_en.wikipedia mobile-web_all-agents \n", "548999 Zodiac_Killer_en.wikipedia all-access_all-agents \n", "\n", "[549000 rows x 9 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##Creating new columns and splitting the page column into a title and device column\n", "## The page coulmn contains the title searched for and the device used\n", "wiki[['Title', 'Device']] = wiki['Page'].str.split('.org_', expand=True)\n", "wiki" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Page 0\n", "Date 0\n", "Visits 0\n", "Weekday 0\n", "Month 0\n", "Language_Codes 6222\n", "Language_Names 6222\n", "Title 0\n", "Device 0\n", "dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "wiki.isna().sum()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Device\n", "all-access_all-agents 340014\n", "all-access_spider 2196\n", "desktop_all-agents 99186\n", "mobile-web_all-agents 107604\n", "dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##Group Devices\n", "wiki.groupby('Device').size()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonthLanguage_CodesLanguage_NamesTitleDeviceDevice_Type
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English.xxx_en.wikipediaall-access_all-agentsAll Access
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English.xxx_en.wikipediamobile-web_all-agentsMobile
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.German1._Juli_de.wikipediadesktop_all-agentsDesktop
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediaall-access_all-agentsAll Access
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediadesktop_all-agentsDesktop
.................................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.EnglishZac_Efron_en.wikipediaall-access_all-agentsAll Access
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.EnglishZayn_Malik_en.wikipediaall-access_all-agentsAll Access
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.EnglishZendaya_en.wikipediaall-access_all-agentsAll Access
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.EnglishZendaya_en.wikipediamobile-web_all-agentsMobile
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.EnglishZodiac_Killer_en.wikipediaall-access_all-agentsAll Access
\n", "

549000 rows × 10 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month Language_Codes Language_Names \\\n", "0 Friday January _en. English \n", "1 Friday January _en. English \n", "2 Friday January _de. German \n", "3 Friday January _en. English \n", "4 Friday January _en. English \n", "... ... ... ... ... \n", "548995 Saturday December _en. English \n", "548996 Saturday December _en. English \n", "548997 Saturday December _en. English \n", "548998 Saturday December _en. English \n", "548999 Saturday December _en. English \n", "\n", " Title Device Device_Type \n", "0 .xxx_en.wikipedia all-access_all-agents All Access \n", "1 .xxx_en.wikipedia mobile-web_all-agents Mobile \n", "2 1._Juli_de.wikipedia desktop_all-agents Desktop \n", "3 2014_FIFA_World_Cup_en.wikipedia all-access_all-agents All Access \n", "4 2014_FIFA_World_Cup_en.wikipedia desktop_all-agents Desktop \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia all-access_all-agents All Access \n", "548996 Zayn_Malik_en.wikipedia all-access_all-agents All Access \n", "548997 Zendaya_en.wikipedia all-access_all-agents All Access \n", "548998 Zendaya_en.wikipedia mobile-web_all-agents Mobile \n", "548999 Zodiac_Killer_en.wikipedia all-access_all-agents All Access \n", "\n", "[549000 rows x 10 columns]" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##Replacing the Device column with well defined strings\n", "wiki['Device_Type'] = wiki['Device'].replace(['all-access_all-agents','all-access_spider','mobile-web_all-agents','desktop_all-agents'],['All Access','All Access','Mobile', 'Desktop'])\n", "wiki" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonthLanguage_CodesLanguage_NamesTitleDeviceDevice_Type
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English.xxx_en.wikipediaall-access_all-agentsAll Access
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English.xxx_en.wikipediamobile-web_all-agentsMobile
21._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.German1._Juli_de.wikipediadesktop_all-agentsDesktop
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediaall-access_all-agentsAll Access
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediadesktop_all-agentsDesktop
.................................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.EnglishZac_Efron_en.wikipediaall-access_all-agentsAll Access
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.EnglishZayn_Malik_en.wikipediaall-access_all-agentsAll Access
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.EnglishZendaya_en.wikipediaall-access_all-agentsAll Access
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.EnglishZendaya_en.wikipediamobile-web_all-agentsMobile
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.EnglishZodiac_Killer_en.wikipediaall-access_all-agentsAll Access
\n", "

542778 rows × 10 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 20.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month Language_Codes Language_Names \\\n", "0 Friday January _en. English \n", "1 Friday January _en. English \n", "2 Friday January _de. German \n", "3 Friday January _en. English \n", "4 Friday January _en. English \n", "... ... ... ... ... \n", "548995 Saturday December _en. English \n", "548996 Saturday December _en. English \n", "548997 Saturday December _en. English \n", "548998 Saturday December _en. English \n", "548999 Saturday December _en. English \n", "\n", " Title Device Device_Type \n", "0 .xxx_en.wikipedia all-access_all-agents All Access \n", "1 .xxx_en.wikipedia mobile-web_all-agents Mobile \n", "2 1._Juli_de.wikipedia desktop_all-agents Desktop \n", "3 2014_FIFA_World_Cup_en.wikipedia all-access_all-agents All Access \n", "4 2014_FIFA_World_Cup_en.wikipedia desktop_all-agents Desktop \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia all-access_all-agents All Access \n", "548996 Zayn_Malik_en.wikipedia all-access_all-agents All Access \n", "548997 Zendaya_en.wikipedia all-access_all-agents All Access \n", "548998 Zendaya_en.wikipedia mobile-web_all-agents Mobile \n", "548999 Zodiac_Killer_en.wikipedia all-access_all-agents All Access \n", "\n", "[542778 rows x 10 columns]" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Assigning a variable for only wikipedia pages\n", "## Since we're working with only wikipedia pages, we filter those out\n", "only_wikipedia = wiki[wiki['Page'].str.contains(\"wikipedia.org\")]\n", "only_wikipedia" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexPageDateVisitsWeekdayMonthLanguage_CodesLanguage_NamesTitleDeviceDevice_Type
00.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English.xxx_en.wikipediaall-access_all-agentsAll Access
11.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English.xxx_en.wikipediamobile-web_all-agentsMobile
221._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.German1._Juli_de.wikipediadesktop_all-agentsDesktop
332014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediaall-access_all-agentsAll Access
442014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediadesktop_all-agentsDesktop
....................................
542773548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.EnglishZac_Efron_en.wikipediaall-access_all-agentsAll Access
542774548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.EnglishZayn_Malik_en.wikipediaall-access_all-agentsAll Access
542775548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.EnglishZendaya_en.wikipediaall-access_all-agentsAll Access
542776548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.EnglishZendaya_en.wikipediamobile-web_all-agentsMobile
542777548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.EnglishZodiac_Killer_en.wikipediaall-access_all-agentsAll Access
\n", "

542778 rows × 11 columns

\n", "
" ], "text/plain": [ " index Page Date \\\n", "0 0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 \n", "1 1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 \n", "2 2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 \n", "3 3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 \n", "4 4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 \n", "... ... ... ... \n", "542773 548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 \n", "542774 548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 \n", "542775 548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 \n", "542776 548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 \n", "542777 548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 \n", "\n", " Visits Weekday Month Language_Codes Language_Names \\\n", "0 7089.0 Friday January _en. English \n", "1 6182.0 Friday January _en. English \n", "2 20.0 Friday January _de. German \n", "3 5363.0 Friday January _en. English \n", "4 3186.0 Friday January _en. English \n", "... ... ... ... ... ... \n", "542773 13146.0 Saturday December _en. English \n", "542774 12597.0 Saturday December _en. English \n", "542775 6217.0 Saturday December _en. English \n", "542776 4650.0 Saturday December _en. English \n", "542777 6659.0 Saturday December _en. English \n", "\n", " Title Device Device_Type \n", "0 .xxx_en.wikipedia all-access_all-agents All Access \n", "1 .xxx_en.wikipedia mobile-web_all-agents Mobile \n", "2 1._Juli_de.wikipedia desktop_all-agents Desktop \n", "3 2014_FIFA_World_Cup_en.wikipedia all-access_all-agents All Access \n", "4 2014_FIFA_World_Cup_en.wikipedia desktop_all-agents Desktop \n", "... ... ... ... \n", "542773 Zac_Efron_en.wikipedia all-access_all-agents All Access \n", "542774 Zayn_Malik_en.wikipedia all-access_all-agents All Access \n", "542775 Zendaya_en.wikipedia all-access_all-agents All Access \n", "542776 Zendaya_en.wikipedia mobile-web_all-agents Mobile \n", "542777 Zodiac_Killer_en.wikipedia all-access_all-agents All Access \n", "\n", "[542778 rows x 11 columns]" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Creating a new index with ordered sequence\n", "wikipedia_all = only_wikipedia.reset_index() \n", "wikipedia_all" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
indexPageDateVisitsWeekdayMonthLanguage_CodesLanguage_NamesTitleDeviceDevice_Type
Row
00.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English.xxx_en.wikipediaall-access_all-agentsAll Access
11.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English.xxx_en.wikipediamobile-web_all-agentsMobile
221._Juli_de.wikipedia.org_desktop_all-agents2016-01-0120.0FridayJanuary_de.German1._Juli_de.wikipediadesktop_all-agentsDesktop
332014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediaall-access_all-agentsAll Access
442014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediadesktop_all-agentsDesktop
....................................
542773548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.EnglishZac_Efron_en.wikipediaall-access_all-agentsAll Access
542774548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.EnglishZayn_Malik_en.wikipediaall-access_all-agentsAll Access
542775548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.EnglishZendaya_en.wikipediaall-access_all-agentsAll Access
542776548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.EnglishZendaya_en.wikipediamobile-web_all-agentsMobile
542777548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.EnglishZodiac_Killer_en.wikipediaall-access_all-agentsAll Access
\n", "

542778 rows × 11 columns

\n", "
" ], "text/plain": [ " index Page Date \\\n", "Row \n", "0 0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 \n", "1 1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 \n", "2 2 1._Juli_de.wikipedia.org_desktop_all-agents 2016-01-01 \n", "3 3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 \n", "4 4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 \n", "... ... ... ... \n", "542773 548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 \n", "542774 548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 \n", "542775 548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 \n", "542776 548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 \n", "542777 548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 \n", "\n", " Visits Weekday Month Language_Codes Language_Names \\\n", "Row \n", "0 7089.0 Friday January _en. English \n", "1 6182.0 Friday January _en. English \n", "2 20.0 Friday January _de. German \n", "3 5363.0 Friday January _en. English \n", "4 3186.0 Friday January _en. English \n", "... ... ... ... ... ... \n", "542773 13146.0 Saturday December _en. English \n", "542774 12597.0 Saturday December _en. English \n", "542775 6217.0 Saturday December _en. English \n", "542776 4650.0 Saturday December _en. English \n", "542777 6659.0 Saturday December _en. English \n", "\n", " Title Device Device_Type \n", "Row \n", "0 .xxx_en.wikipedia all-access_all-agents All Access \n", "1 .xxx_en.wikipedia mobile-web_all-agents Mobile \n", "2 1._Juli_de.wikipedia desktop_all-agents Desktop \n", "3 2014_FIFA_World_Cup_en.wikipedia all-access_all-agents All Access \n", "4 2014_FIFA_World_Cup_en.wikipedia desktop_all-agents Desktop \n", "... ... ... ... \n", "542773 Zac_Efron_en.wikipedia all-access_all-agents All Access \n", "542774 Zayn_Malik_en.wikipedia all-access_all-agents All Access \n", "542775 Zendaya_en.wikipedia all-access_all-agents All Access \n", "542776 Zendaya_en.wikipedia mobile-web_all-agents Mobile \n", "542777 Zodiac_Killer_en.wikipedia all-access_all-agents All Access \n", "\n", "[542778 rows x 11 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Naming the index\n", "wikipedia_all.index.name = 'Row'\n", "wikipedia_all" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "6222\n" ] } ], "source": [ "## This confirms the missing values that didnt populate when the language codes were extracted were not wikipedia pages\n", "print(549000 - 542778)" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Language_Names\n", "Chinese 2.600405e+08\n", "English 2.404128e+10\n", "French 1.392899e+09\n", "German 2.593808e+09\n", "Japanese 5.279525e+08\n", "Russian 1.889219e+09\n", "Spanish 1.408561e+09\n", "Name: Visits, dtype: float64" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## The number of languages represented with occurrences\n", "Languages = wikipedia_all.groupby(['Language_Names'])['Visits'].sum()\n", "Languages" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Weekday\n", "Monday 4.870715e+09\n", "Tuesday 4.721588e+09\n", "Wednesday 4.641320e+09\n", "Thursday 4.537628e+09\n", "Sunday 4.522586e+09\n", "Saturday 4.442689e+09\n", "Friday 4.377232e+09\n", "Name: Visits, dtype: float64" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## The day of the week most and least popular for visiting wikipedia\n", "Day = wikipedia_all.groupby(['Weekday'])['Visits'].sum().sort_values(ascending=False)\n", "Day" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Device_Type\n", "All Access 1.736010e+10\n", "Desktop 1.015998e+10\n", "Mobile 4.593680e+09\n", "Name: Visits, dtype: float64" ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "##Device type used more frequently in visiting wikipedia\n", "Device = wikipedia_all.groupby(['Device_Type'])['Visits'].sum()\n", "Device" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PageDateVisitsWeekdayMonthLanguage_CodesLanguage_NamesTitleDeviceDevice_Type
0.xxx_en.wikipedia.org_all-access_all-agents2016-01-017089.0FridayJanuary_en.English.xxx_en.wikipediaall-access_all-agentsAll Access
1.xxx_en.wikipedia.org_mobile-web_all-agents2016-01-016182.0FridayJanuary_en.English.xxx_en.wikipediamobile-web_all-agentsMobile
32014_FIFA_World_Cup_en.wikipedia.org_all-acces...2016-01-015363.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediaall-access_all-agentsAll Access
42014_FIFA_World_Cup_en.wikipedia.org_desktop_a...2016-01-013186.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediadesktop_all-agentsDesktop
52014_FIFA_World_Cup_en.wikipedia.org_mobile-we...2016-01-012136.0FridayJanuary_en.English2014_FIFA_World_Cup_en.wikipediamobile-web_all-agentsMobile
.................................
548995Zac_Efron_en.wikipedia.org_all-access_all-agents2016-12-3113146.0SaturdayDecember_en.EnglishZac_Efron_en.wikipediaall-access_all-agentsAll Access
548996Zayn_Malik_en.wikipedia.org_all-access_all-agents2016-12-3112597.0SaturdayDecember_en.EnglishZayn_Malik_en.wikipediaall-access_all-agentsAll Access
548997Zendaya_en.wikipedia.org_all-access_all-agents2016-12-316217.0SaturdayDecember_en.EnglishZendaya_en.wikipediaall-access_all-agentsAll Access
548998Zendaya_en.wikipedia.org_mobile-web_all-agents2016-12-314650.0SaturdayDecember_en.EnglishZendaya_en.wikipediamobile-web_all-agentsMobile
548999Zodiac_Killer_en.wikipedia.org_all-access_all-...2016-12-316659.0SaturdayDecember_en.EnglishZodiac_Killer_en.wikipediaall-access_all-agentsAll Access
\n", "

418338 rows × 10 columns

\n", "
" ], "text/plain": [ " Page Date Visits \\\n", "0 .xxx_en.wikipedia.org_all-access_all-agents 2016-01-01 7089.0 \n", "1 .xxx_en.wikipedia.org_mobile-web_all-agents 2016-01-01 6182.0 \n", "3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 2016-01-01 5363.0 \n", "4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 2016-01-01 3186.0 \n", "5 2014_FIFA_World_Cup_en.wikipedia.org_mobile-we... 2016-01-01 2136.0 \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 2016-12-31 13146.0 \n", "548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 2016-12-31 12597.0 \n", "548997 Zendaya_en.wikipedia.org_all-access_all-agents 2016-12-31 6217.0 \n", "548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 2016-12-31 4650.0 \n", "548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 2016-12-31 6659.0 \n", "\n", " Weekday Month Language_Codes Language_Names \\\n", "0 Friday January _en. English \n", "1 Friday January _en. English \n", "3 Friday January _en. English \n", "4 Friday January _en. English \n", "5 Friday January _en. English \n", "... ... ... ... ... \n", "548995 Saturday December _en. English \n", "548996 Saturday December _en. English \n", "548997 Saturday December _en. English \n", "548998 Saturday December _en. English \n", "548999 Saturday December _en. English \n", "\n", " Title Device Device_Type \n", "0 .xxx_en.wikipedia all-access_all-agents All Access \n", "1 .xxx_en.wikipedia mobile-web_all-agents Mobile \n", "3 2014_FIFA_World_Cup_en.wikipedia all-access_all-agents All Access \n", "4 2014_FIFA_World_Cup_en.wikipedia desktop_all-agents Desktop \n", "5 2014_FIFA_World_Cup_en.wikipedia mobile-web_all-agents Mobile \n", "... ... ... ... \n", "548995 Zac_Efron_en.wikipedia all-access_all-agents All Access \n", "548996 Zayn_Malik_en.wikipedia all-access_all-agents All Access \n", "548997 Zendaya_en.wikipedia all-access_all-agents All Access \n", "548998 Zendaya_en.wikipedia mobile-web_all-agents Mobile \n", "548999 Zodiac_Killer_en.wikipedia all-access_all-agents All Access \n", "\n", "[418338 rows x 10 columns]" ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Creating a variable for only wikipedia pages written in English.\n", "En_wikipedia = wiki.query(\"Language_Names == 'English'\")\n", "En_wikipedia" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Title\n", "Main_Page_en.wikipedia 32579831.0\n", "Special:Search_en.wikipedia 3588408.0\n", "Special:Book_en.wikipedia 963706.0\n", "Star_Wars:_The_Force_Awakens_en.wikipedia 475826.0\n", "Star_Wars_en.wikipedia 212609.0\n", " ... \n", "Eduardo_Vargas_en.wikipedia 807.0\n", "Eugenie_Bouchard_en.wikipedia 728.0\n", "Missy_Franklin_en.wikipedia 431.0\n", "Nick_Kyrgios_en.wikipedia 272.0\n", "Canada_Day_en.wikipedia 169.0\n", "Name: Visits, Length: 757, dtype: float64" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Trending search topics on Januray 1, 2016\n", "## First filter out new years,then group the title column to return the sum of visits per group\n", "new_year_visits = En_wikipedia.query(\"Date == '2016-01-01'\").groupby(['Title'])['Visits'].sum().sort_values(ascending=False)\n", "new_year_visits" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Title\n", "Main_Page_en.wikipedia 49232448.0\n", "Special:Search_en.wikipedia 4493900.0\n", "United_States_presidential_election,_2016_en.wikipedia 1519180.0\n", "Donald_Trump_en.wikipedia 1057298.0\n", "Special:RecentChangesLinked_en.wikipedia 1024401.0\n", " ... \n", "Lycos_en.wikipedia 320.0\n", "Missy_Franklin_en.wikipedia 308.0\n", "Nick_Kyrgios_en.wikipedia 176.0\n", "Elena_Delle_Donne_en.wikipedia 160.0\n", "User:GoogleAnalitycsRoman/google-api_en.wikipedia 0.0\n", "Name: Visits, Length: 757, dtype: float64" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "## Trending search topics on November 8, 2016\n", "## First filter out 8th November,then group the title column to return the sum of visits per group\n", "November8 = En_wikipedia.query(\"Date == '2016-11-8'\").groupby(['Title'])['Visits'].sum().sort_values(ascending=False)\n", "November8" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "##Exporting my tables to be visualized\n", "Day.to_csv('Day.csv')\n", "Device.to_csv('Device.csv')\n", "Languages.to_csv('Languages.csv')\n", "new_year_visits.to_csv('new_year_visits.csv')\n", "November8.to_csv('November8.csv')\n" ] } ], "metadata": { "interpreter": { "hash": "7bec828fe05d00453f80cfcce5b2e09f7fc84f6636438ea32bff4faccc640861" }, "kernelspec": { "display_name": "Python 3.9.7 ('base')", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.7" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }