{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import sqlite3 as sql "
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"wikipedia = pd.read_excel('wikipedia_dataset_flat.xlsx')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" date | \n",
" page | \n",
" visits | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 2016-01-01 | \n",
" .xxx_en.wikipedia.org_all-access_all-agents | \n",
" 7089.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2016-01-01 | \n",
" .xxx_en.wikipedia.org_mobile-web_all-agents | \n",
" 6182.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 2016-01-01 | \n",
" 1._Juli_de.wikipedia.org_desktop_all-agents | \n",
" 20.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 2016-01-01 | \n",
" 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | \n",
" 5363.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 2016-01-01 | \n",
" 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | \n",
" 3186.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Unnamed: 0 date page \\\n",
"0 0 2016-01-01 .xxx_en.wikipedia.org_all-access_all-agents \n",
"1 1 2016-01-01 .xxx_en.wikipedia.org_mobile-web_all-agents \n",
"2 2 2016-01-01 1._Juli_de.wikipedia.org_desktop_all-agents \n",
"3 3 2016-01-01 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... \n",
"4 4 2016-01-01 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... \n",
"\n",
" visits \n",
"0 7089.0 \n",
"1 6182.0 \n",
"2 20.0 \n",
"3 5363.0 \n",
"4 3186.0 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wikipedia.head()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"##Creating a SQLite Database \n",
"## STEP 1: Assigning the new SQLte database we are about to create to the variable db_conn.\n",
"db_conn = sql.connect(\"wikipedia_dataset.db\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"##Since the new database file is empty,(i.e, it has no tables), we need to establish a cursor object that will execute the SQL codes to create the data tables\n",
"## STEP 2: The cursor is assigned to the variable c\n",
"c = db_conn.cursor()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
""
]
},
"execution_count": 24,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"## STEP 4: Create the table that will be included in the database. the will result in empty tables\n",
"c.execute(\n",
" \"\"\"\n",
" CREATE TABLE wikipedia_pages (\n",
" date INTEGER,\n",
" page TEXT NOT NULL,\n",
" visits INTEGER\n",
" );\n",
" \"\"\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [],
"source": [
"##STEP 5: Populate the table in the database with the relevant data\n",
"wikipedia.to_sql('wikipedia', db_conn)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Unnamed: 0 | \n",
" date | \n",
" page | \n",
" visits | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 2016-01-01 | \n",
" .xxx_en.wikipedia.org_all-access_all-agents | \n",
" 7089.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 2016-01-01 | \n",
" .xxx_en.wikipedia.org_mobile-web_all-agents | \n",
" 6182.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 2016-01-01 | \n",
" 1._Juli_de.wikipedia.org_desktop_all-agents | \n",
" 20.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 2016-01-01 | \n",
" 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | \n",
" 5363.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 2016-01-01 | \n",
" 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | \n",
" 3186.0 | \n",
"
\n",
" \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
" 548995 | \n",
" 548995 | \n",
" 2016-12-31 | \n",
" Zac_Efron_en.wikipedia.org_all-access_all-agents | \n",
" 13146.0 | \n",
"
\n",
" \n",
" 548996 | \n",
" 548996 | \n",
" 2016-12-31 | \n",
" Zayn_Malik_en.wikipedia.org_all-access_all-agents | \n",
" 12597.0 | \n",
"
\n",
" \n",
" 548997 | \n",
" 548997 | \n",
" 2016-12-31 | \n",
" Zendaya_en.wikipedia.org_all-access_all-agents | \n",
" 6217.0 | \n",
"
\n",
" \n",
" 548998 | \n",
" 548998 | \n",
" 2016-12-31 | \n",
" Zendaya_en.wikipedia.org_mobile-web_all-agents | \n",
" 4650.0 | \n",
"
\n",
" \n",
" 548999 | \n",
" 548999 | \n",
" 2016-12-31 | \n",
" Zodiac_Killer_en.wikipedia.org_all-access_all-... | \n",
" 6659.0 | \n",
"
\n",
" \n",
"
\n",
"
549000 rows × 4 columns
\n",
"
"
],
"text/plain": [
" Unnamed: 0 date \\\n",
"0 0 2016-01-01 \n",
"1 1 2016-01-01 \n",
"2 2 2016-01-01 \n",
"3 3 2016-01-01 \n",
"4 4 2016-01-01 \n",
"... ... ... \n",
"548995 548995 2016-12-31 \n",
"548996 548996 2016-12-31 \n",
"548997 548997 2016-12-31 \n",
"548998 548998 2016-12-31 \n",
"548999 548999 2016-12-31 \n",
"\n",
" page visits \n",
"0 .xxx_en.wikipedia.org_all-access_all-agents 7089.0 \n",
"1 .xxx_en.wikipedia.org_mobile-web_all-agents 6182.0 \n",
"2 1._Juli_de.wikipedia.org_desktop_all-agents 20.0 \n",
"3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 5363.0 \n",
"4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 3186.0 \n",
"... ... ... \n",
"548995 Zac_Efron_en.wikipedia.org_all-access_all-agents 13146.0 \n",
"548996 Zayn_Malik_en.wikipedia.org_all-access_all-agents 12597.0 \n",
"548997 Zendaya_en.wikipedia.org_all-access_all-agents 6217.0 \n",
"548998 Zendaya_en.wikipedia.org_mobile-web_all-agents 4650.0 \n",
"548999 Zodiac_Killer_en.wikipedia.org_all-access_all-... 6659.0 \n",
"\n",
"[549000 rows x 4 columns]"
]
},
"execution_count": 31,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"wikipedia"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" index | \n",
" Unnamed: 0 | \n",
" date | \n",
" page | \n",
" visits | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" 0 | \n",
" 2016-01-01 00:00:00 | \n",
" .xxx_en.wikipedia.org_all-access_all-agents | \n",
" 7089.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 2016-01-01 00:00:00 | \n",
" .xxx_en.wikipedia.org_mobile-web_all-agents | \n",
" 6182.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" 2 | \n",
" 2016-01-01 00:00:00 | \n",
" 1._Juli_de.wikipedia.org_desktop_all-agents | \n",
" 20.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" 3 | \n",
" 2016-01-01 00:00:00 | \n",
" 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... | \n",
" 5363.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" 4 | \n",
" 2016-01-01 00:00:00 | \n",
" 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... | \n",
" 3186.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" index Unnamed: 0 date \\\n",
"0 0 0 2016-01-01 00:00:00 \n",
"1 1 1 2016-01-01 00:00:00 \n",
"2 2 2 2016-01-01 00:00:00 \n",
"3 3 3 2016-01-01 00:00:00 \n",
"4 4 4 2016-01-01 00:00:00 \n",
"\n",
" page visits \n",
"0 .xxx_en.wikipedia.org_all-access_all-agents 7089.0 \n",
"1 .xxx_en.wikipedia.org_mobile-web_all-agents 6182.0 \n",
"2 1._Juli_de.wikipedia.org_desktop_all-agents 20.0 \n",
"3 2014_FIFA_World_Cup_en.wikipedia.org_all-acces... 5363.0 \n",
"4 2014_FIFA_World_Cup_en.wikipedia.org_desktop_a... 3186.0 "
]
},
"execution_count": 35,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.read_sql(\"SELECT * FROM wikipedia LIMIT 5\", db_conn)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"interpreter": {
"hash": "7bec828fe05d00453f80cfcce5b2e09f7fc84f6636438ea32bff4faccc640861"
},
"kernelspec": {
"display_name": "Python 3.9.7 ('base')",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.7"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}