{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyzing Database with SQL and Connecting Database to Jupyter Notebook" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">- This notebook shows how to connect database to Jupyter Notebook, and join multi tables, and write complex queries to analyze data. ", "--by Lu Tang" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Analyzing data in 'factbook.db', only two tables" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Imports \n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "import sqlite3\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typenametbl_namerootpagesql
0tablesqlite_sequencesqlite_sequence3CREATE TABLE sqlite_sequence(name,seq)
1tablefactsfacts47CREATE TABLE \"facts\" (\"id\" INTEGER PRIMARY KEY...
2tablecitiescities2CREATE TABLE cities (\\n id integer prim...
\n", "
" ], "text/plain": [ " type name tbl_name rootpage \\\n", "0 table sqlite_sequence sqlite_sequence 3 \n", "1 table facts facts 47 \n", "2 table cities cities 2 \n", "\n", " sql \n", "0 CREATE TABLE sqlite_sequence(name,seq) \n", "1 CREATE TABLE \"facts\" (\"id\" INTEGER PRIMARY KEY... \n", "2 CREATE TABLE cities (\\n id integer prim... " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create the connection to the database, and see what tables we have\n", "conn = sqlite3.connect('factbook.db')\n", "\n", "tables = pd.read_sql(\"\"\"SELECT *\n", " FROM sqlite_master\n", " WHERE type='table';\"\"\", conn)\n", "tables" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "397\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamepopulationcapitalfacts_id
01Oranjestad370001216
12Saint John'S2700016
23Abu Dhabi9420001184
34Dubai19780000184
45Sharjah9830000184
\n", "
" ], "text/plain": [ " id name population capital facts_id\n", "0 1 Oranjestad 37000 1 216\n", "1 2 Saint John'S 27000 1 6\n", "2 3 Abu Dhabi 942000 1 184\n", "3 4 Dubai 1978000 0 184\n", "4 5 Sharjah 983000 0 184" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# List of cities\n", "cities = pd.read_sql(\"\"\"SELECT *\n", " FROM cities;\"\"\", conn)\n", "print(len(cities))\n", "cities.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "397\n", "396\n", "210\n" ] } ], "source": [ "print(cities.id.nunique())\n", "print(cities.name.nunique())\n", "# SELECT COUNT(DISTINCT(facts_id)) FROM cities;\n", "print(cities.facts_id.nunique())\n", "\n", "# several cities share the same facts" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "261\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcodenameareaarea_landarea_waterpopulationpopulation_growthbirth_ratedeath_ratemigration_rate
01afAfghanistan652230.0652230.00.032564342.02.3238.5713.891.51
12alAlbania28748.027398.01350.03029278.00.3012.926.583.30
23agAlgeria2381741.02381741.00.039542166.01.8423.674.310.92
34anAndorra468.0468.00.085580.00.128.136.960.00
45aoAngola1246700.01246700.00.019625353.02.7838.7811.490.46
\n", "
" ], "text/plain": [ " id code name area area_land area_water population \\\n", "0 1 af Afghanistan 652230.0 652230.0 0.0 32564342.0 \n", "1 2 al Albania 28748.0 27398.0 1350.0 3029278.0 \n", "2 3 ag Algeria 2381741.0 2381741.0 0.0 39542166.0 \n", "3 4 an Andorra 468.0 468.0 0.0 85580.0 \n", "4 5 ao Angola 1246700.0 1246700.0 0.0 19625353.0 \n", "\n", " population_growth birth_rate death_rate migration_rate \n", "0 2.32 38.57 13.89 1.51 \n", "1 0.30 12.92 6.58 3.30 \n", "2 1.84 23.67 4.31 0.92 \n", "3 0.12 8.13 6.96 0.00 \n", "4 2.78 38.78 11.49 0.46 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# view table of facts\n", "facts = pd.read_sql(\"\"\"SELECT *\n", " FROM facts;\"\"\", conn)\n", "print(len(facts))\n", "facts.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.1 (INNER) JOIN" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "397\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcodenameareaarea_landarea_waterpopulationpopulation_growthbirth_ratedeath_ratemigration_rateidnamepopulationcapitalfacts_id
0216aaAruba180.0180.00.01121621.3312.568.188.921Oranjestad370001216
16acAntigua and Barbuda442.0442.00.0924361.2415.855.692.212Saint John'S2700016
2184aeUnited Arab Emirates83600.083600.00.057797602.5815.431.9712.363Abu Dhabi9420001184
3184aeUnited Arab Emirates83600.083600.00.057797602.5815.431.9712.364Dubai19780000184
4184aeUnited Arab Emirates83600.083600.00.057797602.5815.431.9712.365Sharjah9830000184
\n", "
" ], "text/plain": [ " id code name area area_land area_water population \\\n", "0 216 aa Aruba 180.0 180.0 0.0 112162 \n", "1 6 ac Antigua and Barbuda 442.0 442.0 0.0 92436 \n", "2 184 ae United Arab Emirates 83600.0 83600.0 0.0 5779760 \n", "3 184 ae United Arab Emirates 83600.0 83600.0 0.0 5779760 \n", "4 184 ae United Arab Emirates 83600.0 83600.0 0.0 5779760 \n", "\n", " population_growth birth_rate death_rate migration_rate id \\\n", "0 1.33 12.56 8.18 8.92 1 \n", "1 1.24 15.85 5.69 2.21 2 \n", "2 2.58 15.43 1.97 12.36 3 \n", "3 2.58 15.43 1.97 12.36 4 \n", "4 2.58 15.43 1.97 12.36 5 \n", "\n", " name population capital facts_id \n", "0 Oranjestad 37000 1 216 \n", "1 Saint John'S 27000 1 6 \n", "2 Abu Dhabi 942000 1 184 \n", "3 Dubai 1978000 0 184 \n", "4 Sharjah 983000 0 184 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Inner Join\n", "q=\"\"\"SELECT * \n", "FROM facts \n", "INNER JOIN cities \n", "ON cities.facts_id = facts.id\"\"\"\n", "\n", "inner_join = pd.read_sql(q, conn)\n", "\n", "print(len(inner_join))\n", "# view joined table \n", "inner_join.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "210" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Not all the facts_id is in the cities table\n", "inner_join['facts_id'].nunique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.2 LEFT JOIN" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "448\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idcodenameareaarea_landarea_waterpopulationpopulation_growthbirth_ratedeath_ratemigration_rateidnamepopulationcapitalfacts_id
01afAfghanistan652230.0652230.00.032564342.02.3238.5713.891.516.0Kabul3097000.01.01
12alAlbania28748.027398.01350.03029278.00.3012.926.583.3010.0Tirana419000.01.02
23agAlgeria2381741.02381741.00.039542166.01.8423.674.310.927.0Algiers2916000.01.03
33agAlgeria2381741.02381741.00.039542166.01.8423.674.310.928.0Oran783000.00.03
44anAndorra468.0468.00.085580.00.128.136.960.0012.0Andorra La Vella23000.01.04
\n", "
" ], "text/plain": [ " id code name area area_land area_water population \\\n", "0 1 af Afghanistan 652230.0 652230.0 0.0 32564342.0 \n", "1 2 al Albania 28748.0 27398.0 1350.0 3029278.0 \n", "2 3 ag Algeria 2381741.0 2381741.0 0.0 39542166.0 \n", "3 3 ag Algeria 2381741.0 2381741.0 0.0 39542166.0 \n", "4 4 an Andorra 468.0 468.0 0.0 85580.0 \n", "\n", " population_growth birth_rate death_rate migration_rate id \\\n", "0 2.32 38.57 13.89 1.51 6.0 \n", "1 0.30 12.92 6.58 3.30 10.0 \n", "2 1.84 23.67 4.31 0.92 7.0 \n", "3 1.84 23.67 4.31 0.92 8.0 \n", "4 0.12 8.13 6.96 0.00 12.0 \n", "\n", " name population capital facts_id \n", "0 Kabul 3097000.0 1.0 1 \n", "1 Tirana 419000.0 1.0 2 \n", "2 Algiers 2916000.0 1.0 3 \n", "3 Oran 783000.0 0.0 3 \n", "4 Andorra La Vella 23000.0 1.0 4 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Left Join\n", "q2=\"\"\"SELECT * \n", "FROM facts\n", "LEFT JOIN cities \n", "ON cities.facts_id = facts.id;\"\"\"\n", "\n", "left_join = pd.read_sql(q2, conn)\n", "\n", "print(len(left_join))\n", "left_join.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">- For left join, the order of FROM and LEFT JOIN matters\n", ">- FROM facts means every rows in the facts will be included, and some same facts have different cities, and some facts do not have cities at all." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3 Inner Join with a subquery" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "208\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countrycapital_city
0ArubaOranjestad
1Antigua and BarbudaSaint John'S
2United Arab EmiratesAbu Dhabi
3AfghanistanKabul
4AlgeriaAlgiers
\n", "
" ], "text/plain": [ " country capital_city\n", "0 Aruba Oranjestad\n", "1 Antigua and Barbuda Saint John'S\n", "2 United Arab Emirates Abu Dhabi\n", "3 Afghanistan Kabul\n", "4 Algeria Algiers" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# name subquery table as c, and from facts join c\n", "q3=\"\"\"\n", "SELECT f.name country, c.name capital_city\n", "FROM facts f\n", "JOIN (\n", " SELECT * FROM cities\n", " WHERE capital = 1) c\n", "ON c.facts_id = f.id;\n", "\"\"\"\n", "subquery_join = pd.read_sql(q3, conn)\n", "\n", "print(len(subquery_join))\n", "subquery_join.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Analyzing data in 'chinook.db' with 11 tables" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
typenametbl_namerootpagesql
0tablealbumalbum2CREATE TABLE [album]\\n(\\n [album_id] INTEGE...
1tableartistartist3CREATE TABLE [artist]\\n(\\n [artist_id] INTE...
2tablecustomercustomer4CREATE TABLE [customer]\\n(\\n [customer_id] ...
3tableemployeeemployee5CREATE TABLE [employee]\\n(\\n [employee_id] ...
4tablegenregenre6CREATE TABLE [genre]\\n(\\n [genre_id] INTEGE...
5tableinvoiceinvoice7CREATE TABLE [invoice]\\n(\\n [invoice_id] IN...
6tableinvoice_lineinvoice_line8CREATE TABLE [invoice_line]\\n(\\n [invoice_l...
7tablemedia_typemedia_type9CREATE TABLE [media_type]\\n(\\n [media_type_...
8tableplaylistplaylist10CREATE TABLE [playlist]\\n(\\n [playlist_id] ...
9tableplaylist_trackplaylist_track11CREATE TABLE [playlist_track]\\n(\\n [playlis...
10tabletracktrack13CREATE TABLE [track]\\n(\\n [track_id] INTEGE...
\n", "
" ], "text/plain": [ " type name tbl_name rootpage \\\n", "0 table album album 2 \n", "1 table artist artist 3 \n", "2 table customer customer 4 \n", "3 table employee employee 5 \n", "4 table genre genre 6 \n", "5 table invoice invoice 7 \n", "6 table invoice_line invoice_line 8 \n", "7 table media_type media_type 9 \n", "8 table playlist playlist 10 \n", "9 table playlist_track playlist_track 11 \n", "10 table track track 13 \n", "\n", " sql \n", "0 CREATE TABLE [album]\\n(\\n [album_id] INTEGE... \n", "1 CREATE TABLE [artist]\\n(\\n [artist_id] INTE... \n", "2 CREATE TABLE [customer]\\n(\\n [customer_id] ... \n", "3 CREATE TABLE [employee]\\n(\\n [employee_id] ... \n", "4 CREATE TABLE [genre]\\n(\\n [genre_id] INTEGE... \n", "5 CREATE TABLE [invoice]\\n(\\n [invoice_id] IN... \n", "6 CREATE TABLE [invoice_line]\\n(\\n [invoice_l... \n", "7 CREATE TABLE [media_type]\\n(\\n [media_type_... \n", "8 CREATE TABLE [playlist]\\n(\\n [playlist_id] ... \n", "9 CREATE TABLE [playlist_track]\\n(\\n [playlis... \n", "10 CREATE TABLE [track]\\n(\\n [track_id] INTEGE... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Build connection\n", "conn=sqlite3.connect('chinook.db')\n", "# view all tables\n", "tables = pd.read_sql(\"\"\"SELECT *\n", " FROM sqlite_master\n", " WHERE type='table';\"\"\", conn)\n", "tables" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
track_idtrack_nametrack_typeunit_pricequantity
03448Lamentations of Jeremiah, First Set \\ Incipit ...Protected AAC audio file0.991
12560Violent PornographyMPEG audio file0.991
23336War PigsPurchased AAC audio file0.991
3829Let's Get RockedMPEG audio file0.991
41872AttitudeMPEG audio file0.991
5748DealerMPEG audio file0.991
61778You're What's Happening (In The World Today)MPEG audio file0.991
72514SpoonmanMPEG audio file0.991
\n", "
" ], "text/plain": [ " track_id track_name \\\n", "0 3448 Lamentations of Jeremiah, First Set \\ Incipit ... \n", "1 2560 Violent Pornography \n", "2 3336 War Pigs \n", "3 829 Let's Get Rocked \n", "4 1872 Attitude \n", "5 748 Dealer \n", "6 1778 You're What's Happening (In The World Today) \n", "7 2514 Spoonman \n", "\n", " track_type unit_price quantity \n", "0 Protected AAC audio file 0.99 1 \n", "1 MPEG audio file 0.99 1 \n", "2 Purchased AAC audio file 0.99 1 \n", "3 MPEG audio file 0.99 1 \n", "4 MPEG audio file 0.99 1 \n", "5 MPEG audio file 0.99 1 \n", "6 MPEG audio file 0.99 1 \n", "7 MPEG audio file 0.99 1 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join three tables\n", "q_1=\"\"\"\n", "SELECT t.track_id, t.name track_name, m.name track_type, il.unit_price, il.quantity\n", "FROM invoice_line il\n", "JOIN track t \n", "ON t.track_id = il.track_id\n", "JOIN media_type m \n", "ON m.media_type_id=t.media_type_id\n", "WHERE il.invoice_id = 4\"\"\"\n", "\n", "table_1=pd.read_sql(q_1, conn)\n", "table_1" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
track_idtrack_nameartist_nametrack_typeunit_pricequantity
03448Lamentations of Jeremiah, First Set \\ Incipit ...The King's SingersProtected AAC audio file0.991
12560Violent PornographySystem Of A DownMPEG audio file0.991
23336War PigsCakePurchased AAC audio file0.991
3829Let's Get RockedDef LeppardMPEG audio file0.991
41872AttitudeMetallicaMPEG audio file0.991
5748DealerDeep PurpleMPEG audio file0.991
61778You're What's Happening (In The World Today)Marvin GayeMPEG audio file0.991
72514SpoonmanSoundgardenMPEG audio file0.991
\n", "
" ], "text/plain": [ " track_id track_name \\\n", "0 3448 Lamentations of Jeremiah, First Set \\ Incipit ... \n", "1 2560 Violent Pornography \n", "2 3336 War Pigs \n", "3 829 Let's Get Rocked \n", "4 1872 Attitude \n", "5 748 Dealer \n", "6 1778 You're What's Happening (In The World Today) \n", "7 2514 Spoonman \n", "\n", " artist_name track_type unit_price quantity \n", "0 The King's Singers Protected AAC audio file 0.99 1 \n", "1 System Of A Down MPEG audio file 0.99 1 \n", "2 Cake Purchased AAC audio file 0.99 1 \n", "3 Def Leppard MPEG audio file 0.99 1 \n", "4 Metallica MPEG audio file 0.99 1 \n", "5 Deep Purple MPEG audio file 0.99 1 \n", "6 Marvin Gaye MPEG audio file 0.99 1 \n", "7 Soundgarden MPEG audio file 0.99 1 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join five tables\n", "q_2= \"\"\"\n", "SELECT\n", " il.track_id,\n", " t.name track_name,\n", " ar.name artist_name,\n", " mt.name track_type,\n", " il.unit_price,\n", " il.quantity\n", "FROM invoice_line il\n", "INNER JOIN track t ON t.track_id = il.track_id\n", "INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id\n", "INNER JOIN album al ON al.album_id = t.album_id\n", "INNER JOIN artist ar on ar.artist_id = al.artist_id\n", "WHERE il.invoice_id = 4;\"\"\"\n", "\n", "table_2=pd.read_sql(q_2, conn)\n", "table_2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.1 Lists the top 10 artists, calculated by the number of times a track by that artist has been purchased." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "invoice_line table contains each individual song from each customer purchase, it contains information about which songs are purchased the most. We can use the table to find out which artists are purchased the most. " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3503\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
track_idartist_name
01AC/DC
16AC/DC
27AC/DC
38AC/DC
49AC/DC
\n", "
" ], "text/plain": [ " track_id artist_name\n", "0 1 AC/DC\n", "1 6 AC/DC\n", "2 7 AC/DC\n", "3 8 AC/DC\n", "4 9 AC/DC" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_q = \"\"\"SELECT\n", " t.track_id,\n", " ar.name artist_name \n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\"\"\"\n", "sub_table=pd.read_sql(sub_q, conn)\n", "print(len(sub_table))\n", "sub_table.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4757\n", "130\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
artistquantity
0Guns N' Roses1
1Guns N' Roses1
2Guns N' Roses1
3Guns N' Roses1
4Guns N' Roses1
\n", "
" ], "text/plain": [ " artist quantity\n", "0 Guns N' Roses 1\n", "1 Guns N' Roses 1\n", "2 Guns N' Roses 1\n", "3 Guns N' Roses 1\n", "4 Guns N' Roses 1" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_3= \"\"\"\n", "SELECT\n", " ta.artist_name artist, il.quantity\n", "FROM invoice_line il\n", "INNER JOIN (\n", " SELECT\n", " t.track_id,\n", " ar.name artist_name\n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\n", " ) ta\n", "ON ta.track_id = il.track_id;\"\"\"\n", "\n", "table_3=pd.read_sql(q_3, conn)\n", "print(len(table_3))\n", "print(table_3.artist.nunique())\n", "table_3.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "130\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
artisttracks_purchased
0Queen192
1Jimi Hendrix187
2Nirvana130
3Red Hot Chili Peppers130
4Pearl Jam129
5AC/DC124
6Guns N' Roses124
7Foo Fighters121
8The Rolling Stones117
9Metallica106
\n", "
" ], "text/plain": [ " artist tracks_purchased\n", "0 Queen 192\n", "1 Jimi Hendrix 187\n", "2 Nirvana 130\n", "3 Red Hot Chili Peppers 130\n", "4 Pearl Jam 129\n", "5 AC/DC 124\n", "6 Guns N' Roses 124\n", "7 Foo Fighters 121\n", "8 The Rolling Stones 117\n", "9 Metallica 106" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join table with subquery and group by\n", "q_4=\"\"\"\n", "SELECT\n", " ta.artist_name artist,\n", " COUNT(*) tracks_purchased\n", "FROM invoice_line il\n", "INNER JOIN (\n", " SELECT\n", " t.track_id,\n", " ar.name artist_name\n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\n", " ) ta\n", " ON ta.track_id = il.track_id\n", "GROUP BY 1\n", "ORDER BY 2 DESC;\"\"\"\n", "\n", "table_4 = pd.read_sql(q_4, conn)\n", "print(len(table_4))\n", "table_4.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.2 Write a query that returns the top 5 albums, as calculated by the number of times a track from that album has been purchased." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "251\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
albumartisttracks_purchased
0Are You Experienced?Jimi Hendrix187
1FacelessGodsmack96
2MezmerizeSystem Of A Down93
3Get BornJET90
4The DoorsThe Doors83
\n", "
" ], "text/plain": [ " album artist tracks_purchased\n", "0 Are You Experienced? Jimi Hendrix 187\n", "1 Faceless Godsmack 96\n", "2 Mezmerize System Of A Down 93\n", "3 Get Born JET 90\n", "4 The Doors The Doors 83" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_5=\"\"\"\n", "SELECT al.title album, ar.name artist, COUNT(*) tracks_purchased\n", "FROM invoice_line il\n", "JOIN track t ON t.track_id=il.track_id\n", "JOIN album al ON al.album_id= t.album_id\n", "JOIN artist ar ON ar.artist_id=al.artist_id\n", "GROUP BY 1\n", "ORDER BY 3 DESC;\"\"\"\n", "\n", "table_5 = pd.read_sql(q_5, conn)\n", "print(len(table_5))\n", "table_5.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "251\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
albumartisttracks_purchased
0Are You Experienced?Jimi Hendrix187
1FacelessGodsmack96
2MezmerizeSystem Of A Down93
3Get BornJET90
4The DoorsThe Doors83
\n", "
" ], "text/plain": [ " album artist tracks_purchased\n", "0 Are You Experienced? Jimi Hendrix 187\n", "1 Faceless Godsmack 96\n", "2 Mezmerize System Of A Down 93\n", "3 Get Born JET 90\n", "4 The Doors The Doors 83" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# same as q_5\n", "q_6=\"\"\"\n", "SELECT\n", " ta.album_title album,\n", " ta.artist_name artist,\n", " COUNT(*) tracks_purchased\n", "FROM invoice_line il\n", "INNER JOIN (\n", " SELECT\n", " t.track_id,\n", " al.title album_title,\n", " ar.name artist_name\n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\n", " ) ta\n", " ON ta.track_id = il.track_id\n", "GROUP BY 1, 2\n", "ORDER BY 3 DESC;\"\"\"\n", "\n", "table_6 = pd.read_sql(q_6, conn)\n", "print(len(table_6))\n", "table_6.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3 Recursive Joins" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
employee_idlast_namefirst_nametitlereports_tobirthdatehire_dateaddresscitystatecountrypostal_codephonefaxemail
01AdamsAndrewGeneral ManagerNaN1962-02-18 00:00:002016-08-14 00:00:0011120 Jasper Ave NWEdmontonABCanadaT5K 2N1+1 (780) 428-9482+1 (780) 428-3457andrew@chinookcorp.com
12EdwardsNancySales Manager1.01958-12-08 00:00:002016-05-01 00:00:00825 8 Ave SWCalgaryABCanadaT2P 2T3+1 (403) 262-3443+1 (403) 262-3322nancy@chinookcorp.com
23PeacockJaneSales Support Agent2.01973-08-29 00:00:002017-04-01 00:00:001111 6 Ave SWCalgaryABCanadaT2P 5M5+1 (403) 262-3443+1 (403) 262-6712jane@chinookcorp.com
34ParkMargaretSales Support Agent2.01947-09-19 00:00:002017-05-03 00:00:00683 10 Street SWCalgaryABCanadaT2P 5G3+1 (403) 263-4423+1 (403) 263-4289margaret@chinookcorp.com
45JohnsonSteveSales Support Agent2.01965-03-03 00:00:002017-10-17 00:00:007727B 41 AveCalgaryABCanadaT3B 1Y71 (780) 836-99871 (780) 836-9543steve@chinookcorp.com
56MitchellMichaelIT Manager1.01973-07-01 00:00:002016-10-17 00:00:005827 Bowness Road NWCalgaryABCanadaT3B 0C5+1 (403) 246-9887+1 (403) 246-9899michael@chinookcorp.com
67KingRobertIT Staff6.01970-05-29 00:00:002017-01-02 00:00:00590 Columbia Boulevard WestLethbridgeABCanadaT1K 5N8+1 (403) 456-9986+1 (403) 456-8485robert@chinookcorp.com
78CallahanLauraIT Staff6.01968-01-09 00:00:002017-03-04 00:00:00923 7 ST NWLethbridgeABCanadaT1H 1Y8+1 (403) 467-3351+1 (403) 467-8772laura@chinookcorp.com
\n", "
" ], "text/plain": [ " employee_id last_name first_name title reports_to \\\n", "0 1 Adams Andrew General Manager NaN \n", "1 2 Edwards Nancy Sales Manager 1.0 \n", "2 3 Peacock Jane Sales Support Agent 2.0 \n", "3 4 Park Margaret Sales Support Agent 2.0 \n", "4 5 Johnson Steve Sales Support Agent 2.0 \n", "5 6 Mitchell Michael IT Manager 1.0 \n", "6 7 King Robert IT Staff 6.0 \n", "7 8 Callahan Laura IT Staff 6.0 \n", "\n", " birthdate hire_date address \\\n", "0 1962-02-18 00:00:00 2016-08-14 00:00:00 11120 Jasper Ave NW \n", "1 1958-12-08 00:00:00 2016-05-01 00:00:00 825 8 Ave SW \n", "2 1973-08-29 00:00:00 2017-04-01 00:00:00 1111 6 Ave SW \n", "3 1947-09-19 00:00:00 2017-05-03 00:00:00 683 10 Street SW \n", "4 1965-03-03 00:00:00 2017-10-17 00:00:00 7727B 41 Ave \n", "5 1973-07-01 00:00:00 2016-10-17 00:00:00 5827 Bowness Road NW \n", "6 1970-05-29 00:00:00 2017-01-02 00:00:00 590 Columbia Boulevard West \n", "7 1968-01-09 00:00:00 2017-03-04 00:00:00 923 7 ST NW \n", "\n", " city state country postal_code phone fax \\\n", "0 Edmonton AB Canada T5K 2N1 +1 (780) 428-9482 +1 (780) 428-3457 \n", "1 Calgary AB Canada T2P 2T3 +1 (403) 262-3443 +1 (403) 262-3322 \n", "2 Calgary AB Canada T2P 5M5 +1 (403) 262-3443 +1 (403) 262-6712 \n", "3 Calgary AB Canada T2P 5G3 +1 (403) 263-4423 +1 (403) 263-4289 \n", "4 Calgary AB Canada T3B 1Y7 1 (780) 836-9987 1 (780) 836-9543 \n", "5 Calgary AB Canada T3B 0C5 +1 (403) 246-9887 +1 (403) 246-9899 \n", "6 Lethbridge AB Canada T1K 5N8 +1 (403) 456-9986 +1 (403) 456-8485 \n", "7 Lethbridge AB Canada T1H 1Y8 +1 (403) 467-3351 +1 (403) 467-8772 \n", "\n", " email \n", "0 andrew@chinookcorp.com \n", "1 nancy@chinookcorp.com \n", "2 jane@chinookcorp.com \n", "3 margaret@chinookcorp.com \n", "4 steve@chinookcorp.com \n", "5 michael@chinookcorp.com \n", "6 robert@chinookcorp.com \n", "7 laura@chinookcorp.com " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# view the table employee\n", "q_7=\"\"\"\n", "SELECT *\n", "FROM employee\"\"\"\n", "\n", "table_7=pd.read_sql(q_7, conn)\n", "table_7" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
employee_nameemployee_titlesupervisor_namesupervisor_title
0Andrew AdamsGeneral ManagerNoneNone
1Jane PeacockSales Support AgentNancy EdwardsSales Manager
2Laura CallahanIT StaffMichael MitchellIT Manager
3Margaret ParkSales Support AgentNancy EdwardsSales Manager
4Michael MitchellIT ManagerAndrew AdamsGeneral Manager
5Nancy EdwardsSales ManagerAndrew AdamsGeneral Manager
6Robert KingIT StaffMichael MitchellIT Manager
7Steve JohnsonSales Support AgentNancy EdwardsSales Manager
\n", "
" ], "text/plain": [ " employee_name employee_title supervisor_name supervisor_title\n", "0 Andrew Adams General Manager None None\n", "1 Jane Peacock Sales Support Agent Nancy Edwards Sales Manager\n", "2 Laura Callahan IT Staff Michael Mitchell IT Manager\n", "3 Margaret Park Sales Support Agent Nancy Edwards Sales Manager\n", "4 Michael Mitchell IT Manager Andrew Adams General Manager\n", "5 Nancy Edwards Sales Manager Andrew Adams General Manager\n", "6 Robert King IT Staff Michael Mitchell IT Manager\n", "7 Steve Johnson Sales Support Agent Nancy Edwards Sales Manager" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Recursive Joins\n", "q_8=\"\"\"\n", "SELECT\n", " e1.first_name || \" \" || e1.last_name employee_name,\n", " e1.title employee_title,\n", " e2.first_name || \" \" || e2.last_name supervisor_name,\n", " e2.title supervisor_title\n", "FROM employee e1\n", "LEFT JOIN employee e2 ON e1.reports_to = e2.employee_id\n", "ORDER BY 1;\"\"\"\n", "\n", "table_8=pd.read_sql(q_8, conn)\n", "table_8" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
invoice_idcustomer_idinvoice_datebilling_addressbilling_citybilling_statebilling_countrybilling_postal_codetotalcustomer_idfirst_namelast_namecompanyaddresscitystatecountrypostal_codephonefaxemailsupport_rep_id
01182017-01-03 00:00:00627 BroadwayNew YorkNYUSA10012-261215.8418MichelleBrooksNone627 BroadwayNew YorkNYUSA10012-2612+1 (212) 221-3546+1 (212) 221-4679michelleb@aol.com3
12302017-01-03 00:00:00230 Elgin StreetOttawaONCanadaK2P 1L79.9030EdwardFrancisNone230 Elgin StreetOttawaONCanadaK2P 1L7+1 (613) 234-3322Noneedfrancis@yachoo.ca3
23402017-01-05 00:00:008, Rue HanovreParisNoneFrance750021.9840DominiqueLefebvreNone8, Rue HanovreParisNoneFrance75002+33 01 47 42 71 71Nonedominiquelefebvre@gmail.com4
34182017-01-06 00:00:00627 BroadwayNew YorkNYUSA10012-26127.9218MichelleBrooksNone627 BroadwayNew YorkNYUSA10012-2612+1 (212) 221-3546+1 (212) 221-4679michelleb@aol.com3
45272017-01-07 00:00:001033 N Park AveTucsonAZUSA8571916.8327PatrickGrayNone1033 N Park AveTucsonAZUSA85719+1 (520) 622-4200Nonepatrick.gray@aol.com4
\n", "
" ], "text/plain": [ " invoice_id customer_id invoice_date billing_address \\\n", "0 1 18 2017-01-03 00:00:00 627 Broadway \n", "1 2 30 2017-01-03 00:00:00 230 Elgin Street \n", "2 3 40 2017-01-05 00:00:00 8, Rue Hanovre \n", "3 4 18 2017-01-06 00:00:00 627 Broadway \n", "4 5 27 2017-01-07 00:00:00 1033 N Park Ave \n", "\n", " billing_city billing_state billing_country billing_postal_code total \\\n", "0 New York NY USA 10012-2612 15.84 \n", "1 Ottawa ON Canada K2P 1L7 9.90 \n", "2 Paris None France 75002 1.98 \n", "3 New York NY USA 10012-2612 7.92 \n", "4 Tucson AZ USA 85719 16.83 \n", "\n", " customer_id first_name last_name company address city state \\\n", "0 18 Michelle Brooks None 627 Broadway New York NY \n", "1 30 Edward Francis None 230 Elgin Street Ottawa ON \n", "2 40 Dominique Lefebvre None 8, Rue Hanovre Paris None \n", "3 18 Michelle Brooks None 627 Broadway New York NY \n", "4 27 Patrick Gray None 1033 N Park Ave Tucson AZ \n", "\n", " country postal_code phone fax \\\n", "0 USA 10012-2612 +1 (212) 221-3546 +1 (212) 221-4679 \n", "1 Canada K2P 1L7 +1 (613) 234-3322 None \n", "2 France 75002 +33 01 47 42 71 71 None \n", "3 USA 10012-2612 +1 (212) 221-3546 +1 (212) 221-4679 \n", "4 USA 85719 +1 (520) 622-4200 None \n", "\n", " email support_rep_id \n", "0 michelleb@aol.com 3 \n", "1 edfrancis@yachoo.ca 3 \n", "2 dominiquelefebvre@gmail.com 4 \n", "3 michelleb@aol.com 3 \n", "4 patrick.gray@aol.com 4 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub=\"\"\"\n", "SELECT *\n", "FROM invoice i\n", "JOIN customer c\n", "ON c.customer_id = i.customer_id;\"\"\"\n", "customer_invoice = pd.read_sql(sub, conn)\n", "# to avoid truncated output \n", "pd.options.display.max_columns = 150 \n", "customer_invoice.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_namenumber_of_purchasestotal_spentcustomer_category
0Aaron Mitchell870.29regular
1Alexandre Rocha1069.30regular
2Astrid Gruber969.30regular
3Bjørn Hansen972.27regular
4Camille Bernard979.20regular
\n", "
" ], "text/plain": [ " customer_name number_of_purchases total_spent customer_category\n", "0 Aaron Mitchell 8 70.29 regular\n", "1 Alexandre Rocha 10 69.30 regular\n", "2 Astrid Gruber 9 69.30 regular\n", "3 Bjørn Hansen 9 72.27 regular\n", "4 Camille Bernard 9 79.20 regular" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# join customer and invoice tabel, groupby customer name and create a new column\n", "q_9=\"\"\"\n", "SELECT\n", " c.first_name || \" \" || c.last_name customer_name,\n", " COUNT(i.invoice_id) number_of_purchases,\n", " SUM(i.total) total_spent,\n", " CASE\n", " WHEN sum(i.total) < 40 THEN 'small spender'\n", " WHEN sum(i.total) > 100 THEN 'big spender'\n", " ELSE 'regular'\n", " END\n", " AS customer_category\n", "FROM invoice i\n", "INNER JOIN customer c ON i.customer_id = c.customer_id\n", "GROUP BY 1 ORDER BY 1;\"\"\"\n", "\n", "table_9=pd.read_sql(q_9, conn)\n", "table_9.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.Building and Organizing Complex Queries" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3503\n", "14\n" ] } ], "source": [ "# view the table playlist\n", "q=\"\"\"\n", "SELECT *\n", "FROM playlist_track\"\"\"\n", "playlist_track=pd.read_sql(q,conn)\n", "print(playlist_track.track_id.nunique())\n", "print(playlist_track.playlist_id.nunique())\n", "# total length of playlist is 18, means not all playlist is in playlist_track." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
track_idnamealbum_idmedia_type_idgenre_idcomposermillisecondsbytesunit_priceplaylist_idtrack_idplaylist_idname
03402Band Members Discuss Tracks from \"Revelations\"271323None294294611188910.99134021Music
13389Revelations271223None25237641110510.99133891Music
23390One and the Same271223None21773235590400.99133901Music
33391Sound of a Gun271223None26015442349900.99133911Music
43392Until We Fall271223None23075837666050.99133921Music
\n", "
" ], "text/plain": [ " track_id name album_id \\\n", "0 3402 Band Members Discuss Tracks from \"Revelations\" 271 \n", "1 3389 Revelations 271 \n", "2 3390 One and the Same 271 \n", "3 3391 Sound of a Gun 271 \n", "4 3392 Until We Fall 271 \n", "\n", " media_type_id genre_id composer milliseconds bytes unit_price \\\n", "0 3 23 None 294294 61118891 0.99 \n", "1 2 23 None 252376 4111051 0.99 \n", "2 2 23 None 217732 3559040 0.99 \n", "3 2 23 None 260154 4234990 0.99 \n", "4 2 23 None 230758 3766605 0.99 \n", "\n", " playlist_id track_id playlist_id name \n", "0 1 3402 1 Music \n", "1 1 3389 1 Music \n", "2 1 3390 1 Music \n", "3 1 3391 1 Music \n", "4 1 3392 1 Music " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# (inner) join three tables \n", "join_from_track=\"\"\"\n", "SELECT * \n", " FROM track t\n", " JOIN playlist_track pt ON t.track_id=pt.track_id\n", " JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "join_from_track=pd.read_sql(join_from_track,conn)\n", "print(len(join_from_track))\n", "join_from_track.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idnameplaylist_idtrack_idtrack_idnamealbum_idmedia_type_idgenre_idcomposermillisecondsbytesunit_price
01Music134023402Band Members Discuss Tracks from \"Revelations\"271323None294294611188910.99
11Music133893389Revelations271223None25237641110510.99
21Music133903390One and the Same271223None21773235590400.99
31Music133913391Sound of a Gun271223None26015442349900.99
41Music133923392Until We Fall271223None23075837666050.99
\n", "
" ], "text/plain": [ " playlist_id name playlist_id track_id track_id \\\n", "0 1 Music 1 3402 3402 \n", "1 1 Music 1 3389 3389 \n", "2 1 Music 1 3390 3390 \n", "3 1 Music 1 3391 3391 \n", "4 1 Music 1 3392 3392 \n", "\n", " name album_id media_type_id \\\n", "0 Band Members Discuss Tracks from \"Revelations\" 271 3 \n", "1 Revelations 271 2 \n", "2 One and the Same 271 2 \n", "3 Sound of a Gun 271 2 \n", "4 Until We Fall 271 2 \n", "\n", " genre_id composer milliseconds bytes unit_price \n", "0 23 None 294294 61118891 0.99 \n", "1 23 None 252376 4111051 0.99 \n", "2 23 None 217732 3559040 0.99 \n", "3 23 None 260154 4234990 0.99 \n", "4 23 None 230758 3766605 0.99 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# same as above, this is to show, for (inner) join, table order does not matter\n", "join_from_playlist=\"\"\"\n", "SELECT * \n", " FROM playlist p\n", " JOIN playlist_track pt ON p.playlist_id=pt.playlist_id\n", " JOIN track t ON t.track_id=pt.track_id;\"\"\"\n", "\n", "join_from_playlist=pd.read_sql(join_from_playlist,conn)\n", "print(len(join_from_playlist))\n", "join_from_playlist.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
track_idnamealbum_idmedia_type_idgenre_idcomposermillisecondsbytesunit_priceplaylist_idtrack_idplaylist_idname
01For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.99111Music
11For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.99818Music
21For Those About To Rock (We Salute You)111Angus Young, Malcolm Young, Brian Johnson343719111703340.9917117Heavy Metal Classic
32Balls to the Wall221None34256255104240.99121Music
42Balls to the Wall221None34256255104240.99828Music
\n", "
" ], "text/plain": [ " track_id name album_id media_type_id \\\n", "0 1 For Those About To Rock (We Salute You) 1 1 \n", "1 1 For Those About To Rock (We Salute You) 1 1 \n", "2 1 For Those About To Rock (We Salute You) 1 1 \n", "3 2 Balls to the Wall 2 2 \n", "4 2 Balls to the Wall 2 2 \n", "\n", " genre_id composer milliseconds \\\n", "0 1 Angus Young, Malcolm Young, Brian Johnson 343719 \n", "1 1 Angus Young, Malcolm Young, Brian Johnson 343719 \n", "2 1 Angus Young, Malcolm Young, Brian Johnson 343719 \n", "3 1 None 342562 \n", "4 1 None 342562 \n", "\n", " bytes unit_price playlist_id track_id playlist_id \\\n", "0 11170334 0.99 1 1 1 \n", "1 11170334 0.99 8 1 8 \n", "2 11170334 0.99 17 1 17 \n", "3 5510424 0.99 1 2 1 \n", "4 5510424 0.99 8 2 8 \n", "\n", " name \n", "0 Music \n", "1 Music \n", "2 Heavy Metal Classic \n", "3 Music \n", "4 Music " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# left join 3 tables\n", "LEFT_join_from_track=\"\"\"\n", "SELECT * \n", " FROM track t\n", " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n", " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "LEFT_join_from_track=pd.read_sql(LEFT_join_from_track,conn)\n", "print(len(LEFT_join_from_track))\n", "LEFT_join_from_track.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8719\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idnameplaylist_idtrack_idtrack_idnamealbum_idmedia_type_idgenre_idcomposermillisecondsbytesunit_price
01Music1.01.01.0For Those About To Rock (We Salute You)1.01.01.0Angus Young, Malcolm Young, Brian Johnson343719.011170334.00.99
11Music1.02.02.0Balls to the Wall2.02.01.0None342562.05510424.00.99
21Music1.03.03.0Fast As a Shark3.02.01.0F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...230619.03990994.00.99
31Music1.04.04.0Restless and Wild3.02.01.0F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...252051.04331779.00.99
41Music1.05.05.0Princess of the Dawn3.02.01.0Deaffy & R.A. Smith-Diesel375418.06290521.00.99
\n", "
" ], "text/plain": [ " playlist_id name playlist_id track_id track_id \\\n", "0 1 Music 1.0 1.0 1.0 \n", "1 1 Music 1.0 2.0 2.0 \n", "2 1 Music 1.0 3.0 3.0 \n", "3 1 Music 1.0 4.0 4.0 \n", "4 1 Music 1.0 5.0 5.0 \n", "\n", " name album_id media_type_id genre_id \\\n", "0 For Those About To Rock (We Salute You) 1.0 1.0 1.0 \n", "1 Balls to the Wall 2.0 2.0 1.0 \n", "2 Fast As a Shark 3.0 2.0 1.0 \n", "3 Restless and Wild 3.0 2.0 1.0 \n", "4 Princess of the Dawn 3.0 2.0 1.0 \n", "\n", " composer milliseconds \\\n", "0 Angus Young, Malcolm Young, Brian Johnson 343719.0 \n", "1 None 342562.0 \n", "2 F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho... 230619.0 \n", "3 F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D... 252051.0 \n", "4 Deaffy & R.A. Smith-Diesel 375418.0 \n", "\n", " bytes unit_price \n", "0 11170334.0 0.99 \n", "1 5510424.0 0.99 \n", "2 3990994.0 0.99 \n", "3 4331779.0 0.99 \n", "4 6290521.0 0.99 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this is to show different order in table can have different result for left join\n", "LEFT_join_from_playlist=\"\"\"\n", "SELECT *\n", "FROM playlist p\n", "LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n", "LEFT JOIN track t ON t.track_id = pt.track_id\"\"\"\n", "\n", "LEFT_join_from_playlist=pd.read_sql(LEFT_join_from_playlist,conn)\n", "print(len(LEFT_join_from_playlist))\n", "LEFT_join_from_playlist.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">- The result has 8719 rows, instead of 8715 rows for left_join_from_track" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**sub_join_from_track**" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idplaylist_nametrack_namelength_seconds
01MusicBand Members Discuss Tracks from \"Revelations\"294
11MusicRevelations252
21MusicOne and the Same217
31MusicSound of a Gun260
41MusicUntil We Fall230
\n", "
" ], "text/plain": [ " playlist_id playlist_name track_name \\\n", "0 1 Music Band Members Discuss Tracks from \"Revelations\" \n", "1 1 Music Revelations \n", "2 1 Music One and the Same \n", "3 1 Music Sound of a Gun \n", "4 1 Music Until We Fall \n", "\n", " length_seconds \n", "0 294 \n", "1 252 \n", "2 217 \n", "3 260 \n", "4 230 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub=\"\"\"\n", "SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " JOIN playlist_track pt ON t.track_id=pt.track_id\n", " JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "sub=pd.read_sql(sub,conn)\n", "print(len(sub))\n", "sub.head()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "14\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idplaylist_namenumber_of_trackslength_seconds
01Music3290876049
13TV Shows213500987
2590’s Music1477397970
38Music3290876049
49Music Videos1294
510TV Shows213500987
611Brazilian Music399464
712Classical7521736
813Classical 101 - Deep Cuts256742
914Classical 101 - Next Steps257565
1015Classical 101 - The Basics257429
1116Grunge154114
1217Heavy Metal Classic268189
1318On-The-Go 11197
\n", "
" ], "text/plain": [ " playlist_id playlist_name number_of_tracks length_seconds\n", "0 1 Music 3290 876049\n", "1 3 TV Shows 213 500987\n", "2 5 90’s Music 1477 397970\n", "3 8 Music 3290 876049\n", "4 9 Music Videos 1 294\n", "5 10 TV Shows 213 500987\n", "6 11 Brazilian Music 39 9464\n", "7 12 Classical 75 21736\n", "8 13 Classical 101 - Deep Cuts 25 6742\n", "9 14 Classical 101 - Next Steps 25 7565\n", "10 15 Classical 101 - The Basics 25 7429\n", "11 16 Grunge 15 4114\n", "12 17 Heavy Metal Classic 26 8189\n", "13 18 On-The-Go 1 1 197" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# name subquery using WITH and AS \n", "q=\"\"\"\n", "WITH sub AS\n", "(SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " JOIN playlist_track pt ON t.track_id=pt.track_id\n", " JOIN playlist p ON p.playlist_id=pt.playlist_id\n", ")\n", "SELECT\n", " playlist_id,\n", " playlist_name,\n", " COUNT(playlist_id) number_of_tracks,\n", " SUM(length_seconds) length_seconds\n", "FROM sub\n", "GROUP BY 1\n", "ORDER BY 1\n", "\"\"\"\n", "table_sub=pd.read_sql(q,conn)\n", "print(len(table_sub))\n", "table_sub\n", "\n", "# GROUP BY 1 and GROUP BY 1,2 are the same\n", "# COUNT(playlist_id), same as COUNT(track_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**sub_LEFT_from_track**" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idplaylist_nametrack_namelength_seconds
01MusicFor Those About To Rock (We Salute You)343
18MusicFor Those About To Rock (We Salute You)343
217Heavy Metal ClassicFor Those About To Rock (We Salute You)343
31MusicBalls to the Wall342
48MusicBalls to the Wall342
\n", "
" ], "text/plain": [ " playlist_id playlist_name track_name \\\n", "0 1 Music For Those About To Rock (We Salute You) \n", "1 8 Music For Those About To Rock (We Salute You) \n", "2 17 Heavy Metal Classic For Those About To Rock (We Salute You) \n", "3 1 Music Balls to the Wall \n", "4 8 Music Balls to the Wall \n", "\n", " length_seconds \n", "0 343 \n", "1 343 \n", "2 343 \n", "3 342 \n", "4 342 " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_LEFT_from_track=\"\"\"\n", "SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n", " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "sub_LEFT_from_track=pd.read_sql(sub_LEFT_from_track,conn)\n", "print(len(sub_LEFT_from_track))\n", "sub_LEFT_from_track.head()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "14\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idplaylist_namenumber_of_trackslength_seconds
01Music3290876049
13TV Shows213500987
2590’s Music1477397970
38Music3290876049
49Music Videos1294
510TV Shows213500987
611Brazilian Music399464
712Classical7521736
813Classical 101 - Deep Cuts256742
914Classical 101 - Next Steps257565
1015Classical 101 - The Basics257429
1116Grunge154114
1217Heavy Metal Classic268189
1318On-The-Go 11197
\n", "
" ], "text/plain": [ " playlist_id playlist_name number_of_tracks length_seconds\n", "0 1 Music 3290 876049\n", "1 3 TV Shows 213 500987\n", "2 5 90’s Music 1477 397970\n", "3 8 Music 3290 876049\n", "4 9 Music Videos 1 294\n", "5 10 TV Shows 213 500987\n", "6 11 Brazilian Music 39 9464\n", "7 12 Classical 75 21736\n", "8 13 Classical 101 - Deep Cuts 25 6742\n", "9 14 Classical 101 - Next Steps 25 7565\n", "10 15 Classical 101 - The Basics 25 7429\n", "11 16 Grunge 15 4114\n", "12 17 Heavy Metal Classic 26 8189\n", "13 18 On-The-Go 1 1 197" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q=\"\"\"\n", "WITH sub_LEFT AS\n", "(SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n", " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id\n", ")\n", "SELECT\n", " playlist_id,\n", " playlist_name,\n", " COUNT(track_name) number_of_tracks,\n", " SUM(length_seconds) length_seconds\n", "FROM sub_LEFT\n", "GROUP BY 1, 2\n", "ORDER BY 1\n", "\"\"\"\n", "table_sub_LEFT_from_track=pd.read_sql(q,conn)\n", "print(len(table_sub_LEFT_from_track))\n", "table_sub_LEFT_from_track" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**sub_LEFT_from_playlist**" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8719\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idplaylist_nametrack_namelength_seconds
01MusicFor Those About To Rock (We Salute You)343.0
11MusicBalls to the Wall342.0
21MusicFast As a Shark230.0
31MusicRestless and Wild252.0
41MusicPrincess of the Dawn375.0
\n", "
" ], "text/plain": [ " playlist_id playlist_name track_name \\\n", "0 1 Music For Those About To Rock (We Salute You) \n", "1 1 Music Balls to the Wall \n", "2 1 Music Fast As a Shark \n", "3 1 Music Restless and Wild \n", "4 1 Music Princess of the Dawn \n", "\n", " length_seconds \n", "0 343.0 \n", "1 342.0 \n", "2 230.0 \n", "3 252.0 \n", "4 375.0 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_LEFT_from_playlist=\"\"\"\n", "SELECT\n", " p.playlist_id,\n", " p.name playlist_name,\n", " t.name track_name,\n", " (t.milliseconds / 1000) length_seconds\n", "FROM playlist p\n", "LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n", "LEFT JOIN track t ON t.track_id = pt.track_id\"\"\"\n", "\n", "sub_LEFT_from_playlist=pd.read_sql(sub_LEFT_from_playlist, conn)\n", "print(len(sub_LEFT_from_playlist))\n", "sub_LEFT_from_playlist.head()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
playlist_idplaylist_namenumber_of_trackslength_seconds
01Music3290876049.0
12Movies0NaN
23TV Shows213500987.0
34Audiobooks0NaN
4590’s Music1477397970.0
56Audiobooks0NaN
67Movies0NaN
78Music3290876049.0
89Music Videos1294.0
910TV Shows213500987.0
1011Brazilian Music399464.0
1112Classical7521736.0
1213Classical 101 - Deep Cuts256742.0
1314Classical 101 - Next Steps257565.0
1415Classical 101 - The Basics257429.0
1516Grunge154114.0
1617Heavy Metal Classic268189.0
1718On-The-Go 11197.0
\n", "
" ], "text/plain": [ " playlist_id playlist_name number_of_tracks length_seconds\n", "0 1 Music 3290 876049.0\n", "1 2 Movies 0 NaN\n", "2 3 TV Shows 213 500987.0\n", "3 4 Audiobooks 0 NaN\n", "4 5 90’s Music 1477 397970.0\n", "5 6 Audiobooks 0 NaN\n", "6 7 Movies 0 NaN\n", "7 8 Music 3290 876049.0\n", "8 9 Music Videos 1 294.0\n", "9 10 TV Shows 213 500987.0\n", "10 11 Brazilian Music 39 9464.0\n", "11 12 Classical 75 21736.0\n", "12 13 Classical 101 - Deep Cuts 25 6742.0\n", "13 14 Classical 101 - Next Steps 25 7565.0\n", "14 15 Classical 101 - The Basics 25 7429.0\n", "15 16 Grunge 15 4114.0\n", "16 17 Heavy Metal Classic 26 8189.0\n", "17 18 On-The-Go 1 1 197.0" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q=\"\"\"\n", "WITH playlist_info AS\n", " (\n", " SELECT\n", " p.playlist_id,\n", " p.name playlist_name,\n", " t.name track_name,\n", " (t.milliseconds / 1000) length_seconds\n", " FROM playlist p\n", " LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n", " LEFT JOIN track t ON t.track_id = pt.track_id\n", " )\n", "\n", "SELECT\n", " playlist_id,\n", " playlist_name,\n", " COUNT(track_name) number_of_tracks,\n", " SUM(length_seconds) length_seconds\n", "FROM playlist_info\n", "GROUP BY 1,2\n", "ORDER BY 1;\"\"\"\n", "\n", "table_sub_LEFT_from_playlist=pd.read_sql(q, conn)\n", "print(len(table_sub_LEFT_from_playlist))\n", "table_sub_LEFT_from_playlist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**CREATE VIEW (create a table can be reused), view can not be run on Jupyter notebook**" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
customer_idfirst_namelast_namecompanyaddresscitystatecountrypostal_codephonefaxemailsupport_rep_idinvoice_idcustomer_idinvoice_datebilling_addressbilling_citybilling_statebilling_countrybilling_postal_codetotal
01LuísGonçalvesEmbraer - Empresa Brasileira de Aeronáutica S.A.Av. Brigadeiro Faria Lima, 2170São José dos CamposSPBrazil12227-000+55 (12) 3923-5555+55 (12) 3923-5566luisg@embraer.com.br31612017-01-26 00:00:00Av. Brigadeiro Faria Lima, 2170São José dos CamposSPBrazil12227-0008.91
13FrançoisTremblayNone1498 rue BélangerMontréalQCCanadaH2G 1A7+1 (514) 721-4711Noneftremblay@gmail.com33132017-02-21 00:00:001498 rue BélangerMontréalQCCanadaH2G 1A719.80
25FrantišekWichterlováJetBrains s.r.o.Klanova 9/506PragueNoneCzech Republic14700+420 2 4172 5555+420 2 4172 5555frantisekw@jetbrains.com47852017-05-29 00:00:00Klanova 9/506PragueNoneCzech Republic147008.91
36HelenaHolýNoneRilská 3174/6PragueNoneCzech Republic14300+420 2 4177 0449Nonehholy@gmail.com511262017-08-31 00:00:00Rilská 3174/6PragueNoneCzech Republic1430016.83
413FernandaRamosNoneQe 7 Bloco GBrasíliaDFBrazil71020-677+55 (61) 3363-5547+55 (61) 3363-7855fernadaramos4@uol.com.br473132017-05-18 00:00:00Qe 7 Bloco GBrasíliaDFBrazil71020-67714.85
517JackSmithMicrosoft Corporation1 Microsoft WayRedmondWAUSA98052-8300+1 (425) 882-8080+1 (425) 882-8081jacksmith@microsoft.com598172017-07-23 00:00:001 Microsoft WayRedmondWAUSA98052-83009.90
620DanMillerNone541 Del Medio AvenueMountain ViewCAUSA94040-111+1 (650) 644-3358Nonedmiller@comcast.com417202017-01-28 00:00:00541 Del Medio AvenueMountain ViewCAUSA94040-11110.89
721KathyChaseNone801 W 4th StreetRenoNVUSA89503+1 (775) 223-7665Nonekachase@hotmail.com544212017-03-20 00:00:00801 W 4th StreetRenoNVUSA8950311.88
822HeatherLeacockNone120 S Orange AveOrlandoFLUSA32801+1 (407) 999-7788Nonehleacock@gmail.com443222017-03-19 00:00:00120 S Orange AveOrlandoFLUSA328016.93
930EdwardFrancisNone230 Elgin StreetOttawaONCanadaK2P 1L7+1 (613) 234-3322Noneedfrancis@yachoo.ca32302017-01-03 00:00:00230 Elgin StreetOttawaONCanadaK2P 1L79.90
1034JoãoFernandesNoneRua da Assunção 53LisbonNonePortugalNone+351 (213) 466-111Nonejfernandes@yahoo.pt435342017-02-25 00:00:00Rua da Assunção 53LisbonNonePortugalNone4.95
1137FynnZimmermannNoneBerger Straße 10FrankfurtNoneGermany60316+49 069 40598889Nonefzimmermann@yahoo.de340372017-03-13 00:00:00Berger Straße 10FrankfurtNoneGermany603169.90
1242WyattGirardNone9, Place Louis BarthouBordeauxNoneFrance33000+33 05 56 96 96 96Nonewyatt.girard@yahoo.fr312422017-01-21 00:00:009, Place Louis BarthouBordeauxNoneFrance330003.96
1346HughO'ReillyNone3 Chatham StreetDublinDublinIrelandNone+353 01 6792424Nonehughoreilly@apple.ie330462017-02-18 00:00:003 Chatham StreetDublinDublinIrelandNone10.89
1450EnriqueMuñozNoneC/ San Bernardo 85MadridNoneSpain28015+34 914 454 454Noneenrique_munoz@yahoo.es541502017-03-17 00:00:00C/ San Bernardo 85MadridNoneSpain280157.92
1553PhilHughesNone113 Lupus StLondonNoneUnited KingdomSW1V 3EN+44 020 7976 5722Nonephil.hughes@gmail.com323532017-02-09 00:00:00113 Lupus StLondonNoneUnited KingdomSW1V 3EN9.90
1657LuisRojasNoneCalle Lira, 198SantiagoNoneChileNone+56 (0)2 635 4444Noneluisrojas@yahoo.cl547572017-03-25 00:00:00Calle Lira, 198SantiagoNoneChileNone5.94
1758ManojPareekNone12,Community CentreDelhiNoneIndia110017+91 0124 39883988Nonemanoj.pareek@rediff.com333582017-02-21 00:00:0012,Community CentreDelhiNoneIndia1100173.96
\n", "
" ], "text/plain": [ " customer_id first_name last_name \\\n", "0 1 Luís Gonçalves \n", "1 3 François Tremblay \n", "2 5 František Wichterlová \n", "3 6 Helena Holý \n", "4 13 Fernanda Ramos \n", "5 17 Jack Smith \n", "6 20 Dan Miller \n", "7 21 Kathy Chase \n", "8 22 Heather Leacock \n", "9 30 Edward Francis \n", "10 34 João Fernandes \n", "11 37 Fynn Zimmermann \n", "12 42 Wyatt Girard \n", "13 46 Hugh O'Reilly \n", "14 50 Enrique Muñoz \n", "15 53 Phil Hughes \n", "16 57 Luis Rojas \n", "17 58 Manoj Pareek \n", "\n", " company \\\n", "0 Embraer - Empresa Brasileira de Aeronáutica S.A. \n", "1 None \n", "2 JetBrains s.r.o. \n", "3 None \n", "4 None \n", "5 Microsoft Corporation \n", "6 None \n", "7 None \n", "8 None \n", "9 None \n", "10 None \n", "11 None \n", "12 None \n", "13 None \n", "14 None \n", "15 None \n", "16 None \n", "17 None \n", "\n", " address city state \\\n", "0 Av. Brigadeiro Faria Lima, 2170 São José dos Campos SP \n", "1 1498 rue Bélanger Montréal QC \n", "2 Klanova 9/506 Prague None \n", "3 Rilská 3174/6 Prague None \n", "4 Qe 7 Bloco G Brasília DF \n", "5 1 Microsoft Way Redmond WA \n", "6 541 Del Medio Avenue Mountain View CA \n", "7 801 W 4th Street Reno NV \n", "8 120 S Orange Ave Orlando FL \n", "9 230 Elgin Street Ottawa ON \n", "10 Rua da Assunção 53 Lisbon None \n", "11 Berger Straße 10 Frankfurt None \n", "12 9, Place Louis Barthou Bordeaux None \n", "13 3 Chatham Street Dublin Dublin \n", "14 C/ San Bernardo 85 Madrid None \n", "15 113 Lupus St London None \n", "16 Calle Lira, 198 Santiago None \n", "17 12,Community Centre Delhi None \n", "\n", " country postal_code phone fax \\\n", "0 Brazil 12227-000 +55 (12) 3923-5555 +55 (12) 3923-5566 \n", "1 Canada H2G 1A7 +1 (514) 721-4711 None \n", "2 Czech Republic 14700 +420 2 4172 5555 +420 2 4172 5555 \n", "3 Czech Republic 14300 +420 2 4177 0449 None \n", "4 Brazil 71020-677 +55 (61) 3363-5547 +55 (61) 3363-7855 \n", "5 USA 98052-8300 +1 (425) 882-8080 +1 (425) 882-8081 \n", "6 USA 94040-111 +1 (650) 644-3358 None \n", "7 USA 89503 +1 (775) 223-7665 None \n", "8 USA 32801 +1 (407) 999-7788 None \n", "9 Canada K2P 1L7 +1 (613) 234-3322 None \n", "10 Portugal None +351 (213) 466-111 None \n", "11 Germany 60316 +49 069 40598889 None \n", "12 France 33000 +33 05 56 96 96 96 None \n", "13 Ireland None +353 01 6792424 None \n", "14 Spain 28015 +34 914 454 454 None \n", "15 United Kingdom SW1V 3EN +44 020 7976 5722 None \n", "16 Chile None +56 (0)2 635 4444 None \n", "17 India 110017 +91 0124 39883988 None \n", "\n", " email support_rep_id invoice_id customer_id \\\n", "0 luisg@embraer.com.br 3 16 1 \n", "1 ftremblay@gmail.com 3 31 3 \n", "2 frantisekw@jetbrains.com 4 78 5 \n", "3 hholy@gmail.com 5 112 6 \n", "4 fernadaramos4@uol.com.br 4 73 13 \n", "5 jacksmith@microsoft.com 5 98 17 \n", "6 dmiller@comcast.com 4 17 20 \n", "7 kachase@hotmail.com 5 44 21 \n", "8 hleacock@gmail.com 4 43 22 \n", "9 edfrancis@yachoo.ca 3 2 30 \n", "10 jfernandes@yahoo.pt 4 35 34 \n", "11 fzimmermann@yahoo.de 3 40 37 \n", "12 wyatt.girard@yahoo.fr 3 12 42 \n", "13 hughoreilly@apple.ie 3 30 46 \n", "14 enrique_munoz@yahoo.es 5 41 50 \n", "15 phil.hughes@gmail.com 3 23 53 \n", "16 luisrojas@yahoo.cl 5 47 57 \n", "17 manoj.pareek@rediff.com 3 33 58 \n", "\n", " invoice_date billing_address billing_city \\\n", "0 2017-01-26 00:00:00 Av. Brigadeiro Faria Lima, 2170 São José dos Campos \n", "1 2017-02-21 00:00:00 1498 rue Bélanger Montréal \n", "2 2017-05-29 00:00:00 Klanova 9/506 Prague \n", "3 2017-08-31 00:00:00 Rilská 3174/6 Prague \n", "4 2017-05-18 00:00:00 Qe 7 Bloco G Brasília \n", "5 2017-07-23 00:00:00 1 Microsoft Way Redmond \n", "6 2017-01-28 00:00:00 541 Del Medio Avenue Mountain View \n", "7 2017-03-20 00:00:00 801 W 4th Street Reno \n", "8 2017-03-19 00:00:00 120 S Orange Ave Orlando \n", "9 2017-01-03 00:00:00 230 Elgin Street Ottawa \n", "10 2017-02-25 00:00:00 Rua da Assunção 53 Lisbon \n", "11 2017-03-13 00:00:00 Berger Straße 10 Frankfurt \n", "12 2017-01-21 00:00:00 9, Place Louis Barthou Bordeaux \n", "13 2017-02-18 00:00:00 3 Chatham Street Dublin \n", "14 2017-03-17 00:00:00 C/ San Bernardo 85 Madrid \n", "15 2017-02-09 00:00:00 113 Lupus St London \n", "16 2017-03-25 00:00:00 Calle Lira, 198 Santiago \n", "17 2017-02-21 00:00:00 12,Community Centre Delhi \n", "\n", " billing_state billing_country billing_postal_code total \n", "0 SP Brazil 12227-000 8.91 \n", "1 QC Canada H2G 1A7 19.80 \n", "2 None Czech Republic 14700 8.91 \n", "3 None Czech Republic 14300 16.83 \n", "4 DF Brazil 71020-677 14.85 \n", "5 WA USA 98052-8300 9.90 \n", "6 CA USA 94040-111 10.89 \n", "7 NV USA 89503 11.88 \n", "8 FL USA 32801 6.93 \n", "9 ON Canada K2P 1L7 9.90 \n", "10 None Portugal None 4.95 \n", "11 None Germany 60316 9.90 \n", "12 None France 33000 3.96 \n", "13 Dublin Ireland None 10.89 \n", "14 None Spain 28015 7.92 \n", "15 None United Kingdom SW1V 3EN 9.90 \n", "16 None Chile None 5.94 \n", "17 None India 110017 3.96 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q=\"\"\"\n", "SELECT c.*, i.*\n", "FROM invoice i\n", "INNER JOIN customer c ON i.customer_id = c.customer_id\n", "GROUP BY 1\n", "HAVING SUM(i.total) > 90\n", "\"\"\"\n", "i_c_90=pd.read_sql(q,conn)\n", "print(len(i_c_90))\n", "i_c_90" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }