{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Analyzing Database with SQL and Connecting Database to Jupyter Notebook" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">- This notebook shows how to connect database to Jupyter Notebook, and join multi tables, and write complex queries to analyze data. ", "--by Lu Tang" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 1. Analyzing data in 'factbook.db', only two tables" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# Imports \n", "import numpy as np # linear algebra\n", "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n", "import sqlite3\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>type</th>\n", " <th>name</th>\n", " <th>tbl_name</th>\n", " <th>rootpage</th>\n", " <th>sql</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>table</td>\n", " <td>sqlite_sequence</td>\n", " <td>sqlite_sequence</td>\n", " <td>3</td>\n", " <td>CREATE TABLE sqlite_sequence(name,seq)</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>table</td>\n", " <td>facts</td>\n", " <td>facts</td>\n", " <td>47</td>\n", " <td>CREATE TABLE \"facts\" (\"id\" INTEGER PRIMARY KEY...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>table</td>\n", " <td>cities</td>\n", " <td>cities</td>\n", " <td>2</td>\n", " <td>CREATE TABLE cities (\\n id integer prim...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " type name tbl_name rootpage \\\n", "0 table sqlite_sequence sqlite_sequence 3 \n", "1 table facts facts 47 \n", "2 table cities cities 2 \n", "\n", " sql \n", "0 CREATE TABLE sqlite_sequence(name,seq) \n", "1 CREATE TABLE \"facts\" (\"id\" INTEGER PRIMARY KEY... \n", "2 CREATE TABLE cities (\\n id integer prim... " ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Create the connection to the database, and see what tables we have\n", "conn = sqlite3.connect('factbook.db')\n", "\n", "tables = pd.read_sql(\"\"\"SELECT *\n", " FROM sqlite_master\n", " WHERE type='table';\"\"\", conn)\n", "tables" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "397\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>name</th>\n", " <th>population</th>\n", " <th>capital</th>\n", " <th>facts_id</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Oranjestad</td>\n", " <td>37000</td>\n", " <td>1</td>\n", " <td>216</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>Saint John'S</td>\n", " <td>27000</td>\n", " <td>1</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>Abu Dhabi</td>\n", " <td>942000</td>\n", " <td>1</td>\n", " <td>184</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Dubai</td>\n", " <td>1978000</td>\n", " <td>0</td>\n", " <td>184</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Sharjah</td>\n", " <td>983000</td>\n", " <td>0</td>\n", " <td>184</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " id name population capital facts_id\n", "0 1 Oranjestad 37000 1 216\n", "1 2 Saint John'S 27000 1 6\n", "2 3 Abu Dhabi 942000 1 184\n", "3 4 Dubai 1978000 0 184\n", "4 5 Sharjah 983000 0 184" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# List of cities\n", "cities = pd.read_sql(\"\"\"SELECT *\n", " FROM cities;\"\"\", conn)\n", "print(len(cities))\n", "cities.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "397\n", "396\n", "210\n" ] } ], "source": [ "print(cities.id.nunique())\n", "print(cities.name.nunique())\n", "# SELECT COUNT(DISTINCT(facts_id)) FROM cities;\n", "print(cities.facts_id.nunique())\n", "\n", "# several cities share the same facts" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "261\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>code</th>\n", " <th>name</th>\n", " <th>area</th>\n", " <th>area_land</th>\n", " <th>area_water</th>\n", " <th>population</th>\n", " <th>population_growth</th>\n", " <th>birth_rate</th>\n", " <th>death_rate</th>\n", " <th>migration_rate</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>af</td>\n", " <td>Afghanistan</td>\n", " <td>652230.0</td>\n", " <td>652230.0</td>\n", " <td>0.0</td>\n", " <td>32564342.0</td>\n", " <td>2.32</td>\n", " <td>38.57</td>\n", " <td>13.89</td>\n", " <td>1.51</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>al</td>\n", " <td>Albania</td>\n", " <td>28748.0</td>\n", " <td>27398.0</td>\n", " <td>1350.0</td>\n", " <td>3029278.0</td>\n", " <td>0.30</td>\n", " <td>12.92</td>\n", " <td>6.58</td>\n", " <td>3.30</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>ag</td>\n", " <td>Algeria</td>\n", " <td>2381741.0</td>\n", " <td>2381741.0</td>\n", " <td>0.0</td>\n", " <td>39542166.0</td>\n", " <td>1.84</td>\n", " <td>23.67</td>\n", " <td>4.31</td>\n", " <td>0.92</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>an</td>\n", " <td>Andorra</td>\n", " <td>468.0</td>\n", " <td>468.0</td>\n", " <td>0.0</td>\n", " <td>85580.0</td>\n", " <td>0.12</td>\n", " <td>8.13</td>\n", " <td>6.96</td>\n", " <td>0.00</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>ao</td>\n", " <td>Angola</td>\n", " <td>1246700.0</td>\n", " <td>1246700.0</td>\n", " <td>0.0</td>\n", " <td>19625353.0</td>\n", " <td>2.78</td>\n", " <td>38.78</td>\n", " <td>11.49</td>\n", " <td>0.46</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " id code name area area_land area_water population \\\n", "0 1 af Afghanistan 652230.0 652230.0 0.0 32564342.0 \n", "1 2 al Albania 28748.0 27398.0 1350.0 3029278.0 \n", "2 3 ag Algeria 2381741.0 2381741.0 0.0 39542166.0 \n", "3 4 an Andorra 468.0 468.0 0.0 85580.0 \n", "4 5 ao Angola 1246700.0 1246700.0 0.0 19625353.0 \n", "\n", " population_growth birth_rate death_rate migration_rate \n", "0 2.32 38.57 13.89 1.51 \n", "1 0.30 12.92 6.58 3.30 \n", "2 1.84 23.67 4.31 0.92 \n", "3 0.12 8.13 6.96 0.00 \n", "4 2.78 38.78 11.49 0.46 " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# view table of facts\n", "facts = pd.read_sql(\"\"\"SELECT *\n", " FROM facts;\"\"\", conn)\n", "print(len(facts))\n", "facts.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.1 (INNER) JOIN" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "397\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>code</th>\n", " <th>name</th>\n", " <th>area</th>\n", " <th>area_land</th>\n", " <th>area_water</th>\n", " <th>population</th>\n", " <th>population_growth</th>\n", " <th>birth_rate</th>\n", " <th>death_rate</th>\n", " <th>migration_rate</th>\n", " <th>id</th>\n", " <th>name</th>\n", " <th>population</th>\n", " <th>capital</th>\n", " <th>facts_id</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>216</td>\n", " <td>aa</td>\n", " <td>Aruba</td>\n", " <td>180.0</td>\n", " <td>180.0</td>\n", " <td>0.0</td>\n", " <td>112162</td>\n", " <td>1.33</td>\n", " <td>12.56</td>\n", " <td>8.18</td>\n", " <td>8.92</td>\n", " <td>1</td>\n", " <td>Oranjestad</td>\n", " <td>37000</td>\n", " <td>1</td>\n", " <td>216</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>6</td>\n", " <td>ac</td>\n", " <td>Antigua and Barbuda</td>\n", " <td>442.0</td>\n", " <td>442.0</td>\n", " <td>0.0</td>\n", " <td>92436</td>\n", " <td>1.24</td>\n", " <td>15.85</td>\n", " <td>5.69</td>\n", " <td>2.21</td>\n", " <td>2</td>\n", " <td>Saint John'S</td>\n", " <td>27000</td>\n", " <td>1</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>184</td>\n", " <td>ae</td>\n", " <td>United Arab Emirates</td>\n", " <td>83600.0</td>\n", " <td>83600.0</td>\n", " <td>0.0</td>\n", " <td>5779760</td>\n", " <td>2.58</td>\n", " <td>15.43</td>\n", " <td>1.97</td>\n", " <td>12.36</td>\n", " <td>3</td>\n", " <td>Abu Dhabi</td>\n", " <td>942000</td>\n", " <td>1</td>\n", " <td>184</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>184</td>\n", " <td>ae</td>\n", " <td>United Arab Emirates</td>\n", " <td>83600.0</td>\n", " <td>83600.0</td>\n", " <td>0.0</td>\n", " <td>5779760</td>\n", " <td>2.58</td>\n", " <td>15.43</td>\n", " <td>1.97</td>\n", " <td>12.36</td>\n", " <td>4</td>\n", " <td>Dubai</td>\n", " <td>1978000</td>\n", " <td>0</td>\n", " <td>184</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>184</td>\n", " <td>ae</td>\n", " <td>United Arab Emirates</td>\n", " <td>83600.0</td>\n", " <td>83600.0</td>\n", " <td>0.0</td>\n", " <td>5779760</td>\n", " <td>2.58</td>\n", " <td>15.43</td>\n", " <td>1.97</td>\n", " <td>12.36</td>\n", " <td>5</td>\n", " <td>Sharjah</td>\n", " <td>983000</td>\n", " <td>0</td>\n", " <td>184</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " id code name area area_land area_water population \\\n", "0 216 aa Aruba 180.0 180.0 0.0 112162 \n", "1 6 ac Antigua and Barbuda 442.0 442.0 0.0 92436 \n", "2 184 ae United Arab Emirates 83600.0 83600.0 0.0 5779760 \n", "3 184 ae United Arab Emirates 83600.0 83600.0 0.0 5779760 \n", "4 184 ae United Arab Emirates 83600.0 83600.0 0.0 5779760 \n", "\n", " population_growth birth_rate death_rate migration_rate id \\\n", "0 1.33 12.56 8.18 8.92 1 \n", "1 1.24 15.85 5.69 2.21 2 \n", "2 2.58 15.43 1.97 12.36 3 \n", "3 2.58 15.43 1.97 12.36 4 \n", "4 2.58 15.43 1.97 12.36 5 \n", "\n", " name population capital facts_id \n", "0 Oranjestad 37000 1 216 \n", "1 Saint John'S 27000 1 6 \n", "2 Abu Dhabi 942000 1 184 \n", "3 Dubai 1978000 0 184 \n", "4 Sharjah 983000 0 184 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Inner Join\n", "q=\"\"\"SELECT * \n", "FROM facts \n", "INNER JOIN cities \n", "ON cities.facts_id = facts.id\"\"\"\n", "\n", "inner_join = pd.read_sql(q, conn)\n", "\n", "print(len(inner_join))\n", "# view joined table \n", "inner_join.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "210" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Not all the facts_id is in the cities table\n", "inner_join['facts_id'].nunique()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.2 LEFT JOIN" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "448\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>id</th>\n", " <th>code</th>\n", " <th>name</th>\n", " <th>area</th>\n", " <th>area_land</th>\n", " <th>area_water</th>\n", " <th>population</th>\n", " <th>population_growth</th>\n", " <th>birth_rate</th>\n", " <th>death_rate</th>\n", " <th>migration_rate</th>\n", " <th>id</th>\n", " <th>name</th>\n", " <th>population</th>\n", " <th>capital</th>\n", " <th>facts_id</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>af</td>\n", " <td>Afghanistan</td>\n", " <td>652230.0</td>\n", " <td>652230.0</td>\n", " <td>0.0</td>\n", " <td>32564342.0</td>\n", " <td>2.32</td>\n", " <td>38.57</td>\n", " <td>13.89</td>\n", " <td>1.51</td>\n", " <td>6.0</td>\n", " <td>Kabul</td>\n", " <td>3097000.0</td>\n", " <td>1.0</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>al</td>\n", " <td>Albania</td>\n", " <td>28748.0</td>\n", " <td>27398.0</td>\n", " <td>1350.0</td>\n", " <td>3029278.0</td>\n", " <td>0.30</td>\n", " <td>12.92</td>\n", " <td>6.58</td>\n", " <td>3.30</td>\n", " <td>10.0</td>\n", " <td>Tirana</td>\n", " <td>419000.0</td>\n", " <td>1.0</td>\n", " <td>2</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>ag</td>\n", " <td>Algeria</td>\n", " <td>2381741.0</td>\n", " <td>2381741.0</td>\n", " <td>0.0</td>\n", " <td>39542166.0</td>\n", " <td>1.84</td>\n", " <td>23.67</td>\n", " <td>4.31</td>\n", " <td>0.92</td>\n", " <td>7.0</td>\n", " <td>Algiers</td>\n", " <td>2916000.0</td>\n", " <td>1.0</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3</td>\n", " <td>ag</td>\n", " <td>Algeria</td>\n", " <td>2381741.0</td>\n", " <td>2381741.0</td>\n", " <td>0.0</td>\n", " <td>39542166.0</td>\n", " <td>1.84</td>\n", " <td>23.67</td>\n", " <td>4.31</td>\n", " <td>0.92</td>\n", " <td>8.0</td>\n", " <td>Oran</td>\n", " <td>783000.0</td>\n", " <td>0.0</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>4</td>\n", " <td>an</td>\n", " <td>Andorra</td>\n", " <td>468.0</td>\n", " <td>468.0</td>\n", " <td>0.0</td>\n", " <td>85580.0</td>\n", " <td>0.12</td>\n", " <td>8.13</td>\n", " <td>6.96</td>\n", " <td>0.00</td>\n", " <td>12.0</td>\n", " <td>Andorra La Vella</td>\n", " <td>23000.0</td>\n", " <td>1.0</td>\n", " <td>4</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " id code name area area_land area_water population \\\n", "0 1 af Afghanistan 652230.0 652230.0 0.0 32564342.0 \n", "1 2 al Albania 28748.0 27398.0 1350.0 3029278.0 \n", "2 3 ag Algeria 2381741.0 2381741.0 0.0 39542166.0 \n", "3 3 ag Algeria 2381741.0 2381741.0 0.0 39542166.0 \n", "4 4 an Andorra 468.0 468.0 0.0 85580.0 \n", "\n", " population_growth birth_rate death_rate migration_rate id \\\n", "0 2.32 38.57 13.89 1.51 6.0 \n", "1 0.30 12.92 6.58 3.30 10.0 \n", "2 1.84 23.67 4.31 0.92 7.0 \n", "3 1.84 23.67 4.31 0.92 8.0 \n", "4 0.12 8.13 6.96 0.00 12.0 \n", "\n", " name population capital facts_id \n", "0 Kabul 3097000.0 1.0 1 \n", "1 Tirana 419000.0 1.0 2 \n", "2 Algiers 2916000.0 1.0 3 \n", "3 Oran 783000.0 0.0 3 \n", "4 Andorra La Vella 23000.0 1.0 4 " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Left Join\n", "q2=\"\"\"SELECT * \n", "FROM facts\n", "LEFT JOIN cities \n", "ON cities.facts_id = facts.id;\"\"\"\n", "\n", "left_join = pd.read_sql(q2, conn)\n", "\n", "print(len(left_join))\n", "left_join.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">- For left join, the order of FROM and LEFT JOIN matters\n", ">- FROM facts means every rows in the facts will be included, and some same facts have different cities, and some facts do not have cities at all." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 1.3 Inner Join with a subquery" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "208\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>country</th>\n", " <th>capital_city</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Aruba</td>\n", " <td>Oranjestad</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Antigua and Barbuda</td>\n", " <td>Saint John'S</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>United Arab Emirates</td>\n", " <td>Abu Dhabi</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Afghanistan</td>\n", " <td>Kabul</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Algeria</td>\n", " <td>Algiers</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " country capital_city\n", "0 Aruba Oranjestad\n", "1 Antigua and Barbuda Saint John'S\n", "2 United Arab Emirates Abu Dhabi\n", "3 Afghanistan Kabul\n", "4 Algeria Algiers" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# name subquery table as c, and from facts join c\n", "q3=\"\"\"\n", "SELECT f.name country, c.name capital_city\n", "FROM facts f\n", "JOIN (\n", " SELECT * FROM cities\n", " WHERE capital = 1) c\n", "ON c.facts_id = f.id;\n", "\"\"\"\n", "subquery_join = pd.read_sql(q3, conn)\n", "\n", "print(len(subquery_join))\n", "subquery_join.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 2. Analyzing data in 'chinook.db' with 11 tables" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>type</th>\n", " <th>name</th>\n", " <th>tbl_name</th>\n", " <th>rootpage</th>\n", " <th>sql</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>table</td>\n", " <td>album</td>\n", " <td>album</td>\n", " <td>2</td>\n", " <td>CREATE TABLE [album]\\n(\\n [album_id] INTEGE...</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>table</td>\n", " <td>artist</td>\n", " <td>artist</td>\n", " <td>3</td>\n", " <td>CREATE TABLE [artist]\\n(\\n [artist_id] INTE...</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>table</td>\n", " <td>customer</td>\n", " <td>customer</td>\n", " <td>4</td>\n", " <td>CREATE TABLE [customer]\\n(\\n [customer_id] ...</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>table</td>\n", " <td>employee</td>\n", " <td>employee</td>\n", " <td>5</td>\n", " <td>CREATE TABLE [employee]\\n(\\n [employee_id] ...</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>table</td>\n", " <td>genre</td>\n", " <td>genre</td>\n", " <td>6</td>\n", " <td>CREATE TABLE [genre]\\n(\\n [genre_id] INTEGE...</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>table</td>\n", " <td>invoice</td>\n", " <td>invoice</td>\n", " <td>7</td>\n", " <td>CREATE TABLE [invoice]\\n(\\n [invoice_id] IN...</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>table</td>\n", " <td>invoice_line</td>\n", " <td>invoice_line</td>\n", " <td>8</td>\n", " <td>CREATE TABLE [invoice_line]\\n(\\n [invoice_l...</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>table</td>\n", " <td>media_type</td>\n", " <td>media_type</td>\n", " <td>9</td>\n", " <td>CREATE TABLE [media_type]\\n(\\n [media_type_...</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>table</td>\n", " <td>playlist</td>\n", " <td>playlist</td>\n", " <td>10</td>\n", " <td>CREATE TABLE [playlist]\\n(\\n [playlist_id] ...</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>table</td>\n", " <td>playlist_track</td>\n", " <td>playlist_track</td>\n", " <td>11</td>\n", " <td>CREATE TABLE [playlist_track]\\n(\\n [playlis...</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>table</td>\n", " <td>track</td>\n", " <td>track</td>\n", " <td>13</td>\n", " <td>CREATE TABLE [track]\\n(\\n [track_id] INTEGE...</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " type name tbl_name rootpage \\\n", "0 table album album 2 \n", "1 table artist artist 3 \n", "2 table customer customer 4 \n", "3 table employee employee 5 \n", "4 table genre genre 6 \n", "5 table invoice invoice 7 \n", "6 table invoice_line invoice_line 8 \n", "7 table media_type media_type 9 \n", "8 table playlist playlist 10 \n", "9 table playlist_track playlist_track 11 \n", "10 table track track 13 \n", "\n", " sql \n", "0 CREATE TABLE [album]\\n(\\n [album_id] INTEGE... \n", "1 CREATE TABLE [artist]\\n(\\n [artist_id] INTE... \n", "2 CREATE TABLE [customer]\\n(\\n [customer_id] ... \n", "3 CREATE TABLE [employee]\\n(\\n [employee_id] ... \n", "4 CREATE TABLE [genre]\\n(\\n [genre_id] INTEGE... \n", "5 CREATE TABLE [invoice]\\n(\\n [invoice_id] IN... \n", "6 CREATE TABLE [invoice_line]\\n(\\n [invoice_l... \n", "7 CREATE TABLE [media_type]\\n(\\n [media_type_... \n", "8 CREATE TABLE [playlist]\\n(\\n [playlist_id] ... \n", "9 CREATE TABLE [playlist_track]\\n(\\n [playlis... \n", "10 CREATE TABLE [track]\\n(\\n [track_id] INTEGE... " ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Build connection\n", "conn=sqlite3.connect('chinook.db')\n", "# view all tables\n", "tables = pd.read_sql(\"\"\"SELECT *\n", " FROM sqlite_master\n", " WHERE type='table';\"\"\", conn)\n", "tables" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>track_id</th>\n", " <th>track_name</th>\n", " <th>track_type</th>\n", " <th>unit_price</th>\n", " <th>quantity</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>3448</td>\n", " <td>Lamentations of Jeremiah, First Set \\ Incipit ...</td>\n", " <td>Protected AAC audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2560</td>\n", " <td>Violent Pornography</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3336</td>\n", " <td>War Pigs</td>\n", " <td>Purchased AAC audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>829</td>\n", " <td>Let's Get Rocked</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1872</td>\n", " <td>Attitude</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>748</td>\n", " <td>Dealer</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>1778</td>\n", " <td>You're What's Happening (In The World Today)</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>2514</td>\n", " <td>Spoonman</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " track_id track_name \\\n", "0 3448 Lamentations of Jeremiah, First Set \\ Incipit ... \n", "1 2560 Violent Pornography \n", "2 3336 War Pigs \n", "3 829 Let's Get Rocked \n", "4 1872 Attitude \n", "5 748 Dealer \n", "6 1778 You're What's Happening (In The World Today) \n", "7 2514 Spoonman \n", "\n", " track_type unit_price quantity \n", "0 Protected AAC audio file 0.99 1 \n", "1 MPEG audio file 0.99 1 \n", "2 Purchased AAC audio file 0.99 1 \n", "3 MPEG audio file 0.99 1 \n", "4 MPEG audio file 0.99 1 \n", "5 MPEG audio file 0.99 1 \n", "6 MPEG audio file 0.99 1 \n", "7 MPEG audio file 0.99 1 " ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join three tables\n", "q_1=\"\"\"\n", "SELECT t.track_id, t.name track_name, m.name track_type, il.unit_price, il.quantity\n", "FROM invoice_line il\n", "JOIN track t \n", "ON t.track_id = il.track_id\n", "JOIN media_type m \n", "ON m.media_type_id=t.media_type_id\n", "WHERE il.invoice_id = 4\"\"\"\n", "\n", "table_1=pd.read_sql(q_1, conn)\n", "table_1" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>track_id</th>\n", " <th>track_name</th>\n", " <th>artist_name</th>\n", " <th>track_type</th>\n", " <th>unit_price</th>\n", " <th>quantity</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>3448</td>\n", " <td>Lamentations of Jeremiah, First Set \\ Incipit ...</td>\n", " <td>The King's Singers</td>\n", " <td>Protected AAC audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2560</td>\n", " <td>Violent Pornography</td>\n", " <td>System Of A Down</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3336</td>\n", " <td>War Pigs</td>\n", " <td>Cake</td>\n", " <td>Purchased AAC audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>829</td>\n", " <td>Let's Get Rocked</td>\n", " <td>Def Leppard</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1872</td>\n", " <td>Attitude</td>\n", " <td>Metallica</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>748</td>\n", " <td>Dealer</td>\n", " <td>Deep Purple</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>1778</td>\n", " <td>You're What's Happening (In The World Today)</td>\n", " <td>Marvin Gaye</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>2514</td>\n", " <td>Spoonman</td>\n", " <td>Soundgarden</td>\n", " <td>MPEG audio file</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " track_id track_name \\\n", "0 3448 Lamentations of Jeremiah, First Set \\ Incipit ... \n", "1 2560 Violent Pornography \n", "2 3336 War Pigs \n", "3 829 Let's Get Rocked \n", "4 1872 Attitude \n", "5 748 Dealer \n", "6 1778 You're What's Happening (In The World Today) \n", "7 2514 Spoonman \n", "\n", " artist_name track_type unit_price quantity \n", "0 The King's Singers Protected AAC audio file 0.99 1 \n", "1 System Of A Down MPEG audio file 0.99 1 \n", "2 Cake Purchased AAC audio file 0.99 1 \n", "3 Def Leppard MPEG audio file 0.99 1 \n", "4 Metallica MPEG audio file 0.99 1 \n", "5 Deep Purple MPEG audio file 0.99 1 \n", "6 Marvin Gaye MPEG audio file 0.99 1 \n", "7 Soundgarden MPEG audio file 0.99 1 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join five tables\n", "q_2= \"\"\"\n", "SELECT\n", " il.track_id,\n", " t.name track_name,\n", " ar.name artist_name,\n", " mt.name track_type,\n", " il.unit_price,\n", " il.quantity\n", "FROM invoice_line il\n", "INNER JOIN track t ON t.track_id = il.track_id\n", "INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id\n", "INNER JOIN album al ON al.album_id = t.album_id\n", "INNER JOIN artist ar on ar.artist_id = al.artist_id\n", "WHERE il.invoice_id = 4;\"\"\"\n", "\n", "table_2=pd.read_sql(q_2, conn)\n", "table_2" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.1 Lists the top 10 artists, calculated by the number of times a track by that artist has been purchased." ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "invoice_line table contains each individual song from each customer purchase, it contains information about which songs are purchased the most. We can use the table to find out which artists are purchased the most. " ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3503\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>track_id</th>\n", " <th>artist_name</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>AC/DC</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>6</td>\n", " <td>AC/DC</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>7</td>\n", " <td>AC/DC</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>8</td>\n", " <td>AC/DC</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>9</td>\n", " <td>AC/DC</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " track_id artist_name\n", "0 1 AC/DC\n", "1 6 AC/DC\n", "2 7 AC/DC\n", "3 8 AC/DC\n", "4 9 AC/DC" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_q = \"\"\"SELECT\n", " t.track_id,\n", " ar.name artist_name \n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\"\"\"\n", "sub_table=pd.read_sql(sub_q, conn)\n", "print(len(sub_table))\n", "sub_table.head()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "4757\n", "130\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>artist</th>\n", " <th>quantity</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Guns N' Roses</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Guns N' Roses</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Guns N' Roses</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Guns N' Roses</td>\n", " <td>1</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Guns N' Roses</td>\n", " <td>1</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " artist quantity\n", "0 Guns N' Roses 1\n", "1 Guns N' Roses 1\n", "2 Guns N' Roses 1\n", "3 Guns N' Roses 1\n", "4 Guns N' Roses 1" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_3= \"\"\"\n", "SELECT\n", " ta.artist_name artist, il.quantity\n", "FROM invoice_line il\n", "INNER JOIN (\n", " SELECT\n", " t.track_id,\n", " ar.name artist_name\n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\n", " ) ta\n", "ON ta.track_id = il.track_id;\"\"\"\n", "\n", "table_3=pd.read_sql(q_3, conn)\n", "print(len(table_3))\n", "print(table_3.artist.nunique())\n", "table_3.head()" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "130\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>artist</th>\n", " <th>tracks_purchased</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Queen</td>\n", " <td>192</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Jimi Hendrix</td>\n", " <td>187</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Nirvana</td>\n", " <td>130</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Red Hot Chili Peppers</td>\n", " <td>130</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Pearl Jam</td>\n", " <td>129</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>AC/DC</td>\n", " <td>124</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>Guns N' Roses</td>\n", " <td>124</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>Foo Fighters</td>\n", " <td>121</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>The Rolling Stones</td>\n", " <td>117</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>Metallica</td>\n", " <td>106</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " artist tracks_purchased\n", "0 Queen 192\n", "1 Jimi Hendrix 187\n", "2 Nirvana 130\n", "3 Red Hot Chili Peppers 130\n", "4 Pearl Jam 129\n", "5 AC/DC 124\n", "6 Guns N' Roses 124\n", "7 Foo Fighters 121\n", "8 The Rolling Stones 117\n", "9 Metallica 106" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Join table with subquery and group by\n", "q_4=\"\"\"\n", "SELECT\n", " ta.artist_name artist,\n", " COUNT(*) tracks_purchased\n", "FROM invoice_line il\n", "INNER JOIN (\n", " SELECT\n", " t.track_id,\n", " ar.name artist_name\n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\n", " ) ta\n", " ON ta.track_id = il.track_id\n", "GROUP BY 1\n", "ORDER BY 2 DESC;\"\"\"\n", "\n", "table_4 = pd.read_sql(q_4, conn)\n", "print(len(table_4))\n", "table_4.head(10)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.2 Write a query that returns the top 5 albums, as calculated by the number of times a track from that album has been purchased." ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "251\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>album</th>\n", " <th>artist</th>\n", " <th>tracks_purchased</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Are You Experienced?</td>\n", " <td>Jimi Hendrix</td>\n", " <td>187</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Faceless</td>\n", " <td>Godsmack</td>\n", " <td>96</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Mezmerize</td>\n", " <td>System Of A Down</td>\n", " <td>93</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Get Born</td>\n", " <td>JET</td>\n", " <td>90</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>The Doors</td>\n", " <td>The Doors</td>\n", " <td>83</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " album artist tracks_purchased\n", "0 Are You Experienced? Jimi Hendrix 187\n", "1 Faceless Godsmack 96\n", "2 Mezmerize System Of A Down 93\n", "3 Get Born JET 90\n", "4 The Doors The Doors 83" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q_5=\"\"\"\n", "SELECT al.title album, ar.name artist, COUNT(*) tracks_purchased\n", "FROM invoice_line il\n", "JOIN track t ON t.track_id=il.track_id\n", "JOIN album al ON al.album_id= t.album_id\n", "JOIN artist ar ON ar.artist_id=al.artist_id\n", "GROUP BY 1\n", "ORDER BY 3 DESC;\"\"\"\n", "\n", "table_5 = pd.read_sql(q_5, conn)\n", "print(len(table_5))\n", "table_5.head()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "251\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>album</th>\n", " <th>artist</th>\n", " <th>tracks_purchased</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Are You Experienced?</td>\n", " <td>Jimi Hendrix</td>\n", " <td>187</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Faceless</td>\n", " <td>Godsmack</td>\n", " <td>96</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Mezmerize</td>\n", " <td>System Of A Down</td>\n", " <td>93</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Get Born</td>\n", " <td>JET</td>\n", " <td>90</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>The Doors</td>\n", " <td>The Doors</td>\n", " <td>83</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " album artist tracks_purchased\n", "0 Are You Experienced? Jimi Hendrix 187\n", "1 Faceless Godsmack 96\n", "2 Mezmerize System Of A Down 93\n", "3 Get Born JET 90\n", "4 The Doors The Doors 83" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# same as q_5\n", "q_6=\"\"\"\n", "SELECT\n", " ta.album_title album,\n", " ta.artist_name artist,\n", " COUNT(*) tracks_purchased\n", "FROM invoice_line il\n", "INNER JOIN (\n", " SELECT\n", " t.track_id,\n", " al.title album_title,\n", " ar.name artist_name\n", " FROM track t\n", " INNER JOIN album al ON al.album_id = t.album_id\n", " INNER JOIN artist ar ON ar.artist_id = al.artist_id\n", " ) ta\n", " ON ta.track_id = il.track_id\n", "GROUP BY 1, 2\n", "ORDER BY 3 DESC;\"\"\"\n", "\n", "table_6 = pd.read_sql(q_6, conn)\n", "print(len(table_6))\n", "table_6.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### 2.3 Recursive Joins" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>employee_id</th>\n", " <th>last_name</th>\n", " <th>first_name</th>\n", " <th>title</th>\n", " <th>reports_to</th>\n", " <th>birthdate</th>\n", " <th>hire_date</th>\n", " <th>address</th>\n", " <th>city</th>\n", " <th>state</th>\n", " <th>country</th>\n", " <th>postal_code</th>\n", " <th>phone</th>\n", " <th>fax</th>\n", " <th>email</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Adams</td>\n", " <td>Andrew</td>\n", " <td>General Manager</td>\n", " <td>NaN</td>\n", " <td>1962-02-18 00:00:00</td>\n", " <td>2016-08-14 00:00:00</td>\n", " <td>11120 Jasper Ave NW</td>\n", " <td>Edmonton</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T5K 2N1</td>\n", " <td>+1 (780) 428-9482</td>\n", " <td>+1 (780) 428-3457</td>\n", " <td>andrew@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>Edwards</td>\n", " <td>Nancy</td>\n", " <td>Sales Manager</td>\n", " <td>1.0</td>\n", " <td>1958-12-08 00:00:00</td>\n", " <td>2016-05-01 00:00:00</td>\n", " <td>825 8 Ave SW</td>\n", " <td>Calgary</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T2P 2T3</td>\n", " <td>+1 (403) 262-3443</td>\n", " <td>+1 (403) 262-3322</td>\n", " <td>nancy@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>Peacock</td>\n", " <td>Jane</td>\n", " <td>Sales Support Agent</td>\n", " <td>2.0</td>\n", " <td>1973-08-29 00:00:00</td>\n", " <td>2017-04-01 00:00:00</td>\n", " <td>1111 6 Ave SW</td>\n", " <td>Calgary</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T2P 5M5</td>\n", " <td>+1 (403) 262-3443</td>\n", " <td>+1 (403) 262-6712</td>\n", " <td>jane@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Park</td>\n", " <td>Margaret</td>\n", " <td>Sales Support Agent</td>\n", " <td>2.0</td>\n", " <td>1947-09-19 00:00:00</td>\n", " <td>2017-05-03 00:00:00</td>\n", " <td>683 10 Street SW</td>\n", " <td>Calgary</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T2P 5G3</td>\n", " <td>+1 (403) 263-4423</td>\n", " <td>+1 (403) 263-4289</td>\n", " <td>margaret@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>Johnson</td>\n", " <td>Steve</td>\n", " <td>Sales Support Agent</td>\n", " <td>2.0</td>\n", " <td>1965-03-03 00:00:00</td>\n", " <td>2017-10-17 00:00:00</td>\n", " <td>7727B 41 Ave</td>\n", " <td>Calgary</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T3B 1Y7</td>\n", " <td>1 (780) 836-9987</td>\n", " <td>1 (780) 836-9543</td>\n", " <td>steve@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>6</td>\n", " <td>Mitchell</td>\n", " <td>Michael</td>\n", " <td>IT Manager</td>\n", " <td>1.0</td>\n", " <td>1973-07-01 00:00:00</td>\n", " <td>2016-10-17 00:00:00</td>\n", " <td>5827 Bowness Road NW</td>\n", " <td>Calgary</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T3B 0C5</td>\n", " <td>+1 (403) 246-9887</td>\n", " <td>+1 (403) 246-9899</td>\n", " <td>michael@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>7</td>\n", " <td>King</td>\n", " <td>Robert</td>\n", " <td>IT Staff</td>\n", " <td>6.0</td>\n", " <td>1970-05-29 00:00:00</td>\n", " <td>2017-01-02 00:00:00</td>\n", " <td>590 Columbia Boulevard West</td>\n", " <td>Lethbridge</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T1K 5N8</td>\n", " <td>+1 (403) 456-9986</td>\n", " <td>+1 (403) 456-8485</td>\n", " <td>robert@chinookcorp.com</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>8</td>\n", " <td>Callahan</td>\n", " <td>Laura</td>\n", " <td>IT Staff</td>\n", " <td>6.0</td>\n", " <td>1968-01-09 00:00:00</td>\n", " <td>2017-03-04 00:00:00</td>\n", " <td>923 7 ST NW</td>\n", " <td>Lethbridge</td>\n", " <td>AB</td>\n", " <td>Canada</td>\n", " <td>T1H 1Y8</td>\n", " <td>+1 (403) 467-3351</td>\n", " <td>+1 (403) 467-8772</td>\n", " <td>laura@chinookcorp.com</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " employee_id last_name first_name title reports_to \\\n", "0 1 Adams Andrew General Manager NaN \n", "1 2 Edwards Nancy Sales Manager 1.0 \n", "2 3 Peacock Jane Sales Support Agent 2.0 \n", "3 4 Park Margaret Sales Support Agent 2.0 \n", "4 5 Johnson Steve Sales Support Agent 2.0 \n", "5 6 Mitchell Michael IT Manager 1.0 \n", "6 7 King Robert IT Staff 6.0 \n", "7 8 Callahan Laura IT Staff 6.0 \n", "\n", " birthdate hire_date address \\\n", "0 1962-02-18 00:00:00 2016-08-14 00:00:00 11120 Jasper Ave NW \n", "1 1958-12-08 00:00:00 2016-05-01 00:00:00 825 8 Ave SW \n", "2 1973-08-29 00:00:00 2017-04-01 00:00:00 1111 6 Ave SW \n", "3 1947-09-19 00:00:00 2017-05-03 00:00:00 683 10 Street SW \n", "4 1965-03-03 00:00:00 2017-10-17 00:00:00 7727B 41 Ave \n", "5 1973-07-01 00:00:00 2016-10-17 00:00:00 5827 Bowness Road NW \n", "6 1970-05-29 00:00:00 2017-01-02 00:00:00 590 Columbia Boulevard West \n", "7 1968-01-09 00:00:00 2017-03-04 00:00:00 923 7 ST NW \n", "\n", " city state country postal_code phone fax \\\n", "0 Edmonton AB Canada T5K 2N1 +1 (780) 428-9482 +1 (780) 428-3457 \n", "1 Calgary AB Canada T2P 2T3 +1 (403) 262-3443 +1 (403) 262-3322 \n", "2 Calgary AB Canada T2P 5M5 +1 (403) 262-3443 +1 (403) 262-6712 \n", "3 Calgary AB Canada T2P 5G3 +1 (403) 263-4423 +1 (403) 263-4289 \n", "4 Calgary AB Canada T3B 1Y7 1 (780) 836-9987 1 (780) 836-9543 \n", "5 Calgary AB Canada T3B 0C5 +1 (403) 246-9887 +1 (403) 246-9899 \n", "6 Lethbridge AB Canada T1K 5N8 +1 (403) 456-9986 +1 (403) 456-8485 \n", "7 Lethbridge AB Canada T1H 1Y8 +1 (403) 467-3351 +1 (403) 467-8772 \n", "\n", " email \n", "0 andrew@chinookcorp.com \n", "1 nancy@chinookcorp.com \n", "2 jane@chinookcorp.com \n", "3 margaret@chinookcorp.com \n", "4 steve@chinookcorp.com \n", "5 michael@chinookcorp.com \n", "6 robert@chinookcorp.com \n", "7 laura@chinookcorp.com " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# view the table employee\n", "q_7=\"\"\"\n", "SELECT *\n", "FROM employee\"\"\"\n", "\n", "table_7=pd.read_sql(q_7, conn)\n", "table_7" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>employee_name</th>\n", " <th>employee_title</th>\n", " <th>supervisor_name</th>\n", " <th>supervisor_title</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Andrew Adams</td>\n", " <td>General Manager</td>\n", " <td>None</td>\n", " <td>None</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Jane Peacock</td>\n", " <td>Sales Support Agent</td>\n", " <td>Nancy Edwards</td>\n", " <td>Sales Manager</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Laura Callahan</td>\n", " <td>IT Staff</td>\n", " <td>Michael Mitchell</td>\n", " <td>IT Manager</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Margaret Park</td>\n", " <td>Sales Support Agent</td>\n", " <td>Nancy Edwards</td>\n", " <td>Sales Manager</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Michael Mitchell</td>\n", " <td>IT Manager</td>\n", " <td>Andrew Adams</td>\n", " <td>General Manager</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>Nancy Edwards</td>\n", " <td>Sales Manager</td>\n", " <td>Andrew Adams</td>\n", " <td>General Manager</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>Robert King</td>\n", " <td>IT Staff</td>\n", " <td>Michael Mitchell</td>\n", " <td>IT Manager</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>Steve Johnson</td>\n", " <td>Sales Support Agent</td>\n", " <td>Nancy Edwards</td>\n", " <td>Sales Manager</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " employee_name employee_title supervisor_name supervisor_title\n", "0 Andrew Adams General Manager None None\n", "1 Jane Peacock Sales Support Agent Nancy Edwards Sales Manager\n", "2 Laura Callahan IT Staff Michael Mitchell IT Manager\n", "3 Margaret Park Sales Support Agent Nancy Edwards Sales Manager\n", "4 Michael Mitchell IT Manager Andrew Adams General Manager\n", "5 Nancy Edwards Sales Manager Andrew Adams General Manager\n", "6 Robert King IT Staff Michael Mitchell IT Manager\n", "7 Steve Johnson Sales Support Agent Nancy Edwards Sales Manager" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Recursive Joins\n", "q_8=\"\"\"\n", "SELECT\n", " e1.first_name || \" \" || e1.last_name employee_name,\n", " e1.title employee_title,\n", " e2.first_name || \" \" || e2.last_name supervisor_name,\n", " e2.title supervisor_title\n", "FROM employee e1\n", "LEFT JOIN employee e2 ON e1.reports_to = e2.employee_id\n", "ORDER BY 1;\"\"\"\n", "\n", "table_8=pd.read_sql(q_8, conn)\n", "table_8" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>invoice_id</th>\n", " <th>customer_id</th>\n", " <th>invoice_date</th>\n", " <th>billing_address</th>\n", " <th>billing_city</th>\n", " <th>billing_state</th>\n", " <th>billing_country</th>\n", " <th>billing_postal_code</th>\n", " <th>total</th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>company</th>\n", " <th>address</th>\n", " <th>city</th>\n", " <th>state</th>\n", " <th>country</th>\n", " <th>postal_code</th>\n", " <th>phone</th>\n", " <th>fax</th>\n", " <th>email</th>\n", " <th>support_rep_id</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>18</td>\n", " <td>2017-01-03 00:00:00</td>\n", " <td>627 Broadway</td>\n", " <td>New York</td>\n", " <td>NY</td>\n", " <td>USA</td>\n", " <td>10012-2612</td>\n", " <td>15.84</td>\n", " <td>18</td>\n", " <td>Michelle</td>\n", " <td>Brooks</td>\n", " <td>None</td>\n", " <td>627 Broadway</td>\n", " <td>New York</td>\n", " <td>NY</td>\n", " <td>USA</td>\n", " <td>10012-2612</td>\n", " <td>+1 (212) 221-3546</td>\n", " <td>+1 (212) 221-4679</td>\n", " <td>michelleb@aol.com</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>30</td>\n", " <td>2017-01-03 00:00:00</td>\n", " <td>230 Elgin Street</td>\n", " <td>Ottawa</td>\n", " <td>ON</td>\n", " <td>Canada</td>\n", " <td>K2P 1L7</td>\n", " <td>9.90</td>\n", " <td>30</td>\n", " <td>Edward</td>\n", " <td>Francis</td>\n", " <td>None</td>\n", " <td>230 Elgin Street</td>\n", " <td>Ottawa</td>\n", " <td>ON</td>\n", " <td>Canada</td>\n", " <td>K2P 1L7</td>\n", " <td>+1 (613) 234-3322</td>\n", " <td>None</td>\n", " <td>edfrancis@yachoo.ca</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>40</td>\n", " <td>2017-01-05 00:00:00</td>\n", " <td>8, Rue Hanovre</td>\n", " <td>Paris</td>\n", " <td>None</td>\n", " <td>France</td>\n", " <td>75002</td>\n", " <td>1.98</td>\n", " <td>40</td>\n", " <td>Dominique</td>\n", " <td>Lefebvre</td>\n", " <td>None</td>\n", " <td>8, Rue Hanovre</td>\n", " <td>Paris</td>\n", " <td>None</td>\n", " <td>France</td>\n", " <td>75002</td>\n", " <td>+33 01 47 42 71 71</td>\n", " <td>None</td>\n", " <td>dominiquelefebvre@gmail.com</td>\n", " <td>4</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>18</td>\n", " <td>2017-01-06 00:00:00</td>\n", " <td>627 Broadway</td>\n", " <td>New York</td>\n", " <td>NY</td>\n", " <td>USA</td>\n", " <td>10012-2612</td>\n", " <td>7.92</td>\n", " <td>18</td>\n", " <td>Michelle</td>\n", " <td>Brooks</td>\n", " <td>None</td>\n", " <td>627 Broadway</td>\n", " <td>New York</td>\n", " <td>NY</td>\n", " <td>USA</td>\n", " <td>10012-2612</td>\n", " <td>+1 (212) 221-3546</td>\n", " <td>+1 (212) 221-4679</td>\n", " <td>michelleb@aol.com</td>\n", " <td>3</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>27</td>\n", " <td>2017-01-07 00:00:00</td>\n", " <td>1033 N Park Ave</td>\n", " <td>Tucson</td>\n", " <td>AZ</td>\n", " <td>USA</td>\n", " <td>85719</td>\n", " <td>16.83</td>\n", " <td>27</td>\n", " <td>Patrick</td>\n", " <td>Gray</td>\n", " <td>None</td>\n", " <td>1033 N Park Ave</td>\n", " <td>Tucson</td>\n", " <td>AZ</td>\n", " <td>USA</td>\n", " <td>85719</td>\n", " <td>+1 (520) 622-4200</td>\n", " <td>None</td>\n", " <td>patrick.gray@aol.com</td>\n", " <td>4</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " invoice_id customer_id invoice_date billing_address \\\n", "0 1 18 2017-01-03 00:00:00 627 Broadway \n", "1 2 30 2017-01-03 00:00:00 230 Elgin Street \n", "2 3 40 2017-01-05 00:00:00 8, Rue Hanovre \n", "3 4 18 2017-01-06 00:00:00 627 Broadway \n", "4 5 27 2017-01-07 00:00:00 1033 N Park Ave \n", "\n", " billing_city billing_state billing_country billing_postal_code total \\\n", "0 New York NY USA 10012-2612 15.84 \n", "1 Ottawa ON Canada K2P 1L7 9.90 \n", "2 Paris None France 75002 1.98 \n", "3 New York NY USA 10012-2612 7.92 \n", "4 Tucson AZ USA 85719 16.83 \n", "\n", " customer_id first_name last_name company address city state \\\n", "0 18 Michelle Brooks None 627 Broadway New York NY \n", "1 30 Edward Francis None 230 Elgin Street Ottawa ON \n", "2 40 Dominique Lefebvre None 8, Rue Hanovre Paris None \n", "3 18 Michelle Brooks None 627 Broadway New York NY \n", "4 27 Patrick Gray None 1033 N Park Ave Tucson AZ \n", "\n", " country postal_code phone fax \\\n", "0 USA 10012-2612 +1 (212) 221-3546 +1 (212) 221-4679 \n", "1 Canada K2P 1L7 +1 (613) 234-3322 None \n", "2 France 75002 +33 01 47 42 71 71 None \n", "3 USA 10012-2612 +1 (212) 221-3546 +1 (212) 221-4679 \n", "4 USA 85719 +1 (520) 622-4200 None \n", "\n", " email support_rep_id \n", "0 michelleb@aol.com 3 \n", "1 edfrancis@yachoo.ca 3 \n", "2 dominiquelefebvre@gmail.com 4 \n", "3 michelleb@aol.com 3 \n", "4 patrick.gray@aol.com 4 " ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub=\"\"\"\n", "SELECT *\n", "FROM invoice i\n", "JOIN customer c\n", "ON c.customer_id = i.customer_id;\"\"\"\n", "customer_invoice = pd.read_sql(sub, conn)\n", "# to avoid truncated output \n", "pd.options.display.max_columns = 150 \n", "customer_invoice.head()" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_name</th>\n", " <th>number_of_purchases</th>\n", " <th>total_spent</th>\n", " <th>customer_category</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>Aaron Mitchell</td>\n", " <td>8</td>\n", " <td>70.29</td>\n", " <td>regular</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>Alexandre Rocha</td>\n", " <td>10</td>\n", " <td>69.30</td>\n", " <td>regular</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>Astrid Gruber</td>\n", " <td>9</td>\n", " <td>69.30</td>\n", " <td>regular</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>Bjørn Hansen</td>\n", " <td>9</td>\n", " <td>72.27</td>\n", " <td>regular</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>Camille Bernard</td>\n", " <td>9</td>\n", " <td>79.20</td>\n", " <td>regular</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " customer_name number_of_purchases total_spent customer_category\n", "0 Aaron Mitchell 8 70.29 regular\n", "1 Alexandre Rocha 10 69.30 regular\n", "2 Astrid Gruber 9 69.30 regular\n", "3 Bjørn Hansen 9 72.27 regular\n", "4 Camille Bernard 9 79.20 regular" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# join customer and invoice tabel, groupby customer name and create a new column\n", "q_9=\"\"\"\n", "SELECT\n", " c.first_name || \" \" || c.last_name customer_name,\n", " COUNT(i.invoice_id) number_of_purchases,\n", " SUM(i.total) total_spent,\n", " CASE\n", " WHEN sum(i.total) < 40 THEN 'small spender'\n", " WHEN sum(i.total) > 100 THEN 'big spender'\n", " ELSE 'regular'\n", " END\n", " AS customer_category\n", "FROM invoice i\n", "INNER JOIN customer c ON i.customer_id = c.customer_id\n", "GROUP BY 1 ORDER BY 1;\"\"\"\n", "\n", "table_9=pd.read_sql(q_9, conn)\n", "table_9.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## 3.Building and Organizing Complex Queries" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3503\n", "14\n" ] } ], "source": [ "# view the table playlist\n", "q=\"\"\"\n", "SELECT *\n", "FROM playlist_track\"\"\"\n", "playlist_track=pd.read_sql(q,conn)\n", "print(playlist_track.track_id.nunique())\n", "print(playlist_track.playlist_id.nunique())\n", "# total length of playlist is 18, means not all playlist is in playlist_track." ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>track_id</th>\n", " <th>name</th>\n", " <th>album_id</th>\n", " <th>media_type_id</th>\n", " <th>genre_id</th>\n", " <th>composer</th>\n", " <th>milliseconds</th>\n", " <th>bytes</th>\n", " <th>unit_price</th>\n", " <th>playlist_id</th>\n", " <th>track_id</th>\n", " <th>playlist_id</th>\n", " <th>name</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>3402</td>\n", " <td>Band Members Discuss Tracks from \"Revelations\"</td>\n", " <td>271</td>\n", " <td>3</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>294294</td>\n", " <td>61118891</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>3402</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>3389</td>\n", " <td>Revelations</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>252376</td>\n", " <td>4111051</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>3389</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3390</td>\n", " <td>One and the Same</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>217732</td>\n", " <td>3559040</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>3390</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>3391</td>\n", " <td>Sound of a Gun</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>260154</td>\n", " <td>4234990</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>3391</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>3392</td>\n", " <td>Until We Fall</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>230758</td>\n", " <td>3766605</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>3392</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " track_id name album_id \\\n", "0 3402 Band Members Discuss Tracks from \"Revelations\" 271 \n", "1 3389 Revelations 271 \n", "2 3390 One and the Same 271 \n", "3 3391 Sound of a Gun 271 \n", "4 3392 Until We Fall 271 \n", "\n", " media_type_id genre_id composer milliseconds bytes unit_price \\\n", "0 3 23 None 294294 61118891 0.99 \n", "1 2 23 None 252376 4111051 0.99 \n", "2 2 23 None 217732 3559040 0.99 \n", "3 2 23 None 260154 4234990 0.99 \n", "4 2 23 None 230758 3766605 0.99 \n", "\n", " playlist_id track_id playlist_id name \n", "0 1 3402 1 Music \n", "1 1 3389 1 Music \n", "2 1 3390 1 Music \n", "3 1 3391 1 Music \n", "4 1 3392 1 Music " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# (inner) join three tables \n", "join_from_track=\"\"\"\n", "SELECT * \n", " FROM track t\n", " JOIN playlist_track pt ON t.track_id=pt.track_id\n", " JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "join_from_track=pd.read_sql(join_from_track,conn)\n", "print(len(join_from_track))\n", "join_from_track.head()" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>name</th>\n", " <th>playlist_id</th>\n", " <th>track_id</th>\n", " <th>track_id</th>\n", " <th>name</th>\n", " <th>album_id</th>\n", " <th>media_type_id</th>\n", " <th>genre_id</th>\n", " <th>composer</th>\n", " <th>milliseconds</th>\n", " <th>bytes</th>\n", " <th>unit_price</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1</td>\n", " <td>3402</td>\n", " <td>3402</td>\n", " <td>Band Members Discuss Tracks from \"Revelations\"</td>\n", " <td>271</td>\n", " <td>3</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>294294</td>\n", " <td>61118891</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1</td>\n", " <td>3389</td>\n", " <td>3389</td>\n", " <td>Revelations</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>252376</td>\n", " <td>4111051</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1</td>\n", " <td>3390</td>\n", " <td>3390</td>\n", " <td>One and the Same</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>217732</td>\n", " <td>3559040</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1</td>\n", " <td>3391</td>\n", " <td>3391</td>\n", " <td>Sound of a Gun</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>260154</td>\n", " <td>4234990</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1</td>\n", " <td>3392</td>\n", " <td>3392</td>\n", " <td>Until We Fall</td>\n", " <td>271</td>\n", " <td>2</td>\n", " <td>23</td>\n", " <td>None</td>\n", " <td>230758</td>\n", " <td>3766605</td>\n", " <td>0.99</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id name playlist_id track_id track_id \\\n", "0 1 Music 1 3402 3402 \n", "1 1 Music 1 3389 3389 \n", "2 1 Music 1 3390 3390 \n", "3 1 Music 1 3391 3391 \n", "4 1 Music 1 3392 3392 \n", "\n", " name album_id media_type_id \\\n", "0 Band Members Discuss Tracks from \"Revelations\" 271 3 \n", "1 Revelations 271 2 \n", "2 One and the Same 271 2 \n", "3 Sound of a Gun 271 2 \n", "4 Until We Fall 271 2 \n", "\n", " genre_id composer milliseconds bytes unit_price \n", "0 23 None 294294 61118891 0.99 \n", "1 23 None 252376 4111051 0.99 \n", "2 23 None 217732 3559040 0.99 \n", "3 23 None 260154 4234990 0.99 \n", "4 23 None 230758 3766605 0.99 " ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# same as above, this is to show, for (inner) join, table order does not matter\n", "join_from_playlist=\"\"\"\n", "SELECT * \n", " FROM playlist p\n", " JOIN playlist_track pt ON p.playlist_id=pt.playlist_id\n", " JOIN track t ON t.track_id=pt.track_id;\"\"\"\n", "\n", "join_from_playlist=pd.read_sql(join_from_playlist,conn)\n", "print(len(join_from_playlist))\n", "join_from_playlist.head()" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>track_id</th>\n", " <th>name</th>\n", " <th>album_id</th>\n", " <th>media_type_id</th>\n", " <th>genre_id</th>\n", " <th>composer</th>\n", " <th>milliseconds</th>\n", " <th>bytes</th>\n", " <th>unit_price</th>\n", " <th>playlist_id</th>\n", " <th>track_id</th>\n", " <th>playlist_id</th>\n", " <th>name</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>Angus Young, Malcolm Young, Brian Johnson</td>\n", " <td>343719</td>\n", " <td>11170334</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>Angus Young, Malcolm Young, Brian Johnson</td>\n", " <td>343719</td>\n", " <td>11170334</td>\n", " <td>0.99</td>\n", " <td>8</td>\n", " <td>1</td>\n", " <td>8</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>Angus Young, Malcolm Young, Brian Johnson</td>\n", " <td>343719</td>\n", " <td>11170334</td>\n", " <td>0.99</td>\n", " <td>17</td>\n", " <td>1</td>\n", " <td>17</td>\n", " <td>Heavy Metal Classic</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>2</td>\n", " <td>Balls to the Wall</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>None</td>\n", " <td>342562</td>\n", " <td>5510424</td>\n", " <td>0.99</td>\n", " <td>1</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>Music</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>2</td>\n", " <td>Balls to the Wall</td>\n", " <td>2</td>\n", " <td>2</td>\n", " <td>1</td>\n", " <td>None</td>\n", " <td>342562</td>\n", " <td>5510424</td>\n", " <td>0.99</td>\n", " <td>8</td>\n", " <td>2</td>\n", " <td>8</td>\n", " <td>Music</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " track_id name album_id media_type_id \\\n", "0 1 For Those About To Rock (We Salute You) 1 1 \n", "1 1 For Those About To Rock (We Salute You) 1 1 \n", "2 1 For Those About To Rock (We Salute You) 1 1 \n", "3 2 Balls to the Wall 2 2 \n", "4 2 Balls to the Wall 2 2 \n", "\n", " genre_id composer milliseconds \\\n", "0 1 Angus Young, Malcolm Young, Brian Johnson 343719 \n", "1 1 Angus Young, Malcolm Young, Brian Johnson 343719 \n", "2 1 Angus Young, Malcolm Young, Brian Johnson 343719 \n", "3 1 None 342562 \n", "4 1 None 342562 \n", "\n", " bytes unit_price playlist_id track_id playlist_id \\\n", "0 11170334 0.99 1 1 1 \n", "1 11170334 0.99 8 1 8 \n", "2 11170334 0.99 17 1 17 \n", "3 5510424 0.99 1 2 1 \n", "4 5510424 0.99 8 2 8 \n", "\n", " name \n", "0 Music \n", "1 Music \n", "2 Heavy Metal Classic \n", "3 Music \n", "4 Music " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# left join 3 tables\n", "LEFT_join_from_track=\"\"\"\n", "SELECT * \n", " FROM track t\n", " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n", " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "LEFT_join_from_track=pd.read_sql(LEFT_join_from_track,conn)\n", "print(len(LEFT_join_from_track))\n", "LEFT_join_from_track.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8719\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>name</th>\n", " <th>playlist_id</th>\n", " <th>track_id</th>\n", " <th>track_id</th>\n", " <th>name</th>\n", " <th>album_id</th>\n", " <th>media_type_id</th>\n", " <th>genre_id</th>\n", " <th>composer</th>\n", " <th>milliseconds</th>\n", " <th>bytes</th>\n", " <th>unit_price</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>1.0</td>\n", " <td>Angus Young, Malcolm Young, Brian Johnson</td>\n", " <td>343719.0</td>\n", " <td>11170334.0</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1.0</td>\n", " <td>2.0</td>\n", " <td>2.0</td>\n", " <td>Balls to the Wall</td>\n", " <td>2.0</td>\n", " <td>2.0</td>\n", " <td>1.0</td>\n", " <td>None</td>\n", " <td>342562.0</td>\n", " <td>5510424.0</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1.0</td>\n", " <td>3.0</td>\n", " <td>3.0</td>\n", " <td>Fast As a Shark</td>\n", " <td>3.0</td>\n", " <td>2.0</td>\n", " <td>1.0</td>\n", " <td>F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...</td>\n", " <td>230619.0</td>\n", " <td>3990994.0</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1.0</td>\n", " <td>4.0</td>\n", " <td>4.0</td>\n", " <td>Restless and Wild</td>\n", " <td>3.0</td>\n", " <td>2.0</td>\n", " <td>1.0</td>\n", " <td>F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...</td>\n", " <td>252051.0</td>\n", " <td>4331779.0</td>\n", " <td>0.99</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>1.0</td>\n", " <td>5.0</td>\n", " <td>5.0</td>\n", " <td>Princess of the Dawn</td>\n", " <td>3.0</td>\n", " <td>2.0</td>\n", " <td>1.0</td>\n", " <td>Deaffy & R.A. Smith-Diesel</td>\n", " <td>375418.0</td>\n", " <td>6290521.0</td>\n", " <td>0.99</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id name playlist_id track_id track_id \\\n", "0 1 Music 1.0 1.0 1.0 \n", "1 1 Music 1.0 2.0 2.0 \n", "2 1 Music 1.0 3.0 3.0 \n", "3 1 Music 1.0 4.0 4.0 \n", "4 1 Music 1.0 5.0 5.0 \n", "\n", " name album_id media_type_id genre_id \\\n", "0 For Those About To Rock (We Salute You) 1.0 1.0 1.0 \n", "1 Balls to the Wall 2.0 2.0 1.0 \n", "2 Fast As a Shark 3.0 2.0 1.0 \n", "3 Restless and Wild 3.0 2.0 1.0 \n", "4 Princess of the Dawn 3.0 2.0 1.0 \n", "\n", " composer milliseconds \\\n", "0 Angus Young, Malcolm Young, Brian Johnson 343719.0 \n", "1 None 342562.0 \n", "2 F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho... 230619.0 \n", "3 F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D... 252051.0 \n", "4 Deaffy & R.A. Smith-Diesel 375418.0 \n", "\n", " bytes unit_price \n", "0 11170334.0 0.99 \n", "1 5510424.0 0.99 \n", "2 3990994.0 0.99 \n", "3 4331779.0 0.99 \n", "4 6290521.0 0.99 " ] }, "execution_count": 26, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# this is to show different order in table can have different result for left join\n", "LEFT_join_from_playlist=\"\"\"\n", "SELECT *\n", "FROM playlist p\n", "LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n", "LEFT JOIN track t ON t.track_id = pt.track_id\"\"\"\n", "\n", "LEFT_join_from_playlist=pd.read_sql(LEFT_join_from_playlist,conn)\n", "print(len(LEFT_join_from_playlist))\n", "LEFT_join_from_playlist.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ ">- The result has 8719 rows, instead of 8715 rows for left_join_from_track" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**sub_join_from_track**" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>playlist_name</th>\n", " <th>track_name</th>\n", " <th>length_seconds</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Band Members Discuss Tracks from \"Revelations\"</td>\n", " <td>294</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Revelations</td>\n", " <td>252</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>One and the Same</td>\n", " <td>217</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Sound of a Gun</td>\n", " <td>260</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Until We Fall</td>\n", " <td>230</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id playlist_name track_name \\\n", "0 1 Music Band Members Discuss Tracks from \"Revelations\" \n", "1 1 Music Revelations \n", "2 1 Music One and the Same \n", "3 1 Music Sound of a Gun \n", "4 1 Music Until We Fall \n", "\n", " length_seconds \n", "0 294 \n", "1 252 \n", "2 217 \n", "3 260 \n", "4 230 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub=\"\"\"\n", "SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " JOIN playlist_track pt ON t.track_id=pt.track_id\n", " JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "sub=pd.read_sql(sub,conn)\n", "print(len(sub))\n", "sub.head()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "14\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>playlist_name</th>\n", " <th>number_of_tracks</th>\n", " <th>length_seconds</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>3290</td>\n", " <td>876049</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>3</td>\n", " <td>TV Shows</td>\n", " <td>213</td>\n", " <td>500987</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>5</td>\n", " <td>90’s Music</td>\n", " <td>1477</td>\n", " <td>397970</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>8</td>\n", " <td>Music</td>\n", " <td>3290</td>\n", " <td>876049</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>9</td>\n", " <td>Music Videos</td>\n", " <td>1</td>\n", " <td>294</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>10</td>\n", " <td>TV Shows</td>\n", " <td>213</td>\n", " <td>500987</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>11</td>\n", " <td>Brazilian Music</td>\n", " <td>39</td>\n", " <td>9464</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>12</td>\n", " <td>Classical</td>\n", " <td>75</td>\n", " <td>21736</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>13</td>\n", " <td>Classical 101 - Deep Cuts</td>\n", " <td>25</td>\n", " <td>6742</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>14</td>\n", " <td>Classical 101 - Next Steps</td>\n", " <td>25</td>\n", " <td>7565</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>15</td>\n", " <td>Classical 101 - The Basics</td>\n", " <td>25</td>\n", " <td>7429</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>16</td>\n", " <td>Grunge</td>\n", " <td>15</td>\n", " <td>4114</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>17</td>\n", " <td>Heavy Metal Classic</td>\n", " <td>26</td>\n", " <td>8189</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>18</td>\n", " <td>On-The-Go 1</td>\n", " <td>1</td>\n", " <td>197</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id playlist_name number_of_tracks length_seconds\n", "0 1 Music 3290 876049\n", "1 3 TV Shows 213 500987\n", "2 5 90’s Music 1477 397970\n", "3 8 Music 3290 876049\n", "4 9 Music Videos 1 294\n", "5 10 TV Shows 213 500987\n", "6 11 Brazilian Music 39 9464\n", "7 12 Classical 75 21736\n", "8 13 Classical 101 - Deep Cuts 25 6742\n", "9 14 Classical 101 - Next Steps 25 7565\n", "10 15 Classical 101 - The Basics 25 7429\n", "11 16 Grunge 15 4114\n", "12 17 Heavy Metal Classic 26 8189\n", "13 18 On-The-Go 1 1 197" ] }, "execution_count": 28, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# name subquery using WITH and AS \n", "q=\"\"\"\n", "WITH sub AS\n", "(SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " JOIN playlist_track pt ON t.track_id=pt.track_id\n", " JOIN playlist p ON p.playlist_id=pt.playlist_id\n", ")\n", "SELECT\n", " playlist_id,\n", " playlist_name,\n", " COUNT(playlist_id) number_of_tracks,\n", " SUM(length_seconds) length_seconds\n", "FROM sub\n", "GROUP BY 1\n", "ORDER BY 1\n", "\"\"\"\n", "table_sub=pd.read_sql(q,conn)\n", "print(len(table_sub))\n", "table_sub\n", "\n", "# GROUP BY 1 and GROUP BY 1,2 are the same\n", "# COUNT(playlist_id), same as COUNT(track_name)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**sub_LEFT_from_track**" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8715\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>playlist_name</th>\n", " <th>track_name</th>\n", " <th>length_seconds</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>343</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>8</td>\n", " <td>Music</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>343</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>17</td>\n", " <td>Heavy Metal Classic</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>343</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Balls to the Wall</td>\n", " <td>342</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>8</td>\n", " <td>Music</td>\n", " <td>Balls to the Wall</td>\n", " <td>342</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id playlist_name track_name \\\n", "0 1 Music For Those About To Rock (We Salute You) \n", "1 8 Music For Those About To Rock (We Salute You) \n", "2 17 Heavy Metal Classic For Those About To Rock (We Salute You) \n", "3 1 Music Balls to the Wall \n", "4 8 Music Balls to the Wall \n", "\n", " length_seconds \n", "0 343 \n", "1 343 \n", "2 343 \n", "3 342 \n", "4 342 " ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_LEFT_from_track=\"\"\"\n", "SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n", " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n", "\n", "sub_LEFT_from_track=pd.read_sql(sub_LEFT_from_track,conn)\n", "print(len(sub_LEFT_from_track))\n", "sub_LEFT_from_track.head()" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "14\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>playlist_name</th>\n", " <th>number_of_tracks</th>\n", " <th>length_seconds</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>3290</td>\n", " <td>876049</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>3</td>\n", " <td>TV Shows</td>\n", " <td>213</td>\n", " <td>500987</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>5</td>\n", " <td>90’s Music</td>\n", " <td>1477</td>\n", " <td>397970</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>8</td>\n", " <td>Music</td>\n", " <td>3290</td>\n", " <td>876049</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>9</td>\n", " <td>Music Videos</td>\n", " <td>1</td>\n", " <td>294</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>10</td>\n", " <td>TV Shows</td>\n", " <td>213</td>\n", " <td>500987</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>11</td>\n", " <td>Brazilian Music</td>\n", " <td>39</td>\n", " <td>9464</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>12</td>\n", " <td>Classical</td>\n", " <td>75</td>\n", " <td>21736</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>13</td>\n", " <td>Classical 101 - Deep Cuts</td>\n", " <td>25</td>\n", " <td>6742</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>14</td>\n", " <td>Classical 101 - Next Steps</td>\n", " <td>25</td>\n", " <td>7565</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>15</td>\n", " <td>Classical 101 - The Basics</td>\n", " <td>25</td>\n", " <td>7429</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>16</td>\n", " <td>Grunge</td>\n", " <td>15</td>\n", " <td>4114</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>17</td>\n", " <td>Heavy Metal Classic</td>\n", " <td>26</td>\n", " <td>8189</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>18</td>\n", " <td>On-The-Go 1</td>\n", " <td>1</td>\n", " <td>197</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id playlist_name number_of_tracks length_seconds\n", "0 1 Music 3290 876049\n", "1 3 TV Shows 213 500987\n", "2 5 90’s Music 1477 397970\n", "3 8 Music 3290 876049\n", "4 9 Music Videos 1 294\n", "5 10 TV Shows 213 500987\n", "6 11 Brazilian Music 39 9464\n", "7 12 Classical 75 21736\n", "8 13 Classical 101 - Deep Cuts 25 6742\n", "9 14 Classical 101 - Next Steps 25 7565\n", "10 15 Classical 101 - The Basics 25 7429\n", "11 16 Grunge 15 4114\n", "12 17 Heavy Metal Classic 26 8189\n", "13 18 On-The-Go 1 1 197" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q=\"\"\"\n", "WITH sub_LEFT AS\n", "(SELECT \n", " p.playlist_id, \n", " p.name AS playlist_name, \n", " t.name AS track_name,\n", " (t.milliseconds / 1000) length_seconds \n", " FROM track t\n", " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n", " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id\n", ")\n", "SELECT\n", " playlist_id,\n", " playlist_name,\n", " COUNT(track_name) number_of_tracks,\n", " SUM(length_seconds) length_seconds\n", "FROM sub_LEFT\n", "GROUP BY 1, 2\n", "ORDER BY 1\n", "\"\"\"\n", "table_sub_LEFT_from_track=pd.read_sql(q,conn)\n", "print(len(table_sub_LEFT_from_track))\n", "table_sub_LEFT_from_track" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**sub_LEFT_from_playlist**" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "8719\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>playlist_name</th>\n", " <th>track_name</th>\n", " <th>length_seconds</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>For Those About To Rock (We Salute You)</td>\n", " <td>343.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Balls to the Wall</td>\n", " <td>342.0</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Fast As a Shark</td>\n", " <td>230.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Restless and Wild</td>\n", " <td>252.0</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>Princess of the Dawn</td>\n", " <td>375.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id playlist_name track_name \\\n", "0 1 Music For Those About To Rock (We Salute You) \n", "1 1 Music Balls to the Wall \n", "2 1 Music Fast As a Shark \n", "3 1 Music Restless and Wild \n", "4 1 Music Princess of the Dawn \n", "\n", " length_seconds \n", "0 343.0 \n", "1 342.0 \n", "2 230.0 \n", "3 252.0 \n", "4 375.0 " ] }, "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sub_LEFT_from_playlist=\"\"\"\n", "SELECT\n", " p.playlist_id,\n", " p.name playlist_name,\n", " t.name track_name,\n", " (t.milliseconds / 1000) length_seconds\n", "FROM playlist p\n", "LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n", "LEFT JOIN track t ON t.track_id = pt.track_id\"\"\"\n", "\n", "sub_LEFT_from_playlist=pd.read_sql(sub_LEFT_from_playlist, conn)\n", "print(len(sub_LEFT_from_playlist))\n", "sub_LEFT_from_playlist.head()" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>playlist_id</th>\n", " <th>playlist_name</th>\n", " <th>number_of_tracks</th>\n", " <th>length_seconds</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Music</td>\n", " <td>3290</td>\n", " <td>876049.0</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>Movies</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>TV Shows</td>\n", " <td>213</td>\n", " <td>500987.0</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>Audiobooks</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>90’s Music</td>\n", " <td>1477</td>\n", " <td>397970.0</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>6</td>\n", " <td>Audiobooks</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>7</td>\n", " <td>Movies</td>\n", " <td>0</td>\n", " <td>NaN</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>8</td>\n", " <td>Music</td>\n", " <td>3290</td>\n", " <td>876049.0</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>9</td>\n", " <td>Music Videos</td>\n", " <td>1</td>\n", " <td>294.0</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>10</td>\n", " <td>TV Shows</td>\n", " <td>213</td>\n", " <td>500987.0</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>11</td>\n", " <td>Brazilian Music</td>\n", " <td>39</td>\n", " <td>9464.0</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>12</td>\n", " <td>Classical</td>\n", " <td>75</td>\n", " <td>21736.0</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>13</td>\n", " <td>Classical 101 - Deep Cuts</td>\n", " <td>25</td>\n", " <td>6742.0</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>14</td>\n", " <td>Classical 101 - Next Steps</td>\n", " <td>25</td>\n", " <td>7565.0</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>15</td>\n", " <td>Classical 101 - The Basics</td>\n", " <td>25</td>\n", " <td>7429.0</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>16</td>\n", " <td>Grunge</td>\n", " <td>15</td>\n", " <td>4114.0</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>17</td>\n", " <td>Heavy Metal Classic</td>\n", " <td>26</td>\n", " <td>8189.0</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>18</td>\n", " <td>On-The-Go 1</td>\n", " <td>1</td>\n", " <td>197.0</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " playlist_id playlist_name number_of_tracks length_seconds\n", "0 1 Music 3290 876049.0\n", "1 2 Movies 0 NaN\n", "2 3 TV Shows 213 500987.0\n", "3 4 Audiobooks 0 NaN\n", "4 5 90’s Music 1477 397970.0\n", "5 6 Audiobooks 0 NaN\n", "6 7 Movies 0 NaN\n", "7 8 Music 3290 876049.0\n", "8 9 Music Videos 1 294.0\n", "9 10 TV Shows 213 500987.0\n", "10 11 Brazilian Music 39 9464.0\n", "11 12 Classical 75 21736.0\n", "12 13 Classical 101 - Deep Cuts 25 6742.0\n", "13 14 Classical 101 - Next Steps 25 7565.0\n", "14 15 Classical 101 - The Basics 25 7429.0\n", "15 16 Grunge 15 4114.0\n", "16 17 Heavy Metal Classic 26 8189.0\n", "17 18 On-The-Go 1 1 197.0" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q=\"\"\"\n", "WITH playlist_info AS\n", " (\n", " SELECT\n", " p.playlist_id,\n", " p.name playlist_name,\n", " t.name track_name,\n", " (t.milliseconds / 1000) length_seconds\n", " FROM playlist p\n", " LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n", " LEFT JOIN track t ON t.track_id = pt.track_id\n", " )\n", "\n", "SELECT\n", " playlist_id,\n", " playlist_name,\n", " COUNT(track_name) number_of_tracks,\n", " SUM(length_seconds) length_seconds\n", "FROM playlist_info\n", "GROUP BY 1,2\n", "ORDER BY 1;\"\"\"\n", "\n", "table_sub_LEFT_from_playlist=pd.read_sql(q, conn)\n", "print(len(table_sub_LEFT_from_playlist))\n", "table_sub_LEFT_from_playlist" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**CREATE VIEW (create a table can be reused), view can not be run on Jupyter notebook**" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "18\n" ] }, { "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>customer_id</th>\n", " <th>first_name</th>\n", " <th>last_name</th>\n", " <th>company</th>\n", " <th>address</th>\n", " <th>city</th>\n", " <th>state</th>\n", " <th>country</th>\n", " <th>postal_code</th>\n", " <th>phone</th>\n", " <th>fax</th>\n", " <th>email</th>\n", " <th>support_rep_id</th>\n", " <th>invoice_id</th>\n", " <th>customer_id</th>\n", " <th>invoice_date</th>\n", " <th>billing_address</th>\n", " <th>billing_city</th>\n", " <th>billing_state</th>\n", " <th>billing_country</th>\n", " <th>billing_postal_code</th>\n", " <th>total</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>Luís</td>\n", " <td>Gonçalves</td>\n", " <td>Embraer - Empresa Brasileira de Aeronáutica S.A.</td>\n", " <td>Av. Brigadeiro Faria Lima, 2170</td>\n", " <td>São José dos Campos</td>\n", " <td>SP</td>\n", " <td>Brazil</td>\n", " <td>12227-000</td>\n", " <td>+55 (12) 3923-5555</td>\n", " <td>+55 (12) 3923-5566</td>\n", " <td>luisg@embraer.com.br</td>\n", " <td>3</td>\n", " <td>16</td>\n", " <td>1</td>\n", " <td>2017-01-26 00:00:00</td>\n", " <td>Av. Brigadeiro Faria Lima, 2170</td>\n", " <td>São José dos Campos</td>\n", " <td>SP</td>\n", " <td>Brazil</td>\n", " <td>12227-000</td>\n", " <td>8.91</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>3</td>\n", " <td>François</td>\n", " <td>Tremblay</td>\n", " <td>None</td>\n", " <td>1498 rue Bélanger</td>\n", " <td>Montréal</td>\n", " <td>QC</td>\n", " <td>Canada</td>\n", " <td>H2G 1A7</td>\n", " <td>+1 (514) 721-4711</td>\n", " <td>None</td>\n", " <td>ftremblay@gmail.com</td>\n", " <td>3</td>\n", " <td>31</td>\n", " <td>3</td>\n", " <td>2017-02-21 00:00:00</td>\n", " <td>1498 rue Bélanger</td>\n", " <td>Montréal</td>\n", " <td>QC</td>\n", " <td>Canada</td>\n", " <td>H2G 1A7</td>\n", " <td>19.80</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>5</td>\n", " <td>František</td>\n", " <td>Wichterlová</td>\n", " <td>JetBrains s.r.o.</td>\n", " <td>Klanova 9/506</td>\n", " <td>Prague</td>\n", " <td>None</td>\n", " <td>Czech Republic</td>\n", " <td>14700</td>\n", " <td>+420 2 4172 5555</td>\n", " <td>+420 2 4172 5555</td>\n", " <td>frantisekw@jetbrains.com</td>\n", " <td>4</td>\n", " <td>78</td>\n", " <td>5</td>\n", " <td>2017-05-29 00:00:00</td>\n", " <td>Klanova 9/506</td>\n", " <td>Prague</td>\n", " <td>None</td>\n", " <td>Czech Republic</td>\n", " <td>14700</td>\n", " <td>8.91</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>6</td>\n", " <td>Helena</td>\n", " <td>Holý</td>\n", " <td>None</td>\n", " <td>Rilská 3174/6</td>\n", " <td>Prague</td>\n", " <td>None</td>\n", " <td>Czech Republic</td>\n", " <td>14300</td>\n", " <td>+420 2 4177 0449</td>\n", " <td>None</td>\n", " <td>hholy@gmail.com</td>\n", " <td>5</td>\n", " <td>112</td>\n", " <td>6</td>\n", " <td>2017-08-31 00:00:00</td>\n", " <td>Rilská 3174/6</td>\n", " <td>Prague</td>\n", " <td>None</td>\n", " <td>Czech Republic</td>\n", " <td>14300</td>\n", " <td>16.83</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>13</td>\n", " <td>Fernanda</td>\n", " <td>Ramos</td>\n", " <td>None</td>\n", " <td>Qe 7 Bloco G</td>\n", " <td>Brasília</td>\n", " <td>DF</td>\n", " <td>Brazil</td>\n", " <td>71020-677</td>\n", " <td>+55 (61) 3363-5547</td>\n", " <td>+55 (61) 3363-7855</td>\n", " <td>fernadaramos4@uol.com.br</td>\n", " <td>4</td>\n", " <td>73</td>\n", " <td>13</td>\n", " <td>2017-05-18 00:00:00</td>\n", " <td>Qe 7 Bloco G</td>\n", " <td>Brasília</td>\n", " <td>DF</td>\n", " <td>Brazil</td>\n", " <td>71020-677</td>\n", " <td>14.85</td>\n", " </tr>\n", " <tr>\n", " <th>5</th>\n", " <td>17</td>\n", " <td>Jack</td>\n", " <td>Smith</td>\n", " <td>Microsoft Corporation</td>\n", " <td>1 Microsoft Way</td>\n", " <td>Redmond</td>\n", " <td>WA</td>\n", " <td>USA</td>\n", " <td>98052-8300</td>\n", " <td>+1 (425) 882-8080</td>\n", " <td>+1 (425) 882-8081</td>\n", " <td>jacksmith@microsoft.com</td>\n", " <td>5</td>\n", " <td>98</td>\n", " <td>17</td>\n", " <td>2017-07-23 00:00:00</td>\n", " <td>1 Microsoft Way</td>\n", " <td>Redmond</td>\n", " <td>WA</td>\n", " <td>USA</td>\n", " <td>98052-8300</td>\n", " <td>9.90</td>\n", " </tr>\n", " <tr>\n", " <th>6</th>\n", " <td>20</td>\n", " <td>Dan</td>\n", " <td>Miller</td>\n", " <td>None</td>\n", " <td>541 Del Medio Avenue</td>\n", " <td>Mountain View</td>\n", " <td>CA</td>\n", " <td>USA</td>\n", " <td>94040-111</td>\n", " <td>+1 (650) 644-3358</td>\n", " <td>None</td>\n", " <td>dmiller@comcast.com</td>\n", " <td>4</td>\n", " <td>17</td>\n", " <td>20</td>\n", " <td>2017-01-28 00:00:00</td>\n", " <td>541 Del Medio Avenue</td>\n", " <td>Mountain View</td>\n", " <td>CA</td>\n", " <td>USA</td>\n", " <td>94040-111</td>\n", " <td>10.89</td>\n", " </tr>\n", " <tr>\n", " <th>7</th>\n", " <td>21</td>\n", " <td>Kathy</td>\n", " <td>Chase</td>\n", " <td>None</td>\n", " <td>801 W 4th Street</td>\n", " <td>Reno</td>\n", " <td>NV</td>\n", " <td>USA</td>\n", " <td>89503</td>\n", " <td>+1 (775) 223-7665</td>\n", " <td>None</td>\n", " <td>kachase@hotmail.com</td>\n", " <td>5</td>\n", " <td>44</td>\n", " <td>21</td>\n", " <td>2017-03-20 00:00:00</td>\n", " <td>801 W 4th Street</td>\n", " <td>Reno</td>\n", " <td>NV</td>\n", " <td>USA</td>\n", " <td>89503</td>\n", " <td>11.88</td>\n", " </tr>\n", " <tr>\n", " <th>8</th>\n", " <td>22</td>\n", " <td>Heather</td>\n", " <td>Leacock</td>\n", " <td>None</td>\n", " <td>120 S Orange Ave</td>\n", " <td>Orlando</td>\n", " <td>FL</td>\n", " <td>USA</td>\n", " <td>32801</td>\n", " <td>+1 (407) 999-7788</td>\n", " <td>None</td>\n", " <td>hleacock@gmail.com</td>\n", " <td>4</td>\n", " <td>43</td>\n", " <td>22</td>\n", " <td>2017-03-19 00:00:00</td>\n", " <td>120 S Orange Ave</td>\n", " <td>Orlando</td>\n", " <td>FL</td>\n", " <td>USA</td>\n", " <td>32801</td>\n", " <td>6.93</td>\n", " </tr>\n", " <tr>\n", " <th>9</th>\n", " <td>30</td>\n", " <td>Edward</td>\n", " <td>Francis</td>\n", " <td>None</td>\n", " <td>230 Elgin Street</td>\n", " <td>Ottawa</td>\n", " <td>ON</td>\n", " <td>Canada</td>\n", " <td>K2P 1L7</td>\n", " <td>+1 (613) 234-3322</td>\n", " <td>None</td>\n", " <td>edfrancis@yachoo.ca</td>\n", " <td>3</td>\n", " <td>2</td>\n", " <td>30</td>\n", " <td>2017-01-03 00:00:00</td>\n", " <td>230 Elgin Street</td>\n", " <td>Ottawa</td>\n", " <td>ON</td>\n", " <td>Canada</td>\n", " <td>K2P 1L7</td>\n", " <td>9.90</td>\n", " </tr>\n", " <tr>\n", " <th>10</th>\n", " <td>34</td>\n", " <td>João</td>\n", " <td>Fernandes</td>\n", " <td>None</td>\n", " <td>Rua da Assunção 53</td>\n", " <td>Lisbon</td>\n", " <td>None</td>\n", " <td>Portugal</td>\n", " <td>None</td>\n", " <td>+351 (213) 466-111</td>\n", " <td>None</td>\n", " <td>jfernandes@yahoo.pt</td>\n", " <td>4</td>\n", " <td>35</td>\n", " <td>34</td>\n", " <td>2017-02-25 00:00:00</td>\n", " <td>Rua da Assunção 53</td>\n", " <td>Lisbon</td>\n", " <td>None</td>\n", " <td>Portugal</td>\n", " <td>None</td>\n", " <td>4.95</td>\n", " </tr>\n", " <tr>\n", " <th>11</th>\n", " <td>37</td>\n", " <td>Fynn</td>\n", " <td>Zimmermann</td>\n", " <td>None</td>\n", " <td>Berger Straße 10</td>\n", " <td>Frankfurt</td>\n", " <td>None</td>\n", " <td>Germany</td>\n", " <td>60316</td>\n", " <td>+49 069 40598889</td>\n", " <td>None</td>\n", " <td>fzimmermann@yahoo.de</td>\n", " <td>3</td>\n", " <td>40</td>\n", " <td>37</td>\n", " <td>2017-03-13 00:00:00</td>\n", " <td>Berger Straße 10</td>\n", " <td>Frankfurt</td>\n", " <td>None</td>\n", " <td>Germany</td>\n", " <td>60316</td>\n", " <td>9.90</td>\n", " </tr>\n", " <tr>\n", " <th>12</th>\n", " <td>42</td>\n", " <td>Wyatt</td>\n", " <td>Girard</td>\n", " <td>None</td>\n", " <td>9, Place Louis Barthou</td>\n", " <td>Bordeaux</td>\n", " <td>None</td>\n", " <td>France</td>\n", " <td>33000</td>\n", " <td>+33 05 56 96 96 96</td>\n", " <td>None</td>\n", " <td>wyatt.girard@yahoo.fr</td>\n", " <td>3</td>\n", " <td>12</td>\n", " <td>42</td>\n", " <td>2017-01-21 00:00:00</td>\n", " <td>9, Place Louis Barthou</td>\n", " <td>Bordeaux</td>\n", " <td>None</td>\n", " <td>France</td>\n", " <td>33000</td>\n", " <td>3.96</td>\n", " </tr>\n", " <tr>\n", " <th>13</th>\n", " <td>46</td>\n", " <td>Hugh</td>\n", " <td>O'Reilly</td>\n", " <td>None</td>\n", " <td>3 Chatham Street</td>\n", " <td>Dublin</td>\n", " <td>Dublin</td>\n", " <td>Ireland</td>\n", " <td>None</td>\n", " <td>+353 01 6792424</td>\n", " <td>None</td>\n", " <td>hughoreilly@apple.ie</td>\n", " <td>3</td>\n", " <td>30</td>\n", " <td>46</td>\n", " <td>2017-02-18 00:00:00</td>\n", " <td>3 Chatham Street</td>\n", " <td>Dublin</td>\n", " <td>Dublin</td>\n", " <td>Ireland</td>\n", " <td>None</td>\n", " <td>10.89</td>\n", " </tr>\n", " <tr>\n", " <th>14</th>\n", " <td>50</td>\n", " <td>Enrique</td>\n", " <td>Muñoz</td>\n", " <td>None</td>\n", " <td>C/ San Bernardo 85</td>\n", " <td>Madrid</td>\n", " <td>None</td>\n", " <td>Spain</td>\n", " <td>28015</td>\n", " <td>+34 914 454 454</td>\n", " <td>None</td>\n", " <td>enrique_munoz@yahoo.es</td>\n", " <td>5</td>\n", " <td>41</td>\n", " <td>50</td>\n", " <td>2017-03-17 00:00:00</td>\n", " <td>C/ San Bernardo 85</td>\n", " <td>Madrid</td>\n", " <td>None</td>\n", " <td>Spain</td>\n", " <td>28015</td>\n", " <td>7.92</td>\n", " </tr>\n", " <tr>\n", " <th>15</th>\n", " <td>53</td>\n", " <td>Phil</td>\n", " <td>Hughes</td>\n", " <td>None</td>\n", " <td>113 Lupus St</td>\n", " <td>London</td>\n", " <td>None</td>\n", " <td>United Kingdom</td>\n", " <td>SW1V 3EN</td>\n", " <td>+44 020 7976 5722</td>\n", " <td>None</td>\n", " <td>phil.hughes@gmail.com</td>\n", " <td>3</td>\n", " <td>23</td>\n", " <td>53</td>\n", " <td>2017-02-09 00:00:00</td>\n", " <td>113 Lupus St</td>\n", " <td>London</td>\n", " <td>None</td>\n", " <td>United Kingdom</td>\n", " <td>SW1V 3EN</td>\n", " <td>9.90</td>\n", " </tr>\n", " <tr>\n", " <th>16</th>\n", " <td>57</td>\n", " <td>Luis</td>\n", " <td>Rojas</td>\n", " <td>None</td>\n", " <td>Calle Lira, 198</td>\n", " <td>Santiago</td>\n", " <td>None</td>\n", " <td>Chile</td>\n", " <td>None</td>\n", " <td>+56 (0)2 635 4444</td>\n", " <td>None</td>\n", " <td>luisrojas@yahoo.cl</td>\n", " <td>5</td>\n", " <td>47</td>\n", " <td>57</td>\n", " <td>2017-03-25 00:00:00</td>\n", " <td>Calle Lira, 198</td>\n", " <td>Santiago</td>\n", " <td>None</td>\n", " <td>Chile</td>\n", " <td>None</td>\n", " <td>5.94</td>\n", " </tr>\n", " <tr>\n", " <th>17</th>\n", " <td>58</td>\n", " <td>Manoj</td>\n", " <td>Pareek</td>\n", " <td>None</td>\n", " <td>12,Community Centre</td>\n", " <td>Delhi</td>\n", " <td>None</td>\n", " <td>India</td>\n", " <td>110017</td>\n", " <td>+91 0124 39883988</td>\n", " <td>None</td>\n", " <td>manoj.pareek@rediff.com</td>\n", " <td>3</td>\n", " <td>33</td>\n", " <td>58</td>\n", " <td>2017-02-21 00:00:00</td>\n", " <td>12,Community Centre</td>\n", " <td>Delhi</td>\n", " <td>None</td>\n", " <td>India</td>\n", " <td>110017</td>\n", " <td>3.96</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " customer_id first_name last_name \\\n", "0 1 Luís Gonçalves \n", "1 3 François Tremblay \n", "2 5 František Wichterlová \n", "3 6 Helena Holý \n", "4 13 Fernanda Ramos \n", "5 17 Jack Smith \n", "6 20 Dan Miller \n", "7 21 Kathy Chase \n", "8 22 Heather Leacock \n", "9 30 Edward Francis \n", "10 34 João Fernandes \n", "11 37 Fynn Zimmermann \n", "12 42 Wyatt Girard \n", "13 46 Hugh O'Reilly \n", "14 50 Enrique Muñoz \n", "15 53 Phil Hughes \n", "16 57 Luis Rojas \n", "17 58 Manoj Pareek \n", "\n", " company \\\n", "0 Embraer - Empresa Brasileira de Aeronáutica S.A. \n", "1 None \n", "2 JetBrains s.r.o. \n", "3 None \n", "4 None \n", "5 Microsoft Corporation \n", "6 None \n", "7 None \n", "8 None \n", "9 None \n", "10 None \n", "11 None \n", "12 None \n", "13 None \n", "14 None \n", "15 None \n", "16 None \n", "17 None \n", "\n", " address city state \\\n", "0 Av. Brigadeiro Faria Lima, 2170 São José dos Campos SP \n", "1 1498 rue Bélanger Montréal QC \n", "2 Klanova 9/506 Prague None \n", "3 Rilská 3174/6 Prague None \n", "4 Qe 7 Bloco G Brasília DF \n", "5 1 Microsoft Way Redmond WA \n", "6 541 Del Medio Avenue Mountain View CA \n", "7 801 W 4th Street Reno NV \n", "8 120 S Orange Ave Orlando FL \n", "9 230 Elgin Street Ottawa ON \n", "10 Rua da Assunção 53 Lisbon None \n", "11 Berger Straße 10 Frankfurt None \n", "12 9, Place Louis Barthou Bordeaux None \n", "13 3 Chatham Street Dublin Dublin \n", "14 C/ San Bernardo 85 Madrid None \n", "15 113 Lupus St London None \n", "16 Calle Lira, 198 Santiago None \n", "17 12,Community Centre Delhi None \n", "\n", " country postal_code phone fax \\\n", "0 Brazil 12227-000 +55 (12) 3923-5555 +55 (12) 3923-5566 \n", "1 Canada H2G 1A7 +1 (514) 721-4711 None \n", "2 Czech Republic 14700 +420 2 4172 5555 +420 2 4172 5555 \n", "3 Czech Republic 14300 +420 2 4177 0449 None \n", "4 Brazil 71020-677 +55 (61) 3363-5547 +55 (61) 3363-7855 \n", "5 USA 98052-8300 +1 (425) 882-8080 +1 (425) 882-8081 \n", "6 USA 94040-111 +1 (650) 644-3358 None \n", "7 USA 89503 +1 (775) 223-7665 None \n", "8 USA 32801 +1 (407) 999-7788 None \n", "9 Canada K2P 1L7 +1 (613) 234-3322 None \n", "10 Portugal None +351 (213) 466-111 None \n", "11 Germany 60316 +49 069 40598889 None \n", "12 France 33000 +33 05 56 96 96 96 None \n", "13 Ireland None +353 01 6792424 None \n", "14 Spain 28015 +34 914 454 454 None \n", "15 United Kingdom SW1V 3EN +44 020 7976 5722 None \n", "16 Chile None +56 (0)2 635 4444 None \n", "17 India 110017 +91 0124 39883988 None \n", "\n", " email support_rep_id invoice_id customer_id \\\n", "0 luisg@embraer.com.br 3 16 1 \n", "1 ftremblay@gmail.com 3 31 3 \n", "2 frantisekw@jetbrains.com 4 78 5 \n", "3 hholy@gmail.com 5 112 6 \n", "4 fernadaramos4@uol.com.br 4 73 13 \n", "5 jacksmith@microsoft.com 5 98 17 \n", "6 dmiller@comcast.com 4 17 20 \n", "7 kachase@hotmail.com 5 44 21 \n", "8 hleacock@gmail.com 4 43 22 \n", "9 edfrancis@yachoo.ca 3 2 30 \n", "10 jfernandes@yahoo.pt 4 35 34 \n", "11 fzimmermann@yahoo.de 3 40 37 \n", "12 wyatt.girard@yahoo.fr 3 12 42 \n", "13 hughoreilly@apple.ie 3 30 46 \n", "14 enrique_munoz@yahoo.es 5 41 50 \n", "15 phil.hughes@gmail.com 3 23 53 \n", "16 luisrojas@yahoo.cl 5 47 57 \n", "17 manoj.pareek@rediff.com 3 33 58 \n", "\n", " invoice_date billing_address billing_city \\\n", "0 2017-01-26 00:00:00 Av. Brigadeiro Faria Lima, 2170 São José dos Campos \n", "1 2017-02-21 00:00:00 1498 rue Bélanger Montréal \n", "2 2017-05-29 00:00:00 Klanova 9/506 Prague \n", "3 2017-08-31 00:00:00 Rilská 3174/6 Prague \n", "4 2017-05-18 00:00:00 Qe 7 Bloco G Brasília \n", "5 2017-07-23 00:00:00 1 Microsoft Way Redmond \n", "6 2017-01-28 00:00:00 541 Del Medio Avenue Mountain View \n", "7 2017-03-20 00:00:00 801 W 4th Street Reno \n", "8 2017-03-19 00:00:00 120 S Orange Ave Orlando \n", "9 2017-01-03 00:00:00 230 Elgin Street Ottawa \n", "10 2017-02-25 00:00:00 Rua da Assunção 53 Lisbon \n", "11 2017-03-13 00:00:00 Berger Straße 10 Frankfurt \n", "12 2017-01-21 00:00:00 9, Place Louis Barthou Bordeaux \n", "13 2017-02-18 00:00:00 3 Chatham Street Dublin \n", "14 2017-03-17 00:00:00 C/ San Bernardo 85 Madrid \n", "15 2017-02-09 00:00:00 113 Lupus St London \n", "16 2017-03-25 00:00:00 Calle Lira, 198 Santiago \n", "17 2017-02-21 00:00:00 12,Community Centre Delhi \n", "\n", " billing_state billing_country billing_postal_code total \n", "0 SP Brazil 12227-000 8.91 \n", "1 QC Canada H2G 1A7 19.80 \n", "2 None Czech Republic 14700 8.91 \n", "3 None Czech Republic 14300 16.83 \n", "4 DF Brazil 71020-677 14.85 \n", "5 WA USA 98052-8300 9.90 \n", "6 CA USA 94040-111 10.89 \n", "7 NV USA 89503 11.88 \n", "8 FL USA 32801 6.93 \n", "9 ON Canada K2P 1L7 9.90 \n", "10 None Portugal None 4.95 \n", "11 None Germany 60316 9.90 \n", "12 None France 33000 3.96 \n", "13 Dublin Ireland None 10.89 \n", "14 None Spain 28015 7.92 \n", "15 None United Kingdom SW1V 3EN 9.90 \n", "16 None Chile None 5.94 \n", "17 None India 110017 3.96 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "q=\"\"\"\n", "SELECT c.*, i.*\n", "FROM invoice i\n", "INNER JOIN customer c ON i.customer_id = c.customer_id\n", "GROUP BY 1\n", "HAVING SUM(i.total) > 90\n", "\"\"\"\n", "i_c_90=pd.read_sql(q,conn)\n", "print(len(i_c_90))\n", "i_c_90" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 2 }