{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Analyzing Database with SQL and Connecting Database to Jupyter Notebook"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ">- This notebook shows how to connect database to Jupyter Notebook, and join multi tables, and write complex queries to analyze data. ",
     "--by Lu Tang"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 1. Analyzing data in 'factbook.db', only two tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Imports \n",
    "import numpy as np # linear algebra\n",
    "import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)\n",
    "import sqlite3\n",
    "import matplotlib.pyplot as plt"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>tbl_name</th>\n",
       "      <th>rootpage</th>\n",
       "      <th>sql</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table</td>\n",
       "      <td>sqlite_sequence</td>\n",
       "      <td>sqlite_sequence</td>\n",
       "      <td>3</td>\n",
       "      <td>CREATE TABLE sqlite_sequence(name,seq)</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table</td>\n",
       "      <td>facts</td>\n",
       "      <td>facts</td>\n",
       "      <td>47</td>\n",
       "      <td>CREATE TABLE \"facts\" (\"id\" INTEGER PRIMARY KEY...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>table</td>\n",
       "      <td>cities</td>\n",
       "      <td>cities</td>\n",
       "      <td>2</td>\n",
       "      <td>CREATE TABLE cities (\\n        id integer prim...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    type             name         tbl_name  rootpage  \\\n",
       "0  table  sqlite_sequence  sqlite_sequence         3   \n",
       "1  table            facts            facts        47   \n",
       "2  table           cities           cities         2   \n",
       "\n",
       "                                                 sql  \n",
       "0             CREATE TABLE sqlite_sequence(name,seq)  \n",
       "1  CREATE TABLE \"facts\" (\"id\" INTEGER PRIMARY KEY...  \n",
       "2  CREATE TABLE cities (\\n        id integer prim...  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Create the connection to the database, and see what tables we have\n",
    "conn = sqlite3.connect('factbook.db')\n",
    "\n",
    "tables = pd.read_sql(\"\"\"SELECT *\n",
    "                        FROM sqlite_master\n",
    "                        WHERE type='table';\"\"\", conn)\n",
    "tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "397\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>population</th>\n",
       "      <th>capital</th>\n",
       "      <th>facts_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Oranjestad</td>\n",
       "      <td>37000</td>\n",
       "      <td>1</td>\n",
       "      <td>216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Saint John'S</td>\n",
       "      <td>27000</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Abu Dhabi</td>\n",
       "      <td>942000</td>\n",
       "      <td>1</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Dubai</td>\n",
       "      <td>1978000</td>\n",
       "      <td>0</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Sharjah</td>\n",
       "      <td>983000</td>\n",
       "      <td>0</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id          name  population  capital facts_id\n",
       "0   1    Oranjestad       37000        1      216\n",
       "1   2  Saint John'S       27000        1        6\n",
       "2   3     Abu Dhabi      942000        1      184\n",
       "3   4         Dubai     1978000        0      184\n",
       "4   5       Sharjah      983000        0      184"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# List of cities\n",
    "cities = pd.read_sql(\"\"\"SELECT *\n",
    "                        FROM cities;\"\"\", conn)\n",
    "print(len(cities))\n",
    "cities.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "397\n",
      "396\n",
      "210\n"
     ]
    }
   ],
   "source": [
    "print(cities.id.nunique())\n",
    "print(cities.name.nunique())\n",
    "# SELECT COUNT(DISTINCT(facts_id)) FROM cities;\n",
    "print(cities.facts_id.nunique())\n",
    "\n",
    "# several cities share the same facts"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "261\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>code</th>\n",
       "      <th>name</th>\n",
       "      <th>area</th>\n",
       "      <th>area_land</th>\n",
       "      <th>area_water</th>\n",
       "      <th>population</th>\n",
       "      <th>population_growth</th>\n",
       "      <th>birth_rate</th>\n",
       "      <th>death_rate</th>\n",
       "      <th>migration_rate</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>af</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>652230.0</td>\n",
       "      <td>652230.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32564342.0</td>\n",
       "      <td>2.32</td>\n",
       "      <td>38.57</td>\n",
       "      <td>13.89</td>\n",
       "      <td>1.51</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>al</td>\n",
       "      <td>Albania</td>\n",
       "      <td>28748.0</td>\n",
       "      <td>27398.0</td>\n",
       "      <td>1350.0</td>\n",
       "      <td>3029278.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>12.92</td>\n",
       "      <td>6.58</td>\n",
       "      <td>3.30</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>ag</td>\n",
       "      <td>Algeria</td>\n",
       "      <td>2381741.0</td>\n",
       "      <td>2381741.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>39542166.0</td>\n",
       "      <td>1.84</td>\n",
       "      <td>23.67</td>\n",
       "      <td>4.31</td>\n",
       "      <td>0.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>an</td>\n",
       "      <td>Andorra</td>\n",
       "      <td>468.0</td>\n",
       "      <td>468.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>85580.0</td>\n",
       "      <td>0.12</td>\n",
       "      <td>8.13</td>\n",
       "      <td>6.96</td>\n",
       "      <td>0.00</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>ao</td>\n",
       "      <td>Angola</td>\n",
       "      <td>1246700.0</td>\n",
       "      <td>1246700.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>19625353.0</td>\n",
       "      <td>2.78</td>\n",
       "      <td>38.78</td>\n",
       "      <td>11.49</td>\n",
       "      <td>0.46</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id code         name       area  area_land  area_water  population  \\\n",
       "0   1   af  Afghanistan   652230.0   652230.0         0.0  32564342.0   \n",
       "1   2   al      Albania    28748.0    27398.0      1350.0   3029278.0   \n",
       "2   3   ag      Algeria  2381741.0  2381741.0         0.0  39542166.0   \n",
       "3   4   an      Andorra      468.0      468.0         0.0     85580.0   \n",
       "4   5   ao       Angola  1246700.0  1246700.0         0.0  19625353.0   \n",
       "\n",
       "   population_growth  birth_rate  death_rate  migration_rate  \n",
       "0               2.32       38.57       13.89            1.51  \n",
       "1               0.30       12.92        6.58            3.30  \n",
       "2               1.84       23.67        4.31            0.92  \n",
       "3               0.12        8.13        6.96            0.00  \n",
       "4               2.78       38.78       11.49            0.46  "
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view table of facts\n",
    "facts = pd.read_sql(\"\"\"SELECT *\n",
    "                        FROM facts;\"\"\", conn)\n",
    "print(len(facts))\n",
    "facts.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.1 (INNER) JOIN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "397\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>code</th>\n",
       "      <th>name</th>\n",
       "      <th>area</th>\n",
       "      <th>area_land</th>\n",
       "      <th>area_water</th>\n",
       "      <th>population</th>\n",
       "      <th>population_growth</th>\n",
       "      <th>birth_rate</th>\n",
       "      <th>death_rate</th>\n",
       "      <th>migration_rate</th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>population</th>\n",
       "      <th>capital</th>\n",
       "      <th>facts_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>216</td>\n",
       "      <td>aa</td>\n",
       "      <td>Aruba</td>\n",
       "      <td>180.0</td>\n",
       "      <td>180.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>112162</td>\n",
       "      <td>1.33</td>\n",
       "      <td>12.56</td>\n",
       "      <td>8.18</td>\n",
       "      <td>8.92</td>\n",
       "      <td>1</td>\n",
       "      <td>Oranjestad</td>\n",
       "      <td>37000</td>\n",
       "      <td>1</td>\n",
       "      <td>216</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>ac</td>\n",
       "      <td>Antigua and Barbuda</td>\n",
       "      <td>442.0</td>\n",
       "      <td>442.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>92436</td>\n",
       "      <td>1.24</td>\n",
       "      <td>15.85</td>\n",
       "      <td>5.69</td>\n",
       "      <td>2.21</td>\n",
       "      <td>2</td>\n",
       "      <td>Saint John'S</td>\n",
       "      <td>27000</td>\n",
       "      <td>1</td>\n",
       "      <td>6</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>184</td>\n",
       "      <td>ae</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>83600.0</td>\n",
       "      <td>83600.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5779760</td>\n",
       "      <td>2.58</td>\n",
       "      <td>15.43</td>\n",
       "      <td>1.97</td>\n",
       "      <td>12.36</td>\n",
       "      <td>3</td>\n",
       "      <td>Abu Dhabi</td>\n",
       "      <td>942000</td>\n",
       "      <td>1</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>184</td>\n",
       "      <td>ae</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>83600.0</td>\n",
       "      <td>83600.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5779760</td>\n",
       "      <td>2.58</td>\n",
       "      <td>15.43</td>\n",
       "      <td>1.97</td>\n",
       "      <td>12.36</td>\n",
       "      <td>4</td>\n",
       "      <td>Dubai</td>\n",
       "      <td>1978000</td>\n",
       "      <td>0</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>184</td>\n",
       "      <td>ae</td>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>83600.0</td>\n",
       "      <td>83600.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>5779760</td>\n",
       "      <td>2.58</td>\n",
       "      <td>15.43</td>\n",
       "      <td>1.97</td>\n",
       "      <td>12.36</td>\n",
       "      <td>5</td>\n",
       "      <td>Sharjah</td>\n",
       "      <td>983000</td>\n",
       "      <td>0</td>\n",
       "      <td>184</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    id code                  name     area  area_land  area_water  population  \\\n",
       "0  216   aa                 Aruba    180.0      180.0         0.0      112162   \n",
       "1    6   ac   Antigua and Barbuda    442.0      442.0         0.0       92436   \n",
       "2  184   ae  United Arab Emirates  83600.0    83600.0         0.0     5779760   \n",
       "3  184   ae  United Arab Emirates  83600.0    83600.0         0.0     5779760   \n",
       "4  184   ae  United Arab Emirates  83600.0    83600.0         0.0     5779760   \n",
       "\n",
       "   population_growth  birth_rate  death_rate  migration_rate  id  \\\n",
       "0               1.33       12.56        8.18            8.92   1   \n",
       "1               1.24       15.85        5.69            2.21   2   \n",
       "2               2.58       15.43        1.97           12.36   3   \n",
       "3               2.58       15.43        1.97           12.36   4   \n",
       "4               2.58       15.43        1.97           12.36   5   \n",
       "\n",
       "           name  population  capital facts_id  \n",
       "0    Oranjestad       37000        1      216  \n",
       "1  Saint John'S       27000        1        6  \n",
       "2     Abu Dhabi      942000        1      184  \n",
       "3         Dubai     1978000        0      184  \n",
       "4       Sharjah      983000        0      184  "
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Inner Join\n",
    "q=\"\"\"SELECT * \n",
    "FROM facts  \n",
    "INNER JOIN cities \n",
    "ON cities.facts_id = facts.id\"\"\"\n",
    "\n",
    "inner_join = pd.read_sql(q, conn)\n",
    "\n",
    "print(len(inner_join))\n",
    "# view joined table \n",
    "inner_join.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "210"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Not all the facts_id is in the cities table\n",
    "inner_join['facts_id'].nunique()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.2 LEFT JOIN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "448\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>id</th>\n",
       "      <th>code</th>\n",
       "      <th>name</th>\n",
       "      <th>area</th>\n",
       "      <th>area_land</th>\n",
       "      <th>area_water</th>\n",
       "      <th>population</th>\n",
       "      <th>population_growth</th>\n",
       "      <th>birth_rate</th>\n",
       "      <th>death_rate</th>\n",
       "      <th>migration_rate</th>\n",
       "      <th>id</th>\n",
       "      <th>name</th>\n",
       "      <th>population</th>\n",
       "      <th>capital</th>\n",
       "      <th>facts_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>af</td>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>652230.0</td>\n",
       "      <td>652230.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>32564342.0</td>\n",
       "      <td>2.32</td>\n",
       "      <td>38.57</td>\n",
       "      <td>13.89</td>\n",
       "      <td>1.51</td>\n",
       "      <td>6.0</td>\n",
       "      <td>Kabul</td>\n",
       "      <td>3097000.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>al</td>\n",
       "      <td>Albania</td>\n",
       "      <td>28748.0</td>\n",
       "      <td>27398.0</td>\n",
       "      <td>1350.0</td>\n",
       "      <td>3029278.0</td>\n",
       "      <td>0.30</td>\n",
       "      <td>12.92</td>\n",
       "      <td>6.58</td>\n",
       "      <td>3.30</td>\n",
       "      <td>10.0</td>\n",
       "      <td>Tirana</td>\n",
       "      <td>419000.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>ag</td>\n",
       "      <td>Algeria</td>\n",
       "      <td>2381741.0</td>\n",
       "      <td>2381741.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>39542166.0</td>\n",
       "      <td>1.84</td>\n",
       "      <td>23.67</td>\n",
       "      <td>4.31</td>\n",
       "      <td>0.92</td>\n",
       "      <td>7.0</td>\n",
       "      <td>Algiers</td>\n",
       "      <td>2916000.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3</td>\n",
       "      <td>ag</td>\n",
       "      <td>Algeria</td>\n",
       "      <td>2381741.0</td>\n",
       "      <td>2381741.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>39542166.0</td>\n",
       "      <td>1.84</td>\n",
       "      <td>23.67</td>\n",
       "      <td>4.31</td>\n",
       "      <td>0.92</td>\n",
       "      <td>8.0</td>\n",
       "      <td>Oran</td>\n",
       "      <td>783000.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>4</td>\n",
       "      <td>an</td>\n",
       "      <td>Andorra</td>\n",
       "      <td>468.0</td>\n",
       "      <td>468.0</td>\n",
       "      <td>0.0</td>\n",
       "      <td>85580.0</td>\n",
       "      <td>0.12</td>\n",
       "      <td>8.13</td>\n",
       "      <td>6.96</td>\n",
       "      <td>0.00</td>\n",
       "      <td>12.0</td>\n",
       "      <td>Andorra La Vella</td>\n",
       "      <td>23000.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   id code         name       area  area_land  area_water  population  \\\n",
       "0   1   af  Afghanistan   652230.0   652230.0         0.0  32564342.0   \n",
       "1   2   al      Albania    28748.0    27398.0      1350.0   3029278.0   \n",
       "2   3   ag      Algeria  2381741.0  2381741.0         0.0  39542166.0   \n",
       "3   3   ag      Algeria  2381741.0  2381741.0         0.0  39542166.0   \n",
       "4   4   an      Andorra      468.0      468.0         0.0     85580.0   \n",
       "\n",
       "   population_growth  birth_rate  death_rate  migration_rate    id  \\\n",
       "0               2.32       38.57       13.89            1.51   6.0   \n",
       "1               0.30       12.92        6.58            3.30  10.0   \n",
       "2               1.84       23.67        4.31            0.92   7.0   \n",
       "3               1.84       23.67        4.31            0.92   8.0   \n",
       "4               0.12        8.13        6.96            0.00  12.0   \n",
       "\n",
       "               name  population  capital facts_id  \n",
       "0             Kabul   3097000.0      1.0        1  \n",
       "1            Tirana    419000.0      1.0        2  \n",
       "2           Algiers   2916000.0      1.0        3  \n",
       "3              Oran    783000.0      0.0        3  \n",
       "4  Andorra La Vella     23000.0      1.0        4  "
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Left Join\n",
    "q2=\"\"\"SELECT * \n",
    "FROM facts\n",
    "LEFT JOIN cities  \n",
    "ON cities.facts_id = facts.id;\"\"\"\n",
    "\n",
    "left_join = pd.read_sql(q2, conn)\n",
    "\n",
    "print(len(left_join))\n",
    "left_join.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ">- For left join, the order of FROM and LEFT JOIN matters\n",
    ">- FROM facts means every rows in the facts will be included, and some same facts have different cities, and some facts do not have cities at all."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 1.3 Inner Join with a subquery"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "208\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>country</th>\n",
       "      <th>capital_city</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Aruba</td>\n",
       "      <td>Oranjestad</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Antigua and Barbuda</td>\n",
       "      <td>Saint John'S</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>United Arab Emirates</td>\n",
       "      <td>Abu Dhabi</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Afghanistan</td>\n",
       "      <td>Kabul</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Algeria</td>\n",
       "      <td>Algiers</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                country  capital_city\n",
       "0                 Aruba    Oranjestad\n",
       "1   Antigua and Barbuda  Saint John'S\n",
       "2  United Arab Emirates     Abu Dhabi\n",
       "3           Afghanistan         Kabul\n",
       "4               Algeria       Algiers"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# name subquery table as c, and from facts join c\n",
    "q3=\"\"\"\n",
    "SELECT f.name country, c.name capital_city\n",
    "FROM facts f\n",
    "JOIN (\n",
    "      SELECT * FROM cities\n",
    "      WHERE capital = 1) c\n",
    "ON c.facts_id = f.id;\n",
    "\"\"\"\n",
    "subquery_join = pd.read_sql(q3, conn)\n",
    "\n",
    "print(len(subquery_join))\n",
    "subquery_join.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 2. Analyzing data in 'chinook.db' with 11 tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>type</th>\n",
       "      <th>name</th>\n",
       "      <th>tbl_name</th>\n",
       "      <th>rootpage</th>\n",
       "      <th>sql</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>table</td>\n",
       "      <td>album</td>\n",
       "      <td>album</td>\n",
       "      <td>2</td>\n",
       "      <td>CREATE TABLE [album]\\n(\\n    [album_id] INTEGE...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>table</td>\n",
       "      <td>artist</td>\n",
       "      <td>artist</td>\n",
       "      <td>3</td>\n",
       "      <td>CREATE TABLE [artist]\\n(\\n    [artist_id] INTE...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>table</td>\n",
       "      <td>customer</td>\n",
       "      <td>customer</td>\n",
       "      <td>4</td>\n",
       "      <td>CREATE TABLE [customer]\\n(\\n    [customer_id] ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>table</td>\n",
       "      <td>employee</td>\n",
       "      <td>employee</td>\n",
       "      <td>5</td>\n",
       "      <td>CREATE TABLE [employee]\\n(\\n    [employee_id] ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>table</td>\n",
       "      <td>genre</td>\n",
       "      <td>genre</td>\n",
       "      <td>6</td>\n",
       "      <td>CREATE TABLE [genre]\\n(\\n    [genre_id] INTEGE...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>table</td>\n",
       "      <td>invoice</td>\n",
       "      <td>invoice</td>\n",
       "      <td>7</td>\n",
       "      <td>CREATE TABLE [invoice]\\n(\\n    [invoice_id] IN...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>table</td>\n",
       "      <td>invoice_line</td>\n",
       "      <td>invoice_line</td>\n",
       "      <td>8</td>\n",
       "      <td>CREATE TABLE [invoice_line]\\n(\\n    [invoice_l...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>table</td>\n",
       "      <td>media_type</td>\n",
       "      <td>media_type</td>\n",
       "      <td>9</td>\n",
       "      <td>CREATE TABLE [media_type]\\n(\\n    [media_type_...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>table</td>\n",
       "      <td>playlist</td>\n",
       "      <td>playlist</td>\n",
       "      <td>10</td>\n",
       "      <td>CREATE TABLE [playlist]\\n(\\n    [playlist_id] ...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>table</td>\n",
       "      <td>playlist_track</td>\n",
       "      <td>playlist_track</td>\n",
       "      <td>11</td>\n",
       "      <td>CREATE TABLE [playlist_track]\\n(\\n    [playlis...</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>table</td>\n",
       "      <td>track</td>\n",
       "      <td>track</td>\n",
       "      <td>13</td>\n",
       "      <td>CREATE TABLE [track]\\n(\\n    [track_id] INTEGE...</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     type            name        tbl_name  rootpage  \\\n",
       "0   table           album           album         2   \n",
       "1   table          artist          artist         3   \n",
       "2   table        customer        customer         4   \n",
       "3   table        employee        employee         5   \n",
       "4   table           genre           genre         6   \n",
       "5   table         invoice         invoice         7   \n",
       "6   table    invoice_line    invoice_line         8   \n",
       "7   table      media_type      media_type         9   \n",
       "8   table        playlist        playlist        10   \n",
       "9   table  playlist_track  playlist_track        11   \n",
       "10  table           track           track        13   \n",
       "\n",
       "                                                  sql  \n",
       "0   CREATE TABLE [album]\\n(\\n    [album_id] INTEGE...  \n",
       "1   CREATE TABLE [artist]\\n(\\n    [artist_id] INTE...  \n",
       "2   CREATE TABLE [customer]\\n(\\n    [customer_id] ...  \n",
       "3   CREATE TABLE [employee]\\n(\\n    [employee_id] ...  \n",
       "4   CREATE TABLE [genre]\\n(\\n    [genre_id] INTEGE...  \n",
       "5   CREATE TABLE [invoice]\\n(\\n    [invoice_id] IN...  \n",
       "6   CREATE TABLE [invoice_line]\\n(\\n    [invoice_l...  \n",
       "7   CREATE TABLE [media_type]\\n(\\n    [media_type_...  \n",
       "8   CREATE TABLE [playlist]\\n(\\n    [playlist_id] ...  \n",
       "9   CREATE TABLE [playlist_track]\\n(\\n    [playlis...  \n",
       "10  CREATE TABLE [track]\\n(\\n    [track_id] INTEGE...  "
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Build connection\n",
    "conn=sqlite3.connect('chinook.db')\n",
    "# view all tables\n",
    "tables = pd.read_sql(\"\"\"SELECT *\n",
    "                        FROM sqlite_master\n",
    "                        WHERE type='table';\"\"\", conn)\n",
    "tables"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>track_id</th>\n",
       "      <th>track_name</th>\n",
       "      <th>track_type</th>\n",
       "      <th>unit_price</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3448</td>\n",
       "      <td>Lamentations of Jeremiah, First Set \\ Incipit ...</td>\n",
       "      <td>Protected AAC audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2560</td>\n",
       "      <td>Violent Pornography</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3336</td>\n",
       "      <td>War Pigs</td>\n",
       "      <td>Purchased AAC audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>829</td>\n",
       "      <td>Let's Get Rocked</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1872</td>\n",
       "      <td>Attitude</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>748</td>\n",
       "      <td>Dealer</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1778</td>\n",
       "      <td>You're What's Happening (In The World Today)</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2514</td>\n",
       "      <td>Spoonman</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   track_id                                         track_name  \\\n",
       "0      3448  Lamentations of Jeremiah, First Set \\ Incipit ...   \n",
       "1      2560                                Violent Pornography   \n",
       "2      3336                                           War Pigs   \n",
       "3       829                                   Let's Get Rocked   \n",
       "4      1872                                           Attitude   \n",
       "5       748                                             Dealer   \n",
       "6      1778       You're What's Happening (In The World Today)   \n",
       "7      2514                                           Spoonman   \n",
       "\n",
       "                 track_type  unit_price  quantity  \n",
       "0  Protected AAC audio file        0.99         1  \n",
       "1           MPEG audio file        0.99         1  \n",
       "2  Purchased AAC audio file        0.99         1  \n",
       "3           MPEG audio file        0.99         1  \n",
       "4           MPEG audio file        0.99         1  \n",
       "5           MPEG audio file        0.99         1  \n",
       "6           MPEG audio file        0.99         1  \n",
       "7           MPEG audio file        0.99         1  "
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Join three tables\n",
    "q_1=\"\"\"\n",
    "SELECT t.track_id, t.name track_name, m.name track_type, il.unit_price, il.quantity\n",
    "FROM invoice_line il\n",
    "JOIN track t \n",
    "ON t.track_id = il.track_id\n",
    "JOIN media_type m \n",
    "ON m.media_type_id=t.media_type_id\n",
    "WHERE il.invoice_id = 4\"\"\"\n",
    "\n",
    "table_1=pd.read_sql(q_1, conn)\n",
    "table_1"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>track_id</th>\n",
       "      <th>track_name</th>\n",
       "      <th>artist_name</th>\n",
       "      <th>track_type</th>\n",
       "      <th>unit_price</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3448</td>\n",
       "      <td>Lamentations of Jeremiah, First Set \\ Incipit ...</td>\n",
       "      <td>The King's Singers</td>\n",
       "      <td>Protected AAC audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2560</td>\n",
       "      <td>Violent Pornography</td>\n",
       "      <td>System Of A Down</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3336</td>\n",
       "      <td>War Pigs</td>\n",
       "      <td>Cake</td>\n",
       "      <td>Purchased AAC audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>829</td>\n",
       "      <td>Let's Get Rocked</td>\n",
       "      <td>Def Leppard</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1872</td>\n",
       "      <td>Attitude</td>\n",
       "      <td>Metallica</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>748</td>\n",
       "      <td>Dealer</td>\n",
       "      <td>Deep Purple</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>1778</td>\n",
       "      <td>You're What's Happening (In The World Today)</td>\n",
       "      <td>Marvin Gaye</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>2514</td>\n",
       "      <td>Spoonman</td>\n",
       "      <td>Soundgarden</td>\n",
       "      <td>MPEG audio file</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   track_id                                         track_name  \\\n",
       "0      3448  Lamentations of Jeremiah, First Set \\ Incipit ...   \n",
       "1      2560                                Violent Pornography   \n",
       "2      3336                                           War Pigs   \n",
       "3       829                                   Let's Get Rocked   \n",
       "4      1872                                           Attitude   \n",
       "5       748                                             Dealer   \n",
       "6      1778       You're What's Happening (In The World Today)   \n",
       "7      2514                                           Spoonman   \n",
       "\n",
       "          artist_name                track_type  unit_price  quantity  \n",
       "0  The King's Singers  Protected AAC audio file        0.99         1  \n",
       "1    System Of A Down           MPEG audio file        0.99         1  \n",
       "2                Cake  Purchased AAC audio file        0.99         1  \n",
       "3         Def Leppard           MPEG audio file        0.99         1  \n",
       "4           Metallica           MPEG audio file        0.99         1  \n",
       "5         Deep Purple           MPEG audio file        0.99         1  \n",
       "6         Marvin Gaye           MPEG audio file        0.99         1  \n",
       "7         Soundgarden           MPEG audio file        0.99         1  "
      ]
     },
     "execution_count": 12,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Join five tables\n",
    "q_2= \"\"\"\n",
    "SELECT\n",
    "    il.track_id,\n",
    "    t.name track_name,\n",
    "    ar.name artist_name,\n",
    "    mt.name track_type,\n",
    "    il.unit_price,\n",
    "    il.quantity\n",
    "FROM invoice_line il\n",
    "INNER JOIN track t ON t.track_id = il.track_id\n",
    "INNER JOIN media_type mt ON mt.media_type_id = t.media_type_id\n",
    "INNER JOIN album al ON al.album_id = t.album_id\n",
    "INNER JOIN artist ar on ar.artist_id = al.artist_id\n",
    "WHERE il.invoice_id = 4;\"\"\"\n",
    "\n",
    "table_2=pd.read_sql(q_2, conn)\n",
    "table_2"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.1 Lists the top 10 artists, calculated by the number of times a track by that artist has been purchased."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "invoice_line table contains each individual song from each customer purchase, it contains information about which songs are purchased the most. We can use the table to find out which artists are purchased the most. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3503\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>track_id</th>\n",
       "      <th>artist_name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>AC/DC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>6</td>\n",
       "      <td>AC/DC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>7</td>\n",
       "      <td>AC/DC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8</td>\n",
       "      <td>AC/DC</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9</td>\n",
       "      <td>AC/DC</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   track_id artist_name\n",
       "0         1       AC/DC\n",
       "1         6       AC/DC\n",
       "2         7       AC/DC\n",
       "3         8       AC/DC\n",
       "4         9       AC/DC"
      ]
     },
     "execution_count": 13,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub_q = \"\"\"SELECT\n",
    "                t.track_id,\n",
    "                ar.name artist_name    \n",
    "            FROM track t\n",
    "            INNER JOIN album al ON al.album_id = t.album_id\n",
    "            INNER JOIN artist ar ON ar.artist_id = al.artist_id\"\"\"\n",
    "sub_table=pd.read_sql(sub_q, conn)\n",
    "print(len(sub_table))\n",
    "sub_table.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "4757\n",
      "130\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>artist</th>\n",
       "      <th>quantity</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Guns N' Roses</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Guns N' Roses</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Guns N' Roses</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Guns N' Roses</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Guns N' Roses</td>\n",
       "      <td>1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          artist  quantity\n",
       "0  Guns N' Roses         1\n",
       "1  Guns N' Roses         1\n",
       "2  Guns N' Roses         1\n",
       "3  Guns N' Roses         1\n",
       "4  Guns N' Roses         1"
      ]
     },
     "execution_count": 14,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q_3= \"\"\"\n",
    "SELECT\n",
    "    ta.artist_name artist, il.quantity\n",
    "FROM invoice_line il\n",
    "INNER JOIN (\n",
    "            SELECT\n",
    "                t.track_id,\n",
    "                ar.name artist_name\n",
    "            FROM track t\n",
    "            INNER JOIN album al ON al.album_id = t.album_id\n",
    "            INNER JOIN artist ar ON ar.artist_id = al.artist_id\n",
    "           ) ta\n",
    "ON ta.track_id = il.track_id;\"\"\"\n",
    "\n",
    "table_3=pd.read_sql(q_3, conn)\n",
    "print(len(table_3))\n",
    "print(table_3.artist.nunique())\n",
    "table_3.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "130\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>artist</th>\n",
       "      <th>tracks_purchased</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Queen</td>\n",
       "      <td>192</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jimi Hendrix</td>\n",
       "      <td>187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Nirvana</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Red Hot Chili Peppers</td>\n",
       "      <td>130</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Pearl Jam</td>\n",
       "      <td>129</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>AC/DC</td>\n",
       "      <td>124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Guns N' Roses</td>\n",
       "      <td>124</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Foo Fighters</td>\n",
       "      <td>121</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>The Rolling Stones</td>\n",
       "      <td>117</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>Metallica</td>\n",
       "      <td>106</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  artist  tracks_purchased\n",
       "0                  Queen               192\n",
       "1           Jimi Hendrix               187\n",
       "2                Nirvana               130\n",
       "3  Red Hot Chili Peppers               130\n",
       "4              Pearl Jam               129\n",
       "5                  AC/DC               124\n",
       "6          Guns N' Roses               124\n",
       "7           Foo Fighters               121\n",
       "8     The Rolling Stones               117\n",
       "9              Metallica               106"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Join table with subquery and group by\n",
    "q_4=\"\"\"\n",
    "SELECT\n",
    "    ta.artist_name artist,\n",
    "    COUNT(*) tracks_purchased\n",
    "FROM invoice_line il\n",
    "INNER JOIN (\n",
    "            SELECT\n",
    "                t.track_id,\n",
    "                ar.name artist_name\n",
    "            FROM track t\n",
    "            INNER JOIN album al ON al.album_id = t.album_id\n",
    "            INNER JOIN artist ar ON ar.artist_id = al.artist_id\n",
    "           ) ta\n",
    "           ON ta.track_id = il.track_id\n",
    "GROUP BY 1\n",
    "ORDER BY 2 DESC;\"\"\"\n",
    "\n",
    "table_4 = pd.read_sql(q_4, conn)\n",
    "print(len(table_4))\n",
    "table_4.head(10)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.2 Write a query that returns the top 5 albums, as calculated by the number of times a track from that album has been purchased."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "251\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>album</th>\n",
       "      <th>artist</th>\n",
       "      <th>tracks_purchased</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Are You Experienced?</td>\n",
       "      <td>Jimi Hendrix</td>\n",
       "      <td>187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Faceless</td>\n",
       "      <td>Godsmack</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mezmerize</td>\n",
       "      <td>System Of A Down</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Get Born</td>\n",
       "      <td>JET</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Doors</td>\n",
       "      <td>The Doors</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  album            artist  tracks_purchased\n",
       "0  Are You Experienced?      Jimi Hendrix               187\n",
       "1              Faceless          Godsmack                96\n",
       "2             Mezmerize  System Of A Down                93\n",
       "3              Get Born               JET                90\n",
       "4             The Doors         The Doors                83"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q_5=\"\"\"\n",
    "SELECT al.title album, ar.name artist, COUNT(*) tracks_purchased\n",
    "FROM invoice_line il\n",
    "JOIN track t ON t.track_id=il.track_id\n",
    "JOIN album al ON al.album_id= t.album_id\n",
    "JOIN artist ar ON ar.artist_id=al.artist_id\n",
    "GROUP BY 1\n",
    "ORDER BY 3 DESC;\"\"\"\n",
    "\n",
    "table_5 = pd.read_sql(q_5, conn)\n",
    "print(len(table_5))\n",
    "table_5.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "251\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>album</th>\n",
       "      <th>artist</th>\n",
       "      <th>tracks_purchased</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Are You Experienced?</td>\n",
       "      <td>Jimi Hendrix</td>\n",
       "      <td>187</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Faceless</td>\n",
       "      <td>Godsmack</td>\n",
       "      <td>96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Mezmerize</td>\n",
       "      <td>System Of A Down</td>\n",
       "      <td>93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Get Born</td>\n",
       "      <td>JET</td>\n",
       "      <td>90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>The Doors</td>\n",
       "      <td>The Doors</td>\n",
       "      <td>83</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "                  album            artist  tracks_purchased\n",
       "0  Are You Experienced?      Jimi Hendrix               187\n",
       "1              Faceless          Godsmack                96\n",
       "2             Mezmerize  System Of A Down                93\n",
       "3              Get Born               JET                90\n",
       "4             The Doors         The Doors                83"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# same as q_5\n",
    "q_6=\"\"\"\n",
    "SELECT\n",
    "    ta.album_title album,\n",
    "    ta.artist_name artist,\n",
    "    COUNT(*) tracks_purchased\n",
    "FROM invoice_line il\n",
    "INNER JOIN (\n",
    "            SELECT\n",
    "                t.track_id,\n",
    "                al.title album_title,\n",
    "                ar.name artist_name\n",
    "            FROM track t\n",
    "            INNER JOIN album al ON al.album_id = t.album_id\n",
    "            INNER JOIN artist ar ON ar.artist_id = al.artist_id\n",
    "           ) ta\n",
    "           ON ta.track_id = il.track_id\n",
    "GROUP BY 1, 2\n",
    "ORDER BY 3 DESC;\"\"\"\n",
    "\n",
    "table_6 = pd.read_sql(q_6, conn)\n",
    "print(len(table_6))\n",
    "table_6.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 2.3 Recursive Joins"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>employee_id</th>\n",
       "      <th>last_name</th>\n",
       "      <th>first_name</th>\n",
       "      <th>title</th>\n",
       "      <th>reports_to</th>\n",
       "      <th>birthdate</th>\n",
       "      <th>hire_date</th>\n",
       "      <th>address</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "      <th>postal_code</th>\n",
       "      <th>phone</th>\n",
       "      <th>fax</th>\n",
       "      <th>email</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Adams</td>\n",
       "      <td>Andrew</td>\n",
       "      <td>General Manager</td>\n",
       "      <td>NaN</td>\n",
       "      <td>1962-02-18 00:00:00</td>\n",
       "      <td>2016-08-14 00:00:00</td>\n",
       "      <td>11120 Jasper Ave NW</td>\n",
       "      <td>Edmonton</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T5K 2N1</td>\n",
       "      <td>+1 (780) 428-9482</td>\n",
       "      <td>+1 (780) 428-3457</td>\n",
       "      <td>andrew@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Edwards</td>\n",
       "      <td>Nancy</td>\n",
       "      <td>Sales Manager</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1958-12-08 00:00:00</td>\n",
       "      <td>2016-05-01 00:00:00</td>\n",
       "      <td>825 8 Ave SW</td>\n",
       "      <td>Calgary</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T2P 2T3</td>\n",
       "      <td>+1 (403) 262-3443</td>\n",
       "      <td>+1 (403) 262-3322</td>\n",
       "      <td>nancy@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>Peacock</td>\n",
       "      <td>Jane</td>\n",
       "      <td>Sales Support Agent</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1973-08-29 00:00:00</td>\n",
       "      <td>2017-04-01 00:00:00</td>\n",
       "      <td>1111 6 Ave SW</td>\n",
       "      <td>Calgary</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T2P 5M5</td>\n",
       "      <td>+1 (403) 262-3443</td>\n",
       "      <td>+1 (403) 262-6712</td>\n",
       "      <td>jane@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Park</td>\n",
       "      <td>Margaret</td>\n",
       "      <td>Sales Support Agent</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1947-09-19 00:00:00</td>\n",
       "      <td>2017-05-03 00:00:00</td>\n",
       "      <td>683 10 Street SW</td>\n",
       "      <td>Calgary</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T2P 5G3</td>\n",
       "      <td>+1 (403) 263-4423</td>\n",
       "      <td>+1 (403) 263-4289</td>\n",
       "      <td>margaret@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>Johnson</td>\n",
       "      <td>Steve</td>\n",
       "      <td>Sales Support Agent</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1965-03-03 00:00:00</td>\n",
       "      <td>2017-10-17 00:00:00</td>\n",
       "      <td>7727B 41 Ave</td>\n",
       "      <td>Calgary</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T3B 1Y7</td>\n",
       "      <td>1 (780) 836-9987</td>\n",
       "      <td>1 (780) 836-9543</td>\n",
       "      <td>steve@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>Mitchell</td>\n",
       "      <td>Michael</td>\n",
       "      <td>IT Manager</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1973-07-01 00:00:00</td>\n",
       "      <td>2016-10-17 00:00:00</td>\n",
       "      <td>5827 Bowness Road NW</td>\n",
       "      <td>Calgary</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T3B 0C5</td>\n",
       "      <td>+1 (403) 246-9887</td>\n",
       "      <td>+1 (403) 246-9899</td>\n",
       "      <td>michael@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>King</td>\n",
       "      <td>Robert</td>\n",
       "      <td>IT Staff</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1970-05-29 00:00:00</td>\n",
       "      <td>2017-01-02 00:00:00</td>\n",
       "      <td>590 Columbia Boulevard West</td>\n",
       "      <td>Lethbridge</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T1K 5N8</td>\n",
       "      <td>+1 (403) 456-9986</td>\n",
       "      <td>+1 (403) 456-8485</td>\n",
       "      <td>robert@chinookcorp.com</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>Callahan</td>\n",
       "      <td>Laura</td>\n",
       "      <td>IT Staff</td>\n",
       "      <td>6.0</td>\n",
       "      <td>1968-01-09 00:00:00</td>\n",
       "      <td>2017-03-04 00:00:00</td>\n",
       "      <td>923 7 ST NW</td>\n",
       "      <td>Lethbridge</td>\n",
       "      <td>AB</td>\n",
       "      <td>Canada</td>\n",
       "      <td>T1H 1Y8</td>\n",
       "      <td>+1 (403) 467-3351</td>\n",
       "      <td>+1 (403) 467-8772</td>\n",
       "      <td>laura@chinookcorp.com</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   employee_id last_name first_name                title  reports_to  \\\n",
       "0            1     Adams     Andrew      General Manager         NaN   \n",
       "1            2   Edwards      Nancy        Sales Manager         1.0   \n",
       "2            3   Peacock       Jane  Sales Support Agent         2.0   \n",
       "3            4      Park   Margaret  Sales Support Agent         2.0   \n",
       "4            5   Johnson      Steve  Sales Support Agent         2.0   \n",
       "5            6  Mitchell    Michael           IT Manager         1.0   \n",
       "6            7      King     Robert             IT Staff         6.0   \n",
       "7            8  Callahan      Laura             IT Staff         6.0   \n",
       "\n",
       "             birthdate            hire_date                      address  \\\n",
       "0  1962-02-18 00:00:00  2016-08-14 00:00:00          11120 Jasper Ave NW   \n",
       "1  1958-12-08 00:00:00  2016-05-01 00:00:00                 825 8 Ave SW   \n",
       "2  1973-08-29 00:00:00  2017-04-01 00:00:00                1111 6 Ave SW   \n",
       "3  1947-09-19 00:00:00  2017-05-03 00:00:00             683 10 Street SW   \n",
       "4  1965-03-03 00:00:00  2017-10-17 00:00:00                 7727B 41 Ave   \n",
       "5  1973-07-01 00:00:00  2016-10-17 00:00:00         5827 Bowness Road NW   \n",
       "6  1970-05-29 00:00:00  2017-01-02 00:00:00  590 Columbia Boulevard West   \n",
       "7  1968-01-09 00:00:00  2017-03-04 00:00:00                  923 7 ST NW   \n",
       "\n",
       "         city state country postal_code              phone                fax  \\\n",
       "0    Edmonton    AB  Canada     T5K 2N1  +1 (780) 428-9482  +1 (780) 428-3457   \n",
       "1     Calgary    AB  Canada     T2P 2T3  +1 (403) 262-3443  +1 (403) 262-3322   \n",
       "2     Calgary    AB  Canada     T2P 5M5  +1 (403) 262-3443  +1 (403) 262-6712   \n",
       "3     Calgary    AB  Canada     T2P 5G3  +1 (403) 263-4423  +1 (403) 263-4289   \n",
       "4     Calgary    AB  Canada     T3B 1Y7   1 (780) 836-9987   1 (780) 836-9543   \n",
       "5     Calgary    AB  Canada     T3B 0C5  +1 (403) 246-9887  +1 (403) 246-9899   \n",
       "6  Lethbridge    AB  Canada     T1K 5N8  +1 (403) 456-9986  +1 (403) 456-8485   \n",
       "7  Lethbridge    AB  Canada     T1H 1Y8  +1 (403) 467-3351  +1 (403) 467-8772   \n",
       "\n",
       "                      email  \n",
       "0    andrew@chinookcorp.com  \n",
       "1     nancy@chinookcorp.com  \n",
       "2      jane@chinookcorp.com  \n",
       "3  margaret@chinookcorp.com  \n",
       "4     steve@chinookcorp.com  \n",
       "5   michael@chinookcorp.com  \n",
       "6    robert@chinookcorp.com  \n",
       "7     laura@chinookcorp.com  "
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# view the table employee\n",
    "q_7=\"\"\"\n",
    "SELECT *\n",
    "FROM employee\"\"\"\n",
    "\n",
    "table_7=pd.read_sql(q_7, conn)\n",
    "table_7"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 19,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>employee_name</th>\n",
       "      <th>employee_title</th>\n",
       "      <th>supervisor_name</th>\n",
       "      <th>supervisor_title</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Andrew Adams</td>\n",
       "      <td>General Manager</td>\n",
       "      <td>None</td>\n",
       "      <td>None</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Jane Peacock</td>\n",
       "      <td>Sales Support Agent</td>\n",
       "      <td>Nancy Edwards</td>\n",
       "      <td>Sales Manager</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Laura Callahan</td>\n",
       "      <td>IT Staff</td>\n",
       "      <td>Michael Mitchell</td>\n",
       "      <td>IT Manager</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Margaret Park</td>\n",
       "      <td>Sales Support Agent</td>\n",
       "      <td>Nancy Edwards</td>\n",
       "      <td>Sales Manager</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Michael Mitchell</td>\n",
       "      <td>IT Manager</td>\n",
       "      <td>Andrew Adams</td>\n",
       "      <td>General Manager</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>Nancy Edwards</td>\n",
       "      <td>Sales Manager</td>\n",
       "      <td>Andrew Adams</td>\n",
       "      <td>General Manager</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>Robert King</td>\n",
       "      <td>IT Staff</td>\n",
       "      <td>Michael Mitchell</td>\n",
       "      <td>IT Manager</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>Steve Johnson</td>\n",
       "      <td>Sales Support Agent</td>\n",
       "      <td>Nancy Edwards</td>\n",
       "      <td>Sales Manager</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "      employee_name       employee_title   supervisor_name supervisor_title\n",
       "0      Andrew Adams      General Manager              None             None\n",
       "1      Jane Peacock  Sales Support Agent     Nancy Edwards    Sales Manager\n",
       "2    Laura Callahan             IT Staff  Michael Mitchell       IT Manager\n",
       "3     Margaret Park  Sales Support Agent     Nancy Edwards    Sales Manager\n",
       "4  Michael Mitchell           IT Manager      Andrew Adams  General Manager\n",
       "5     Nancy Edwards        Sales Manager      Andrew Adams  General Manager\n",
       "6       Robert King             IT Staff  Michael Mitchell       IT Manager\n",
       "7     Steve Johnson  Sales Support Agent     Nancy Edwards    Sales Manager"
      ]
     },
     "execution_count": 19,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# Recursive Joins\n",
    "q_8=\"\"\"\n",
    "SELECT\n",
    "    e1.first_name || \" \" || e1.last_name employee_name,\n",
    "    e1.title employee_title,\n",
    "    e2.first_name || \" \" || e2.last_name supervisor_name,\n",
    "    e2.title supervisor_title\n",
    "FROM employee e1\n",
    "LEFT JOIN employee e2 ON e1.reports_to = e2.employee_id\n",
    "ORDER BY 1;\"\"\"\n",
    "\n",
    "table_8=pd.read_sql(q_8, conn)\n",
    "table_8"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>invoice_id</th>\n",
       "      <th>customer_id</th>\n",
       "      <th>invoice_date</th>\n",
       "      <th>billing_address</th>\n",
       "      <th>billing_city</th>\n",
       "      <th>billing_state</th>\n",
       "      <th>billing_country</th>\n",
       "      <th>billing_postal_code</th>\n",
       "      <th>total</th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "      <th>postal_code</th>\n",
       "      <th>phone</th>\n",
       "      <th>fax</th>\n",
       "      <th>email</th>\n",
       "      <th>support_rep_id</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>18</td>\n",
       "      <td>2017-01-03 00:00:00</td>\n",
       "      <td>627 Broadway</td>\n",
       "      <td>New York</td>\n",
       "      <td>NY</td>\n",
       "      <td>USA</td>\n",
       "      <td>10012-2612</td>\n",
       "      <td>15.84</td>\n",
       "      <td>18</td>\n",
       "      <td>Michelle</td>\n",
       "      <td>Brooks</td>\n",
       "      <td>None</td>\n",
       "      <td>627 Broadway</td>\n",
       "      <td>New York</td>\n",
       "      <td>NY</td>\n",
       "      <td>USA</td>\n",
       "      <td>10012-2612</td>\n",
       "      <td>+1 (212) 221-3546</td>\n",
       "      <td>+1 (212) 221-4679</td>\n",
       "      <td>michelleb@aol.com</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>2017-01-03 00:00:00</td>\n",
       "      <td>230 Elgin Street</td>\n",
       "      <td>Ottawa</td>\n",
       "      <td>ON</td>\n",
       "      <td>Canada</td>\n",
       "      <td>K2P 1L7</td>\n",
       "      <td>9.90</td>\n",
       "      <td>30</td>\n",
       "      <td>Edward</td>\n",
       "      <td>Francis</td>\n",
       "      <td>None</td>\n",
       "      <td>230 Elgin Street</td>\n",
       "      <td>Ottawa</td>\n",
       "      <td>ON</td>\n",
       "      <td>Canada</td>\n",
       "      <td>K2P 1L7</td>\n",
       "      <td>+1 (613) 234-3322</td>\n",
       "      <td>None</td>\n",
       "      <td>edfrancis@yachoo.ca</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>40</td>\n",
       "      <td>2017-01-05 00:00:00</td>\n",
       "      <td>8, Rue Hanovre</td>\n",
       "      <td>Paris</td>\n",
       "      <td>None</td>\n",
       "      <td>France</td>\n",
       "      <td>75002</td>\n",
       "      <td>1.98</td>\n",
       "      <td>40</td>\n",
       "      <td>Dominique</td>\n",
       "      <td>Lefebvre</td>\n",
       "      <td>None</td>\n",
       "      <td>8, Rue Hanovre</td>\n",
       "      <td>Paris</td>\n",
       "      <td>None</td>\n",
       "      <td>France</td>\n",
       "      <td>75002</td>\n",
       "      <td>+33 01 47 42 71 71</td>\n",
       "      <td>None</td>\n",
       "      <td>dominiquelefebvre@gmail.com</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>18</td>\n",
       "      <td>2017-01-06 00:00:00</td>\n",
       "      <td>627 Broadway</td>\n",
       "      <td>New York</td>\n",
       "      <td>NY</td>\n",
       "      <td>USA</td>\n",
       "      <td>10012-2612</td>\n",
       "      <td>7.92</td>\n",
       "      <td>18</td>\n",
       "      <td>Michelle</td>\n",
       "      <td>Brooks</td>\n",
       "      <td>None</td>\n",
       "      <td>627 Broadway</td>\n",
       "      <td>New York</td>\n",
       "      <td>NY</td>\n",
       "      <td>USA</td>\n",
       "      <td>10012-2612</td>\n",
       "      <td>+1 (212) 221-3546</td>\n",
       "      <td>+1 (212) 221-4679</td>\n",
       "      <td>michelleb@aol.com</td>\n",
       "      <td>3</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>27</td>\n",
       "      <td>2017-01-07 00:00:00</td>\n",
       "      <td>1033 N Park Ave</td>\n",
       "      <td>Tucson</td>\n",
       "      <td>AZ</td>\n",
       "      <td>USA</td>\n",
       "      <td>85719</td>\n",
       "      <td>16.83</td>\n",
       "      <td>27</td>\n",
       "      <td>Patrick</td>\n",
       "      <td>Gray</td>\n",
       "      <td>None</td>\n",
       "      <td>1033 N Park Ave</td>\n",
       "      <td>Tucson</td>\n",
       "      <td>AZ</td>\n",
       "      <td>USA</td>\n",
       "      <td>85719</td>\n",
       "      <td>+1 (520) 622-4200</td>\n",
       "      <td>None</td>\n",
       "      <td>patrick.gray@aol.com</td>\n",
       "      <td>4</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   invoice_id  customer_id         invoice_date   billing_address  \\\n",
       "0           1           18  2017-01-03 00:00:00      627 Broadway   \n",
       "1           2           30  2017-01-03 00:00:00  230 Elgin Street   \n",
       "2           3           40  2017-01-05 00:00:00    8, Rue Hanovre   \n",
       "3           4           18  2017-01-06 00:00:00      627 Broadway   \n",
       "4           5           27  2017-01-07 00:00:00   1033 N Park Ave   \n",
       "\n",
       "  billing_city billing_state billing_country billing_postal_code  total  \\\n",
       "0     New York            NY             USA          10012-2612  15.84   \n",
       "1       Ottawa            ON          Canada             K2P 1L7   9.90   \n",
       "2        Paris          None          France               75002   1.98   \n",
       "3     New York            NY             USA          10012-2612   7.92   \n",
       "4       Tucson            AZ             USA               85719  16.83   \n",
       "\n",
       "   customer_id first_name last_name company           address      city state  \\\n",
       "0           18   Michelle    Brooks    None      627 Broadway  New York    NY   \n",
       "1           30     Edward   Francis    None  230 Elgin Street    Ottawa    ON   \n",
       "2           40  Dominique  Lefebvre    None    8, Rue Hanovre     Paris  None   \n",
       "3           18   Michelle    Brooks    None      627 Broadway  New York    NY   \n",
       "4           27    Patrick      Gray    None   1033 N Park Ave    Tucson    AZ   \n",
       "\n",
       "  country postal_code               phone                fax  \\\n",
       "0     USA  10012-2612   +1 (212) 221-3546  +1 (212) 221-4679   \n",
       "1  Canada     K2P 1L7   +1 (613) 234-3322               None   \n",
       "2  France       75002  +33 01 47 42 71 71               None   \n",
       "3     USA  10012-2612   +1 (212) 221-3546  +1 (212) 221-4679   \n",
       "4     USA       85719   +1 (520) 622-4200               None   \n",
       "\n",
       "                         email  support_rep_id  \n",
       "0            michelleb@aol.com               3  \n",
       "1          edfrancis@yachoo.ca               3  \n",
       "2  dominiquelefebvre@gmail.com               4  \n",
       "3            michelleb@aol.com               3  \n",
       "4         patrick.gray@aol.com               4  "
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub=\"\"\"\n",
    "SELECT *\n",
    "FROM invoice i\n",
    "JOIN customer c\n",
    "ON c.customer_id = i.customer_id;\"\"\"\n",
    "customer_invoice = pd.read_sql(sub, conn)\n",
    "# to avoid truncated output \n",
    "pd.options.display.max_columns = 150 \n",
    "customer_invoice.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_name</th>\n",
       "      <th>number_of_purchases</th>\n",
       "      <th>total_spent</th>\n",
       "      <th>customer_category</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>Aaron Mitchell</td>\n",
       "      <td>8</td>\n",
       "      <td>70.29</td>\n",
       "      <td>regular</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>Alexandre Rocha</td>\n",
       "      <td>10</td>\n",
       "      <td>69.30</td>\n",
       "      <td>regular</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>Astrid Gruber</td>\n",
       "      <td>9</td>\n",
       "      <td>69.30</td>\n",
       "      <td>regular</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>Bjørn Hansen</td>\n",
       "      <td>9</td>\n",
       "      <td>72.27</td>\n",
       "      <td>regular</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>Camille Bernard</td>\n",
       "      <td>9</td>\n",
       "      <td>79.20</td>\n",
       "      <td>regular</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     customer_name  number_of_purchases  total_spent customer_category\n",
       "0   Aaron Mitchell                    8        70.29           regular\n",
       "1  Alexandre Rocha                   10        69.30           regular\n",
       "2    Astrid Gruber                    9        69.30           regular\n",
       "3     Bjørn Hansen                    9        72.27           regular\n",
       "4  Camille Bernard                    9        79.20           regular"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# join customer and invoice tabel, groupby customer name and create a new column\n",
    "q_9=\"\"\"\n",
    "SELECT\n",
    "   c.first_name || \" \" || c.last_name customer_name,\n",
    "   COUNT(i.invoice_id) number_of_purchases,\n",
    "   SUM(i.total) total_spent,\n",
    "   CASE\n",
    "       WHEN sum(i.total) < 40 THEN 'small spender'\n",
    "       WHEN sum(i.total) > 100 THEN 'big spender'\n",
    "       ELSE 'regular'\n",
    "       END\n",
    "       AS customer_category\n",
    "FROM invoice i\n",
    "INNER JOIN customer c ON i.customer_id = c.customer_id\n",
    "GROUP BY 1 ORDER BY 1;\"\"\"\n",
    "\n",
    "table_9=pd.read_sql(q_9, conn)\n",
    "table_9.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## 3.Building and Organizing Complex Queries"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "3503\n",
      "14\n"
     ]
    }
   ],
   "source": [
    "# view the table playlist\n",
    "q=\"\"\"\n",
    "SELECT *\n",
    "FROM playlist_track\"\"\"\n",
    "playlist_track=pd.read_sql(q,conn)\n",
    "print(playlist_track.track_id.nunique())\n",
    "print(playlist_track.playlist_id.nunique())\n",
    "# total length of playlist is 18, means not all playlist is in playlist_track."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8715\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>track_id</th>\n",
       "      <th>name</th>\n",
       "      <th>album_id</th>\n",
       "      <th>media_type_id</th>\n",
       "      <th>genre_id</th>\n",
       "      <th>composer</th>\n",
       "      <th>milliseconds</th>\n",
       "      <th>bytes</th>\n",
       "      <th>unit_price</th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>track_id</th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>3402</td>\n",
       "      <td>Band Members Discuss Tracks from \"Revelations\"</td>\n",
       "      <td>271</td>\n",
       "      <td>3</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>294294</td>\n",
       "      <td>61118891</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>3402</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3389</td>\n",
       "      <td>Revelations</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>252376</td>\n",
       "      <td>4111051</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>3389</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3390</td>\n",
       "      <td>One and the Same</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>217732</td>\n",
       "      <td>3559040</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>3390</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>3391</td>\n",
       "      <td>Sound of a Gun</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>260154</td>\n",
       "      <td>4234990</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>3391</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>3392</td>\n",
       "      <td>Until We Fall</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>230758</td>\n",
       "      <td>3766605</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>3392</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   track_id                                            name  album_id  \\\n",
       "0      3402  Band Members Discuss Tracks from \"Revelations\"       271   \n",
       "1      3389                                     Revelations       271   \n",
       "2      3390                                One and the Same       271   \n",
       "3      3391                                  Sound of a Gun       271   \n",
       "4      3392                                   Until We Fall       271   \n",
       "\n",
       "   media_type_id  genre_id composer  milliseconds     bytes  unit_price  \\\n",
       "0              3        23     None        294294  61118891        0.99   \n",
       "1              2        23     None        252376   4111051        0.99   \n",
       "2              2        23     None        217732   3559040        0.99   \n",
       "3              2        23     None        260154   4234990        0.99   \n",
       "4              2        23     None        230758   3766605        0.99   \n",
       "\n",
       "   playlist_id  track_id  playlist_id   name  \n",
       "0            1      3402            1  Music  \n",
       "1            1      3389            1  Music  \n",
       "2            1      3390            1  Music  \n",
       "3            1      3391            1  Music  \n",
       "4            1      3392            1  Music  "
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# (inner) join three tables \n",
    "join_from_track=\"\"\"\n",
    "SELECT *  \n",
    " FROM track t\n",
    " JOIN playlist_track pt ON t.track_id=pt.track_id\n",
    " JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n",
    "\n",
    "join_from_track=pd.read_sql(join_from_track,conn)\n",
    "print(len(join_from_track))\n",
    "join_from_track.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8715\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>name</th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>track_id</th>\n",
       "      <th>track_id</th>\n",
       "      <th>name</th>\n",
       "      <th>album_id</th>\n",
       "      <th>media_type_id</th>\n",
       "      <th>genre_id</th>\n",
       "      <th>composer</th>\n",
       "      <th>milliseconds</th>\n",
       "      <th>bytes</th>\n",
       "      <th>unit_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1</td>\n",
       "      <td>3402</td>\n",
       "      <td>3402</td>\n",
       "      <td>Band Members Discuss Tracks from \"Revelations\"</td>\n",
       "      <td>271</td>\n",
       "      <td>3</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>294294</td>\n",
       "      <td>61118891</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1</td>\n",
       "      <td>3389</td>\n",
       "      <td>3389</td>\n",
       "      <td>Revelations</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>252376</td>\n",
       "      <td>4111051</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1</td>\n",
       "      <td>3390</td>\n",
       "      <td>3390</td>\n",
       "      <td>One and the Same</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>217732</td>\n",
       "      <td>3559040</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1</td>\n",
       "      <td>3391</td>\n",
       "      <td>3391</td>\n",
       "      <td>Sound of a Gun</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>260154</td>\n",
       "      <td>4234990</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1</td>\n",
       "      <td>3392</td>\n",
       "      <td>3392</td>\n",
       "      <td>Until We Fall</td>\n",
       "      <td>271</td>\n",
       "      <td>2</td>\n",
       "      <td>23</td>\n",
       "      <td>None</td>\n",
       "      <td>230758</td>\n",
       "      <td>3766605</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   playlist_id   name  playlist_id  track_id  track_id  \\\n",
       "0            1  Music            1      3402      3402   \n",
       "1            1  Music            1      3389      3389   \n",
       "2            1  Music            1      3390      3390   \n",
       "3            1  Music            1      3391      3391   \n",
       "4            1  Music            1      3392      3392   \n",
       "\n",
       "                                             name  album_id  media_type_id  \\\n",
       "0  Band Members Discuss Tracks from \"Revelations\"       271              3   \n",
       "1                                     Revelations       271              2   \n",
       "2                                One and the Same       271              2   \n",
       "3                                  Sound of a Gun       271              2   \n",
       "4                                   Until We Fall       271              2   \n",
       "\n",
       "   genre_id composer  milliseconds     bytes  unit_price  \n",
       "0        23     None        294294  61118891        0.99  \n",
       "1        23     None        252376   4111051        0.99  \n",
       "2        23     None        217732   3559040        0.99  \n",
       "3        23     None        260154   4234990        0.99  \n",
       "4        23     None        230758   3766605        0.99  "
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# same as above, this is to show, for (inner) join, table order does not matter\n",
    "join_from_playlist=\"\"\"\n",
    "SELECT *  \n",
    " FROM playlist p\n",
    " JOIN playlist_track pt ON p.playlist_id=pt.playlist_id\n",
    " JOIN track t ON t.track_id=pt.track_id;\"\"\"\n",
    "\n",
    "join_from_playlist=pd.read_sql(join_from_playlist,conn)\n",
    "print(len(join_from_playlist))\n",
    "join_from_playlist.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8715\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>track_id</th>\n",
       "      <th>name</th>\n",
       "      <th>album_id</th>\n",
       "      <th>media_type_id</th>\n",
       "      <th>genre_id</th>\n",
       "      <th>composer</th>\n",
       "      <th>milliseconds</th>\n",
       "      <th>bytes</th>\n",
       "      <th>unit_price</th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>track_id</th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>name</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Angus Young, Malcolm Young, Brian Johnson</td>\n",
       "      <td>343719</td>\n",
       "      <td>11170334</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Angus Young, Malcolm Young, Brian Johnson</td>\n",
       "      <td>343719</td>\n",
       "      <td>11170334</td>\n",
       "      <td>0.99</td>\n",
       "      <td>8</td>\n",
       "      <td>1</td>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>1</td>\n",
       "      <td>Angus Young, Malcolm Young, Brian Johnson</td>\n",
       "      <td>343719</td>\n",
       "      <td>11170334</td>\n",
       "      <td>0.99</td>\n",
       "      <td>17</td>\n",
       "      <td>1</td>\n",
       "      <td>17</td>\n",
       "      <td>Heavy Metal Classic</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>2</td>\n",
       "      <td>Balls to the Wall</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>None</td>\n",
       "      <td>342562</td>\n",
       "      <td>5510424</td>\n",
       "      <td>0.99</td>\n",
       "      <td>1</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>2</td>\n",
       "      <td>Balls to the Wall</td>\n",
       "      <td>2</td>\n",
       "      <td>2</td>\n",
       "      <td>1</td>\n",
       "      <td>None</td>\n",
       "      <td>342562</td>\n",
       "      <td>5510424</td>\n",
       "      <td>0.99</td>\n",
       "      <td>8</td>\n",
       "      <td>2</td>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   track_id                                     name  album_id  media_type_id  \\\n",
       "0         1  For Those About To Rock (We Salute You)         1              1   \n",
       "1         1  For Those About To Rock (We Salute You)         1              1   \n",
       "2         1  For Those About To Rock (We Salute You)         1              1   \n",
       "3         2                        Balls to the Wall         2              2   \n",
       "4         2                        Balls to the Wall         2              2   \n",
       "\n",
       "   genre_id                                   composer  milliseconds  \\\n",
       "0         1  Angus Young, Malcolm Young, Brian Johnson        343719   \n",
       "1         1  Angus Young, Malcolm Young, Brian Johnson        343719   \n",
       "2         1  Angus Young, Malcolm Young, Brian Johnson        343719   \n",
       "3         1                                       None        342562   \n",
       "4         1                                       None        342562   \n",
       "\n",
       "      bytes  unit_price  playlist_id  track_id  playlist_id  \\\n",
       "0  11170334        0.99            1         1            1   \n",
       "1  11170334        0.99            8         1            8   \n",
       "2  11170334        0.99           17         1           17   \n",
       "3   5510424        0.99            1         2            1   \n",
       "4   5510424        0.99            8         2            8   \n",
       "\n",
       "                  name  \n",
       "0                Music  \n",
       "1                Music  \n",
       "2  Heavy Metal Classic  \n",
       "3                Music  \n",
       "4                Music  "
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# left join 3 tables\n",
    "LEFT_join_from_track=\"\"\"\n",
    "SELECT *  \n",
    " FROM track t\n",
    " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n",
    " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n",
    "\n",
    "LEFT_join_from_track=pd.read_sql(LEFT_join_from_track,conn)\n",
    "print(len(LEFT_join_from_track))\n",
    "LEFT_join_from_track.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8719\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>name</th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>track_id</th>\n",
       "      <th>track_id</th>\n",
       "      <th>name</th>\n",
       "      <th>album_id</th>\n",
       "      <th>media_type_id</th>\n",
       "      <th>genre_id</th>\n",
       "      <th>composer</th>\n",
       "      <th>milliseconds</th>\n",
       "      <th>bytes</th>\n",
       "      <th>unit_price</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Angus Young, Malcolm Young, Brian Johnson</td>\n",
       "      <td>343719.0</td>\n",
       "      <td>11170334.0</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>Balls to the Wall</td>\n",
       "      <td>2.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>None</td>\n",
       "      <td>342562.0</td>\n",
       "      <td>5510424.0</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>3.0</td>\n",
       "      <td>Fast As a Shark</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>F. Baltes, S. Kaufman, U. Dirkscneider &amp; W. Ho...</td>\n",
       "      <td>230619.0</td>\n",
       "      <td>3990994.0</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>4.0</td>\n",
       "      <td>Restless and Wild</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...</td>\n",
       "      <td>252051.0</td>\n",
       "      <td>4331779.0</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>1.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>5.0</td>\n",
       "      <td>Princess of the Dawn</td>\n",
       "      <td>3.0</td>\n",
       "      <td>2.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>Deaffy &amp; R.A. Smith-Diesel</td>\n",
       "      <td>375418.0</td>\n",
       "      <td>6290521.0</td>\n",
       "      <td>0.99</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   playlist_id   name  playlist_id  track_id  track_id  \\\n",
       "0            1  Music          1.0       1.0       1.0   \n",
       "1            1  Music          1.0       2.0       2.0   \n",
       "2            1  Music          1.0       3.0       3.0   \n",
       "3            1  Music          1.0       4.0       4.0   \n",
       "4            1  Music          1.0       5.0       5.0   \n",
       "\n",
       "                                      name  album_id  media_type_id  genre_id  \\\n",
       "0  For Those About To Rock (We Salute You)       1.0            1.0       1.0   \n",
       "1                        Balls to the Wall       2.0            2.0       1.0   \n",
       "2                          Fast As a Shark       3.0            2.0       1.0   \n",
       "3                        Restless and Wild       3.0            2.0       1.0   \n",
       "4                     Princess of the Dawn       3.0            2.0       1.0   \n",
       "\n",
       "                                            composer  milliseconds  \\\n",
       "0          Angus Young, Malcolm Young, Brian Johnson      343719.0   \n",
       "1                                               None      342562.0   \n",
       "2  F. Baltes, S. Kaufman, U. Dirkscneider & W. Ho...      230619.0   \n",
       "3  F. Baltes, R.A. Smith-Diesel, S. Kaufman, U. D...      252051.0   \n",
       "4                         Deaffy & R.A. Smith-Diesel      375418.0   \n",
       "\n",
       "        bytes  unit_price  \n",
       "0  11170334.0        0.99  \n",
       "1   5510424.0        0.99  \n",
       "2   3990994.0        0.99  \n",
       "3   4331779.0        0.99  \n",
       "4   6290521.0        0.99  "
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# this is to show different order in table can have different result for left join\n",
    "LEFT_join_from_playlist=\"\"\"\n",
    "SELECT *\n",
    "FROM playlist p\n",
    "LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n",
    "LEFT JOIN track t ON t.track_id = pt.track_id\"\"\"\n",
    "\n",
    "LEFT_join_from_playlist=pd.read_sql(LEFT_join_from_playlist,conn)\n",
    "print(len(LEFT_join_from_playlist))\n",
    "LEFT_join_from_playlist.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    ">- The result has 8719 rows, instead of 8715 rows for left_join_from_track"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**sub_join_from_track**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8715\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>playlist_name</th>\n",
       "      <th>track_name</th>\n",
       "      <th>length_seconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Band Members Discuss Tracks from \"Revelations\"</td>\n",
       "      <td>294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Revelations</td>\n",
       "      <td>252</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>One and the Same</td>\n",
       "      <td>217</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Sound of a Gun</td>\n",
       "      <td>260</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Until We Fall</td>\n",
       "      <td>230</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   playlist_id playlist_name                                      track_name  \\\n",
       "0            1         Music  Band Members Discuss Tracks from \"Revelations\"   \n",
       "1            1         Music                                     Revelations   \n",
       "2            1         Music                                One and the Same   \n",
       "3            1         Music                                  Sound of a Gun   \n",
       "4            1         Music                                   Until We Fall   \n",
       "\n",
       "   length_seconds  \n",
       "0             294  \n",
       "1             252  \n",
       "2             217  \n",
       "3             260  \n",
       "4             230  "
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub=\"\"\"\n",
    "SELECT \n",
    "     p.playlist_id, \n",
    "     p.name AS playlist_name, \n",
    "     t.name AS track_name,\n",
    "     (t.milliseconds / 1000) length_seconds  \n",
    " FROM track t\n",
    " JOIN playlist_track pt ON t.track_id=pt.track_id\n",
    " JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n",
    "\n",
    "sub=pd.read_sql(sub,conn)\n",
    "print(len(sub))\n",
    "sub.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>playlist_name</th>\n",
       "      <th>number_of_tracks</th>\n",
       "      <th>length_seconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>3290</td>\n",
       "      <td>876049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>TV Shows</td>\n",
       "      <td>213</td>\n",
       "      <td>500987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>90’s Music</td>\n",
       "      <td>1477</td>\n",
       "      <td>397970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "      <td>3290</td>\n",
       "      <td>876049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9</td>\n",
       "      <td>Music Videos</td>\n",
       "      <td>1</td>\n",
       "      <td>294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10</td>\n",
       "      <td>TV Shows</td>\n",
       "      <td>213</td>\n",
       "      <td>500987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>11</td>\n",
       "      <td>Brazilian Music</td>\n",
       "      <td>39</td>\n",
       "      <td>9464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>12</td>\n",
       "      <td>Classical</td>\n",
       "      <td>75</td>\n",
       "      <td>21736</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>13</td>\n",
       "      <td>Classical 101 - Deep Cuts</td>\n",
       "      <td>25</td>\n",
       "      <td>6742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>14</td>\n",
       "      <td>Classical 101 - Next Steps</td>\n",
       "      <td>25</td>\n",
       "      <td>7565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>15</td>\n",
       "      <td>Classical 101 - The Basics</td>\n",
       "      <td>25</td>\n",
       "      <td>7429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>16</td>\n",
       "      <td>Grunge</td>\n",
       "      <td>15</td>\n",
       "      <td>4114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>17</td>\n",
       "      <td>Heavy Metal Classic</td>\n",
       "      <td>26</td>\n",
       "      <td>8189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>18</td>\n",
       "      <td>On-The-Go 1</td>\n",
       "      <td>1</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    playlist_id               playlist_name  number_of_tracks  length_seconds\n",
       "0             1                       Music              3290          876049\n",
       "1             3                    TV Shows               213          500987\n",
       "2             5                  90’s Music              1477          397970\n",
       "3             8                       Music              3290          876049\n",
       "4             9                Music Videos                 1             294\n",
       "5            10                    TV Shows               213          500987\n",
       "6            11             Brazilian Music                39            9464\n",
       "7            12                   Classical                75           21736\n",
       "8            13   Classical 101 - Deep Cuts                25            6742\n",
       "9            14  Classical 101 - Next Steps                25            7565\n",
       "10           15  Classical 101 - The Basics                25            7429\n",
       "11           16                      Grunge                15            4114\n",
       "12           17         Heavy Metal Classic                26            8189\n",
       "13           18                 On-The-Go 1                 1             197"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# name subquery using WITH and AS \n",
    "q=\"\"\"\n",
    "WITH sub AS\n",
    "(SELECT \n",
    "     p.playlist_id, \n",
    "     p.name AS playlist_name, \n",
    "     t.name AS track_name,\n",
    "     (t.milliseconds / 1000) length_seconds  \n",
    " FROM track t\n",
    " JOIN playlist_track pt ON t.track_id=pt.track_id\n",
    " JOIN playlist p ON p.playlist_id=pt.playlist_id\n",
    ")\n",
    "SELECT\n",
    "    playlist_id,\n",
    "    playlist_name,\n",
    "    COUNT(playlist_id) number_of_tracks,\n",
    "    SUM(length_seconds) length_seconds\n",
    "FROM sub\n",
    "GROUP BY 1\n",
    "ORDER BY 1\n",
    "\"\"\"\n",
    "table_sub=pd.read_sql(q,conn)\n",
    "print(len(table_sub))\n",
    "table_sub\n",
    "\n",
    "# GROUP BY 1 and GROUP BY 1,2 are the same\n",
    "# COUNT(playlist_id), same as COUNT(track_name)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**sub_LEFT_from_track**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8715\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>playlist_name</th>\n",
       "      <th>track_name</th>\n",
       "      <th>length_seconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>17</td>\n",
       "      <td>Heavy Metal Classic</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>343</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Balls to the Wall</td>\n",
       "      <td>342</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "      <td>Balls to the Wall</td>\n",
       "      <td>342</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   playlist_id        playlist_name                               track_name  \\\n",
       "0            1                Music  For Those About To Rock (We Salute You)   \n",
       "1            8                Music  For Those About To Rock (We Salute You)   \n",
       "2           17  Heavy Metal Classic  For Those About To Rock (We Salute You)   \n",
       "3            1                Music                        Balls to the Wall   \n",
       "4            8                Music                        Balls to the Wall   \n",
       "\n",
       "   length_seconds  \n",
       "0             343  \n",
       "1             343  \n",
       "2             343  \n",
       "3             342  \n",
       "4             342  "
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub_LEFT_from_track=\"\"\"\n",
    "SELECT \n",
    "     p.playlist_id, \n",
    "     p.name AS playlist_name, \n",
    "     t.name AS track_name,\n",
    "     (t.milliseconds / 1000) length_seconds  \n",
    " FROM track t\n",
    " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n",
    " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id;\"\"\"\n",
    "\n",
    "sub_LEFT_from_track=pd.read_sql(sub_LEFT_from_track,conn)\n",
    "print(len(sub_LEFT_from_track))\n",
    "sub_LEFT_from_track.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "14\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>playlist_name</th>\n",
       "      <th>number_of_tracks</th>\n",
       "      <th>length_seconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>3290</td>\n",
       "      <td>876049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>TV Shows</td>\n",
       "      <td>213</td>\n",
       "      <td>500987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>90’s Music</td>\n",
       "      <td>1477</td>\n",
       "      <td>397970</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "      <td>3290</td>\n",
       "      <td>876049</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>9</td>\n",
       "      <td>Music Videos</td>\n",
       "      <td>1</td>\n",
       "      <td>294</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>10</td>\n",
       "      <td>TV Shows</td>\n",
       "      <td>213</td>\n",
       "      <td>500987</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>11</td>\n",
       "      <td>Brazilian Music</td>\n",
       "      <td>39</td>\n",
       "      <td>9464</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>12</td>\n",
       "      <td>Classical</td>\n",
       "      <td>75</td>\n",
       "      <td>21736</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>13</td>\n",
       "      <td>Classical 101 - Deep Cuts</td>\n",
       "      <td>25</td>\n",
       "      <td>6742</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>14</td>\n",
       "      <td>Classical 101 - Next Steps</td>\n",
       "      <td>25</td>\n",
       "      <td>7565</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>15</td>\n",
       "      <td>Classical 101 - The Basics</td>\n",
       "      <td>25</td>\n",
       "      <td>7429</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>16</td>\n",
       "      <td>Grunge</td>\n",
       "      <td>15</td>\n",
       "      <td>4114</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>17</td>\n",
       "      <td>Heavy Metal Classic</td>\n",
       "      <td>26</td>\n",
       "      <td>8189</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>18</td>\n",
       "      <td>On-The-Go 1</td>\n",
       "      <td>1</td>\n",
       "      <td>197</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    playlist_id               playlist_name  number_of_tracks  length_seconds\n",
       "0             1                       Music              3290          876049\n",
       "1             3                    TV Shows               213          500987\n",
       "2             5                  90’s Music              1477          397970\n",
       "3             8                       Music              3290          876049\n",
       "4             9                Music Videos                 1             294\n",
       "5            10                    TV Shows               213          500987\n",
       "6            11             Brazilian Music                39            9464\n",
       "7            12                   Classical                75           21736\n",
       "8            13   Classical 101 - Deep Cuts                25            6742\n",
       "9            14  Classical 101 - Next Steps                25            7565\n",
       "10           15  Classical 101 - The Basics                25            7429\n",
       "11           16                      Grunge                15            4114\n",
       "12           17         Heavy Metal Classic                26            8189\n",
       "13           18                 On-The-Go 1                 1             197"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q=\"\"\"\n",
    "WITH sub_LEFT AS\n",
    "(SELECT \n",
    "     p.playlist_id, \n",
    "     p.name AS playlist_name, \n",
    "     t.name AS track_name,\n",
    "     (t.milliseconds / 1000) length_seconds  \n",
    " FROM track t\n",
    " LEFT JOIN playlist_track pt ON t.track_id=pt.track_id\n",
    " LEFT JOIN playlist p ON p.playlist_id=pt.playlist_id\n",
    ")\n",
    "SELECT\n",
    "    playlist_id,\n",
    "    playlist_name,\n",
    "    COUNT(track_name) number_of_tracks,\n",
    "    SUM(length_seconds) length_seconds\n",
    "FROM sub_LEFT\n",
    "GROUP BY 1, 2\n",
    "ORDER BY 1\n",
    "\"\"\"\n",
    "table_sub_LEFT_from_track=pd.read_sql(q,conn)\n",
    "print(len(table_sub_LEFT_from_track))\n",
    "table_sub_LEFT_from_track"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**sub_LEFT_from_playlist**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "8719\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>playlist_name</th>\n",
       "      <th>track_name</th>\n",
       "      <th>length_seconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>For Those About To Rock (We Salute You)</td>\n",
       "      <td>343.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Balls to the Wall</td>\n",
       "      <td>342.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Fast As a Shark</td>\n",
       "      <td>230.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Restless and Wild</td>\n",
       "      <td>252.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>Princess of the Dawn</td>\n",
       "      <td>375.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "   playlist_id playlist_name                               track_name  \\\n",
       "0            1         Music  For Those About To Rock (We Salute You)   \n",
       "1            1         Music                        Balls to the Wall   \n",
       "2            1         Music                          Fast As a Shark   \n",
       "3            1         Music                        Restless and Wild   \n",
       "4            1         Music                     Princess of the Dawn   \n",
       "\n",
       "   length_seconds  \n",
       "0           343.0  \n",
       "1           342.0  \n",
       "2           230.0  \n",
       "3           252.0  \n",
       "4           375.0  "
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "sub_LEFT_from_playlist=\"\"\"\n",
    "SELECT\n",
    "         p.playlist_id,\n",
    "         p.name playlist_name,\n",
    "         t.name track_name,\n",
    "         (t.milliseconds / 1000) length_seconds\n",
    "FROM playlist p\n",
    "LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n",
    "LEFT JOIN track t ON t.track_id = pt.track_id\"\"\"\n",
    "\n",
    "sub_LEFT_from_playlist=pd.read_sql(sub_LEFT_from_playlist, conn)\n",
    "print(len(sub_LEFT_from_playlist))\n",
    "sub_LEFT_from_playlist.head()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 32,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "18\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>playlist_id</th>\n",
       "      <th>playlist_name</th>\n",
       "      <th>number_of_tracks</th>\n",
       "      <th>length_seconds</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Music</td>\n",
       "      <td>3290</td>\n",
       "      <td>876049.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>2</td>\n",
       "      <td>Movies</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>3</td>\n",
       "      <td>TV Shows</td>\n",
       "      <td>213</td>\n",
       "      <td>500987.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>4</td>\n",
       "      <td>Audiobooks</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>5</td>\n",
       "      <td>90’s Music</td>\n",
       "      <td>1477</td>\n",
       "      <td>397970.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>6</td>\n",
       "      <td>Audiobooks</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>7</td>\n",
       "      <td>Movies</td>\n",
       "      <td>0</td>\n",
       "      <td>NaN</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>8</td>\n",
       "      <td>Music</td>\n",
       "      <td>3290</td>\n",
       "      <td>876049.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>9</td>\n",
       "      <td>Music Videos</td>\n",
       "      <td>1</td>\n",
       "      <td>294.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>10</td>\n",
       "      <td>TV Shows</td>\n",
       "      <td>213</td>\n",
       "      <td>500987.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>11</td>\n",
       "      <td>Brazilian Music</td>\n",
       "      <td>39</td>\n",
       "      <td>9464.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>12</td>\n",
       "      <td>Classical</td>\n",
       "      <td>75</td>\n",
       "      <td>21736.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>13</td>\n",
       "      <td>Classical 101 - Deep Cuts</td>\n",
       "      <td>25</td>\n",
       "      <td>6742.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>14</td>\n",
       "      <td>Classical 101 - Next Steps</td>\n",
       "      <td>25</td>\n",
       "      <td>7565.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>15</td>\n",
       "      <td>Classical 101 - The Basics</td>\n",
       "      <td>25</td>\n",
       "      <td>7429.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>16</td>\n",
       "      <td>Grunge</td>\n",
       "      <td>15</td>\n",
       "      <td>4114.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>17</td>\n",
       "      <td>Heavy Metal Classic</td>\n",
       "      <td>26</td>\n",
       "      <td>8189.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>18</td>\n",
       "      <td>On-The-Go 1</td>\n",
       "      <td>1</td>\n",
       "      <td>197.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    playlist_id               playlist_name  number_of_tracks  length_seconds\n",
       "0             1                       Music              3290        876049.0\n",
       "1             2                      Movies                 0             NaN\n",
       "2             3                    TV Shows               213        500987.0\n",
       "3             4                  Audiobooks                 0             NaN\n",
       "4             5                  90’s Music              1477        397970.0\n",
       "5             6                  Audiobooks                 0             NaN\n",
       "6             7                      Movies                 0             NaN\n",
       "7             8                       Music              3290        876049.0\n",
       "8             9                Music Videos                 1           294.0\n",
       "9            10                    TV Shows               213        500987.0\n",
       "10           11             Brazilian Music                39          9464.0\n",
       "11           12                   Classical                75         21736.0\n",
       "12           13   Classical 101 - Deep Cuts                25          6742.0\n",
       "13           14  Classical 101 - Next Steps                25          7565.0\n",
       "14           15  Classical 101 - The Basics                25          7429.0\n",
       "15           16                      Grunge                15          4114.0\n",
       "16           17         Heavy Metal Classic                26          8189.0\n",
       "17           18                 On-The-Go 1                 1           197.0"
      ]
     },
     "execution_count": 32,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q=\"\"\"\n",
    "WITH playlist_info AS\n",
    "    (\n",
    "     SELECT\n",
    "         p.playlist_id,\n",
    "         p.name playlist_name,\n",
    "         t.name track_name,\n",
    "         (t.milliseconds / 1000) length_seconds\n",
    "     FROM playlist p\n",
    "     LEFT JOIN playlist_track pt ON pt.playlist_id = p.playlist_id\n",
    "     LEFT JOIN track t ON t.track_id = pt.track_id\n",
    "    )\n",
    "\n",
    "SELECT\n",
    "    playlist_id,\n",
    "    playlist_name,\n",
    "    COUNT(track_name) number_of_tracks,\n",
    "    SUM(length_seconds) length_seconds\n",
    "FROM playlist_info\n",
    "GROUP BY 1,2\n",
    "ORDER BY 1;\"\"\"\n",
    "\n",
    "table_sub_LEFT_from_playlist=pd.read_sql(q, conn)\n",
    "print(len(table_sub_LEFT_from_playlist))\n",
    "table_sub_LEFT_from_playlist"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "**CREATE VIEW (create a table can be reused), view can not be run on Jupyter notebook**"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 33,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "18\n"
     ]
    },
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>customer_id</th>\n",
       "      <th>first_name</th>\n",
       "      <th>last_name</th>\n",
       "      <th>company</th>\n",
       "      <th>address</th>\n",
       "      <th>city</th>\n",
       "      <th>state</th>\n",
       "      <th>country</th>\n",
       "      <th>postal_code</th>\n",
       "      <th>phone</th>\n",
       "      <th>fax</th>\n",
       "      <th>email</th>\n",
       "      <th>support_rep_id</th>\n",
       "      <th>invoice_id</th>\n",
       "      <th>customer_id</th>\n",
       "      <th>invoice_date</th>\n",
       "      <th>billing_address</th>\n",
       "      <th>billing_city</th>\n",
       "      <th>billing_state</th>\n",
       "      <th>billing_country</th>\n",
       "      <th>billing_postal_code</th>\n",
       "      <th>total</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>1</td>\n",
       "      <td>Luís</td>\n",
       "      <td>Gonçalves</td>\n",
       "      <td>Embraer - Empresa Brasileira de Aeronáutica S.A.</td>\n",
       "      <td>Av. Brigadeiro Faria Lima, 2170</td>\n",
       "      <td>São José dos Campos</td>\n",
       "      <td>SP</td>\n",
       "      <td>Brazil</td>\n",
       "      <td>12227-000</td>\n",
       "      <td>+55 (12) 3923-5555</td>\n",
       "      <td>+55 (12) 3923-5566</td>\n",
       "      <td>luisg@embraer.com.br</td>\n",
       "      <td>3</td>\n",
       "      <td>16</td>\n",
       "      <td>1</td>\n",
       "      <td>2017-01-26 00:00:00</td>\n",
       "      <td>Av. Brigadeiro Faria Lima, 2170</td>\n",
       "      <td>São José dos Campos</td>\n",
       "      <td>SP</td>\n",
       "      <td>Brazil</td>\n",
       "      <td>12227-000</td>\n",
       "      <td>8.91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>3</td>\n",
       "      <td>François</td>\n",
       "      <td>Tremblay</td>\n",
       "      <td>None</td>\n",
       "      <td>1498 rue Bélanger</td>\n",
       "      <td>Montréal</td>\n",
       "      <td>QC</td>\n",
       "      <td>Canada</td>\n",
       "      <td>H2G 1A7</td>\n",
       "      <td>+1 (514) 721-4711</td>\n",
       "      <td>None</td>\n",
       "      <td>ftremblay@gmail.com</td>\n",
       "      <td>3</td>\n",
       "      <td>31</td>\n",
       "      <td>3</td>\n",
       "      <td>2017-02-21 00:00:00</td>\n",
       "      <td>1498 rue Bélanger</td>\n",
       "      <td>Montréal</td>\n",
       "      <td>QC</td>\n",
       "      <td>Canada</td>\n",
       "      <td>H2G 1A7</td>\n",
       "      <td>19.80</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>5</td>\n",
       "      <td>František</td>\n",
       "      <td>Wichterlová</td>\n",
       "      <td>JetBrains s.r.o.</td>\n",
       "      <td>Klanova 9/506</td>\n",
       "      <td>Prague</td>\n",
       "      <td>None</td>\n",
       "      <td>Czech Republic</td>\n",
       "      <td>14700</td>\n",
       "      <td>+420 2 4172 5555</td>\n",
       "      <td>+420 2 4172 5555</td>\n",
       "      <td>frantisekw@jetbrains.com</td>\n",
       "      <td>4</td>\n",
       "      <td>78</td>\n",
       "      <td>5</td>\n",
       "      <td>2017-05-29 00:00:00</td>\n",
       "      <td>Klanova 9/506</td>\n",
       "      <td>Prague</td>\n",
       "      <td>None</td>\n",
       "      <td>Czech Republic</td>\n",
       "      <td>14700</td>\n",
       "      <td>8.91</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>6</td>\n",
       "      <td>Helena</td>\n",
       "      <td>Holý</td>\n",
       "      <td>None</td>\n",
       "      <td>Rilská 3174/6</td>\n",
       "      <td>Prague</td>\n",
       "      <td>None</td>\n",
       "      <td>Czech Republic</td>\n",
       "      <td>14300</td>\n",
       "      <td>+420 2 4177 0449</td>\n",
       "      <td>None</td>\n",
       "      <td>hholy@gmail.com</td>\n",
       "      <td>5</td>\n",
       "      <td>112</td>\n",
       "      <td>6</td>\n",
       "      <td>2017-08-31 00:00:00</td>\n",
       "      <td>Rilská 3174/6</td>\n",
       "      <td>Prague</td>\n",
       "      <td>None</td>\n",
       "      <td>Czech Republic</td>\n",
       "      <td>14300</td>\n",
       "      <td>16.83</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>13</td>\n",
       "      <td>Fernanda</td>\n",
       "      <td>Ramos</td>\n",
       "      <td>None</td>\n",
       "      <td>Qe 7 Bloco G</td>\n",
       "      <td>Brasília</td>\n",
       "      <td>DF</td>\n",
       "      <td>Brazil</td>\n",
       "      <td>71020-677</td>\n",
       "      <td>+55 (61) 3363-5547</td>\n",
       "      <td>+55 (61) 3363-7855</td>\n",
       "      <td>fernadaramos4@uol.com.br</td>\n",
       "      <td>4</td>\n",
       "      <td>73</td>\n",
       "      <td>13</td>\n",
       "      <td>2017-05-18 00:00:00</td>\n",
       "      <td>Qe 7 Bloco G</td>\n",
       "      <td>Brasília</td>\n",
       "      <td>DF</td>\n",
       "      <td>Brazil</td>\n",
       "      <td>71020-677</td>\n",
       "      <td>14.85</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>5</th>\n",
       "      <td>17</td>\n",
       "      <td>Jack</td>\n",
       "      <td>Smith</td>\n",
       "      <td>Microsoft Corporation</td>\n",
       "      <td>1 Microsoft Way</td>\n",
       "      <td>Redmond</td>\n",
       "      <td>WA</td>\n",
       "      <td>USA</td>\n",
       "      <td>98052-8300</td>\n",
       "      <td>+1 (425) 882-8080</td>\n",
       "      <td>+1 (425) 882-8081</td>\n",
       "      <td>jacksmith@microsoft.com</td>\n",
       "      <td>5</td>\n",
       "      <td>98</td>\n",
       "      <td>17</td>\n",
       "      <td>2017-07-23 00:00:00</td>\n",
       "      <td>1 Microsoft Way</td>\n",
       "      <td>Redmond</td>\n",
       "      <td>WA</td>\n",
       "      <td>USA</td>\n",
       "      <td>98052-8300</td>\n",
       "      <td>9.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>6</th>\n",
       "      <td>20</td>\n",
       "      <td>Dan</td>\n",
       "      <td>Miller</td>\n",
       "      <td>None</td>\n",
       "      <td>541 Del Medio Avenue</td>\n",
       "      <td>Mountain View</td>\n",
       "      <td>CA</td>\n",
       "      <td>USA</td>\n",
       "      <td>94040-111</td>\n",
       "      <td>+1 (650) 644-3358</td>\n",
       "      <td>None</td>\n",
       "      <td>dmiller@comcast.com</td>\n",
       "      <td>4</td>\n",
       "      <td>17</td>\n",
       "      <td>20</td>\n",
       "      <td>2017-01-28 00:00:00</td>\n",
       "      <td>541 Del Medio Avenue</td>\n",
       "      <td>Mountain View</td>\n",
       "      <td>CA</td>\n",
       "      <td>USA</td>\n",
       "      <td>94040-111</td>\n",
       "      <td>10.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>7</th>\n",
       "      <td>21</td>\n",
       "      <td>Kathy</td>\n",
       "      <td>Chase</td>\n",
       "      <td>None</td>\n",
       "      <td>801 W 4th Street</td>\n",
       "      <td>Reno</td>\n",
       "      <td>NV</td>\n",
       "      <td>USA</td>\n",
       "      <td>89503</td>\n",
       "      <td>+1 (775) 223-7665</td>\n",
       "      <td>None</td>\n",
       "      <td>kachase@hotmail.com</td>\n",
       "      <td>5</td>\n",
       "      <td>44</td>\n",
       "      <td>21</td>\n",
       "      <td>2017-03-20 00:00:00</td>\n",
       "      <td>801 W 4th Street</td>\n",
       "      <td>Reno</td>\n",
       "      <td>NV</td>\n",
       "      <td>USA</td>\n",
       "      <td>89503</td>\n",
       "      <td>11.88</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>8</th>\n",
       "      <td>22</td>\n",
       "      <td>Heather</td>\n",
       "      <td>Leacock</td>\n",
       "      <td>None</td>\n",
       "      <td>120 S Orange Ave</td>\n",
       "      <td>Orlando</td>\n",
       "      <td>FL</td>\n",
       "      <td>USA</td>\n",
       "      <td>32801</td>\n",
       "      <td>+1 (407) 999-7788</td>\n",
       "      <td>None</td>\n",
       "      <td>hleacock@gmail.com</td>\n",
       "      <td>4</td>\n",
       "      <td>43</td>\n",
       "      <td>22</td>\n",
       "      <td>2017-03-19 00:00:00</td>\n",
       "      <td>120 S Orange Ave</td>\n",
       "      <td>Orlando</td>\n",
       "      <td>FL</td>\n",
       "      <td>USA</td>\n",
       "      <td>32801</td>\n",
       "      <td>6.93</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>9</th>\n",
       "      <td>30</td>\n",
       "      <td>Edward</td>\n",
       "      <td>Francis</td>\n",
       "      <td>None</td>\n",
       "      <td>230 Elgin Street</td>\n",
       "      <td>Ottawa</td>\n",
       "      <td>ON</td>\n",
       "      <td>Canada</td>\n",
       "      <td>K2P 1L7</td>\n",
       "      <td>+1 (613) 234-3322</td>\n",
       "      <td>None</td>\n",
       "      <td>edfrancis@yachoo.ca</td>\n",
       "      <td>3</td>\n",
       "      <td>2</td>\n",
       "      <td>30</td>\n",
       "      <td>2017-01-03 00:00:00</td>\n",
       "      <td>230 Elgin Street</td>\n",
       "      <td>Ottawa</td>\n",
       "      <td>ON</td>\n",
       "      <td>Canada</td>\n",
       "      <td>K2P 1L7</td>\n",
       "      <td>9.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>10</th>\n",
       "      <td>34</td>\n",
       "      <td>João</td>\n",
       "      <td>Fernandes</td>\n",
       "      <td>None</td>\n",
       "      <td>Rua da Assunção 53</td>\n",
       "      <td>Lisbon</td>\n",
       "      <td>None</td>\n",
       "      <td>Portugal</td>\n",
       "      <td>None</td>\n",
       "      <td>+351 (213) 466-111</td>\n",
       "      <td>None</td>\n",
       "      <td>jfernandes@yahoo.pt</td>\n",
       "      <td>4</td>\n",
       "      <td>35</td>\n",
       "      <td>34</td>\n",
       "      <td>2017-02-25 00:00:00</td>\n",
       "      <td>Rua da Assunção 53</td>\n",
       "      <td>Lisbon</td>\n",
       "      <td>None</td>\n",
       "      <td>Portugal</td>\n",
       "      <td>None</td>\n",
       "      <td>4.95</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>11</th>\n",
       "      <td>37</td>\n",
       "      <td>Fynn</td>\n",
       "      <td>Zimmermann</td>\n",
       "      <td>None</td>\n",
       "      <td>Berger Straße 10</td>\n",
       "      <td>Frankfurt</td>\n",
       "      <td>None</td>\n",
       "      <td>Germany</td>\n",
       "      <td>60316</td>\n",
       "      <td>+49 069 40598889</td>\n",
       "      <td>None</td>\n",
       "      <td>fzimmermann@yahoo.de</td>\n",
       "      <td>3</td>\n",
       "      <td>40</td>\n",
       "      <td>37</td>\n",
       "      <td>2017-03-13 00:00:00</td>\n",
       "      <td>Berger Straße 10</td>\n",
       "      <td>Frankfurt</td>\n",
       "      <td>None</td>\n",
       "      <td>Germany</td>\n",
       "      <td>60316</td>\n",
       "      <td>9.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>12</th>\n",
       "      <td>42</td>\n",
       "      <td>Wyatt</td>\n",
       "      <td>Girard</td>\n",
       "      <td>None</td>\n",
       "      <td>9, Place Louis Barthou</td>\n",
       "      <td>Bordeaux</td>\n",
       "      <td>None</td>\n",
       "      <td>France</td>\n",
       "      <td>33000</td>\n",
       "      <td>+33 05 56 96 96 96</td>\n",
       "      <td>None</td>\n",
       "      <td>wyatt.girard@yahoo.fr</td>\n",
       "      <td>3</td>\n",
       "      <td>12</td>\n",
       "      <td>42</td>\n",
       "      <td>2017-01-21 00:00:00</td>\n",
       "      <td>9, Place Louis Barthou</td>\n",
       "      <td>Bordeaux</td>\n",
       "      <td>None</td>\n",
       "      <td>France</td>\n",
       "      <td>33000</td>\n",
       "      <td>3.96</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>13</th>\n",
       "      <td>46</td>\n",
       "      <td>Hugh</td>\n",
       "      <td>O'Reilly</td>\n",
       "      <td>None</td>\n",
       "      <td>3 Chatham Street</td>\n",
       "      <td>Dublin</td>\n",
       "      <td>Dublin</td>\n",
       "      <td>Ireland</td>\n",
       "      <td>None</td>\n",
       "      <td>+353 01 6792424</td>\n",
       "      <td>None</td>\n",
       "      <td>hughoreilly@apple.ie</td>\n",
       "      <td>3</td>\n",
       "      <td>30</td>\n",
       "      <td>46</td>\n",
       "      <td>2017-02-18 00:00:00</td>\n",
       "      <td>3 Chatham Street</td>\n",
       "      <td>Dublin</td>\n",
       "      <td>Dublin</td>\n",
       "      <td>Ireland</td>\n",
       "      <td>None</td>\n",
       "      <td>10.89</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>14</th>\n",
       "      <td>50</td>\n",
       "      <td>Enrique</td>\n",
       "      <td>Muñoz</td>\n",
       "      <td>None</td>\n",
       "      <td>C/ San Bernardo 85</td>\n",
       "      <td>Madrid</td>\n",
       "      <td>None</td>\n",
       "      <td>Spain</td>\n",
       "      <td>28015</td>\n",
       "      <td>+34 914 454 454</td>\n",
       "      <td>None</td>\n",
       "      <td>enrique_munoz@yahoo.es</td>\n",
       "      <td>5</td>\n",
       "      <td>41</td>\n",
       "      <td>50</td>\n",
       "      <td>2017-03-17 00:00:00</td>\n",
       "      <td>C/ San Bernardo 85</td>\n",
       "      <td>Madrid</td>\n",
       "      <td>None</td>\n",
       "      <td>Spain</td>\n",
       "      <td>28015</td>\n",
       "      <td>7.92</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>15</th>\n",
       "      <td>53</td>\n",
       "      <td>Phil</td>\n",
       "      <td>Hughes</td>\n",
       "      <td>None</td>\n",
       "      <td>113 Lupus St</td>\n",
       "      <td>London</td>\n",
       "      <td>None</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>SW1V 3EN</td>\n",
       "      <td>+44 020 7976 5722</td>\n",
       "      <td>None</td>\n",
       "      <td>phil.hughes@gmail.com</td>\n",
       "      <td>3</td>\n",
       "      <td>23</td>\n",
       "      <td>53</td>\n",
       "      <td>2017-02-09 00:00:00</td>\n",
       "      <td>113 Lupus St</td>\n",
       "      <td>London</td>\n",
       "      <td>None</td>\n",
       "      <td>United Kingdom</td>\n",
       "      <td>SW1V 3EN</td>\n",
       "      <td>9.90</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>16</th>\n",
       "      <td>57</td>\n",
       "      <td>Luis</td>\n",
       "      <td>Rojas</td>\n",
       "      <td>None</td>\n",
       "      <td>Calle Lira, 198</td>\n",
       "      <td>Santiago</td>\n",
       "      <td>None</td>\n",
       "      <td>Chile</td>\n",
       "      <td>None</td>\n",
       "      <td>+56 (0)2 635 4444</td>\n",
       "      <td>None</td>\n",
       "      <td>luisrojas@yahoo.cl</td>\n",
       "      <td>5</td>\n",
       "      <td>47</td>\n",
       "      <td>57</td>\n",
       "      <td>2017-03-25 00:00:00</td>\n",
       "      <td>Calle Lira, 198</td>\n",
       "      <td>Santiago</td>\n",
       "      <td>None</td>\n",
       "      <td>Chile</td>\n",
       "      <td>None</td>\n",
       "      <td>5.94</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>17</th>\n",
       "      <td>58</td>\n",
       "      <td>Manoj</td>\n",
       "      <td>Pareek</td>\n",
       "      <td>None</td>\n",
       "      <td>12,Community Centre</td>\n",
       "      <td>Delhi</td>\n",
       "      <td>None</td>\n",
       "      <td>India</td>\n",
       "      <td>110017</td>\n",
       "      <td>+91 0124 39883988</td>\n",
       "      <td>None</td>\n",
       "      <td>manoj.pareek@rediff.com</td>\n",
       "      <td>3</td>\n",
       "      <td>33</td>\n",
       "      <td>58</td>\n",
       "      <td>2017-02-21 00:00:00</td>\n",
       "      <td>12,Community Centre</td>\n",
       "      <td>Delhi</td>\n",
       "      <td>None</td>\n",
       "      <td>India</td>\n",
       "      <td>110017</td>\n",
       "      <td>3.96</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "    customer_id first_name    last_name  \\\n",
       "0             1       Luís    Gonçalves   \n",
       "1             3   François     Tremblay   \n",
       "2             5  František  Wichterlová   \n",
       "3             6     Helena         Holý   \n",
       "4            13   Fernanda        Ramos   \n",
       "5            17       Jack        Smith   \n",
       "6            20        Dan       Miller   \n",
       "7            21      Kathy        Chase   \n",
       "8            22    Heather      Leacock   \n",
       "9            30     Edward      Francis   \n",
       "10           34       João    Fernandes   \n",
       "11           37       Fynn   Zimmermann   \n",
       "12           42      Wyatt       Girard   \n",
       "13           46       Hugh     O'Reilly   \n",
       "14           50    Enrique        Muñoz   \n",
       "15           53       Phil       Hughes   \n",
       "16           57       Luis        Rojas   \n",
       "17           58      Manoj       Pareek   \n",
       "\n",
       "                                             company  \\\n",
       "0   Embraer - Empresa Brasileira de Aeronáutica S.A.   \n",
       "1                                               None   \n",
       "2                                   JetBrains s.r.o.   \n",
       "3                                               None   \n",
       "4                                               None   \n",
       "5                              Microsoft Corporation   \n",
       "6                                               None   \n",
       "7                                               None   \n",
       "8                                               None   \n",
       "9                                               None   \n",
       "10                                              None   \n",
       "11                                              None   \n",
       "12                                              None   \n",
       "13                                              None   \n",
       "14                                              None   \n",
       "15                                              None   \n",
       "16                                              None   \n",
       "17                                              None   \n",
       "\n",
       "                            address                 city   state  \\\n",
       "0   Av. Brigadeiro Faria Lima, 2170  São José dos Campos      SP   \n",
       "1                 1498 rue Bélanger             Montréal      QC   \n",
       "2                     Klanova 9/506               Prague    None   \n",
       "3                     Rilská 3174/6               Prague    None   \n",
       "4                      Qe 7 Bloco G             Brasília      DF   \n",
       "5                   1 Microsoft Way              Redmond      WA   \n",
       "6              541 Del Medio Avenue        Mountain View      CA   \n",
       "7                  801 W 4th Street                 Reno      NV   \n",
       "8                  120 S Orange Ave              Orlando      FL   \n",
       "9                  230 Elgin Street               Ottawa      ON   \n",
       "10               Rua da Assunção 53               Lisbon    None   \n",
       "11                 Berger Straße 10            Frankfurt    None   \n",
       "12           9, Place Louis Barthou             Bordeaux    None   \n",
       "13                 3 Chatham Street               Dublin  Dublin   \n",
       "14               C/ San Bernardo 85               Madrid    None   \n",
       "15                     113 Lupus St               London    None   \n",
       "16                  Calle Lira, 198             Santiago    None   \n",
       "17              12,Community Centre                Delhi    None   \n",
       "\n",
       "           country postal_code               phone                 fax  \\\n",
       "0           Brazil   12227-000  +55 (12) 3923-5555  +55 (12) 3923-5566   \n",
       "1           Canada     H2G 1A7   +1 (514) 721-4711                None   \n",
       "2   Czech Republic       14700    +420 2 4172 5555    +420 2 4172 5555   \n",
       "3   Czech Republic       14300    +420 2 4177 0449                None   \n",
       "4           Brazil   71020-677  +55 (61) 3363-5547  +55 (61) 3363-7855   \n",
       "5              USA  98052-8300   +1 (425) 882-8080   +1 (425) 882-8081   \n",
       "6              USA   94040-111   +1 (650) 644-3358                None   \n",
       "7              USA       89503   +1 (775) 223-7665                None   \n",
       "8              USA       32801   +1 (407) 999-7788                None   \n",
       "9           Canada     K2P 1L7   +1 (613) 234-3322                None   \n",
       "10        Portugal        None  +351 (213) 466-111                None   \n",
       "11         Germany       60316    +49 069 40598889                None   \n",
       "12          France       33000  +33 05 56 96 96 96                None   \n",
       "13         Ireland        None     +353 01 6792424                None   \n",
       "14           Spain       28015     +34 914 454 454                None   \n",
       "15  United Kingdom    SW1V 3EN   +44 020 7976 5722                None   \n",
       "16           Chile        None   +56 (0)2 635 4444                None   \n",
       "17           India      110017   +91 0124 39883988                None   \n",
       "\n",
       "                       email  support_rep_id  invoice_id  customer_id  \\\n",
       "0       luisg@embraer.com.br               3          16            1   \n",
       "1        ftremblay@gmail.com               3          31            3   \n",
       "2   frantisekw@jetbrains.com               4          78            5   \n",
       "3            hholy@gmail.com               5         112            6   \n",
       "4   fernadaramos4@uol.com.br               4          73           13   \n",
       "5    jacksmith@microsoft.com               5          98           17   \n",
       "6        dmiller@comcast.com               4          17           20   \n",
       "7        kachase@hotmail.com               5          44           21   \n",
       "8         hleacock@gmail.com               4          43           22   \n",
       "9        edfrancis@yachoo.ca               3           2           30   \n",
       "10       jfernandes@yahoo.pt               4          35           34   \n",
       "11      fzimmermann@yahoo.de               3          40           37   \n",
       "12     wyatt.girard@yahoo.fr               3          12           42   \n",
       "13      hughoreilly@apple.ie               3          30           46   \n",
       "14    enrique_munoz@yahoo.es               5          41           50   \n",
       "15     phil.hughes@gmail.com               3          23           53   \n",
       "16        luisrojas@yahoo.cl               5          47           57   \n",
       "17   manoj.pareek@rediff.com               3          33           58   \n",
       "\n",
       "           invoice_date                  billing_address         billing_city  \\\n",
       "0   2017-01-26 00:00:00  Av. Brigadeiro Faria Lima, 2170  São José dos Campos   \n",
       "1   2017-02-21 00:00:00                1498 rue Bélanger             Montréal   \n",
       "2   2017-05-29 00:00:00                    Klanova 9/506               Prague   \n",
       "3   2017-08-31 00:00:00                    Rilská 3174/6               Prague   \n",
       "4   2017-05-18 00:00:00                     Qe 7 Bloco G             Brasília   \n",
       "5   2017-07-23 00:00:00                  1 Microsoft Way              Redmond   \n",
       "6   2017-01-28 00:00:00             541 Del Medio Avenue        Mountain View   \n",
       "7   2017-03-20 00:00:00                 801 W 4th Street                 Reno   \n",
       "8   2017-03-19 00:00:00                 120 S Orange Ave              Orlando   \n",
       "9   2017-01-03 00:00:00                 230 Elgin Street               Ottawa   \n",
       "10  2017-02-25 00:00:00               Rua da Assunção 53               Lisbon   \n",
       "11  2017-03-13 00:00:00                 Berger Straße 10            Frankfurt   \n",
       "12  2017-01-21 00:00:00           9, Place Louis Barthou             Bordeaux   \n",
       "13  2017-02-18 00:00:00                 3 Chatham Street               Dublin   \n",
       "14  2017-03-17 00:00:00               C/ San Bernardo 85               Madrid   \n",
       "15  2017-02-09 00:00:00                     113 Lupus St               London   \n",
       "16  2017-03-25 00:00:00                  Calle Lira, 198             Santiago   \n",
       "17  2017-02-21 00:00:00              12,Community Centre                Delhi   \n",
       "\n",
       "   billing_state billing_country billing_postal_code  total  \n",
       "0             SP          Brazil           12227-000   8.91  \n",
       "1             QC          Canada             H2G 1A7  19.80  \n",
       "2           None  Czech Republic               14700   8.91  \n",
       "3           None  Czech Republic               14300  16.83  \n",
       "4             DF          Brazil           71020-677  14.85  \n",
       "5             WA             USA          98052-8300   9.90  \n",
       "6             CA             USA           94040-111  10.89  \n",
       "7             NV             USA               89503  11.88  \n",
       "8             FL             USA               32801   6.93  \n",
       "9             ON          Canada             K2P 1L7   9.90  \n",
       "10          None        Portugal                None   4.95  \n",
       "11          None         Germany               60316   9.90  \n",
       "12          None          France               33000   3.96  \n",
       "13        Dublin         Ireland                None  10.89  \n",
       "14          None           Spain               28015   7.92  \n",
       "15          None  United Kingdom            SW1V 3EN   9.90  \n",
       "16          None           Chile                None   5.94  \n",
       "17          None           India              110017   3.96  "
      ]
     },
     "execution_count": 33,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "q=\"\"\"\n",
    "SELECT c.*, i.*\n",
    "FROM invoice i\n",
    "INNER JOIN customer c ON i.customer_id = c.customer_id\n",
    "GROUP BY 1\n",
    "HAVING SUM(i.total) > 90\n",
    "\"\"\"\n",
    "i_c_90=pd.read_sql(q,conn)\n",
    "print(len(i_c_90))\n",
    "i_c_90"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.7.0"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}