{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Lesson 8 exercises\n",
    "\n",
    "**[Data set download](https://s3.amazonaws.com/bebi103.caltech.edu/data/penguins_subset.csv)**\n",
    "\n",
    "<hr />"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Colab setup ------------------\n",
    "import os, sys, subprocess\n",
    "if \"google.colab\" in sys.modules:\n",
    "    cmd = \"pip install --upgrade iqplot bebi103 watermark\"\n",
    "    process = subprocess.Popen(cmd.split(), stdout=subprocess.PIPE, stderr=subprocess.PIPE)\n",
    "    stdout, stderr = process.communicate()\n",
    "    data_path = \"https://s3.amazonaws.com/bebi103.caltech.edu/data/\"\n",
    "else:\n",
    "    data_path = \"../data/\"\n",
    "# ------------------------------\n",
    "\n",
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<hr />\n",
    "\n",
    "## Exercise 8.1\n",
    "\n",
    "In the lesson exercise, we will again work with a subset of the Palmer penguin data set. I will load it and view it now."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead tr th {\n",
       "        text-align: left;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th colspan=\"4\" halign=\"left\">Gentoo</th>\n",
       "      <th colspan=\"4\" halign=\"left\">Adelie</th>\n",
       "      <th colspan=\"4\" halign=\"left\">Chinstrap</th>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th></th>\n",
       "      <th>bill_depth_mm</th>\n",
       "      <th>bill_length_mm</th>\n",
       "      <th>flipper_length_mm</th>\n",
       "      <th>body_mass_g</th>\n",
       "      <th>bill_depth_mm</th>\n",
       "      <th>bill_length_mm</th>\n",
       "      <th>flipper_length_mm</th>\n",
       "      <th>body_mass_g</th>\n",
       "      <th>bill_depth_mm</th>\n",
       "      <th>bill_length_mm</th>\n",
       "      <th>flipper_length_mm</th>\n",
       "      <th>body_mass_g</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <th>0</th>\n",
       "      <td>16.3</td>\n",
       "      <td>48.4</td>\n",
       "      <td>220.0</td>\n",
       "      <td>5400.0</td>\n",
       "      <td>18.5</td>\n",
       "      <td>36.8</td>\n",
       "      <td>193.0</td>\n",
       "      <td>3500.0</td>\n",
       "      <td>18.3</td>\n",
       "      <td>47.6</td>\n",
       "      <td>195.0</td>\n",
       "      <td>3850.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>1</th>\n",
       "      <td>15.8</td>\n",
       "      <td>46.3</td>\n",
       "      <td>215.0</td>\n",
       "      <td>5050.0</td>\n",
       "      <td>16.9</td>\n",
       "      <td>37.0</td>\n",
       "      <td>185.0</td>\n",
       "      <td>3000.0</td>\n",
       "      <td>16.7</td>\n",
       "      <td>42.5</td>\n",
       "      <td>187.0</td>\n",
       "      <td>3350.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>2</th>\n",
       "      <td>14.2</td>\n",
       "      <td>47.5</td>\n",
       "      <td>209.0</td>\n",
       "      <td>4600.0</td>\n",
       "      <td>19.5</td>\n",
       "      <td>42.0</td>\n",
       "      <td>200.0</td>\n",
       "      <td>4050.0</td>\n",
       "      <td>16.6</td>\n",
       "      <td>40.9</td>\n",
       "      <td>187.0</td>\n",
       "      <td>3200.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>3</th>\n",
       "      <td>15.7</td>\n",
       "      <td>48.7</td>\n",
       "      <td>208.0</td>\n",
       "      <td>5350.0</td>\n",
       "      <td>18.3</td>\n",
       "      <td>42.7</td>\n",
       "      <td>196.0</td>\n",
       "      <td>4075.0</td>\n",
       "      <td>20.0</td>\n",
       "      <td>52.8</td>\n",
       "      <td>205.0</td>\n",
       "      <td>4550.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <th>4</th>\n",
       "      <td>14.1</td>\n",
       "      <td>48.7</td>\n",
       "      <td>210.0</td>\n",
       "      <td>4450.0</td>\n",
       "      <td>18.0</td>\n",
       "      <td>35.7</td>\n",
       "      <td>202.0</td>\n",
       "      <td>3550.0</td>\n",
       "      <td>18.7</td>\n",
       "      <td>45.4</td>\n",
       "      <td>188.0</td>\n",
       "      <td>3525.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "         Gentoo                                                     Adelie  \\\n",
       "  bill_depth_mm bill_length_mm flipper_length_mm body_mass_g bill_depth_mm   \n",
       "0          16.3           48.4             220.0      5400.0          18.5   \n",
       "1          15.8           46.3             215.0      5050.0          16.9   \n",
       "2          14.2           47.5             209.0      4600.0          19.5   \n",
       "3          15.7           48.7             208.0      5350.0          18.3   \n",
       "4          14.1           48.7             210.0      4450.0          18.0   \n",
       "\n",
       "                                                   Chinstrap                 \\\n",
       "  bill_length_mm flipper_length_mm body_mass_g bill_depth_mm bill_length_mm   \n",
       "0           36.8             193.0      3500.0          18.3           47.6   \n",
       "1           37.0             185.0      3000.0          16.7           42.5   \n",
       "2           42.0             200.0      4050.0          16.6           40.9   \n",
       "3           42.7             196.0      4075.0          20.0           52.8   \n",
       "4           35.7             202.0      3550.0          18.7           45.4   \n",
       "\n",
       "                                 \n",
       "  flipper_length_mm body_mass_g  \n",
       "0             195.0      3850.0  \n",
       "1             187.0      3350.0  \n",
       "2             187.0      3200.0  \n",
       "3             205.0      4550.0  \n",
       "4             188.0      3525.0  "
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.read_csv(os.path.join(data_path, \"penguins_subset.csv\"), header=[0, 1])\n",
    "\n",
    "df.head()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "Explain in words what each of the following code cells does as we work toward tidying this data frame."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "df.columns.names = ['species', None]"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.stack(level='species')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.reset_index(level='species')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [],
   "source": [
    "df = df.reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br />"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercise 8.2\n",
    "\n",
    "What is the difference between merging and concatenating data frames?"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br />"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Exercise 8.3\n",
    "\n",
    "Write down any questions or points of confusion that you have."
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "<br />"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Computing environment"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "CPython 3.8.5\n",
      "IPython 7.18.1\n",
      "\n",
      "pandas 1.1.3\n",
      "jupyterlab 2.2.6\n"
     ]
    }
   ],
   "source": [
    "%load_ext watermark\n",
    "%watermark -v -p pandas,jupyterlab"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.8.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}