{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "name": "sample_datasets.ipynb",
      "provenance": [],
      "collapsed_sections": [],
      "authorship_tag": "ABX9TyMHJcfhFEEQm/LsB7gh7HZ0",
      "include_colab_link": true
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/cyrus723/my-first-binder/blob/main/sample_datasets.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "pip install pydataset"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "WHg_b6MTGuZu",
        "outputId": "ecbfd75a-bfbd-4967-e566-a5594e7456ad"
      },
      "execution_count": 7,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Collecting pydataset\n",
            "  Downloading pydataset-0.2.0.tar.gz (15.9 MB)\n",
            "\u001b[K     |████████████████████████████████| 15.9 MB 3.8 MB/s \n",
            "\u001b[?25hRequirement already satisfied: pandas in /usr/local/lib/python3.7/dist-packages (from pydataset) (1.3.5)\n",
            "Requirement already satisfied: numpy>=1.17.3 in /usr/local/lib/python3.7/dist-packages (from pandas->pydataset) (1.21.6)\n",
            "Requirement already satisfied: pytz>=2017.3 in /usr/local/lib/python3.7/dist-packages (from pandas->pydataset) (2022.1)\n",
            "Requirement already satisfied: python-dateutil>=2.7.3 in /usr/local/lib/python3.7/dist-packages (from pandas->pydataset) (2.8.2)\n",
            "Requirement already satisfied: six>=1.5 in /usr/local/lib/python3.7/dist-packages (from python-dateutil>=2.7.3->pandas->pydataset) (1.15.0)\n",
            "Building wheels for collected packages: pydataset\n",
            "  Building wheel for pydataset (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
            "  Created wheel for pydataset: filename=pydataset-0.2.0-py3-none-any.whl size=15939432 sha256=a806d6fc5bf803cbd7f0adf0dadb93635522366f99f7334a9dcb209087a69360\n",
            "  Stored in directory: /root/.cache/pip/wheels/32/26/30/d71562a19eed948eaada9a61b4d722fa358657a3bfb5d151e2\n",
            "Successfully built pydataset\n",
            "Installing collected packages: pydataset\n",
            "Successfully installed pydataset-0.2.0\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Import package\n",
        "from pydataset import data\n",
        "# Check out datasets\n",
        "print(data())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "As1heSJCGuWg",
        "outputId": "b0792618-1712-4115-c189-5404ebfffc8f"
      },
      "execution_count": 25,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "        dataset_id                                             title\n",
            "0    AirPassengers       Monthly Airline Passenger Numbers 1949-1960\n",
            "1          BJsales                 Sales Data with Leading Indicator\n",
            "2              BOD                         Biochemical Oxygen Demand\n",
            "3     Formaldehyde                     Determination of Formaldehyde\n",
            "4     HairEyeColor         Hair and Eye Color of Statistics Students\n",
            "..             ...                                               ...\n",
            "752        VerbAgg                  Verbal Aggression item responses\n",
            "753           cake                 Breakage Angle of Chocolate Cakes\n",
            "754           cbpp                 Contagious bovine pleuropneumonia\n",
            "755    grouseticks  Data on red grouse ticks from Elston et al. 2001\n",
            "756     sleepstudy       Reaction times in a sleep deprivation study\n",
            "\n",
            "[757 rows x 2 columns]\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Create a function to glimpse the data\n",
        "def glimpse(df):\n",
        "    print(f\"{df.shape[0]} rows and {df.shape[1]} columns\")\n",
        "    display(df.head())\n",
        "    display(df.tail())"
      ],
      "metadata": {
        "id": "S8ag0sluGuTZ"
      },
      "execution_count": 9,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Load as a dataframe\n",
        "df = data('cake')\n",
        "glimpse(df)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 413
        },
        "id": "Mab4X3gbGuRE",
        "outputId": "93a85f27-34e0-4927-a919-4cc9107a7009"
      },
      "execution_count": 12,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "270 rows and 5 columns\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "   replicate recipe  temperature  angle  temp\n",
              "1          1      A          175     42   175\n",
              "2          1      A          185     46   185\n",
              "3          1      A          195     47   195\n",
              "4          1      A          205     39   205\n",
              "5          1      A          215     53   215"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-6fd4b7d1-97b6-4c9e-8cee-67badb539c8f\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>replicate</th>\n",
              "      <th>recipe</th>\n",
              "      <th>temperature</th>\n",
              "      <th>angle</th>\n",
              "      <th>temp</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1</td>\n",
              "      <td>A</td>\n",
              "      <td>175</td>\n",
              "      <td>42</td>\n",
              "      <td>175</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1</td>\n",
              "      <td>A</td>\n",
              "      <td>185</td>\n",
              "      <td>46</td>\n",
              "      <td>185</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1</td>\n",
              "      <td>A</td>\n",
              "      <td>195</td>\n",
              "      <td>47</td>\n",
              "      <td>195</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1</td>\n",
              "      <td>A</td>\n",
              "      <td>205</td>\n",
              "      <td>39</td>\n",
              "      <td>205</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>5</th>\n",
              "      <td>1</td>\n",
              "      <td>A</td>\n",
              "      <td>215</td>\n",
              "      <td>53</td>\n",
              "      <td>215</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-6fd4b7d1-97b6-4c9e-8cee-67badb539c8f')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-6fd4b7d1-97b6-4c9e-8cee-67badb539c8f button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-6fd4b7d1-97b6-4c9e-8cee-67badb539c8f');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "     replicate recipe  temperature  angle  temp\n",
              "266         15      C          185     28   185\n",
              "267         15      C          195     25   195\n",
              "268         15      C          205     25   205\n",
              "269         15      C          215     31   215\n",
              "270         15      C          225     25   225"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-f91ed822-b885-4b2d-9cfe-358987da0bb5\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>replicate</th>\n",
              "      <th>recipe</th>\n",
              "      <th>temperature</th>\n",
              "      <th>angle</th>\n",
              "      <th>temp</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>266</th>\n",
              "      <td>15</td>\n",
              "      <td>C</td>\n",
              "      <td>185</td>\n",
              "      <td>28</td>\n",
              "      <td>185</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>267</th>\n",
              "      <td>15</td>\n",
              "      <td>C</td>\n",
              "      <td>195</td>\n",
              "      <td>25</td>\n",
              "      <td>195</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>268</th>\n",
              "      <td>15</td>\n",
              "      <td>C</td>\n",
              "      <td>205</td>\n",
              "      <td>25</td>\n",
              "      <td>205</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>269</th>\n",
              "      <td>15</td>\n",
              "      <td>C</td>\n",
              "      <td>215</td>\n",
              "      <td>31</td>\n",
              "      <td>215</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>270</th>\n",
              "      <td>15</td>\n",
              "      <td>C</td>\n",
              "      <td>225</td>\n",
              "      <td>25</td>\n",
              "      <td>225</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f91ed822-b885-4b2d-9cfe-358987da0bb5')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-f91ed822-b885-4b2d-9cfe-358987da0bb5 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-f91ed822-b885-4b2d-9cfe-358987da0bb5');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Import seaborn\n",
        "import seaborn as sns\n",
        "# Check out available datasets\n",
        "print(sns.get_dataset_names())"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "liCs1voBGuOx",
        "outputId": "b5222ee7-176f-4dc2-cad6-d5574cfd83ba"
      },
      "execution_count": 13,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "['anagrams', 'anscombe', 'attention', 'brain_networks', 'car_crashes', 'diamonds', 'dots', 'exercise', 'flights', 'fmri', 'gammas', 'geyser', 'iris', 'mpg', 'penguins', 'planets', 'taxis', 'tips', 'titanic']\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "df = sns.load_dataset('flights')\n",
        "glimpse(df)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 413
        },
        "id": "dmH_dMegKY-M",
        "outputId": "5ec16c95-3cc3-439e-94d5-f9ebc186ccd1"
      },
      "execution_count": 17,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "144 rows and 3 columns\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "   year month  passengers\n",
              "0  1949   Jan         112\n",
              "1  1949   Feb         118\n",
              "2  1949   Mar         132\n",
              "3  1949   Apr         129\n",
              "4  1949   May         121"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-82d7c4ab-5be2-4c16-b506-97eb6f6923c5\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>year</th>\n",
              "      <th>month</th>\n",
              "      <th>passengers</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1949</td>\n",
              "      <td>Jan</td>\n",
              "      <td>112</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1949</td>\n",
              "      <td>Feb</td>\n",
              "      <td>118</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1949</td>\n",
              "      <td>Mar</td>\n",
              "      <td>132</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1949</td>\n",
              "      <td>Apr</td>\n",
              "      <td>129</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1949</td>\n",
              "      <td>May</td>\n",
              "      <td>121</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-82d7c4ab-5be2-4c16-b506-97eb6f6923c5')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-82d7c4ab-5be2-4c16-b506-97eb6f6923c5 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-82d7c4ab-5be2-4c16-b506-97eb6f6923c5');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "     year month  passengers\n",
              "139  1960   Aug         606\n",
              "140  1960   Sep         508\n",
              "141  1960   Oct         461\n",
              "142  1960   Nov         390\n",
              "143  1960   Dec         432"
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-718a129a-270e-4006-8197-9b5026033c01\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>year</th>\n",
              "      <th>month</th>\n",
              "      <th>passengers</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>139</th>\n",
              "      <td>1960</td>\n",
              "      <td>Aug</td>\n",
              "      <td>606</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>140</th>\n",
              "      <td>1960</td>\n",
              "      <td>Sep</td>\n",
              "      <td>508</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>141</th>\n",
              "      <td>1960</td>\n",
              "      <td>Oct</td>\n",
              "      <td>461</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>142</th>\n",
              "      <td>1960</td>\n",
              "      <td>Nov</td>\n",
              "      <td>390</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>143</th>\n",
              "      <td>1960</td>\n",
              "      <td>Dec</td>\n",
              "      <td>432</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-718a129a-270e-4006-8197-9b5026033c01')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-718a129a-270e-4006-8197-9b5026033c01 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-718a129a-270e-4006-8197-9b5026033c01');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Import package\n",
        "from sklearn.datasets import fetch_california_housing\n",
        "# Load data (will download the data if it's the first time loading)\n",
        "housing = fetch_california_housing(as_frame=True)\n",
        "# Create a dataframe\n",
        "df = housing['data'].join(housing['target'])\n",
        "glimpse(df)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 413
        },
        "id": "HMEilRlhKY7i",
        "outputId": "392243f4-a55e-40ae-aa44-6947e672ebf8"
      },
      "execution_count": 18,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "20640 rows and 9 columns\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "   MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \\\n",
              "0  8.3252      41.0  6.984127   1.023810       322.0  2.555556     37.88   \n",
              "1  8.3014      21.0  6.238137   0.971880      2401.0  2.109842     37.86   \n",
              "2  7.2574      52.0  8.288136   1.073446       496.0  2.802260     37.85   \n",
              "3  5.6431      52.0  5.817352   1.073059       558.0  2.547945     37.85   \n",
              "4  3.8462      52.0  6.281853   1.081081       565.0  2.181467     37.85   \n",
              "\n",
              "   Longitude  MedHouseVal  \n",
              "0    -122.23        4.526  \n",
              "1    -122.22        3.585  \n",
              "2    -122.24        3.521  \n",
              "3    -122.25        3.413  \n",
              "4    -122.25        3.422  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-7e0b88d0-721c-4f11-9dde-1368b1ac1fee\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>MedInc</th>\n",
              "      <th>HouseAge</th>\n",
              "      <th>AveRooms</th>\n",
              "      <th>AveBedrms</th>\n",
              "      <th>Population</th>\n",
              "      <th>AveOccup</th>\n",
              "      <th>Latitude</th>\n",
              "      <th>Longitude</th>\n",
              "      <th>MedHouseVal</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>8.3252</td>\n",
              "      <td>41.0</td>\n",
              "      <td>6.984127</td>\n",
              "      <td>1.023810</td>\n",
              "      <td>322.0</td>\n",
              "      <td>2.555556</td>\n",
              "      <td>37.88</td>\n",
              "      <td>-122.23</td>\n",
              "      <td>4.526</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>8.3014</td>\n",
              "      <td>21.0</td>\n",
              "      <td>6.238137</td>\n",
              "      <td>0.971880</td>\n",
              "      <td>2401.0</td>\n",
              "      <td>2.109842</td>\n",
              "      <td>37.86</td>\n",
              "      <td>-122.22</td>\n",
              "      <td>3.585</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>7.2574</td>\n",
              "      <td>52.0</td>\n",
              "      <td>8.288136</td>\n",
              "      <td>1.073446</td>\n",
              "      <td>496.0</td>\n",
              "      <td>2.802260</td>\n",
              "      <td>37.85</td>\n",
              "      <td>-122.24</td>\n",
              "      <td>3.521</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>5.6431</td>\n",
              "      <td>52.0</td>\n",
              "      <td>5.817352</td>\n",
              "      <td>1.073059</td>\n",
              "      <td>558.0</td>\n",
              "      <td>2.547945</td>\n",
              "      <td>37.85</td>\n",
              "      <td>-122.25</td>\n",
              "      <td>3.413</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>3.8462</td>\n",
              "      <td>52.0</td>\n",
              "      <td>6.281853</td>\n",
              "      <td>1.081081</td>\n",
              "      <td>565.0</td>\n",
              "      <td>2.181467</td>\n",
              "      <td>37.85</td>\n",
              "      <td>-122.25</td>\n",
              "      <td>3.422</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-7e0b88d0-721c-4f11-9dde-1368b1ac1fee')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-7e0b88d0-721c-4f11-9dde-1368b1ac1fee button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-7e0b88d0-721c-4f11-9dde-1368b1ac1fee');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "       MedInc  HouseAge  AveRooms  AveBedrms  Population  AveOccup  Latitude  \\\n",
              "20635  1.5603      25.0  5.045455   1.133333       845.0  2.560606     39.48   \n",
              "20636  2.5568      18.0  6.114035   1.315789       356.0  3.122807     39.49   \n",
              "20637  1.7000      17.0  5.205543   1.120092      1007.0  2.325635     39.43   \n",
              "20638  1.8672      18.0  5.329513   1.171920       741.0  2.123209     39.43   \n",
              "20639  2.3886      16.0  5.254717   1.162264      1387.0  2.616981     39.37   \n",
              "\n",
              "       Longitude  MedHouseVal  \n",
              "20635    -121.09        0.781  \n",
              "20636    -121.21        0.771  \n",
              "20637    -121.22        0.923  \n",
              "20638    -121.32        0.847  \n",
              "20639    -121.24        0.894  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-5982b96a-e17c-48e4-93a1-9632132a0f13\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>MedInc</th>\n",
              "      <th>HouseAge</th>\n",
              "      <th>AveRooms</th>\n",
              "      <th>AveBedrms</th>\n",
              "      <th>Population</th>\n",
              "      <th>AveOccup</th>\n",
              "      <th>Latitude</th>\n",
              "      <th>Longitude</th>\n",
              "      <th>MedHouseVal</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>20635</th>\n",
              "      <td>1.5603</td>\n",
              "      <td>25.0</td>\n",
              "      <td>5.045455</td>\n",
              "      <td>1.133333</td>\n",
              "      <td>845.0</td>\n",
              "      <td>2.560606</td>\n",
              "      <td>39.48</td>\n",
              "      <td>-121.09</td>\n",
              "      <td>0.781</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20636</th>\n",
              "      <td>2.5568</td>\n",
              "      <td>18.0</td>\n",
              "      <td>6.114035</td>\n",
              "      <td>1.315789</td>\n",
              "      <td>356.0</td>\n",
              "      <td>3.122807</td>\n",
              "      <td>39.49</td>\n",
              "      <td>-121.21</td>\n",
              "      <td>0.771</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20637</th>\n",
              "      <td>1.7000</td>\n",
              "      <td>17.0</td>\n",
              "      <td>5.205543</td>\n",
              "      <td>1.120092</td>\n",
              "      <td>1007.0</td>\n",
              "      <td>2.325635</td>\n",
              "      <td>39.43</td>\n",
              "      <td>-121.22</td>\n",
              "      <td>0.923</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20638</th>\n",
              "      <td>1.8672</td>\n",
              "      <td>18.0</td>\n",
              "      <td>5.329513</td>\n",
              "      <td>1.171920</td>\n",
              "      <td>741.0</td>\n",
              "      <td>2.123209</td>\n",
              "      <td>39.43</td>\n",
              "      <td>-121.32</td>\n",
              "      <td>0.847</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>20639</th>\n",
              "      <td>2.3886</td>\n",
              "      <td>16.0</td>\n",
              "      <td>5.254717</td>\n",
              "      <td>1.162264</td>\n",
              "      <td>1387.0</td>\n",
              "      <td>2.616981</td>\n",
              "      <td>39.37</td>\n",
              "      <td>-121.24</td>\n",
              "      <td>0.894</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-5982b96a-e17c-48e4-93a1-9632132a0f13')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-5982b96a-e17c-48e4-93a1-9632132a0f13 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-5982b96a-e17c-48e4-93a1-9632132a0f13');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Import package\n",
        "import statsmodels.api as sm\n",
        "# Load data as a dataframe\n",
        "df = sm.datasets.macrodata.load_pandas()['data']\n",
        "glimpse(df)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 467
        },
        "id": "SRbMPdLKKY5J",
        "outputId": "36a9e4f2-d5a9-4a86-c6a1-9684afe58e71"
      },
      "execution_count": 19,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stderr",
          "text": [
            "/usr/local/lib/python3.7/dist-packages/statsmodels/tools/_testing.py:19: FutureWarning: pandas.util.testing is deprecated. Use the functions in the public API at pandas.testing instead.\n",
            "  import pandas.util.testing as tm\n"
          ]
        },
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "203 rows and 14 columns\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "     year  quarter   realgdp  realcons  realinv  realgovt  realdpi    cpi  \\\n",
              "0  1959.0      1.0  2710.349    1707.4  286.898   470.045   1886.9  28.98   \n",
              "1  1959.0      2.0  2778.801    1733.7  310.859   481.301   1919.7  29.15   \n",
              "2  1959.0      3.0  2775.488    1751.8  289.226   491.260   1916.4  29.35   \n",
              "3  1959.0      4.0  2785.204    1753.7  299.356   484.052   1931.3  29.37   \n",
              "4  1960.0      1.0  2847.699    1770.5  331.722   462.199   1955.5  29.54   \n",
              "\n",
              "      m1  tbilrate  unemp      pop  infl  realint  \n",
              "0  139.7      2.82    5.8  177.146  0.00     0.00  \n",
              "1  141.7      3.08    5.1  177.830  2.34     0.74  \n",
              "2  140.5      3.82    5.3  178.657  2.74     1.09  \n",
              "3  140.0      4.33    5.6  179.386  0.27     4.06  \n",
              "4  139.6      3.50    5.2  180.007  2.31     1.19  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-8bc825ae-688e-4180-9d62-15335fc3a00d\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>year</th>\n",
              "      <th>quarter</th>\n",
              "      <th>realgdp</th>\n",
              "      <th>realcons</th>\n",
              "      <th>realinv</th>\n",
              "      <th>realgovt</th>\n",
              "      <th>realdpi</th>\n",
              "      <th>cpi</th>\n",
              "      <th>m1</th>\n",
              "      <th>tbilrate</th>\n",
              "      <th>unemp</th>\n",
              "      <th>pop</th>\n",
              "      <th>infl</th>\n",
              "      <th>realint</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>1959.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>2710.349</td>\n",
              "      <td>1707.4</td>\n",
              "      <td>286.898</td>\n",
              "      <td>470.045</td>\n",
              "      <td>1886.9</td>\n",
              "      <td>28.98</td>\n",
              "      <td>139.7</td>\n",
              "      <td>2.82</td>\n",
              "      <td>5.8</td>\n",
              "      <td>177.146</td>\n",
              "      <td>0.00</td>\n",
              "      <td>0.00</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>1959.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>2778.801</td>\n",
              "      <td>1733.7</td>\n",
              "      <td>310.859</td>\n",
              "      <td>481.301</td>\n",
              "      <td>1919.7</td>\n",
              "      <td>29.15</td>\n",
              "      <td>141.7</td>\n",
              "      <td>3.08</td>\n",
              "      <td>5.1</td>\n",
              "      <td>177.830</td>\n",
              "      <td>2.34</td>\n",
              "      <td>0.74</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>1959.0</td>\n",
              "      <td>3.0</td>\n",
              "      <td>2775.488</td>\n",
              "      <td>1751.8</td>\n",
              "      <td>289.226</td>\n",
              "      <td>491.260</td>\n",
              "      <td>1916.4</td>\n",
              "      <td>29.35</td>\n",
              "      <td>140.5</td>\n",
              "      <td>3.82</td>\n",
              "      <td>5.3</td>\n",
              "      <td>178.657</td>\n",
              "      <td>2.74</td>\n",
              "      <td>1.09</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>1959.0</td>\n",
              "      <td>4.0</td>\n",
              "      <td>2785.204</td>\n",
              "      <td>1753.7</td>\n",
              "      <td>299.356</td>\n",
              "      <td>484.052</td>\n",
              "      <td>1931.3</td>\n",
              "      <td>29.37</td>\n",
              "      <td>140.0</td>\n",
              "      <td>4.33</td>\n",
              "      <td>5.6</td>\n",
              "      <td>179.386</td>\n",
              "      <td>0.27</td>\n",
              "      <td>4.06</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>1960.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>2847.699</td>\n",
              "      <td>1770.5</td>\n",
              "      <td>331.722</td>\n",
              "      <td>462.199</td>\n",
              "      <td>1955.5</td>\n",
              "      <td>29.54</td>\n",
              "      <td>139.6</td>\n",
              "      <td>3.50</td>\n",
              "      <td>5.2</td>\n",
              "      <td>180.007</td>\n",
              "      <td>2.31</td>\n",
              "      <td>1.19</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8bc825ae-688e-4180-9d62-15335fc3a00d')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-8bc825ae-688e-4180-9d62-15335fc3a00d button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-8bc825ae-688e-4180-9d62-15335fc3a00d');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "       year  quarter    realgdp  realcons   realinv  realgovt  realdpi  \\\n",
              "198  2008.0      3.0  13324.600    9267.7  1990.693   991.551   9838.3   \n",
              "199  2008.0      4.0  13141.920    9195.3  1857.661  1007.273   9920.4   \n",
              "200  2009.0      1.0  12925.410    9209.2  1558.494   996.287   9926.4   \n",
              "201  2009.0      2.0  12901.504    9189.0  1456.678  1023.528  10077.5   \n",
              "202  2009.0      3.0  12990.341    9256.0  1486.398  1044.088  10040.6   \n",
              "\n",
              "         cpi      m1  tbilrate  unemp      pop  infl  realint  \n",
              "198  216.889  1474.7      1.17    6.0  305.270 -3.16     4.33  \n",
              "199  212.174  1576.5      0.12    6.9  305.952 -8.79     8.91  \n",
              "200  212.671  1592.8      0.22    8.1  306.547  0.94    -0.71  \n",
              "201  214.469  1653.6      0.18    9.2  307.226  3.37    -3.19  \n",
              "202  216.385  1673.9      0.12    9.6  308.013  3.56    -3.44  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-eafef7df-5beb-45ea-ad5e-17a62ae7f971\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>year</th>\n",
              "      <th>quarter</th>\n",
              "      <th>realgdp</th>\n",
              "      <th>realcons</th>\n",
              "      <th>realinv</th>\n",
              "      <th>realgovt</th>\n",
              "      <th>realdpi</th>\n",
              "      <th>cpi</th>\n",
              "      <th>m1</th>\n",
              "      <th>tbilrate</th>\n",
              "      <th>unemp</th>\n",
              "      <th>pop</th>\n",
              "      <th>infl</th>\n",
              "      <th>realint</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>198</th>\n",
              "      <td>2008.0</td>\n",
              "      <td>3.0</td>\n",
              "      <td>13324.600</td>\n",
              "      <td>9267.7</td>\n",
              "      <td>1990.693</td>\n",
              "      <td>991.551</td>\n",
              "      <td>9838.3</td>\n",
              "      <td>216.889</td>\n",
              "      <td>1474.7</td>\n",
              "      <td>1.17</td>\n",
              "      <td>6.0</td>\n",
              "      <td>305.270</td>\n",
              "      <td>-3.16</td>\n",
              "      <td>4.33</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>199</th>\n",
              "      <td>2008.0</td>\n",
              "      <td>4.0</td>\n",
              "      <td>13141.920</td>\n",
              "      <td>9195.3</td>\n",
              "      <td>1857.661</td>\n",
              "      <td>1007.273</td>\n",
              "      <td>9920.4</td>\n",
              "      <td>212.174</td>\n",
              "      <td>1576.5</td>\n",
              "      <td>0.12</td>\n",
              "      <td>6.9</td>\n",
              "      <td>305.952</td>\n",
              "      <td>-8.79</td>\n",
              "      <td>8.91</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>200</th>\n",
              "      <td>2009.0</td>\n",
              "      <td>1.0</td>\n",
              "      <td>12925.410</td>\n",
              "      <td>9209.2</td>\n",
              "      <td>1558.494</td>\n",
              "      <td>996.287</td>\n",
              "      <td>9926.4</td>\n",
              "      <td>212.671</td>\n",
              "      <td>1592.8</td>\n",
              "      <td>0.22</td>\n",
              "      <td>8.1</td>\n",
              "      <td>306.547</td>\n",
              "      <td>0.94</td>\n",
              "      <td>-0.71</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>201</th>\n",
              "      <td>2009.0</td>\n",
              "      <td>2.0</td>\n",
              "      <td>12901.504</td>\n",
              "      <td>9189.0</td>\n",
              "      <td>1456.678</td>\n",
              "      <td>1023.528</td>\n",
              "      <td>10077.5</td>\n",
              "      <td>214.469</td>\n",
              "      <td>1653.6</td>\n",
              "      <td>0.18</td>\n",
              "      <td>9.2</td>\n",
              "      <td>307.226</td>\n",
              "      <td>3.37</td>\n",
              "      <td>-3.19</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>202</th>\n",
              "      <td>2009.0</td>\n",
              "      <td>3.0</td>\n",
              "      <td>12990.341</td>\n",
              "      <td>9256.0</td>\n",
              "      <td>1486.398</td>\n",
              "      <td>1044.088</td>\n",
              "      <td>10040.6</td>\n",
              "      <td>216.385</td>\n",
              "      <td>1673.9</td>\n",
              "      <td>0.12</td>\n",
              "      <td>9.6</td>\n",
              "      <td>308.013</td>\n",
              "      <td>3.56</td>\n",
              "      <td>-3.44</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-eafef7df-5beb-45ea-ad5e-17a62ae7f971')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-eafef7df-5beb-45ea-ad5e-17a62ae7f971 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-eafef7df-5beb-45ea-ad5e-17a62ae7f971');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "pip install nltk"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "V4i-MFxeKY2x",
        "outputId": "03ea599b-5ec9-433e-ecc1-7dea252b72e7"
      },
      "execution_count": 32,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "Requirement already satisfied: nltk in /usr/local/lib/python3.7/dist-packages (3.2.5)\n",
            "Requirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from nltk) (1.15.0)\n"
          ]
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Import package\n",
        "import nltk\n",
        "# Download the corpus (only need to do once)\n",
        "nltk.download('movie_reviews')"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gnQAwiSgKY0M",
        "outputId": "0d02eb88-0117-4eaa-b71d-1fb3acf3647c"
      },
      "execution_count": 33,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "[nltk_data] Downloading package movie_reviews to /root/nltk_data...\n",
            "[nltk_data]   Unzipping corpora/movie_reviews.zip.\n"
          ]
        },
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "True"
            ]
          },
          "metadata": {},
          "execution_count": 33
        }
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Import packages\n",
        "import pandas as pd\n",
        "from nltk.corpus import movie_reviews\n",
        "# Convert to dataframe\n",
        "documents = []\n",
        "for fileid in movie_reviews.fileids():\n",
        "    tag, filename = fileid.split('/')\n",
        "    documents.append((tag, movie_reviews.raw(fileid)))\n",
        "df = pd.DataFrame(documents, columns=['target', 'document'])\n",
        "glimpse(df)"
      ],
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 413
        },
        "id": "7d37DToOKYx7",
        "outputId": "db6112c3-9013-42ae-af8c-2bb0db2e93bd"
      },
      "execution_count": 34,
      "outputs": [
        {
          "output_type": "stream",
          "name": "stdout",
          "text": [
            "2000 rows and 2 columns\n"
          ]
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "  target  \\\n",
              "0    neg   \n",
              "1    neg   \n",
              "2    neg   \n",
              "3    neg   \n",
              "4    neg   \n",
              "\n",
              "                                                                                    document  \n",
              "0  plot : two teen couples go to a church party , drink and then drive . \\nthey get into ...  \n",
              "1  the happy bastard's quick movie review \\ndamn that y2k bug . \\nit's got a head start i...  \n",
              "2  it is movies like these that make a jaded movie viewer thankful for the invention of t...  \n",
              "3   \" quest for camelot \" is warner bros . ' first feature-length , fully-animated attemp...  \n",
              "4  synopsis : a mentally unstable man undergoing psychotherapy saves a boy from a potenti...  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-8d60e654-6970-4f5a-a608-023f1eedc315\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>target</th>\n",
              "      <th>document</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>neg</td>\n",
              "      <td>plot : two teen couples go to a church party , drink and then drive . \\nthey get into ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>neg</td>\n",
              "      <td>the happy bastard's quick movie review \\ndamn that y2k bug . \\nit's got a head start i...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>neg</td>\n",
              "      <td>it is movies like these that make a jaded movie viewer thankful for the invention of t...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>neg</td>\n",
              "      <td>\" quest for camelot \" is warner bros . ' first feature-length , fully-animated attemp...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>neg</td>\n",
              "      <td>synopsis : a mentally unstable man undergoing psychotherapy saves a boy from a potenti...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-8d60e654-6970-4f5a-a608-023f1eedc315')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-8d60e654-6970-4f5a-a608-023f1eedc315 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-8d60e654-6970-4f5a-a608-023f1eedc315');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        },
        {
          "output_type": "display_data",
          "data": {
            "text/plain": [
              "     target  \\\n",
              "1995    pos   \n",
              "1996    pos   \n",
              "1997    pos   \n",
              "1998    pos   \n",
              "1999    pos   \n",
              "\n",
              "                                                                                       document  \n",
              "1995  wow ! what a movie . \\nit's everything a movie can be : funny , dramatic , interesting...  \n",
              "1996  richard gere can be a commanding actor , but he's not always in great films . \\neveryt...  \n",
              "1997  glory--starring matthew broderick , denzel washington , and morgan freeman--is the tru...  \n",
              "1998  steven spielberg's second epic film on world war ii is an unquestioned masterpiece of ...  \n",
              "1999  truman ( \" true-man \" ) burbank is the perfect name for jim carrey's character in this...  "
            ],
            "text/html": [
              "\n",
              "  <div id=\"df-f8adb6ea-efb7-4f88-8b08-0e55a1a30087\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>target</th>\n",
              "      <th>document</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>1995</th>\n",
              "      <td>pos</td>\n",
              "      <td>wow ! what a movie . \\nit's everything a movie can be : funny , dramatic , interesting...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1996</th>\n",
              "      <td>pos</td>\n",
              "      <td>richard gere can be a commanding actor , but he's not always in great films . \\neveryt...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1997</th>\n",
              "      <td>pos</td>\n",
              "      <td>glory--starring matthew broderick , denzel washington , and morgan freeman--is the tru...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1998</th>\n",
              "      <td>pos</td>\n",
              "      <td>steven spielberg's second epic film on world war ii is an unquestioned masterpiece of ...</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1999</th>\n",
              "      <td>pos</td>\n",
              "      <td>truman ( \" true-man \" ) burbank is the perfect name for jim carrey's character in this...</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f8adb6ea-efb7-4f88-8b08-0e55a1a30087')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-f8adb6ea-efb7-4f88-8b08-0e55a1a30087 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-f8adb6ea-efb7-4f88-8b08-0e55a1a30087');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ]
          },
          "metadata": {}
        }
      ]
    }
  ]
}