{
  "cells": [
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {},
      "source": [
        "# ML/SGD Classifier: Weather Prediction\n",
        "[Click here to Interact with this code on nbViewer](https://nbviewer.org/github/ujwalnk/MachineLearning101/blob/main/docs/examples/Machine_Learning_01_Weather_Classfication.ipynb)\n",
        "## Data Preprocessing"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 1,
      "metadata": {
        "id": "5BZOlBpsIrkw"
      },
      "outputs": [],
      "source": [
        "import pandas as pd"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "k3cQdermJkB9"
      },
      "source": [
        "Get the `csv` from github"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 2,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 386
        },
        "id": "DuU9kyr-JlT6",
        "outputId": "12acf768-802d-40e2-a0a5-b2441f728318"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "\n",
              "  <div id=\"df-f967a217-3460-4920-8ea2-bfd85e2366b4\">\n",
              "    <div class=\"colab-df-container\">\n",
              "      <div>\n",
              "<style scoped>\n",
              "    .dataframe tbody tr th:only-of-type {\n",
              "        vertical-align: middle;\n",
              "    }\n",
              "\n",
              "    .dataframe tbody tr th {\n",
              "        vertical-align: top;\n",
              "    }\n",
              "\n",
              "    .dataframe thead th {\n",
              "        text-align: right;\n",
              "    }\n",
              "</style>\n",
              "<table border=\"1\" class=\"dataframe\">\n",
              "  <thead>\n",
              "    <tr style=\"text-align: right;\">\n",
              "      <th></th>\n",
              "      <th>Date</th>\n",
              "      <th>Location</th>\n",
              "      <th>MinTemp</th>\n",
              "      <th>MaxTemp</th>\n",
              "      <th>Rainfall</th>\n",
              "      <th>Evaporation</th>\n",
              "      <th>Sunshine</th>\n",
              "      <th>WindGustDir</th>\n",
              "      <th>WindGustSpeed</th>\n",
              "      <th>WindDir9am</th>\n",
              "      <th>...</th>\n",
              "      <th>Humidity9am</th>\n",
              "      <th>Humidity3pm</th>\n",
              "      <th>Pressure9am</th>\n",
              "      <th>Pressure3pm</th>\n",
              "      <th>Cloud9am</th>\n",
              "      <th>Cloud3pm</th>\n",
              "      <th>Temp9am</th>\n",
              "      <th>Temp3pm</th>\n",
              "      <th>RainToday</th>\n",
              "      <th>RainTomorrow</th>\n",
              "    </tr>\n",
              "  </thead>\n",
              "  <tbody>\n",
              "    <tr>\n",
              "      <th>0</th>\n",
              "      <td>2008-12-01</td>\n",
              "      <td>Albury</td>\n",
              "      <td>13.4</td>\n",
              "      <td>22.9</td>\n",
              "      <td>0.6</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>W</td>\n",
              "      <td>44.0</td>\n",
              "      <td>W</td>\n",
              "      <td>...</td>\n",
              "      <td>71.0</td>\n",
              "      <td>22.0</td>\n",
              "      <td>1007.7</td>\n",
              "      <td>1007.1</td>\n",
              "      <td>8.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>16.9</td>\n",
              "      <td>21.8</td>\n",
              "      <td>No</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>1</th>\n",
              "      <td>2008-12-02</td>\n",
              "      <td>Albury</td>\n",
              "      <td>7.4</td>\n",
              "      <td>25.1</td>\n",
              "      <td>0.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>WNW</td>\n",
              "      <td>44.0</td>\n",
              "      <td>NNW</td>\n",
              "      <td>...</td>\n",
              "      <td>44.0</td>\n",
              "      <td>25.0</td>\n",
              "      <td>1010.6</td>\n",
              "      <td>1007.8</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>17.2</td>\n",
              "      <td>24.3</td>\n",
              "      <td>No</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>2</th>\n",
              "      <td>2008-12-03</td>\n",
              "      <td>Albury</td>\n",
              "      <td>12.9</td>\n",
              "      <td>25.7</td>\n",
              "      <td>0.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>WSW</td>\n",
              "      <td>46.0</td>\n",
              "      <td>W</td>\n",
              "      <td>...</td>\n",
              "      <td>38.0</td>\n",
              "      <td>30.0</td>\n",
              "      <td>1007.6</td>\n",
              "      <td>1008.7</td>\n",
              "      <td>NaN</td>\n",
              "      <td>2.0</td>\n",
              "      <td>21.0</td>\n",
              "      <td>23.2</td>\n",
              "      <td>No</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>3</th>\n",
              "      <td>2008-12-04</td>\n",
              "      <td>Albury</td>\n",
              "      <td>9.2</td>\n",
              "      <td>28.0</td>\n",
              "      <td>0.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NE</td>\n",
              "      <td>24.0</td>\n",
              "      <td>SE</td>\n",
              "      <td>...</td>\n",
              "      <td>45.0</td>\n",
              "      <td>16.0</td>\n",
              "      <td>1017.6</td>\n",
              "      <td>1012.8</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>18.1</td>\n",
              "      <td>26.5</td>\n",
              "      <td>No</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "    <tr>\n",
              "      <th>4</th>\n",
              "      <td>2008-12-05</td>\n",
              "      <td>Albury</td>\n",
              "      <td>17.5</td>\n",
              "      <td>32.3</td>\n",
              "      <td>1.0</td>\n",
              "      <td>NaN</td>\n",
              "      <td>NaN</td>\n",
              "      <td>W</td>\n",
              "      <td>41.0</td>\n",
              "      <td>ENE</td>\n",
              "      <td>...</td>\n",
              "      <td>82.0</td>\n",
              "      <td>33.0</td>\n",
              "      <td>1010.8</td>\n",
              "      <td>1006.0</td>\n",
              "      <td>7.0</td>\n",
              "      <td>8.0</td>\n",
              "      <td>17.8</td>\n",
              "      <td>29.7</td>\n",
              "      <td>No</td>\n",
              "      <td>No</td>\n",
              "    </tr>\n",
              "  </tbody>\n",
              "</table>\n",
              "<p>5 rows × 23 columns</p>\n",
              "</div>\n",
              "      <button class=\"colab-df-convert\" onclick=\"convertToInteractive('df-f967a217-3460-4920-8ea2-bfd85e2366b4')\"\n",
              "              title=\"Convert this dataframe to an interactive table.\"\n",
              "              style=\"display:none;\">\n",
              "        \n",
              "  <svg xmlns=\"http://www.w3.org/2000/svg\" height=\"24px\"viewBox=\"0 0 24 24\"\n",
              "       width=\"24px\">\n",
              "    <path d=\"M0 0h24v24H0V0z\" fill=\"none\"/>\n",
              "    <path d=\"M18.56 5.44l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94zm-11 1L8.5 8.5l.94-2.06 2.06-.94-2.06-.94L8.5 2.5l-.94 2.06-2.06.94zm10 10l.94 2.06.94-2.06 2.06-.94-2.06-.94-.94-2.06-.94 2.06-2.06.94z\"/><path d=\"M17.41 7.96l-1.37-1.37c-.4-.4-.92-.59-1.43-.59-.52 0-1.04.2-1.43.59L10.3 9.45l-7.72 7.72c-.78.78-.78 2.05 0 2.83L4 21.41c.39.39.9.59 1.41.59.51 0 1.02-.2 1.41-.59l7.78-7.78 2.81-2.81c.8-.78.8-2.07 0-2.86zM5.41 20L4 18.59l7.72-7.72 1.47 1.35L5.41 20z\"/>\n",
              "  </svg>\n",
              "      </button>\n",
              "      \n",
              "  <style>\n",
              "    .colab-df-container {\n",
              "      display:flex;\n",
              "      flex-wrap:wrap;\n",
              "      gap: 12px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert {\n",
              "      background-color: #E8F0FE;\n",
              "      border: none;\n",
              "      border-radius: 50%;\n",
              "      cursor: pointer;\n",
              "      display: none;\n",
              "      fill: #1967D2;\n",
              "      height: 32px;\n",
              "      padding: 0 0 0 0;\n",
              "      width: 32px;\n",
              "    }\n",
              "\n",
              "    .colab-df-convert:hover {\n",
              "      background-color: #E2EBFA;\n",
              "      box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
              "      fill: #174EA6;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert {\n",
              "      background-color: #3B4455;\n",
              "      fill: #D2E3FC;\n",
              "    }\n",
              "\n",
              "    [theme=dark] .colab-df-convert:hover {\n",
              "      background-color: #434B5C;\n",
              "      box-shadow: 0px 1px 3px 1px rgba(0, 0, 0, 0.15);\n",
              "      filter: drop-shadow(0px 1px 2px rgba(0, 0, 0, 0.3));\n",
              "      fill: #FFFFFF;\n",
              "    }\n",
              "  </style>\n",
              "\n",
              "      <script>\n",
              "        const buttonEl =\n",
              "          document.querySelector('#df-f967a217-3460-4920-8ea2-bfd85e2366b4 button.colab-df-convert');\n",
              "        buttonEl.style.display =\n",
              "          google.colab.kernel.accessAllowed ? 'block' : 'none';\n",
              "\n",
              "        async function convertToInteractive(key) {\n",
              "          const element = document.querySelector('#df-f967a217-3460-4920-8ea2-bfd85e2366b4');\n",
              "          const dataTable =\n",
              "            await google.colab.kernel.invokeFunction('convertToInteractive',\n",
              "                                                     [key], {});\n",
              "          if (!dataTable) return;\n",
              "\n",
              "          const docLinkHtml = 'Like what you see? Visit the ' +\n",
              "            '<a target=\"_blank\" href=https://colab.research.google.com/notebooks/data_table.ipynb>data table notebook</a>'\n",
              "            + ' to learn more about interactive tables.';\n",
              "          element.innerHTML = '';\n",
              "          dataTable['output_type'] = 'display_data';\n",
              "          await google.colab.output.renderOutput(dataTable, element);\n",
              "          const docLink = document.createElement('div');\n",
              "          docLink.innerHTML = docLinkHtml;\n",
              "          element.appendChild(docLink);\n",
              "        }\n",
              "      </script>\n",
              "    </div>\n",
              "  </div>\n",
              "  "
            ],
            "text/plain": [
              "         Date Location  MinTemp  MaxTemp  Rainfall  Evaporation  Sunshine  \\\n",
              "0  2008-12-01   Albury     13.4     22.9       0.6          NaN       NaN   \n",
              "1  2008-12-02   Albury      7.4     25.1       0.0          NaN       NaN   \n",
              "2  2008-12-03   Albury     12.9     25.7       0.0          NaN       NaN   \n",
              "3  2008-12-04   Albury      9.2     28.0       0.0          NaN       NaN   \n",
              "4  2008-12-05   Albury     17.5     32.3       1.0          NaN       NaN   \n",
              "\n",
              "  WindGustDir  WindGustSpeed WindDir9am  ... Humidity9am  Humidity3pm  \\\n",
              "0           W           44.0          W  ...        71.0         22.0   \n",
              "1         WNW           44.0        NNW  ...        44.0         25.0   \n",
              "2         WSW           46.0          W  ...        38.0         30.0   \n",
              "3          NE           24.0         SE  ...        45.0         16.0   \n",
              "4           W           41.0        ENE  ...        82.0         33.0   \n",
              "\n",
              "   Pressure9am  Pressure3pm  Cloud9am  Cloud3pm  Temp9am  Temp3pm  RainToday  \\\n",
              "0       1007.7       1007.1       8.0       NaN     16.9     21.8         No   \n",
              "1       1010.6       1007.8       NaN       NaN     17.2     24.3         No   \n",
              "2       1007.6       1008.7       NaN       2.0     21.0     23.2         No   \n",
              "3       1017.6       1012.8       NaN       NaN     18.1     26.5         No   \n",
              "4       1010.8       1006.0       7.0       8.0     17.8     29.7         No   \n",
              "\n",
              "   RainTomorrow  \n",
              "0            No  \n",
              "1            No  \n",
              "2            No  \n",
              "3            No  \n",
              "4            No  \n",
              "\n",
              "[5 rows x 23 columns]"
            ]
          },
          "execution_count": 2,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "dataframe = pd.read_csv(\"https://raw.githubusercontent.com/ujwalnk/MachineLearning101/main/data/01%20Weather%20Data.csv\")\n",
        "dataframe.head()"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "fUJjOrOTJyX0"
      },
      "source": [
        "### Check for missing data & remove any na data"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 3,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "II0rlaRxJ7a7",
        "outputId": "1b7db804-1ce6-46d7-aae4-a867c9e4d854"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "(Date             0\n",
              " Location         0\n",
              " MinTemp          0\n",
              " MaxTemp          0\n",
              " Rainfall         0\n",
              " Evaporation      0\n",
              " Sunshine         0\n",
              " WindGustDir      0\n",
              " WindGustSpeed    0\n",
              " WindDir9am       0\n",
              " WindDir3pm       0\n",
              " WindSpeed9am     0\n",
              " WindSpeed3pm     0\n",
              " Humidity9am      0\n",
              " Humidity3pm      0\n",
              " Pressure9am      0\n",
              " Pressure3pm      0\n",
              " Cloud9am         0\n",
              " Cloud3pm         0\n",
              " Temp9am          0\n",
              " Temp3pm          0\n",
              " RainToday        0\n",
              " RainTomorrow     0\n",
              " dtype: int64,\n",
              " Date             56420\n",
              " Location         56420\n",
              " MinTemp          56420\n",
              " MaxTemp          56420\n",
              " Rainfall         56420\n",
              " Evaporation      56420\n",
              " Sunshine         56420\n",
              " WindGustDir      56420\n",
              " WindGustSpeed    56420\n",
              " WindDir9am       56420\n",
              " WindDir3pm       56420\n",
              " WindSpeed9am     56420\n",
              " WindSpeed3pm     56420\n",
              " Humidity9am      56420\n",
              " Humidity3pm      56420\n",
              " Pressure9am      56420\n",
              " Pressure3pm      56420\n",
              " Cloud9am         56420\n",
              " Cloud3pm         56420\n",
              " Temp9am          56420\n",
              " Temp3pm          56420\n",
              " RainToday        56420\n",
              " RainTomorrow     56420\n",
              " dtype: int64)"
            ]
          },
          "execution_count": 3,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "dataframe = dataframe.dropna()\n",
        "dataframe.isnull().sum(), dataframe.count()"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "K5jNK3leYp5u"
      },
      "source": [
        "Drop Unnecessary Columns"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 4,
      "metadata": {
        "id": "0X4WxiVEYyKy"
      },
      "outputs": [],
      "source": [
        "dataframe = dataframe.drop(\"Date\", axis=1)"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "DGuVKMCCTkvb"
      },
      "source": [
        "Sort and check for datapoints"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 5,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "gDaL9J2gO9J7",
        "outputId": "5bdce59a-c35b-4b02-d10c-b6267f887ad4"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "No     43993\n",
              "Yes    12427\n",
              "Name: RainTomorrow, dtype: int64"
            ]
          },
          "execution_count": 5,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "dataframe = dataframe.drop_duplicates()\n",
        "dataframe.sort_values(\"RainTomorrow\", axis=0, ascending=True, inplace=True)\n",
        "dataframe[\"RainTomorrow\"].value_counts()"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "R1psB7EhTxBI"
      },
      "source": [
        "### Data Splitting"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 6,
      "metadata": {
        "id": "yE0Jsc2OS6K0"
      },
      "outputs": [],
      "source": [
        "from sklearn.model_selection import train_test_split\n",
        "\n",
        "# Import label encoder\n",
        "from sklearn import preprocessing\n",
        "\n",
        "label_encoder = preprocessing.LabelEncoder()\n",
        "dataframe[\"RainTomorrow\"] = label_encoder.fit_transform(dataframe[\"RainTomorrow\"])\n",
        "\n",
        "y = dataframe[\"RainTomorrow\"]\n",
        "X = dataframe = pd.get_dummies(dataframe.drop(\"RainTomorrow\", axis=1))\n",
        "\n",
        "X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2)"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "_UCvF_MYUfhg"
      },
      "source": [
        "## Test for shape matching:\n",
        "$$\n",
        "\\begin{align}\n",
        "&(\\approx 70\\% \\times data , <\\# vars>) &&(\\approx 10\\% \\times data , <\\# vars>) &&&(\\approx 20\\% \\times data , <\\# vars>) \\\\ &(\\approx 70\\% \\times data,) &&(\\approx 10\\% \\times data,) &&&(\\approx 20\\% \\times data,) \\\\\n",
        "\\end{align}$$"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "JgEvDu2lU3Bt"
      },
      "source": [
        "Need to make a separate validation and test data as all data is labelled"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 7,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "rHJHAcJdUrig",
        "outputId": "8e2e3069-19f9-4b61-a351-e22324c2d293"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "((45136, 92), (11284, 92), (45136,), (11284,))"
            ]
          },
          "execution_count": 7,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "X_train.shape, X_test.shape, y_train.shape, y_test.shape"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "MmgWO7m1U_nq"
      },
      "source": [
        "The shapes match, so start training the model"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "JLwNKoCDUxEq"
      },
      "source": [
        "## Model Training"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 8,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 74
        },
        "id": "Um9wSdvZVLY2",
        "outputId": "841bdb63-7b55-4b89-862e-9b8269a3cd2f"
      },
      "outputs": [
        {
          "data": {
            "text/html": [
              "<style>#sk-container-id-1 {color: black;background-color: white;}#sk-container-id-1 pre{padding: 0;}#sk-container-id-1 div.sk-toggleable {background-color: white;}#sk-container-id-1 label.sk-toggleable__label {cursor: pointer;display: block;width: 100%;margin-bottom: 0;padding: 0.3em;box-sizing: border-box;text-align: center;}#sk-container-id-1 label.sk-toggleable__label-arrow:before {content: \"▸\";float: left;margin-right: 0.25em;color: #696969;}#sk-container-id-1 label.sk-toggleable__label-arrow:hover:before {color: black;}#sk-container-id-1 div.sk-estimator:hover label.sk-toggleable__label-arrow:before {color: black;}#sk-container-id-1 div.sk-toggleable__content {max-height: 0;max-width: 0;overflow: hidden;text-align: left;background-color: #f0f8ff;}#sk-container-id-1 div.sk-toggleable__content pre {margin: 0.2em;color: black;border-radius: 0.25em;background-color: #f0f8ff;}#sk-container-id-1 input.sk-toggleable__control:checked~div.sk-toggleable__content {max-height: 200px;max-width: 100%;overflow: auto;}#sk-container-id-1 input.sk-toggleable__control:checked~label.sk-toggleable__label-arrow:before {content: \"▾\";}#sk-container-id-1 div.sk-estimator input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-label input.sk-toggleable__control:checked~label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 input.sk-hidden--visually {border: 0;clip: rect(1px 1px 1px 1px);clip: rect(1px, 1px, 1px, 1px);height: 1px;margin: -1px;overflow: hidden;padding: 0;position: absolute;width: 1px;}#sk-container-id-1 div.sk-estimator {font-family: monospace;background-color: #f0f8ff;border: 1px dotted black;border-radius: 0.25em;box-sizing: border-box;margin-bottom: 0.5em;}#sk-container-id-1 div.sk-estimator:hover {background-color: #d4ebff;}#sk-container-id-1 div.sk-parallel-item::after {content: \"\";width: 100%;border-bottom: 1px solid gray;flex-grow: 1;}#sk-container-id-1 div.sk-label:hover label.sk-toggleable__label {background-color: #d4ebff;}#sk-container-id-1 div.sk-serial::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: 0;}#sk-container-id-1 div.sk-serial {display: flex;flex-direction: column;align-items: center;background-color: white;padding-right: 0.2em;padding-left: 0.2em;position: relative;}#sk-container-id-1 div.sk-item {position: relative;z-index: 1;}#sk-container-id-1 div.sk-parallel {display: flex;align-items: stretch;justify-content: center;background-color: white;position: relative;}#sk-container-id-1 div.sk-item::before, #sk-container-id-1 div.sk-parallel-item::before {content: \"\";position: absolute;border-left: 1px solid gray;box-sizing: border-box;top: 0;bottom: 0;left: 50%;z-index: -1;}#sk-container-id-1 div.sk-parallel-item {display: flex;flex-direction: column;z-index: 1;position: relative;background-color: white;}#sk-container-id-1 div.sk-parallel-item:first-child::after {align-self: flex-end;width: 50%;}#sk-container-id-1 div.sk-parallel-item:last-child::after {align-self: flex-start;width: 50%;}#sk-container-id-1 div.sk-parallel-item:only-child::after {width: 0;}#sk-container-id-1 div.sk-dashed-wrapped {border: 1px dashed gray;margin: 0 0.4em 0.5em 0.4em;box-sizing: border-box;padding-bottom: 0.4em;background-color: white;}#sk-container-id-1 div.sk-label label {font-family: monospace;font-weight: bold;display: inline-block;line-height: 1.2em;}#sk-container-id-1 div.sk-label-container {text-align: center;}#sk-container-id-1 div.sk-container {/* jupyter's `normalize.less` sets `[hidden] { display: none; }` but bootstrap.min.css set `[hidden] { display: none !important; }` so we also need the `!important` here to be able to override the default hidden behavior on the sphinx rendered scikit-learn.org. See: https://github.com/scikit-learn/scikit-learn/issues/21755 */display: inline-block !important;position: relative;}#sk-container-id-1 div.sk-text-repr-fallback {display: none;}</style><div id=\"sk-container-id-1\" class=\"sk-top-container\"><div class=\"sk-text-repr-fallback\"><pre>SGDClassifier()</pre><b>In a Jupyter environment, please rerun this cell to show the HTML representation or trust the notebook. <br />On GitHub, the HTML representation is unable to render, please try loading this page with nbviewer.org.</b></div><div class=\"sk-container\" hidden><div class=\"sk-item\"><div class=\"sk-estimator sk-toggleable\"><input class=\"sk-toggleable__control sk-hidden--visually\" id=\"sk-estimator-id-1\" type=\"checkbox\" checked><label for=\"sk-estimator-id-1\" class=\"sk-toggleable__label sk-toggleable__label-arrow\">SGDClassifier</label><div class=\"sk-toggleable__content\"><pre>SGDClassifier()</pre></div></div></div></div></div>"
            ],
            "text/plain": [
              "SGDClassifier()"
            ]
          },
          "execution_count": 8,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "from sklearn.linear_model import SGDClassifier as clf\n",
        "\n",
        "sgd_model = clf()\n",
        "sgd_model.fit(X_train, y_train)"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "GF9ZZjfdVht6"
      },
      "source": [
        "### Testing accuracy score of model"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 9,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "czPBz9Hddkur",
        "outputId": "6a07b8f4-1d87-43b8-cdca-67c34c042085"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "0.8290499822757887"
            ]
          },
          "execution_count": 9,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "sgd_model.score(X_test, y_test)"
      ]
    },
    {
      "attachments": {},
      "cell_type": "markdown",
      "metadata": {
        "id": "LmllnC5SdpzV"
      },
      "source": [
        "### Weights of each data column"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": 10,
      "metadata": {
        "colab": {
          "base_uri": "https://localhost:8080/"
        },
        "id": "0iCP1h8idv-x",
        "outputId": "67b1c3e7-ee04-45bd-a162-6df11d743b96"
      },
      "outputs": [
        {
          "data": {
            "text/plain": [
              "array([[-13.94129179,  18.26175469,  10.5008666 ,  -1.86834394,\n",
              "        -66.24311611,  33.02988746,  -5.71274478, -13.38599022,\n",
              "          1.76674237,  26.90531948,  65.68816178, -69.91987087,\n",
              "         -7.36906574,  54.10162366,  14.16866264,  -1.30859774,\n",
              "         -0.95906887,  20.3071201 ,   5.75649665,  -0.0888927 ,\n",
              "          0.47154798,   2.907069  ,  -7.01696732,  -8.65384302,\n",
              "         -0.40279504, -21.44189095,   1.52645429,   2.55149821,\n",
              "          4.77312118, -14.39783913,   2.07092207,  28.33801976,\n",
              "         15.94026633,  10.62267737, -15.19578999,   2.63691854,\n",
              "         -7.81491818, -24.71842023,  10.43655829,  -6.39471844,\n",
              "          1.33894626,  -0.79239506, -13.03597522,  -7.77880552,\n",
              "        -11.29701182,  -2.94040377,  -3.40222911,   2.91470822,\n",
              "         11.67827816,  11.32826316, -13.05889287,  -2.50913529,\n",
              "         -6.4037466 ,  -5.88219648,   1.97439015,  11.5803573 ,\n",
              "         19.23832431,   9.39415251,  -7.21767037,   5.76899719,\n",
              "        -12.28872098,  11.44771272,  13.61377776,  28.21509783,\n",
              "          1.14866033,   2.78692497, -14.34783699, -16.69168741,\n",
              "        -14.76591046, -13.51794032,  -1.02573839,  11.31298473,\n",
              "          2.7778968 ,   4.58352972, -10.14904597,  -9.11775178,\n",
              "         -8.16076633,   8.22604691,  -6.20026566,   3.65918456,\n",
              "         14.85341421,  19.45013894, -15.02494933,  -3.1063831 ,\n",
              "        -11.05186243,  -7.18016876, -12.0081534 ,  12.90749749,\n",
              "         23.61767862,   1.08546318, -50.80912147,  52.6091986 ]])"
            ]
          },
          "execution_count": 10,
          "metadata": {},
          "output_type": "execute_result"
        }
      ],
      "source": [
        "sgd_model.coef_"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "authorship_tag": "ABX9TyMxZhaFaDHf2tb71IOWYbOG",
      "provenance": []
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}