{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "name": "Week7 Assignment", "provenance": [], "collapsed_sections": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" } }, "cells": [ { "cell_type": "code", "metadata": { "id": "N3hulcocaUJd", "colab": { "base_uri": "https://localhost:8080/", "height": 170 }, "outputId": "0e4b1c0f-403e-44df-af80-d7cbeca8a0c8" }, "source": [ "from google.colab import drive\n", "drive.mount('/data/')\n", "data_dir = '/data/My Drive/Colab Notebooks/Experiment'\n", "!ls '/data/My Drive/Colab Notebooks/Experiment'\n", "!pip install matplotlib" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "Mounted at /data/\n", "diamonds.csv Iris.csv\tm_data.csv w_data.csv\n", "Requirement already satisfied: matplotlib in /usr/local/lib/python3.6/dist-packages (3.2.2)\n", "Requirement already satisfied: kiwisolver>=1.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.2.0)\n", "Requirement already satisfied: numpy>=1.11 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (1.18.5)\n", "Requirement already satisfied: cycler>=0.10 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (0.10.0)\n", "Requirement already satisfied: python-dateutil>=2.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.8.1)\n", "Requirement already satisfied: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /usr/local/lib/python3.6/dist-packages (from matplotlib) (2.4.7)\n", "Requirement already satisfied: six in /usr/local/lib/python3.6/dist-packages (from cycler>=0.10->matplotlib) (1.15.0)\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "oLEtUkVMaXjq", "colab": { "base_uri": "https://localhost:8080/", "height": 204 }, "outputId": "3eb0cb78-416d-4d1a-d80b-1b07ab4bf68f" }, "source": [ "import pandas as pd\n", "\n", "df = pd.read_csv(data_dir+'/diamonds.csv')\n", "df.head()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Unnamed: 0</th>\n", " <th>carat</th>\n", " <th>cut</th>\n", " <th>color</th>\n", " <th>clarity</th>\n", " <th>depth</th>\n", " <th>table</th>\n", " <th>price</th>\n", " <th>x</th>\n", " <th>y</th>\n", " <th>z</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>0.23</td>\n", " <td>Ideal</td>\n", " <td>E</td>\n", " <td>SI2</td>\n", " <td>61.5</td>\n", " <td>55.0</td>\n", " <td>326</td>\n", " <td>3.95</td>\n", " <td>3.98</td>\n", " <td>2.43</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>0.21</td>\n", " <td>Premium</td>\n", " <td>E</td>\n", " <td>SI1</td>\n", " <td>59.8</td>\n", " <td>61.0</td>\n", " <td>326</td>\n", " <td>3.89</td>\n", " <td>3.84</td>\n", " <td>2.31</td>\n", " </tr>\n", " <tr>\n", " <th>2</th>\n", " <td>3</td>\n", " <td>0.23</td>\n", " <td>Good</td>\n", " <td>E</td>\n", " <td>VS1</td>\n", " <td>56.9</td>\n", " <td>65.0</td>\n", " <td>327</td>\n", " <td>4.05</td>\n", " <td>4.07</td>\n", " <td>2.31</td>\n", " </tr>\n", " <tr>\n", " <th>3</th>\n", " <td>4</td>\n", " <td>0.29</td>\n", " <td>Premium</td>\n", " <td>I</td>\n", " <td>VS2</td>\n", " <td>62.4</td>\n", " <td>58.0</td>\n", " <td>334</td>\n", " <td>4.20</td>\n", " <td>4.23</td>\n", " <td>2.63</td>\n", " </tr>\n", " <tr>\n", " <th>4</th>\n", " <td>5</td>\n", " <td>0.31</td>\n", " <td>Good</td>\n", " <td>J</td>\n", " <td>SI2</td>\n", " <td>63.3</td>\n", " <td>58.0</td>\n", " <td>335</td>\n", " <td>4.34</td>\n", " <td>4.35</td>\n", " <td>2.75</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Unnamed: 0 carat cut color clarity ... table price x y z\n", "0 1 0.23 Ideal E SI2 ... 55.0 326 3.95 3.98 2.43\n", "1 2 0.21 Premium E SI1 ... 61.0 326 3.89 3.84 2.31\n", "2 3 0.23 Good E VS1 ... 65.0 327 4.05 4.07 2.31\n", "3 4 0.29 Premium I VS2 ... 58.0 334 4.20 4.23 2.63\n", "4 5 0.31 Good J SI2 ... 58.0 335 4.34 4.35 2.75\n", "\n", "[5 rows x 11 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 2 } ] }, { "cell_type": "code", "metadata": { "id": "hQZ6U8YLawFh" }, "source": [ "import numpy as np \n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "from sklearn.linear_model import LogisticRegression \n", "from sklearn.model_selection import train_test_split \n", "from sklearn.neighbors import KNeighborsClassifier \n", "from sklearn import svm \n", "from sklearn import metrics \n", "from sklearn.tree import DecisionTreeClassifier\n", "\n", "df['color_int'] = df['color'].astype('category').cat.codes\n", "\n", "transform = {'Ideal':0, 'Premium':1, 'Good':2, 'Very Good':3, 'Fair':4}\n", "df['cut_int']=df['cut'].apply(lambda x: transform[x])\n", "\n", "transform_clarity ={'SI2':6, 'SI1':5, 'VS1':3, 'VS2':4, 'VVS2':2, 'VVS1':1, 'I1':7, 'IF':0}\n", "df['clarity_int']=df['clarity'].apply(lambda x: transform_clarity[x])" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "_ghgXydCLwro", "colab": { "base_uri": "https://localhost:8080/", "height": 111 }, "outputId": "75e950cb-1312-44b7-d6e9-266b8ede635f" }, "source": [ "df.head(2)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/html": [ "<div>\n", "<style scoped>\n", " .dataframe tbody tr th:only-of-type {\n", " vertical-align: middle;\n", " }\n", "\n", " .dataframe tbody tr th {\n", " vertical-align: top;\n", " }\n", "\n", " .dataframe thead th {\n", " text-align: right;\n", " }\n", "</style>\n", "<table border=\"1\" class=\"dataframe\">\n", " <thead>\n", " <tr style=\"text-align: right;\">\n", " <th></th>\n", " <th>Unnamed: 0</th>\n", " <th>carat</th>\n", " <th>cut</th>\n", " <th>color</th>\n", " <th>clarity</th>\n", " <th>depth</th>\n", " <th>table</th>\n", " <th>price</th>\n", " <th>x</th>\n", " <th>y</th>\n", " <th>z</th>\n", " <th>color_int</th>\n", " <th>cut_int</th>\n", " <th>clarity_int</th>\n", " </tr>\n", " </thead>\n", " <tbody>\n", " <tr>\n", " <th>0</th>\n", " <td>1</td>\n", " <td>0.23</td>\n", " <td>Ideal</td>\n", " <td>E</td>\n", " <td>SI2</td>\n", " <td>61.5</td>\n", " <td>55.0</td>\n", " <td>326</td>\n", " <td>3.95</td>\n", " <td>3.98</td>\n", " <td>2.43</td>\n", " <td>1</td>\n", " <td>0</td>\n", " <td>6</td>\n", " </tr>\n", " <tr>\n", " <th>1</th>\n", " <td>2</td>\n", " <td>0.21</td>\n", " <td>Premium</td>\n", " <td>E</td>\n", " <td>SI1</td>\n", " <td>59.8</td>\n", " <td>61.0</td>\n", " <td>326</td>\n", " <td>3.89</td>\n", " <td>3.84</td>\n", " <td>2.31</td>\n", " <td>1</td>\n", " <td>1</td>\n", " <td>5</td>\n", " </tr>\n", " </tbody>\n", "</table>\n", "</div>" ], "text/plain": [ " Unnamed: 0 carat cut color ... z color_int cut_int clarity_int\n", "0 1 0.23 Ideal E ... 2.43 1 0 6\n", "1 2 0.21 Premium E ... 2.31 1 1 5\n", "\n", "[2 rows x 14 columns]" ] }, "metadata": { "tags": [] }, "execution_count": 4 } ] }, { "cell_type": "code", "metadata": { "id": "TLBTmr-m9A_W" }, "source": [ "features = ['carat','color_int','cut_int','clarity_int','x','y','z','depth','table']\n", "X = df[features]\n", "y = df['price']\n", "\n", "train_X, test_X, train_y, test_y = train_test_split(X, y, test_size = 0.25, random_state=0)" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "_-vEZnuBB71Q" }, "source": [ "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.tree import DecisionTreeRegressor\n", "from sklearn.metrics import mean_absolute_error" ], "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "metadata": { "id": "eA5hxJWOGHzz" }, "source": [ "Decision Tree Model " ] }, { "cell_type": "code", "metadata": { "id": "zUFFrRUsBJSJ", "colab": { "base_uri": "https://localhost:8080/", "height": 119 }, "outputId": "7ffa56d7-753d-4019-eb9c-20e65a24eaa5" }, "source": [ "tree = DecisionTreeRegressor(random_state=1)\n", "tree.fit(train_X, train_y)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "DecisionTreeRegressor(ccp_alpha=0.0, criterion='mse', max_depth=None,\n", " max_features=None, max_leaf_nodes=None,\n", " min_impurity_decrease=0.0, min_impurity_split=None,\n", " min_samples_leaf=1, min_samples_split=2,\n", " min_weight_fraction_leaf=0.0, presort='deprecated',\n", " random_state=1, splitter='best')" ] }, "metadata": { "tags": [] }, "execution_count": 7 } ] }, { "cell_type": "code", "metadata": { "id": "TdTTL6WHCQEY", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "c250ac9f-fe21-41c2-ee2e-a162dcfb013c" }, "source": [ "prediction = tree.predict(test_X)\n", "test_mse = mean_absolute_error(prediction, test_y)\n", "\n", "errors = prediction - test_y\n", "test_mse" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "351.4046718576196" ] }, "metadata": { "tags": [] }, "execution_count": 8 } ] }, { "cell_type": "code", "metadata": { "id": "cCp0xAgRC1y5", "colab": { "base_uri": "https://localhost:8080/", "height": 282 }, "outputId": "fee5cd55-ce8a-484e-a295-d4fcfaa59518" }, "source": [ "errors.hist()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "<matplotlib.axes._subplots.AxesSubplot at 0x7f00e6b5e160>" ] }, "metadata": { "tags": [] }, "execution_count": 9 }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYMAAAD4CAYAAAAO9oqkAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAWwUlEQVR4nO3df7DddZ3f8eerRJAlSoLoLSVpE8fUFklb4Q7QcbdzERciWkO76GAZCUqbaRe3bJudNZRuYVSm0i3LyKzipIUKLjWwrJaM6GIWud3ZTokYZQk/RC4Ya1KELkE0wrK99t0/zufiMd6b3HvOybn3rs/HzJn7Pe/vr/f5fk/O635/3JNUFZKkn29/Zb4bkCTNP8NAkmQYSJIMA0kShoEkCVgy3w306vjjj69Vq1YB8KMf/YhjjjlmfhuaBfscvMXSq30O3mLpdaH1uXPnzj+rqtf+zIiqWpSPU089tabce++9tRjY5+Atll7tc/AWS68LrU/gazXNZ6qniSRJhoEkyTCQJGEYSJIwDCRJGAaSJAwDSRKzCIMkNyV5JslDXbXfTvLNJA8m+XySZV3jLk8ykeSxJOd01de12kSSzV311Ul2tPptSY4c5AuUJB3abI4MPg2sO6C2HTi5qv4O8C3gcoAkJwEXAG9q83wyyRFJjgA+AbwdOAl4b5sW4Brguqp6A/AccElfr0iSNGeH/DqKqvrjJKsOqH256+l9wPlteD2wtapeAr6dZAI4rY2bqKonAZJsBdYneRR4K/BP2jQ3A1cBN/TyYqSfd6s239XX/JvWTnJxD8vY/bF39LVezb9BfDfRB4Db2vCJdMJhyp5WA/juAfXTgdcA36+qyWmm/xlJNgIbAUZGRhgfHwdg//79Lw8vZPY5eIul12H1uWnt5KEnOoiRo3tbxnzsA/f9YPUVBkmuACaBWwfTzsFV1RZgC8Do6GiNjY0BnTfi1PBCZp+Dt1h6HVafvfxW323T2kmu3TX3j4XdF471td5euO8Hq+cwSHIx8E7grPblRwB7gZVdk61oNWaoPwssS7KkHR10Ty9JGpKebi1Nsg74TeBdVfVC16htwAVJjkqyGlgDfBW4H1jT7hw6ks5F5m0tRO7lJ9ccNgB39vZSJEm9ms2tpZ8F/ifwxiR7klwC/C7wKmB7kgeSfAqgqh4GbgceAf4QuLSqftx+6/8gcDfwKHB7mxbgQ8C/bhebXwPcONBXKEk6pNncTfTeacozfmBX1dXA1dPUvwh8cZr6k/zkjiNJ0jzwL5AlSYaBJMkwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CSxCzCIMlNSZ5J8lBX7bgk25M83n4ub/UkuT7JRJIHk5zSNc+GNv3jSTZ01U9NsqvNc32SDPpFSpIObjZHBp8G1h1Q2wzcU1VrgHvac4C3A2vaYyNwA3TCA7gSOB04DbhyKkDaNP+sa74D1yVJOswOGQZV9cfAvgPK64Gb2/DNwHld9Vuq4z5gWZITgHOA7VW1r6qeA7YD69q4V1fVfVVVwC1dy5IkDcmSHucbqaqn2vD3gJE2fCLw3a7p9rTawep7pqlPK8lGOkccjIyMMD4+DsD+/ftfHl7I7HPwFkuvw+pz09rJvuYfObq3ZczHPnDfD1avYfCyqqokNYhmZrGuLcAWgNHR0RobGwM6b8Sp4YXMPgdvsfQ6rD4v3nxXX/NvWjvJtbvm/rGw+8KxvtbbC/f9YPV6N9HT7RQP7eczrb4XWNk13YpWO1h9xTR1SdIQ9RoG24CpO4I2AHd21S9qdxWdATzfTifdDZydZHm7cHw2cHcb94MkZ7S7iC7qWpYkaUgOeTyY5LPAGHB8kj107gr6GHB7kkuA7wDvaZN/ETgXmABeAN4PUFX7knwEuL9N9+Gqmroo/at07lg6GvhSe0iShuiQYVBV751h1FnTTFvApTMs5ybgpmnqXwNOPlQfkqTDx79AliQZBpIkw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSSJPsMgyb9K8nCSh5J8Nskrk6xOsiPJRJLbkhzZpj2qPZ9o41d1LefyVn8syTn9vSRJ0lz1HAZJTgT+JTBaVScDRwAXANcA11XVG4DngEvaLJcAz7X6dW06kpzU5nsTsA74ZJIjeu1LkjR3/Z4mWgIcnWQJ8AvAU8BbgTva+JuB89rw+vacNv6sJGn1rVX1UlV9G5gATuuzL0nSHKSqep85uQy4GngR+DJwGXBf++2fJCuBL1XVyUkeAtZV1Z427gngdOCqNs/vtfqNbZ47plnfRmAjwMjIyKlbt24FYP/+/SxdurTn1zEs9jl4i6XXYfW5a+/zfc0/cjQ8/eLc51t74rF9rbcX7vvenHnmmTuravTA+pJeF5hkOZ3f6lcD3wd+n85pnsOmqrYAWwBGR0drbGwMgPHxcaaGFzL7HLzF0uuw+rx48119zb9p7STX7pr7x8LuC8f6Wm8v3PeD1c9porcB366q/1NV/xf4HPAWYFk7bQSwAtjbhvcCKwHa+GOBZ7vr08wjSRqCfsLgfwFnJPmFdu7/LOAR4F7g/DbNBuDONrytPaeN/0p1zlFtAy5odxutBtYAX+2jL0nSHPV8mqiqdiS5A/g6MAl8g84pnLuArUk+2mo3tlluBD6TZALYR+cOIqrq4SS30wmSSeDSqvpxr31Jkuau5zAAqKorgSsPKD/JNHcDVdWfA++eYTlX07kQLUmaB/4FsiTJMJAkGQaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJLoMwySLEtyR5JvJnk0yd9PclyS7Ukebz+Xt2mT5PokE0keTHJK13I2tOkfT7Kh3xclSZqbfo8MPg78YVX9LeDvAo8Cm4F7qmoNcE97DvB2YE17bARuAEhyHHAlcDpwGnDlVIBIkoaj5zBIcizwD4AbAarqL6rq+8B64OY22c3AeW14PXBLddwHLEtyAnAOsL2q9lXVc8B2YF2vfUmS5i5V1duMyd8DtgCP0Dkq2AlcBuytqmVtmgDPVdWyJF8APlZVf9LG3QN8CBgDXllVH2313wJerKr/OM06N9I5qmBkZOTUrVu3ArB//36WLl3a0+sYJvscvMXS67D63LX3+b7mHzkann5x7vOtPfHYvtbbC/d9b84888ydVTV6YH1JH8tcApwC/FpV7UjycX5ySgiAqqokvaXNNKpqC50AYnR0tMbGxgAYHx9nanghs8/BWyy9DqvPizff1df8m9ZOcu2uuX8s7L5wrK/19sJ9P1j9XDPYA+ypqh3t+R10wuHpdvqH9vOZNn4vsLJr/hWtNlNdkjQkPYdBVX0P+G6SN7bSWXROGW0Dpu4I2gDc2Ya3ARe1u4rOAJ6vqqeAu4GzkyxvF47PbjVJ0pD0c5oI4NeAW5McCTwJvJ9OwNye5BLgO8B72rRfBM4FJoAX2rRU1b4kHwHub9N9uKr29dmXJGkO+gqDqnoA+JkLEXSOEg6ctoBLZ1jOTcBN/fQiSeqdf4EsSTIMJEmGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJAnDQJKEYSBJwjCQJGEYSJIwDCRJGAaSJAYQBkmOSPKNJF9oz1cn2ZFkIsltSY5s9aPa84k2flXXMi5v9ceSnNNvT5KkuRnEkcFlwKNdz68BrquqNwDPAZe0+iXAc61+XZuOJCcBFwBvAtYBn0xyxAD6kiTNUl9hkGQF8A7gP7fnAd4K3NEmuRk4rw2vb89p489q068HtlbVS1X1bWACOK2fviRJc5Oq6n3m5A7g3wOvAn4DuBi4r/32T5KVwJeq6uQkDwHrqmpPG/cEcDpwVZvn91r9xjbPHQesjiQbgY0AIyMjp27duhWA/fv3s3Tp0p5fx7DY5+Atll6H1eeuvc/3Nf/I0fD0i3Ofb+2Jx/a13l6473tz5pln7qyq0QPrS3pdYJJ3As9U1c4kY/00N1tVtQXYAjA6OlpjY53Vjo+PMzW8kNnn4C2WXofV58Wb7+pr/k1rJ7l219w/FnZfONbXenvhvh+snsMAeAvwriTnAq8EXg18HFiWZElVTQIrgL1t+r3ASmBPkiXAscCzXfUp3fNIkoag52sGVXV5Va2oqlV0LgB/paouBO4Fzm+TbQDubMPb2nPa+K9U5xzVNuCCdrfRamAN8NVe+5IkzV0/RwYz+RCwNclHgW8AN7b6jcBnkkwA++gECFX1cJLbgUeASeDSqvrxYehLkjSDgYRBVY0D4234Saa5G6iq/hx49wzzXw1cPYheJElz518gS5IMA0mSYSBJwjCQJGEYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEnCMJAkYRhIkjAMJEkYBpIkDANJEoaBJIk+wiDJyiT3JnkkycNJLmv145JsT/J4+7m81ZPk+iQTSR5MckrXsja06R9PsqH/lyVJmot+jgwmgU1VdRJwBnBpkpOAzcA9VbUGuKc9B3g7sKY9NgI3QCc8gCuB04HTgCunAkSSNBw9h0FVPVVVX2/DPwQeBU4E1gM3t8luBs5rw+uBW6rjPmBZkhOAc4DtVbWvqp4DtgPreu1LkjR3A7lmkGQV8GZgBzBSVU+1Ud8DRtrwicB3u2bb02oz1SVJQ7Kk3wUkWQr8AfDrVfWDJC+Pq6pKUv2uo2tdG+mcYmJkZITx8XEA9u/f//LwQmafg7dYeh1Wn5vWTvY1/8jRvS1jPvaB+36w+gqDJK+gEwS3VtXnWvnpJCdU1VPtNNAzrb4XWNk1+4pW2wuMHVAfn259VbUF2AIwOjpaY2Od2cbHx5kaXsjsc/AWS6/D6vPizXf1Nf+mtZNcu2vuHwu7Lxzra729cN8PVj93EwW4EXi0qn6na9Q2YOqOoA3AnV31i9pdRWcAz7fTSXcDZydZ3i4cn91qkqQh6efI4C3A+4BdSR5otX8DfAy4PcklwHeA97RxXwTOBSaAF4D3A1TVviQfAe5v0324qvb10ZckaY56DoOq+hMgM4w+a5rpC7h0hmXdBNzUay+SpP74F8iSJMNAkmQYSJIwDCRJGAaSJAwDSRKGgSQJw0CShGEgScIwkCRhGEiSMAwkSRgGkiQMA0kShoEkCcNAkoRhIEmiv//2UpIAWLX5rqGvc9PaScaGvta/vDwykCQZBpIkTxNJAzfdKZNNaye5eB5OpUiz5ZGBJMkwkCQZBpIkDANJEoaBJIkFFAZJ1iV5LMlEks3z3Y8k/TxZELeWJjkC+ATwy8Ae4P4k26rqkfntTIvZfPxVrIZrPvfx7o+9Y97WfTgsiDAATgMmqupJgCRbgfWAYSBpQZptEA36b0wOVwilqg7LgufURHI+sK6q/ml7/j7g9Kr64AHTbQQ2tqdvBB5rw8cDfzakdvthn4O3WHq1z8FbLL0utD7/RlW99sDiQjkymJWq2gJsObCe5GtVNToPLc2JfQ7eYunVPgdvsfS6WPpcKBeQ9wIru56vaDVJ0hAslDC4H1iTZHWSI4ELgG3z3JMk/dxYEKeJqmoyyQeBu4EjgJuq6uE5LOJnTh0tUPY5eIulV/scvMXS66Loc0FcQJYkza+FcppIkjSPDANJ0sIMgyTvTvJwkv+XZLSr/stJdibZ1X6+tWvcePs6iwfa43WtflSS29rXXOxIsqprnstb/bEk5wyqz4Mte6av3WgXz3e0+m3tQvpB++9VW97Udtqd5IFWX5Xkxa5xn+qa59S23SeSXJ8krX5cku1JHm8/l/fbX9c6r0qyt6ufc7vGDWT7DqjP307yzSQPJvl8kmWtvqC25yxex7x+JUySlUnuTfJI+3d1WasP7H0wwF53t/33QJKvtdq0+y4d17deHkxyStdyNrTpH0+yYdB9zklVLbgH8Lfp/FHZODDaVX8z8Nfa8MnA3q5xPzVtV/1XgU+14QuA29rwScCfAkcBq4EngCMG1Oe0y26PJ4DXA0e2aU5q89wOXNCGPwX8i4P1P8BtfS3w79rwKuChGab7KnAGEOBLwNtb/T8Am9vwZuCaAfZ2FfAb09QHtn0H1OfZwJI2fM3UNlho2/MQr2HGbTesB3ACcEobfhXwrbavB/Y+GGCvu4HjD6hNu++Ac9s+TtvnO1r9OODJ9nN5G14+zG3e/ViQRwZV9WhVPTZN/RtV9b/b04eBo5McdYjFrQdubsN3AGe138LWA1ur6qWq+jYwQedrMfru8yDLfvlrN6rqL4CtwPrWz1tbf7R+zztE/31ry3kP8NlDTHcC8Oqquq867+JbZuivu+/DaZDbt29V9eWqmmxP76PzdzIzWoDbE2bYdkNaNwBV9VRVfb0N/xB4FDjxILPM6X1weLt/uZ/p9t164JbquA9Y1t4D5wDbq2pfVT0HbAfWDaHPaS3IMJilXwG+XlUvddX+Szts+62uD8wTge9C5xZW4HngNd31Zg8Hf+PNxUzLnqn+GuD7XR8o3b3M1P8g/BLwdFU93lVbneQbSf57kl/q6mHPNH0DjFTVU234e8DIgHqb8sF2aH1T1ymTQW7fQfsAnd8Cpyy07TmTw/nvYc7SOR36ZmBHKw3ifTBIBXw5ndPVU1+RM9O+m88+Z23e/s4gyR8Bf3WaUVdU1Z2HmPdNdA7Hz+4qX1hVe5O8CvgD4H10fuOatz7n0yz7fi8/fVTwFPDXq+rZJKcC/61t61mpqkoyp3uVD9YncAPwETr/8D5C55TWB+ay/EGZzfZMcgUwCdzaxg19e/5lkGQpnX/Dv15VP0iyYN4HXX6xfd68Dtie5JvdIxfjvpu3MKiqt/UyX5IVwOeBi6rqia7l7W0/f5jkv9I5VLyFn3zVxZ4kS4BjgWeZ5Vdg9NjnwZY9Xf1ZOoeOS9pvr93Tz9T/QR2q77asfwyc2jXPS8BLbXhnkieAv9l66D710d3f00lOqKqn2qHvM4fqbS59dvX7n4AvtKeD3L4D6TPJxcA7gbPaqZ952Z59WBBfCZPkFXSC4Naq+hxAVT3dNb6f98HAdH3ePJPk83Q+b2badzP1uRcYO6A+Psg+52JRnSZK5y6Nu+hcpPkfXfUlSY5vw6+g84/yoTZ6GzB1lf584CvtH+s24IJ07tZZDayhc1FvEGZa9rRfu9H6ubf1R+v3zq5lTdd/v94GfLOqXj5dkeS16fzfEiR5fev7yXbo+4MkZ7TTbxfN0F93331r/6Cm/CN+ep8OavsOos91wG8C76qqF7rqC2p7HsK8fyVM2xY3Ao9W1e901QfyPhhgn8e0MxAkOYbOGYqHmHnfbQMuSscZwPPtPXA3cHaS5e3U19mtNj+GdaV6Lg86O3wPnd+qngbubvV/C/wIeKDr8TrgGGAn8CCdC8sfp90ZBLwS+H06F5e+Cry+az1X0Lnr4DHa3RyD6PNgy6ZzZ8G32rgruuqvb/1NtH6POlT/fW7jTwP//IDar7Tt9wDwdeAfdo0bpfOGfwL4XX7y1+uvAe4BHgf+CDhugO+DzwC72n7dBpww6O07oD4n6Jz7nXpPTt39taC25yxex7Tbbojr/0U6p4Ie7NqW5w7yfTCgPl9P5w6lP23794qD7Ts6dxF9ovWyi5++8/AD7f0zAbx/2Nu8++HXUUiSFtdpIknS4WEYSJIMA0mSYSBJwjCQJGEYSJIwDCRJwP8HBtNUCCih+z8AAAAASUVORK5CYII=\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "markdown", "metadata": { "id": "T-4aLGNLGLVO" }, "source": [ "Random Forest Model" ] }, { "cell_type": "code", "metadata": { "id": "ESPyVz_XDFUB", "colab": { "base_uri": "https://localhost:8080/", "height": 136 }, "outputId": "425eb38a-5006-414d-fa4a-18ab3b8d6ffd" }, "source": [ "rf = RandomForestRegressor(random_state=1)\n", "rf.fit(train_X, train_y)" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "RandomForestRegressor(bootstrap=True, ccp_alpha=0.0, criterion='mse',\n", " max_depth=None, max_features='auto', max_leaf_nodes=None,\n", " max_samples=None, min_impurity_decrease=0.0,\n", " min_impurity_split=None, min_samples_leaf=1,\n", " min_samples_split=2, min_weight_fraction_leaf=0.0,\n", " n_estimators=100, n_jobs=None, oob_score=False,\n", " random_state=1, verbose=0, warm_start=False)" ] }, "metadata": { "tags": [] }, "execution_count": 10 } ] }, { "cell_type": "code", "metadata": { "id": "1uYCHh_oGZoI", "colab": { "base_uri": "https://localhost:8080/", "height": 34 }, "outputId": "4aac1255-023f-40b5-c2a7-d19f7dd8c884" }, "source": [ "prediction = rf.predict(test_X)\n", "test_mse_rf = mean_absolute_error(prediction, test_y)\n", "\n", "errors = prediction - test_y\n", "test_mse_rf" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "269.7082693527788" ] }, "metadata": { "tags": [] }, "execution_count": 11 } ] }, { "cell_type": "code", "metadata": { "id": "HKLg41jVGx1x", "colab": { "base_uri": "https://localhost:8080/", "height": 282 }, "outputId": "35c7fc4f-a0af-44ee-bbb2-fea053b86ab1" }, "source": [ "errors.hist()" ], "execution_count": null, "outputs": [ { "output_type": "execute_result", "data": { "text/plain": [ "<matplotlib.axes._subplots.AxesSubplot at 0x7f00e6ad8f98>" ] }, "metadata": { "tags": [] }, "execution_count": 12 }, { "output_type": "display_data", "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYQAAAD4CAYAAADsKpHdAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAXaklEQVR4nO3df7DddZ3f8eerRJAlSoLUW0rSSRxTWyRtl9wBOu52bsSFgNbQrjo4zBKUaaZd3GVbdtZQanFUptBd1pFZf0xaqOBaA8tqyYhuzCK3znYKKsoSfohcENekGFoS0SiLjX33j/OJHuM5yc25596co8/HzJn7Pe/v5/v9vs/3nHte+X7P956kqpAk6W8c7QYkSaPBQJAkAQaCJKkxECRJgIEgSWoWHe0GBnXyySfXihUrZjX2+9//PieccML8NjRE49Yv2PNCGLd+Yfx6Hrd+4ch6Pvnkk9m2bdu2qlrXc0BVjeVtzZo1NVv33HPPrMeOgnHrt8qeF8K49Vs1fj2PW79VR94z8OXq87562FNGSW5O8kySh7pqv5/ka0keTPKpJEu65l2VZCbJY0nO66qva7WZJJu66iuT3NfqtyU5dlZRJ0kaqtl8hvBR4ODDi+3A6VX1D4CvA1cBJDkNuAh4dVvmQ0mOSXIM8EHgfOA04K1tLMD1wPur6pXAXuCyOT0iSdJADhsIVfUFYM9Btc9V1f52915gWZteD2ypqheq6hvADHBmu81U1ZNV9UNgC7A+SYDXAne05W8BLpzjY5IkDWAYHyq/HbitTZ9KJyAO2NlqAN86qH4W8DLgO13h0j3+ZyTZCGwEmJiYYHp6elYN7tu3b9ZjR8G49Qv2vBDGrV8Yv57HrV8Ybs9zCoQkVwP7gY8PpZvDqKrNwGaAycnJmpqamtVy09PTzHbsKBi3fsGeF8K49Qvj1/O49QvD7XngQEhyKfAG4Jz2yTXALmB517BlrUaf+rPAkiSL2lFC93hJ0gIa6A/TkqwDfg94Y1X9oGvWVuCiJMclWQmsAr4IfAlY1a4oOpbOB89bW5DcA7ypLb8BuHOwhyJJmovZXHb6CeB/Aq9KsjPJZcAfAS8Btid5IMlHAKrqYeB24BHgz4DLq+pH7V//7wC2AY8Ct7exAO8E/k2SGTqfKdw01EcoSZqVw54yqqq39ij3fdOuqmuBa3vUPwN8pkf9STpXIUmSjqKx/eoKSaNjxaa7etavXL2fS/vMG4anrnv9vK37F5FfbidJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAmYRCEluTvJMkoe6aicl2Z7k8fZzaasnyY1JZpI8mOSMrmU2tPGPJ9nQVV+TZEdb5sYkGfaDlCQd3myOED4KrDuotgm4u6pWAXe3+wDnA6vabSPwYegECHANcBZwJnDNgRBpY/5F13IHb0uStAAOGwhV9QVgz0Hl9cAtbfoW4MKu+q3VcS+wJMkpwHnA9qraU1V7ge3AujbvpVV1b1UVcGvXuiRJC2jRgMtNVNXTbfrbwESbPhX4Vte4na12qPrOHvWekmykc+TBxMQE09PTs2p23759sx47CsatX7DnhTDK/V65en/P+sTx/ecNw7D3xyjv436G2fOggfBjVVVJahjNzGJbm4HNAJOTkzU1NTWr5aanp5nt2FEwbv2CPS+EUe730k139axfuXo/N+yY89tMX09dPDXU9Y3yPu5nmD0PepXR7na6h/bzmVbfBSzvGres1Q5VX9ajLklaYIMGwlbgwJVCG4A7u+qXtKuNzgaea6eWtgHnJlnaPkw+F9jW5n03ydnt6qJLutYlSVpAhz2WS/IJYAo4OclOOlcLXQfcnuQy4JvAW9rwzwAXADPAD4C3AVTVniTvBb7Uxr2nqg58UP2bdK5kOh74bLtJkhbYYQOhqt7aZ9Y5PcYWcHmf9dwM3Nyj/mXg9MP1IUmaX/6lsiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkC5hgISf51koeTPJTkE0lenGRlkvuSzCS5Lcmxbexx7f5Mm7+iaz1XtfpjSc6b20OSJA1i4EBIcirw28BkVZ0OHANcBFwPvL+qXgnsBS5ri1wG7G3197dxJDmtLfdqYB3woSTHDNqXJGkwcz1ltAg4Pski4JeAp4HXAne0+bcAF7bp9e0+bf45SdLqW6rqhar6BjADnDnHviRJRyhVNfjCyRXAtcDzwOeAK4B721EASZYDn62q05M8BKyrqp1t3hPAWcC72zJ/3Oo3tWXu6LG9jcBGgImJiTVbtmyZVZ/79u1j8eLFAz/OhTZu/YI9L4RR7nfHrud61ieOh93Pz992V5964lDXN8r7uJ8j7Xnt2rX3V9Vkr3mLBm0iyVI6/7pfCXwH+BM6p3zmTVVtBjYDTE5O1tTU1KyWm56eZrZjR8G49Qv2vBBGud9LN93Vs37l6v3csGPgt5nDeuriqaGub5T3cT/D7Hkup4xeB3yjqv53Vf1f4JPAa4Al7RQSwDJgV5veBSwHaPNPBJ7trvdYRpK0QOYSCH8FnJ3kl9pnAecAjwD3AG9qYzYAd7bpre0+bf7nq3O+aitwUbsKaSWwCvjiHPqSJA1g4GO5qrovyR3AV4D9wFfpnM65C9iS5H2tdlNb5CbgY0lmgD10riyiqh5OcjudMNkPXF5VPxq0L0nSYOZ0cq+qrgGuOaj8JD2uEqqqvwbe3Gc919L5cFqSdJT4l8qSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1cwqEJEuS3JHka0keTfKPk5yUZHuSx9vPpW1sktyYZCbJg0nO6FrPhjb+8SQb5vqgJElHbq5HCB8A/qyq/h7wD4FHgU3A3VW1Cri73Qc4H1jVbhuBDwMkOQm4BjgLOBO45kCISJIWzsCBkORE4J8ANwFU1Q+r6jvAeuCWNuwW4MI2vR64tTruBZYkOQU4D9heVXuqai+wHVg3aF+SpMGkqgZbMPlHwGbgETpHB/cDVwC7qmpJGxNgb1UtSfJp4Lqq+os2727gncAU8OKqel+rvwt4vqr+oMc2N9I5umBiYmLNli1bZtXrvn37WLx48UCP82gYt37BnhfCKPe7Y9dzPesTx8Pu5+dvu6tPPXGo6xvlfdzPkfa8du3a+6tqste8RXPoYxFwBvBbVXVfkg/wk9NDAFRVJRkscXqoqs10QojJycmampqa1XLT09PMduwoGLd+wZ4Xwij3e+mmu3rWr1y9nxt2zOVt5tCeunhqqOsb5X3czzB7nstnCDuBnVV1X7t/B52A2N1OBdF+PtPm7wKWdy2/rNX61SVJC2jgQKiqbwPfSvKqVjqHzumjrcCBK4U2AHe26a3AJe1qo7OB56rqaWAbcG6Spe3D5HNbTZK0gOZ6LPdbwMeTHAs8CbyNTsjcnuQy4JvAW9rYzwAXADPAD9pYqmpPkvcCX2rj3lNVe+bYlyTpCM0pEKrqAaDXhxPn9BhbwOV91nMzcPNcepEkzY1/qSxJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSgCEEQpJjknw1yafb/ZVJ7ksyk+S2JMe2+nHt/kybv6JrHVe1+mNJzptrT5KkIzeMI4QrgEe77l8PvL+qXgnsBS5r9cuAva3+/jaOJKcBFwGvBtYBH0pyzBD6kiQdgTkFQpJlwOuB/9zuB3gtcEcbcgtwYZte3+7T5p/Txq8HtlTVC1X1DWAGOHMufUmSjlyqavCFkzuA/wC8BPhd4FLg3nYUQJLlwGer6vQkDwHrqmpnm/cEcBbw7rbMH7f6TW2ZOw7aHEk2AhsBJiYm1mzZsmVWfe7bt4/FixcP/DgX2rj1C/a8EEa53x27nutZnzgedj8/f9tdfeqJQ13fKO/jfo6057Vr195fVZO95i0atIkkbwCeqar7k0wNup4jUVWbgc0Ak5OTNTU1u81OT08z27GjYNz6BXteCKPc76Wb7upZv3L1fm7YMfDbzGE9dfHUUNc3yvu4n2H2PJdn6jXAG5NcALwYeCnwAWBJkkVVtR9YBuxq43cBy4GdSRYBJwLPdtUP6F5GkrRABv4MoaquqqplVbWCzofCn6+qi4F7gDe1YRuAO9v01nafNv/z1TlftRW4qF2FtBJYBXxx0L4kSYOZj2O5dwJbkrwP+CpwU6vfBHwsyQywh06IUFUPJ7kdeATYD1xeVT+ah74kSYcwlECoqmlguk0/SY+rhKrqr4E391n+WuDaYfQiSRqMf6ksSQIMBElSYyBIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJjYEgSQIMBElSM3AgJFme5J4kjyR5OMkVrX5Sku1JHm8/l7Z6ktyYZCbJg0nO6FrXhjb+8SQb5v6wJElHai5HCPuBK6vqNOBs4PIkpwGbgLurahVwd7sPcD6wqt02Ah+GToAA1wBnAWcC1xwIEUnSwhk4EKrq6ar6Spv+HvAocCqwHrilDbsFuLBNrwdurY57gSVJTgHOA7ZX1Z6q2gtsB9YN2pckaTCpqrmvJFkBfAE4HfirqlrS6gH2VtWSJJ8Grquqv2jz7gbeCUwBL66q97X6u4Dnq+oPemxnI52jCyYmJtZs2bJlVv3t27ePxYsXz+UhLqhx6xfseSGMcr87dj3Xsz5xPOx+fv62u/rUE4e6vlHex/0cac9r1669v6ome81bNNdmkiwG/hT4nar6bicDOqqqksw9cX6yvs3AZoDJycmampqa1XLT09PMduwoGLd+wZ4Xwij3e+mmu3rWr1y9nxt2zPltpq+nLp4a6vpGeR/3M8ye53SVUZIX0QmDj1fVJ1t5dzsVRPv5TKvvApZ3Lb6s1frVJUkLaC5XGQW4CXi0qv6wa9ZW4MCVQhuAO7vql7Srjc4Gnquqp4FtwLlJlrYPk89tNUnSAprLsdxrgN8AdiR5oNX+LXAdcHuSy4BvAm9p8z4DXADMAD8A3gZQVXuSvBf4Uhv3nqraM4e+JEkDGDgQ2ofD6TP7nB7jC7i8z7puBm4etBdJ0tz5l8qSJMBAkCQ1BoIkCRjC3yFIGh0r+vw9gDQbHiFIkgADQZLUGAiSJMBAkCQ1BoIkCTAQJEmNgSBJAgwESVJjIEiSAANBktQYCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUmMgSJIAA0GS1BgIkiTAQJAkNQaCJAkwECRJzaKj3YAkDWrFpruGur4rV+/n0lmu86nrXj/UbY+CkTlCSLIuyWNJZpJsOtr9SNIvmpEIhCTHAB8EzgdOA96a5LSj25Uk/WIZlVNGZwIzVfUkQJItwHrgkaPalTSAYZ/GOOBITmdo/s3X83w483mqalQC4VTgW133dwJnHTwoyUZgY7u7L8ljs1z/ycD/mVOHC2vc+gV7nne/PWb9wvj1PA795vqfKR1Jz4ccNyqBMCtVtRnYfKTLJflyVU3OQ0vzYtz6BXteCOPWL4xfz+PWLwy355H4DAHYBSzvur+s1SRJC2RUAuFLwKokK5McC1wEbD3KPUnSL5SROGVUVfuTvAPYBhwD3FxVDw9xE0d8mukoG7d+wZ4Xwrj1C+PX87j1C0PsOVU1rHVJksbYqJwykiQdZQaCJAkY00BI8uYkDyf5f0kmu+q/luT+JDvaz9d2zZtuX43xQLu9vNWPS3Jb+8qM+5Ks6FrmqlZ/LMl589HzobbT7+s82ofv97X6be2D+EM+ljn2flvXfnsqyQOtviLJ813zPtK1zJr2PMwkuTFJWv2kJNuTPN5+Lh1Gjz16fneSXV29XdA1byj7ex56/v0kX0vyYJJPJVnS6iO7nw/xWEbmq2iSLE9yT5JH2u/gFa0+tNfIPPT8VHteH0jy5Vbr+Zym48bW04NJzuhaz4Y2/vEkGw674aoauxvw94FXAdPAZFf9l4G/3aZPB3Z1zfupsV313wQ+0qYvAm5r06cBfwkcB6wEngCOmYeee26n3Z4AXgEc28ac1pa5HbioTX8E+FeHeixD3vc3AP++Ta8AHuoz7ovA2UCAzwLnt/p/BDa16U3A9fP0Gnk38Ls96kPb3/PQ87nAojZ9/YF9M8r7uU9Pfffl0bgBpwBntOmXAF9vr4OhvUbmoeengJMPqvV8ToEL2nOf9lq4r9VPAp5sP5e26aWH2u5YHiFU1aNV9TN/pVxVX62q/9XuPgwcn+S4w6xuPXBLm74DOKf9K2s9sKWqXqiqbwAzdL5iY6g9H2I7P/46j6r6IbAFWN96e23rldb7hYd5LEPR1vUW4BOHGXcK8NKqurc6r8xb+/TY3ftCGeb+Hqqq+lxV7W9376Xz9zh9jfB+7rkvF3D7P6Wqnq6qr7Tp7wGP0vl2hH6O6DUyv93/TF+9ntP1wK3VcS+wpL02zgO2V9WeqtoLbAfWHWoDYxkIs/TrwFeq6oWu2n9ph2Dv6nqj/PHXZrRfxueAl9H76zQO9SIaVL/t9Ku/DPhO1xtHd1/9Hsuw/Cqwu6oe76qtTPLVJP89ya929bGzR+8AE1X1dJv+NjAxxP4O9o52CH1z1ymTYe7v+fR2Ov/qO2CU9/PBFup354ilcxr1l4H7WmkYr5H5UMDn0jn1feDrevo9p0PrdyT+DqGXJH8O/K0es66uqjsPs+yr6Rxyn9tVvriqdiV5CfCnwG/Q+RfV0Myl56Ntlr2/lZ8+Onga+DtV9WySNcB/a/t+Vqqqkgx83fOhegY+DLyXzi/We+mc6nr7oNsaltns5yRXA/uBj7d5R3U//7xIspjO7/7vVNV3k4zka6T5lfZ+9XJge5Kvdc+cr+d0ZAOhql43yHJJlgGfAi6pqie61rer/fxekv9K5/DvVn7ytRk7kywCTgSeZYCv0xiw50Ntp1f9WTqHhIvav1q7x/d7LId1uN7b+v45sKZrmReAF9r0/UmeAP5u66P7dEd3j7uTnFJVT7fD2mdm098gPXf1/p+AT7e7w9zfQ+85yaXAG4Bz2mmgo76fBzByX0WT5EV0wuDjVfVJgKra3TV/Lq+Roet6v3omyafovF/1e0779bsLmDqoPn2o7f5cnTJK56qMu+h88PI/uuqLkpzcpl9E5xfuoTZ7K3Dg0/c3AZ9vv4hbgYvSuXJnJbCKzgd4w9ZvOz2/zqP1dk/rldb7nV3r6vVYhuF1wNeq6senKJL8zXT+LwuSvKL1/mQ7rP1ukrPbqblL+vTY3ftQtV+YA/4ZP/18D2t/D7vndcDvAW+sqh901Ud2P/cxUl9F0/bNTcCjVfWHXfWhvEbmod8T2pkMkpxA50zHQ/R/TrcCl6TjbOC59trYBpybZGk7HXZuq/U3yCfgR/tG58nbSedfTbuBba3+74DvAw903V4OnADcDzxI58PmD9CuGAJeDPwJnQ+Ovgi8oms7V9O5quAx2tUbw+75UNuhc/XA19u8q7vqr2i9zrTejzvcYxnCPv8o8C8Pqv16258PAF8B/mnXvMn2In4C+CN+8lfxLwPuBh4H/hw4aZ5eIx8DdrTnfCtwyrD39zz0PEPnnO+B1+6BK8ZGdj8f4rH03JdH4wb8Cp3TQg927dsLhvkaGXK/r6BzBdNftuf96kM9p3SuLvpg62kHP30V49vb62oGeNvhtu1XV0iSgJ+zU0aSpMEZCJIkwECQJDUGgiQJMBAkSY2BIEkCDARJUvP/Ad4UVQdWg5ckAAAAAElFTkSuQmCC\n", "text/plain": [ "<Figure size 432x288 with 1 Axes>" ] }, "metadata": { "tags": [], "needs_background": "light" } } ] }, { "cell_type": "code", "metadata": { "id": "VaGDNAi3G1NO" }, "source": [ "init_features = ['carat','color_int','cut_int','clarity_int','x','y','z','depth','table']\n", "from random import random, randint, choice\n", "\n", "def diff(li1, li2):\n", " return (list(list(set(li1)-set(li2)) + list(set(li2)-set(li1))))\n", "\n", "def step(features):\n", "\n", " if random()<0.5: \n", " indx = randint(0,len(features)-1)\n", " print(\"removing\",features[indx])\n", " features.remove(features[indx])\n", " return(features)\n", " else:\n", " d = diff(features,init_features)\n", " if not d:\n", " print(\"do nothing\")\n", " return(features)\n", " else:\n", " f = choice(d)\n", " print(\"adding\",f)\n", " features.append(f)\n", " return(features)\n", "\n", "def evaluate(features):\n", "\n", " X = df[features]\n", " y = df['price']\n", " train_X, test_X, train_y, test_y = train_test_split(X, y, random_state=1)\n", " rf.fit(train_X,train_y)\n", " predictions = rf.predict(test_X)\n", " val_mae = mean_absolute_error(predictions, test_y)\n", " return(val_mae)\n", "\n", "def optmize():\n", "\n", " print(\"###\")\n", " start_features = ['carat','color_int','cut_int','clarity_int','x','y','z','depth','table']\n", " ff=start_features\n", " old_mae = 10000000\n", "\n", " for i in range(10):\n", " new_features = step(ff)\n", " mae = evaluate(new_features)\n", " print(i,mae,features)\n", " if mae < old_mae:\n", " ff = new_features\n", " old_mae = mae\n", " print(\"accepting result\")\n", " else:\n", " print(\"rejecting result\")\n", "\n" ], "execution_count": null, "outputs": [] }, { "cell_type": "code", "metadata": { "id": "5t2LSW31_cmU", "colab": { "base_uri": "https://localhost:8080/", "height": 544 }, "outputId": "43397d99-69d0-448d-9e6c-3cd344a91b58" }, "source": [ "optmize()" ], "execution_count": null, "outputs": [ { "output_type": "stream", "text": [ "###\n", "removing y\n", "0 272.65941055964595 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "accepting result\n", "removing z\n", "1 275.2602746508466 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "rejecting result\n", "adding z\n", "2 272.6264143169071 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "accepting result\n", "adding y\n", "3 264.38214012924414 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "accepting result\n", "removing y\n", "4 272.6264143169071 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "rejecting result\n", "removing carat\n", "5 289.8830542295673 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "rejecting result\n", "removing z\n", "6 306.39629205907795 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "rejecting result\n", "adding z\n", "7 289.8830542295673 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "rejecting result\n", "adding carat\n", "8 272.62460767990297 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "rejecting result\n", "adding y\n", "9 264.36568138496034 ['carat', 'color_int', 'cut_int', 'clarity_int', 'x', 'y', 'z', 'depth', 'table']\n", "accepting result\n" ], "name": "stdout" } ] }, { "cell_type": "code", "metadata": { "id": "9hcB-Z-_BzrG" }, "source": [ "" ], "execution_count": null, "outputs": [] } ] }