{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "#load some data in a pandas dataframe\n", "\n", "df = None\n", "#df =pd.read_excel(\"C:/Users/Admin/Pythonprojects/RAMS/data/Oilanalysis.xlsx\") #for those who would like to work from a local drive \n", "df = pd.read_excel(\"https://raw.githubusercontent.com/chrisrijsdijk/RAMS/master/data/Oilanalysis.xlsx\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# check the datatypes in the dataframe to verify that all columns except for \"Age\" are numerical\n", "\n", "#df.dtypes" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# get a preview on the data\n", "\n", "#df.head(3)\n", "#df.describe()\n", "#len(df[\"BRSTVD\"])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# plot histograms of the data in the dataframe\n", "# causal effects of the columns that just contain one value remain invisible \"ceteris paribus\"\n", "# check for outliers and explain them eventually\n", "\n", "#for col in df.columns: \n", "# try: \n", "# df[col] = pd.to_numeric(df[col]) \n", "# df.hist(column=col)\n", "# except ValueError:\n", "# print(\"The column \"+col+' can not be represented as a histogram')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "#create a correlation matrix to check for pairwise linear dependencies among the columns\n", "\n", "#dummy=df.iloc[:,1:] #remove the \"Age\" column that is not numerical\n", "#dummy.corr(min_periods=15)\n", "#plt.matshow(dummy.corr(min_periods=15))\n", "#plt.show()\n", "#print(dummy.columns)\n", "\n", "#del dummy" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# convert categorical variables into indicator functions\n", "\n", "df = pd.get_dummies(df,columns=[\"Age\"])\n", "#df\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "# define the response variable and convert it into an np.array\n", "\n", "y=np.array(df[\"Age_New\"])\n" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "#perform RF-C\n", "\n", "from sklearn.impute import KNNImputer\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn import tree\n", "#from sklearn.tree import DecisionTreeClassifier\n", "\n", "from sklearn.inspection import permutation_importance\n" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [], "source": [ "# define the explanatory variables\n", "\n", "X=df.iloc[:,:34]\n", "X_names=df.iloc[:,:34].columns\n" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "# impute data in case of NaN's by using K nearest neighbour\n", "\n", "imputer = KNNImputer(n_neighbors=20, weights=\"distance\")\n", "X=imputer.fit_transform(X)\n" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# create a training set and a validation set\n", "\n", "X_train, X_test, y_train, y_test, idx_train, idx_test = train_test_split(X, y, np.arange(len(X)), test_size = 0.25, random_state = None, stratify=y)\n", "\n", "#print('X_train Shape:', X_train.shape)\n", "#print('y_train Shape:', y_train.shape)\n", "#print('X_test Shape:', X_test.shape)\n", "#print('Y_test Shape:', y_test.shape)\n" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(n_estimators=1000)" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Instantiate model with 1000 decision trees\n", "\n", "rf = RandomForestClassifier(n_estimators = 1000, criterion=\"gini\",random_state = None)\n", "\n", "# Train the model on training data\n", "\n", "rf.fit(X_train, y_train)\n" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "# validate the random forest using the test set\n", "\n", "predictions = rf.predict(X_test)\n" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA7EAAADgCAYAAADVGeGWAAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjMuNCwgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy8QVMy6AAAACXBIWXMAAAsTAAALEwEAmpwYAAA2Z0lEQVR4nO3dd5xU5dn/8c9XxKACFsQ8KiJo0AgKCAtqRHA1AooFG9i7BoyR+NPEFhNLTJTHRGOD+CRGTYiIscduWESDSlFUEFFEESJRFEGKhXL9/jhncRhmd2ZxZ+v3/Xrta+e0+7rOmePItfd97lFEYGZmZmZmZlYfbFDbCZiZmZmZmZkVykWsmZmZmZmZ1RsuYs3MzMzMzKzecBFrZmZmZmZm9YaLWDMzMzMzM6s3XMSamZmZmZlZveEi1szMrACSdpH0qqQlks4r8JiQ9L0i53WnpF8Xsf2lknZMX28s6VFJiyXdJ+kESU8XIea+kmZWd7tmZtYwuIg1M6ujJI2T9Jmk79R2LgbAz4FxEdEiIm7K3pi+X2fWQl5FFRHNI2J2ung08F2gVUQcExGjIqLvt42RXexHxPMRscu3bbfA2F0lTZG0PP3dtZJ9t5P0sKSFkuZJGpK1vYmkX0v6MP1jx6uSNk+3Kd32n/SPAOMkdco4dmnWzypJN6fb2qXXKHP75cW5ImZmdZ+LWDOzOkhSO2BfIIDDajj2hjUZr1iKcB47ANOruc36Zgfg7YhYWduJVAdJGwEPA38DtgDuAh5O1+fyN+A9kkJ+APAbSaUZ268EfgDsDbQETgK+TLcdA5xO8t/1lsCLwF/LD0z/WNA8Ipqn7X8B3JcVf/OM/a5ev7M2M6v/XMSamdVNJwMvAXcCp2RukLS9pAckLZD0qaRbMradJWlG2gv0pqRu6fq1eroyh6BK2i/tVbpI0n+Bv0jaQtI/0xifpa/bZBy/paS/pD1On0l6KF0/TdKhGfs1lfRJrt4tSVul7S5Ke7ael7RBZecoaQNJv5A0R9LHku6WtFm6rby36gxJHwBj0/Wnp9fkM0lPSdqhoosu6TBJ09OcxknaNV0/FigFbkl7wXbOOu4akuKkfPstGZt/KOmdNP6tkpRxXFVy6yVpQprbXEmn5tgn3/t2qqTZ6f3xnqQT0vXfk/Rc2kP4iaR7M46JdPuVwC+Bwek5npG290LGvp0kPZO+nx9JujRd31PSi2nu8yXdUl4oShqfHv5a2u7g8nsyo91d0/djUfr+HJax7c70uj6WntfLknaq6Dpm2Q/YELgxIr5Ke9gF7J/j2jZP978mIlZExGvAP0gKUyRtAfwUOCsi5kRiWkSUF7HtgRciYnZErCIpiDtWkNfRwMfA8wWeh5lZo+Ii1sysbjoZGJX+9JP0XUiGKwL/BOYA7YDtgNHptmOAK9JjW5L04H5aYLz/Iekd2gE4m+T/D39Jl9uS9AplFmZ/BTYBOgFbAzek6+8GTszY72BgfkRMzRHzAmAe0Jqk5+lSICo7R+DU9KcU2BFonpUXQB9gV5LrNjBt98g0zvPAPbkuQFqY3kNSiLQGHgcelbRRROyfHntu2gv2duaxEXFZ1vZzMzYfAvQAugCDgH5pvKrk1hZ4Arg53bcrMDXHrhW+b5I2BW4CDoqIFiQ9huVtXA08TdIb2SaNs5aI+BXwG+De9Bz/nJVjC+BZ4ElgW+B7wL/SzauA84GtSHopDwDOSdvtne7TJW333qx2mwKPpvltDfwEGCUpc7jxcSS9oFsAs4BrMo7/p6SLc1wrSO7f1yMiMta9nq7Ppqzf5a93S1/vDqwEjpb0X0lvS/pxxr6jge9J2jk9p1NIrlUupwB3Z+UFMEfJH5z+ImmrCo41M2vwXMSamdUxknqRFCFjImIK8C5wfLq5J0mB8LOIWBYRX0ZEeU/YmcDwiJiU9gLNiog5BYZdDfwq7Y36IiI+jYj7I2J5RCwhKQr6pPltAxwEDImIz9JeqefSdv4GHCypZbp8EhlDJrOsALYBdkjbeD79R3tl53gC8Pu0N2spcAlwrNYeOnxFetwXwI+A30bEjHQI7G+ArhX0eA4GHouIZyJiBXA9sDFJsfdtXBsRiyLiA6CMpAClirmdADwbEfek1+rTXH8YqOx9S60GdpO0cUTMj4jy4dErSO65bbOud1UcAvw3In6XtrEkIl5O85oSES9FxMqIeB/4Y1ZeldmL5I8V10bE1xExluSPHMdl7PNARExMr+MovrnGRMQhEXFtBW03BxZnrVsMtMjeMb2e/wYul9RMySiHo0j+mANJ8b8ZsDNJr+vRwBWSDky3zyf5Q8VMkj8uHENS2K8l/YNFH5KhzeU+IflDyA5A9zS/URWck5lZg+ci1sys7jkFeDoiPkmX/843Q4q3B+ZU8Ezi9iQF7/pYkDHsEUmbSPqjkmG7nwPjgc3TXtLtgYUR8Vl2IxHxIck/9I9SMqHNQVT8j+3/Jek1ezod4lreW1bZOW5L0kNbbg7JcNDvZqybm/F6B+AP6TDURcBCkt6z7fK1HRGr07Zy7VsV/814vZykcKpqbgW9t5W9bxGxjKRQHwLMT4fffj899Odp7InpcN3Tq36aFeeY9j7+M+2h/JykYC+0J3FbYG76fpSbw9rXqaJrnM9SklELmVoCSyrY/wSSAnUuMILk3i4f9vxF+vuq9A9Br5P0vh6crv8VSSG6PdCMpOd4rKTyIrjcySTDjt8rXxERSyNicvpHgI+Ac4G+GX8sMjNrVFzEmpnVIZI2Jhly2if9B/9/SXprukjqQvKP57bKPWnRXKCiZwGX802PESTDhzNlD1u8ANgF2DMiWgLlQz6VxtkyLVJzuYtkSPExwIsR8Z9cO6U9dRdExI7AocD/k3QAlZ/jhyTFX7m2JEM4P6rgXOYCP4qIzTN+No6ICfnaliSSgiNn/rlOqcD91ie3yt7bTJW9b0TEUxFxIEkP+FvA/6Xr/xsRZ0XEtiQ9xLep6l8NVFmOI9J4HdK8LmXtYbmV+RDYXunz0qm2FP6+VGY60Dl9r8t1poIJvCJ51vWQiGgdEXsCrYCJ6ebXy3erIFYXkqHY89Ji9E6S4c/Zz8WezNq9sDlTSX8Xeg3NzBoUF7FmZnXLQJLnBzuSDInsSvJ85/Mk/7idSDIs8VpJm6bDGvdJj/0TcKGk7kp8L2No6lTgeCVfAdKf/EM5W5D0LC2StCVJLxIAETGf5PnM25RMJNRUUu+MYx8CugHDSJ6RzUnSIWmOAj5Pz3tVnnO8BzhfUvt0op3yZzQrmi13JHCJ0q8ykbRZ+uxwLmOAAZIOSJ9ZvAD4CshVVObyEclzuoWqSm6jSCaIGiRpQ0mtlPurYCp83yR9V8nEVZuSnNdSkuuNpGP0zQRQn5EUSauqcC6QDPH9H0k/lfQdSS0k7ZmR1+fA0rT3d2jWsZVdu5eBZcDP03ttP5I/eoyuYP+qGEdynuelOZc/yzw2185KJphqIWkjSScCfYHfA0TEuyT/nV6WtrUrSc/3P9PDJwHHpO/DBpJOApqSjEYob/8HJD3Ma81KLGlPJd9TvIGkViTPNo+LiOyh0GZmjYKLWDOzuuUU4C8R8UHaO/bfiPgvyeQ8J5D0vBxKMmnOByRDGQcDRMR9JM9A/p1kOORDJJM1QVJQHgosStt5KE8eN5I8D/oJySzJ2RPQnETyHOVbJLOo/rR8Q/os6v0kwy4fqCRGB5KJgJaSfN3IbRExLpKZW3OeI3AHyTO240m+6uRLkol+coqIB4HrgNHpMNZpJEOcc+07k6QH+eb0vA8FDo2Irys5h0x/IJnU5zNJ63yP7LfM7QOSYakXkAw7nkrSs5ftRip+3zZIj/8wbaMP6eRKJMNcX5a0FHgEGJY5nLUQ6TOjB5Jct/8C75BMwAVwIclz3UtIen/vzTr8CuCudGj1oKx2vyaZpOyg9LxuA06OiLcKyUvSE0pnSc6R89ckfzg6meS/jdOBgeXvuaQTJGX2yvYDZpMU+kOA/hGxIGP7cSS9+Z8CjwGXR0T55FbXAa+RvHeLSEZYHBURizKOP4Xk+d7s4cw7kryXS0juk69Y+5lgM7NGRbHOxHdmZmbfjqRfAjtHxIl5dzYzMzOrggbxhfZmZlZ3pMNYzyDprTUzMzOrVh5ObGZm1UbSWSQT/DwREeNrOx8zMzNreDyc2MzMzMzMzOoN98SamZmZmZlZveEi1szMzMzMzOqNejmx01ZbbRXt2rWr7TTMzMzMzMysCKZMmfJJRLTOta1eFrHt2rVj8uTJtZ2GmZmZmZmZFYGkORVt83BiMzMzMzMzqzdcxJqZmZmZmVm9UdQiVtIdkj6WNK2C7ZJ0k6RZkl6X1K2Y+ZiZNVTDhw+nrKxsrXVlZWUMHz68ljIyMzMzK45iPxN7J3ALcHcF2w8COqQ/ewIj0t9mZlYFPXr0YNCgQYwZM4bS0lLKysrWLJuZmTUkK1asYN68eXz55Ze1nYpVg2bNmtGmTRuaNm1a8DFFLWIjYrykdpXscjhwd0QE8JKkzSVtExHzi5mXmVlDU1paypgxYxg0aBBDhw5lxIgRawpaMzOzhmTevHm0aNGCdu3aIam207FvISL49NNPmTdvHu3bty/4uNp+JnY7YG7G8rx03ToknS1psqTJCxYsqJHkzMzqk9LSUoYOHcrVV1/N0KFDXcCamVmD9OWXX9KqVSsXsA2AJFq1alXlXvXaLmJz3XmRa8eIuD0iSiKipHXrnF8XZGbWqJWVlTFixAguv/xyRowYsc4zsmZmZg2FC9iGY33ey9ouYucB22cstwE+rKVczMzqrcxnYK+66qo1Q4tdyJqZmVW/Jk2a0LVrV3bbbTeOOeYYli9fvt5tnXrqqfzjH/8A4Mwzz+TNN9+scN9x48YxYcKEKsdo164dn3zyScHrMzVv3rxKsa644gquv/76Kh1TVbVdxD4CnJzOUrwXsNjPw5qZVd2kSZPWega2/BnZSZMm1XJmZmZmDc/GG2/M1KlTmTZtGhtttBEjR45ca/uqVavWq90//elPdOzYscLt61vENjTF/oqde4AXgV0kzZN0hqQhkoakuzwOzAZmAf8HnFPMfMzMGqqf//zn6zwDW1pays9//vNaysjMzKxx2HfffZk1axbjxo2jtLSU448/nt13351Vq1bxs5/9jB49etC5c2f++Mc/AslkRueeey4dO3ZkwIABfPzxx2va2m+//Zg8eTIATz75JN26daNLly4ccMABvP/++4wcOZIbbriBrl278vzzz7NgwQKOOuooevToQY8ePfj3v/8NwKeffkrfvn3ZY489+NGPfkQyj27lBg4cSPfu3enUqRO33377WtsuuOACunXrxgEHHED5/ETvvvsu/fv3p3v37uy777689dZb67R500030bFjRzp37syxxx67fhc4h2LPTnxcnu0B/LiYOZiZmZmZWQP105/C1KnV22bXrnDjjQXtunLlSp544gn69+8PwMSJE5k2bRrt27fn9ttvZ7PNNmPSpEl89dVX7LPPPvTt25dXX32VmTNn8sYbb/DRRx/RsWNHTj/99LXaXbBgAWeddRbjx4+nffv2LFy4kC233JIhQ4bQvHlzLrzwQgCOP/54zj//fHr16sUHH3xAv379mDFjBldeeSW9evXil7/8JY899tg6RWkud9xxB1tuuSVffPEFPXr04KijjqJVq1YsW7aMbt268bvf/Y6rrrqKK6+8kltuuYWzzz6bkSNH0qFDB15++WXOOeccxo4du1ab1157Le+99x7f+c53WLRoUUHXtBDF/p5YMzMzMzOzBuWLL76ga9euQNITe8YZZzBhwgR69uy55qtinn76aV5//fU1z7suXryYd955h/Hjx3PcccfRpEkTtt12W/bff/912n/ppZfo3bv3mra23HLLnHk8++yzaz1D+/nnn7NkyRLGjx/PAw88AMCAAQPYYost8p7TTTfdxIMPPgjA3Llzeeedd2jVqhUbbLABgwcPBuDEE0/kyCOPZOnSpUyYMIFjjjlmzfFfffXVOm127tyZE044gYEDBzJw4MC8ORTKRayZmZmZmdVPBfaYVrfyZ2KzbbrppmteRwQ333wz/fr1W2ufxx9/PO+MvBFR0Ky9q1ev5sUXX2TjjTdeZ1tVZv0dN24czz77LC+++CKbbLIJ++23X4VfeyOJ1atXs/nmm+e8Bpkee+wxxo8fzyOPPMLVV1/N9OnT2XDDb1+C1vbETmZmZmZmZg1Ov379GDFiBCtWrADg7bffZtmyZfTu3ZvRo0ezatUq5s+fn/ObBPbee2+ee+453nvvPQAWLlwIQIsWLViyZMma/fr27cstt9yyZrm8qOzduzejRo0C4IknnuCzzz6rNNfFixezxRZbsMkmm/DWW2/x0ksvrdm2evXqNb3Jf//73+nVqxctW7akffv23HfffUBSdL/22mtrtbl69Wrmzp1LaWkpw4cPZ9GiRSxdujT/hSuAe2LNzMzMzMyq2Zlnnsn7779Pt27diAhat27NQw89xBFHHMHYsWPZfffd2XnnnenTp886x7Zu3Zrbb7+dI488ktWrV7P11lvzzDPPcOihh3L00Ufz8MMPc/PNN3PTTTfx4x//mM6dO7Ny5Up69+7NyJEj+dWvfsVxxx1Ht27d6NOnD23btq001/79+zNy5Eg6d+7MLrvswl577bVm26abbsr06dPp3r07m222Gffeey8Ao0aNYujQofz6179mxYoVHHvssXTp0mXNcatWreLEE09k8eLFRATnn38+m2++ebVcWxUyU1VdU1JSEuWzdpmZmZmZWeMxY8YMdt1119pOw6pRrvdU0pSIKMm1v4cTm5mZmZmZWb3hItbMzMzMzMzqDRexZmZmZmZmVm+4iDUzMzMzM7N6w0WsmZmZmZmZ1RsuYs3MzMzMzKzecBFrZmZmZmZWBfPmzePwww+nQ4cO7LTTTgwbNoyvv/4agDvvvJNzzz23ljNcV/Pmzau0vtz777/PbrvtVqVYp556Kv/4xz+qdExVuIg1MzMzM7MGafjw4ZSVla21rqysjOHDh693mxHBkUceycCBA3nnnXd4++23Wbp0KZdddtm3TbdCK1euLFrb9VGlRaykv0i6Q9INNZWQmZmZmZlZdejRoweDBg1aU8iWlZUxaNAgevTosd5tjh07lmbNmnHaaacB0KRJE2644QbuuOMOli9fDsDcuXPp378/u+yyC1deeSUAy5YtY8CAAXTp0oXddtuNe++9F4ApU6bQp08funfvTr9+/Zg/fz4A++23H5deeil9+vThmmuuoV27dqxevRqA5cuXs/3227NixQreffdd+vfvT/fu3dl333156623AHjvvffYe++96dGjB5dffnne81q6dCkHHHAA3bp1Y/fdd+fhhx9es23lypWccsopdO7cmaOPPnrNeVaUe6aLL76Yjh070rlzZy688ML1uubZNsyz/U4ggK+rJZqZmZmZmVkNKS0tZcyYMQwaNIihQ4cyYsQIxowZQ2lp6Xq3OX36dLp3777WupYtW9K2bVtmzZoFwMSJE5k2bRqbbLIJPXr0YMCAAcyZM4dtt92Wxx57DIDFixezYsUKfvKTn/Dwww/TunVr7r33Xi677DLuuOMOABYtWsRzzz0HwCuvvMJzzz1HaWkpjz76KP369aNp06acffbZjBw5kg4dOvDyyy9zzjnnMHbsWIYNG8bQoUM5+eSTufXWW/OeV7NmzXjwwQdp2bIln3zyCXvttReHHXYYADNnzuTPf/4z++yzD6effjq33XYbw4YNqzR3gIULF/Lggw/y1ltvIYlFixat93XPlK+IvYKkiF0IHF0tEc3MzMzMzGpIaWkpQ4cO5eqrr+byyy//VgUsJMOJJVW6/sADD6RVq1YAHHnkkbzwwgscfPDBXHjhhVx00UUccsgh7LvvvkybNo1p06Zx4IEHArBq1Sq22WabNW0OHjx4rdf33nsvpaWljB49mnPOOYelS5cyYcIEjjnmmDX7ffXVVwD8+9//5v777wfgpJNO4qKLLsp7Xpdeeinjx49ngw024D//+Q8fffQRANtvvz377LMPACeeeCI33XQT/fv3rzR3SIr7Zs2aceaZZzJgwAAOOeSQfJe3IPmK2FPT36uqJZqZmZmZmVkNKisrY8SIEVx++eWMGDGC0tLSb1XIdurUaU1xWO7zzz9n7ty57LTTTkyZMmWdIlcSO++8M1OmTOHxxx/nkksuoW/fvhxxxBF06tSJF198MWesTTfddM3rww47jEsuuYSFCxcyZcoU9t9/f5YtW8bmm2/O1KlTcx6fq9iuyKhRo1iwYAFTpkyhadOmtGvXji+//DJnO5KIiEpzB9hwww2ZOHEi//rXvxg9ejS33HILY8eOLTinilT6TGxEzImIOUBfSR2+dTQzMzMzM7MaUv4M7JgxY7jqqqvWDC3OnuypKg444ACWL1/O3XffDSQ9kBdccAGnnnoqm2yyCQDPPPMMCxcu5IsvvuChhx5in3324cMPP2STTTbhxBNP5MILL+SVV15hl112YcGCBWsKwRUrVjB9+vSccZs3b07Pnj0ZNmwYhxxyCE2aNKFly5a0b9+e++67D0h6U1977TUA9tlnH0aPHg0kBWo+ixcvZuutt6Zp06aUlZUxZ86cNds++OCDNTnec8899OrVq6Dcly5dyuLFizn44IO58cYbKyy2q6rQ2YnbAX+U9K6kMZJ+IqlrtWRgZmZmZmZWBJMmTVrrGdjyZ2QnTZq03m1K4sEHH+S+++6jQ4cO7LzzzjRr1ozf/OY3a/bp1asXJ510El27duWoo46ipKSEN954g549e9K1a1euueYafvGLX7DRRhvxj3/8g4suuoguXbrQtWtXJkyYUGHswYMH87e//W2tYcajRo3iz3/+M126dKFTp05rJmT6wx/+wK233kqPHj1YvHhx3vM64YQTmDx5MiUlJYwaNYrvf//7a7btuuuu3HXXXXTu3JmFCxcydOjQgnJfsmQJhxxyCJ07d6ZPnz7ccEP1zBesiCh8Z2lj4CzgQmC7iGhSLVlUUUlJSUyePLk2QpuZmZmZWS2aMWMGu+66a22nYdUo13sqaUpElOTav6CeWEm/kPQE8DTwPZIitk2Bx/aXNFPSLEkX59i+maRHJb0mabqk0wpp18zMzMzMzBqffBM7lTsSWAk8BjwHvBQRX+Y7SFIT4FbgQGAeMEnSIxHxZsZuPwbejIhDJbUGZkoaFRH+Wh8zMzMzMzNbS0E9sRHRDTgAmEhSkL4h6YUCDu0JzIqI2WlROho4PLt5oIWSKa+ak3ydz8oC8zczMzMzM7NGpKCeWEm7AfsCfYASYC7wfAGHbpfuW24esGfWPrcAjwAfAi2AwRGxupC8zMzMzMys8anou1qt/qnKHE3lCh1OfB0wHrgJmBQRKwo8LtedlZ1lP2AqsD+wE/CMpOcj4vO1GpLOBs4GaNu2bYHhzczMzMysIWnWrBmffvoprVq1ciFbz0UEn376Kc2aNavScQUVsRExIJ2ZuG0VClhIel63z1huQ9Ljmuk04NpISvBZkt4Dvk8ydDkzh9uB2yGZnbgKOZiZmZmZWQPRpk0b5s2bx4IFC2o7FasGzZo1o02bguYMXqPQ4cSHAtcDGwHt0++IvSoiDstz6CSgg6T2wH+AY4Hjs/b5gOR52+clfRfYBZhd8BmYmZmZmVmj0bRpU9q3b1/baVgtKmhiJ+AKkkmaFgFExFSgXb6DImIlcC7wFDADGBMR0yUNkTQk3e1q4AeS3gD+BVwUEZ8UfgpmZmZmZmbWWBT6TOzKiFi8PmPOI+Jx4PGsdSMzXn8I9K1yw2ZmZmZmZtboFFrETpN0PNBEUgfgPGBC8dIyMzMzMzMzW1ehw4l/AnQCvgLuAT4HflqknMzMzMzMzMxyKnR24uXAZemPmZmZmZmZWa2otIiV9BfW/V7XchERZ1R/SmZmZmZmZma55euJ/WeOdW1JhhI3qfZszMzMzMzMzCpRaREbEfeXv5a0I3Ap0Bu4FvhzcVMzMzMzMzMzW1veiZ0k7Srpb8CjwAtAx4gYERFfFz07MzMzMzMzswz5nom9DygBrgfOB1YBLcu/LzYiFhY7QTMzMzMzM7Ny+Z6J7UEysdOFwAXpOqW/A9ixSHmZmZmZmZmZrSPfM7HtCmlEUqeImF4tGZmZmZmZmZlVIO8zsQX6azW1Y2ZmZmZmZlah6ipilX8XMzMzMzMzs2+nuorYqKZ2zMzMzMzMzCpUXUWsmZmZmZmZWdFVVxHr74w1MzMzMzOzoiuoiFXiREm/TJfbSupZvj0i9ipWgmZmZmZmZmblCu2JvQ3YGzguXV4C3FqUjMzMzMzMzMwqUOn3xGbYMyK6SXoVICI+k7RREfMyMzMzMzMzW0ehPbErJDUhnYVYUmtgddGyMjMzMzMzM8uh0CL2JuBBYGtJ1wAvAL8pWlZmZmZmZmZmORQ0nDgiRkmaAhwACBgYETOKmpmZmZmZmZlZlkJnJ94S+Bi4B/g78JGkpgUe21/STEmzJF1cwT77SZoqabqk5wpN3szMzMzMzBqXQid2egXYHviMpCd2c2C+pI+BsyJiSq6D0udobwUOBOYBkyQ9EhFvZuyzOcnsx/0j4gNJW6/nuZiZmZmZmVkDV+gzsU8CB0fEVhHRCjgIGAOcQ1KAVqQnMCsiZkfE18Bo4PCsfY4HHoiIDwAi4uOqnICZmZmZmZk1HoUWsSUR8VT5QkQ8DfSOiJeA71Ry3HbA3Izleem6TDsDW0gaJ2mKpJNzNSTpbEmTJU1esGBBgWmbmZmZmZlZQ1LocOKFki4i6UkFGAwsSocLV/ZVO8qxLnLk0J1k0qiNgRclvRQRb691UMTtwO0AJSUl2W2YmZmZmZlZI1BoT+zxQBvgIeBhoC1wLNAEGFTJcfNInqUt1wb4MMc+T0bEsoj4BBgPdCkwLzMzMzMzM2tECipiI+KTiPhJROwREV2BC4HSiPg6ImZVcugkoIOk9pI2Iil8H8na52FgX0kbStoE2BPw1/eYmZmZmZnZOgrtiUVSE0kHSbobeJ9kSHGlImIlcC7wFElhOiYipksaImlIus8MkomjXgcmAn+KiGlVPhMzMzMzMzNr8BRR+eOlknqTDCceQFJk7gPsGBHLi59ebiUlJTF58uTaCm9mZmZmZmZFJGlKRJTk2lbpxE6S5gEfACOAn0XEEknv1WYBa2ZmZmZmZo1XvuHE95N8Jc5g4FBJm7Lu7MJmZmZmZmZmNaLSIjYihgHtgN8DpcDbQGtJgyQ1L356ZmZmZmZmZt/IO7FTJMZGxFkkBe3xwECSyZ3MzMzMzMzMakylz8Rmi4gVwKPAo5I2Lk5KZmZmZmZmZrnlm9ipjOQZ2IURcXTmtoj4opiJmZmZmZmZmWXL1xN7KkkRu6r4qZiZmZmZmZlVLl8RO46kiF0A7Fn0bMzMzMzMzMwqUWkRGxHtayoRMzMzMzMzs3zyzk5sZmZmZmZmVle4iDUzMzMzM7N6o9IiVtJ3aioRMzMzMzMzs3zy9cS+CCDprzWQi5mZmZmZmVml8s1OvJGkU4AfSDoye2NEPFCctMzMzMzMzMzWla+IHQKcAGwOHJq1LQAXsWZmZmZmZlZj8n3FzgvAC5ImR8SfaygnMzMzMzMzs5zy9cSW+6uk84De6fJzwMiIWFGctMzMzMzMzMzWVWgRexvQNP0NcBIwAjizGEmZmZmZmZmZ5VJoEdsjIrpkLI+V9FoxEjIzMzMzMzOrSL6v2Cm3StJO5QuSdgRWFSclMzMzMzMzs9wK7Yn9GVAmaTYgYAfgtKJlZWZmZmZmZpZDQT2xEfEvoANwXvqzS0SUFXKspP6SZkqaJeniSvbrIWmVpKMLadfMzMzMzMwan0J7YomIr4DXq9K4pCbArcCBwDxgkqRHIuLNHPtdBzxVlfbNzMzMzMyscSn0mdj11ROYFRGzI+JrYDRweI79fgLcD3xc5HzMzMzMzMysHstbxCqx/Xq2vx0wN2N5Xrous/3tgCOAkXnyOFvSZEmTFyxYsJ7pmJmZmZmZWX2Wt4iNiAAeWs/2lavJrOUbgYsiotLZjiPi9ogoiYiS1q1br2c6ZmZmZmZmVp8V+kzsS5J6RMSkKrY/D8jsxW0DfJi1TwkwWhLAVsDBklZGxENVjGVmZmZmZmYNXKFFbCnwI0lzgGUkPawREZ3zHDcJ6CCpPfAf4Fjg+MwdIqJ9+WtJdwL/dAFrZmZmZmZmuRRaxB60Po1HxEpJ55LMOtwEuCMipksakm6v9DlYMzMzMzMzs0wFFbERMUdSF2DfdNXzEfFagcc+DjyetS5n8RoRpxbSppmZmZmZmTVOBX3FjqRhwChg6/Tnb5J+UszEzMzMzMzMzLIVOpz4DGDPiFgGIOk64EXg5mIlZmZmZmZmZpatoJ5YkomcMr8CZxW5vz7HzMzMzMzMrGgK7Yn9C/CypAfT5YHAHUXJyMzMzMzMzKwChU7s9HtJ44BeJD2wp0XEq8VMzMzMzMzMzCxbQUWspL9GxEnAKznWmZmZmZmZmdWIQp+J7ZS5IKkJ0L360zEzMzMzMzOrWKVFrKRLJC0BOkv6XNKSdPlj4OEaydDMzMzMzMwsVWkRGxG/jYgWwP9GRMuIaJH+tIqIS2ooRzMzMzMzMzOg8ImdLpG0BdABaJaxfnyxEjMzMzMzMzPLVujETmcCw4A2wFRgL+BFYP+iZWZmZmZmZmaWpdCJnYYBPYA5EVEK7AEsKFpWZmZmZmZmZjkUWsR+GRFfAkj6TkS8BexSvLTMzMzMzMzM1lXQcGJgnqTNgYeAZyR9BnxYrKTMzMzMzMzMcil0Yqcj0pdXSCoDNgOeLFpWZmZmZmZmZjkUNJxY0l6SWgBExHNAGclzsWZmZmZmZmY1ptBnYkcASzOWl6XrzMzMzMzMzGpMoUWsIiLKFyJiNYU/T2tmZmZmZmZWLQotYmdLOk9S0/RnGDC7mImZmZmZmZmZZSu0iB0C/AD4DzAP2BM4u1hJmZmZmZmZmeVSUBEbER9HxLERsXVEfDcijo+Ijws5VlJ/STMlzZJ0cY7tJ0h6Pf2ZIKlLVU/CzMzMzMzMGodKn2uV9POIGC7pZiCyt0fEeXmObwLcChxI0oM7SdIjEfFmxm7vAX0i4jNJBwG3k/T0mpmZmZmZma0l3+RMM9Lfk9ez/Z7ArIiYDSBpNHA4sKaIjYgJGfu/BLRZz1hmZmZmZmbWwFVaxEbEo+nvu9az/e2AuRnL5c/TVuQM4In1jGVmZmZmZmYNXL7hxI+SYxhxuYg4LE/7ynVYBbFKSYrYXhVsP5t0Mqm2bdvmCWtmZmZmZmYNUb7hxNenv48E/gf4W7p8HPB+Ae3PA7bPWG4DfJi9k6TOwJ+AgyLi01wNRcTtJM/LUlJSUmFhbWZmZmZmZg1XvuHEzwFIujoiemdselTS+ALanwR0kNSe5Ot5jgWOz9xBUlvgAeCkiHi7KsmbmZmZmZlZ45KvJ7Zca0k7ZkzQ1B5one+giFgp6VzgKaAJcEdETJc0JN0+Evgl0Aq4TRLAyogoqfqpmJmZmZmZWUNXaBF7PjBO0ux0uR3wo0IOjIjHgcez1o3MeH0mcGaBeZiZmZmZmVkjVlARGxFPSuoAfD9d9VZEfFW8tMzMzMzMzMzWVWhPLEB3kh7YDYEukoiIu4uSlZmZmZmZmVkOBRWxkv4K7ARMBValqwNwEWtmZmZmZmY1ptCe2BKgY0T4q23MzMzMzMys1mxQ4H7TSL4n1szMzMzMzKzWFNoTuxXwpqSJwJoJnSLisKJkZWZmZmZmZpZDoUXsFcVMwszMzMzMzKwQhX7FznPFTsTMzMzMzMwsn0qLWElLSGYhXmcTEBHRsihZmZmZmZmZmeVQaREbES1qKhEzMzMzMzOzfAqdndjMzMzMzMys1rmINTMzMzMzs3rDRayZmZmZmZnVGy5izczMzMzMrN5wEWtmZmZmZmb1hotYMzMzMzMzqzdcxJqZmZmZmVm94SLWzMzMzMzM6g0XsWZmZmZmZlZvuIg1MzMzMzOzesNFrJmZmZmZmdUbRS9iJfWXNFPSLEkX59guSTel21+X1K3YORXL8OHDKSsrW2tdWVkZw4cPd5xGFKcmYzlO3Y5T07FqQkN7jxpanJqM5Th1O05NxnIcx6npWI5Tt+PUiIgo2g/QBHgX2BHYCHgN6Ji1z8HAE4CAvYCX87XbvXv3qIvGjh0bW221VYwdOzbnsuM0jjg1Gctx6nacmo5VExrae9TQ4tRkLMep23FqMpbjOE5Nx3Kcuh2nugCTo4J6UMn24pC0N3BFRPRLly9JC+ffZuzzR2BcRNyTLs8E9ouI+RW1W1JSEpMnTy5a3t9GWVkZgwYMYGjr1oyYP58xu+5K6RZbVH+czz5j0IwZDN1mG8epg3FqMpbj1O04NR2rJjS096ihxanJWI5Tt+PUZCzHcZyajuU43zLOHnswYvZsxowZQ2lpabXHqQ6SpkRESa5txR5OvB0wN2N5Xrquqvsg6WxJkyVNXrBgQbUnWl1KS0sZuvvuXP3BBwzdZpuifUCUbrEFQ7fZxnHqaJyajOU4dTtOTceqCQ3tPWpocWoyluPU7Tg1GctxHKemYznOt4wzcSJDhw6tswVsXhV10VbHD3AM8KeM5ZOAm7P2eQzolbH8L6B7Ze3W1eHEEd90y19++eVF7Z53nLodpyZjOU7djlPTsWpCQ3uPGlqcmozlOHU7Tk3GchzHqelYjlO341QHKhlOXOwidm/gqYzlS4BLsvb5I3BcxvJMYJvK2q2rRWxDG8/uOHU/luPU7Tg1HasmNLT3qKHFqclYjlO349RkLMdxnJqO5Th1O051qc0idkNgNtCebyZ26pS1zwDWnthpYr5262oRe911161zE4wdOzauu+46x2lEcWoyluPU7Tg1HasmNLT3qKHFqclYjlO349RkLMdxnJqO5Th1O051qayILerETgCSDgZuJJmp+I6IuEbSEICIGClJwC1Af2A5cFpEVDprU12e2MnMzMzMzMy+ncomdtqw2MEj4nHg8ax1IzNeB/DjYudhZmZmZmZm9V+xZyc2MzMzMzMzqzYuYs3MzMzMzKzeKPozscUgaQEwp7bzyGMr4JPaTsLqBN8LVs73goHvA/uG7wUr53vByvle+MYOEdE614Z6WcTWB5ImV/QgsjUuvhesnO8FA98H9g3fC1bO94KV871QGA8nNjMzMzMzs3rDRayZmZmZmZnVGy5ii+f22k7A6gzfC1bO94KB7wP7hu8FK+d7wcr5XiiAn4k1MzMzMzOzesM9sWZmZmZmZlZvuIitZpL6S5opaZaki2s7H6s9kt6X9IakqZIm13Y+VnMk3SHpY0nTMtZtKekZSe+kv7eozRytZlRwL1wh6T/pZ8NUSQfXZo5WMyRtL6lM0gxJ0yUNS9f7s6GRqeRe8GdDIyOpmaSJkl5L74Ur0/X+XMjDw4mrkaQmwNvAgcA8YBJwXES8WauJWa2Q9D5QEhH+rq9GRlJvYClwd0Tslq4bDiyMiGvTP3BtEREX1WaeVnwV3AtXAEsj4vrazM1qlqRtgG0i4hVJLYApwEDgVPzZ0KhUci8Mwp8NjYokAZtGxFJJTYEXgGHAkfhzoVLuia1ePYFZETE7Ir4GRgOH13JOZlbDImI8sDBr9eHAXenru0j+wWINXAX3gjVCETE/Il5JXy8BZgDb4c+GRqeSe8EamUgsTRebpj+BPxfychFbvbYD5mYsz8MfSo1ZAE9LmiLp7NpOxmrddyNiPiT/gAG2ruV8rHadK+n1dLixh4k1MpLaAXsAL+PPhkYt614AfzY0OpKaSJoKfAw8ExH+XCiAi9jqpRzrPF678donIroBBwE/TocVmpmNAHYCugLzgd/VajZWoyQ1B+4HfhoRn9d2PlZ7ctwL/mxohCJiVUR0BdoAPSXtVssp1QsuYqvXPGD7jOU2wIe1lIvVsoj4MP39MfAgyXBza7w+Sp+DKn8e6uNazsdqSUR8lP6jZTXwf/izodFIn3m7HxgVEQ+kq/3Z0Ajluhf82dC4RcQiYBzQH38u5OUitnpNAjpIai9pI+BY4JFazslqgaRN08kakLQp0BeYVvlR1sA9ApySvj4FeLgWc7FaVP4Pk9QR+LOhUUgncPkzMCMifp+xyZ8NjUxF94I/GxofSa0lbZ6+3hj4IfAW/lzIy7MTV7N0OvQbgSbAHRFxTe1mZLVB0o4kva8AGwJ/973QeEi6B9gP2Ar4CPgV8BAwBmgLfAAcExGe8KeBq+Be2I9kuGAA7wM/Kn/2yRouSb2A54E3gNXp6ktJnoX0Z0MjUsm9cBz+bGhUJHUmmbipCUnn4piIuEpSK/y5UCkXsWZmZmZmZlZveDixmZmZmZmZ1RsuYs3MzMzMzKzecBFrZmZmZmZm9YaLWDMzMzMzM6s3XMSamZmZmZlZveEi1szMapykcZJKaiDOeZJmSBpV7Fh58lhaDW1cJemHVdi/a/q1b+XLV0i68NvmUYX4a95jSY+Xfxdi1j55c5I0UFLHjOUqXYfqIunSmo5pZma5uYg1M7N6RdKGVdj9HODgiDihWPnUlIj4ZUQ8W4VDugIH59upJkTEwRGxaD0PHwisKWLX4zpUFxexZmZ1hItYMzPLSVK7tBfz/yRNl/S0pI3TbZm9bFtJej99faqkhyQ9Kuk9SedK+n+SXpX0kqQtM0KcKGmCpGmSeqbHbyrpDkmT0mMOz2j3PkmPAk/nyPX/pe1Mk/TTdN1IYEfgEUnnZ+3fSdJESVMlvS6pQ7r+IUlT0vM9O2P/pZKuS7c9K6lneg1mSzosI8eHJT0paaakX1VwXX+Wnt/rkq7MOO/HJL2WnsPgHMfdKeno9PX7kq6U9IqkNyR9P2vfjYCrgMHpOZa31zEj7/My9j8x43r8UVKTrPYOkjQmY3m/9L1A0ghJk9NrdmUF5/y+pK3S15el1+dZYJeMfc5Kr8trku6XtImkHwCHAf+b5rZT1nU4IL1P3kjvm+8Ucn3SfSq6B9a5FpKuBTZO19Vqr76ZmbmINTOzynUAbo2ITsAi4KgCjtkNOB7oCVwDLI+IPYAXgZMz9ts0In5A0lt6R7ruMmBsRPQASkmKl03TbXsDp0TE/pnBJHUHTgP2BPYCzpK0R0QMAT4ESiPihqwchwB/iIiuQAkwL11/ekR0T9edJ6lVea7AuHTbEuDXwIHAESTFYrmewAkkvaDHKGvItKS+JNe0Z7pPd0m9gf7AhxHRJSJ2A55c56qu65OI6AaMANYakhsRXwO/BO6NiK4RcW+66ftAvzT+ryQ1lbQrMBjYJ70eq9JzyPQMsFfGezEYKG/zsogoAToDfSR1rijh9L06FtgDOBLokbH5gYjoERFdgBnAGRExAXgE+Fl6Hu9mtNUMuBMYHBG7AxsCQwu5Pql17oGKrkVEXAx8keZQ73v1zczqOxexZmZWmfciYmr6egrQroBjyiJiSUQsABYDj6br38g6/h6AiBgPtFTyzGRf4GJJU4FxQDOgbbr/MxGxMEe8XsCDEbEsIpYCDwD75snxReBSSRcBO0TEF+n68yS9BrwEbE9ScAJ8zTeF5RvAcxGxIsc5PRMRn6btPZDmlqlv+vMq8ApJUdkhbeeHaW/vvhGxOE/+pO1D4e8LwGMR8VVEfAJ8DHwXOADoDkxKr/sBJD3Ya0TESpLzP1TJcO4BwMPp5kGSXknPqRMZQ39z2JfkvVoeEZ+TFKjldpP0vKQ3SIroTnnOZReS+/PtdPkuoHfG9nzXJ9c9kPdamJlZ7avKc0VmZtb4fJXxehWwcfp6Jd/8IbRZJceszlhezdr/34ms4wIQcFREzMzcIGlPYFkFOaqi5CsSEX+X9DJJMfaUpDPT/H4I7B0RyyWN45tzWxER5fmuOaeIWK21n9HNdU7Zuf42Iv64zkkkvZQHA7+V9HREXJW9T5by67qKwv9/nv1+bpjmdFdEXJLn2HuBHwMLgUkRsURSe5Jezh4R8ZmkO1n3fsiWfU3K3QkMjIjXJJ0K7JennXzve6XXp4J7oNBrYWZmtcg9sWZmtj7eJ+mxAjh6PdsYDCCpF7A47X18CviJJKXb9iignfHAwPQZyk1Jhvg+X9kBknYEZkfETSS9gZ2BzYDP0gL2+yRDk6vqQElbKnl2eCDw76ztTwGnS2qe5rGdpK0lbUsy7PpvwPVAt/WInW0J0KKA/f4FHC1p6zSnLSXtkGO/cWleZ/HNUOKWJH9cWCzpu8BBeWKNB46QtLGkFsChGdtaAPMlNWXt4cwVncdbQDtJ30uXTwKeyxN/jQrugcquxYo0NzMzq2XuiTUzs/VxPTBG0knA2PVs4zNJE0gKodPTdVcDNwKvp4Xs+8AhlTUSEa+kPYAT01V/iohX88QeTDKx1ArgvyTPtS4Dhkh6HZhJMqS4ql4A/gp8D/h7REzOyvXp9LnLF9M6fSlwYrr//0paDaxg7Wc711cZ3wzN/m1FO0XEm5J+ATwtaYM0/o+BOVn7rZL0T+BU4JR03WuSXgWmA7NZt2jPjvWKpHuBqWn7mX9suBx4OV3/Bt8UrqOB/1MyEdXRGW19Kek04L60N3wSMLKy+FnWuQciYmEl1+J2kvvyFT8Xa2ZWu/TN6CgzMzNbX+kQ2JKIOLe2czEzM2vIPJzYzMzMzMzM6g33xJqZmZmZmVm94Z5YMzMzMzMzqzdcxJqZmZmZmVm94SLWzMzMzMzM6g0XsWZmZmZmZlZvuIg1MzMzMzOzesNFrJmZmZmZmdUb/x9XWWdcgKT1gwAAAABJRU5ErkJggg==\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "# plot the result of the validation\n", "\n", "plt.figure(figsize=(16,3))\n", "plt.plot(range(len(predictions)), predictions, '-', label=\"Predicted labels\", color=\"red\")\n", "plt.plot(range(len(y_test)), y_test, 'x', label=\"Observed labels\",color=\"black\")\n", "plt.title(\"Accuracy score of the classification: \"+str(rf.score(X_test, y_test)))\n", "plt.xlabel('number of samples in the validation set')\n", "plt.ylabel('Indicator of \"Age_New\"')\n", "plt.legend()\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "# depict a tree from the forest\n", "\n", "#plt.figure(figsize=(10,10))\n", "#clf = rf.estimators_[5]\n", "#tree.plot_tree(clf,feature_names=X_names, filled=True)\n" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
CU0.121197
VIS990.090836
ISO 4406 large0.090261
VIS400.090237
WATER0.083972
TBN0.080551
VLAMCC0.060583
FE0.046925
ISO 4406 small0.042201
LNF-ROET0.040431
TAN0.036852
BRSTVD0.033758
SI0.029886
LNF-NMW0.027464
ISO 4406 medium0.027061
P0.023858
CA0.016000
MG0.013672
LNF-SSW0.010024
LNF-UNC0.009453
ZN0.007079
LNF-FW0.006504
LNF-CUT0.006081
LNF-FIB0.004499
PB0.000259
NA0.000177
BA0.000146
CR0.000023
SN0.000011
MN0.000000
NI0.000000
LI0.000000
AL0.000000
V0.000000
\n", "
" ], "text/plain": [ " 0\n", "CU 0.121197\n", "VIS99 0.090836\n", "ISO 4406 large 0.090261\n", "VIS40 0.090237\n", "WATER 0.083972\n", "TBN 0.080551\n", "VLAMCC 0.060583\n", "FE 0.046925\n", "ISO 4406 small 0.042201\n", "LNF-ROET 0.040431\n", "TAN 0.036852\n", "BRSTVD 0.033758\n", "SI 0.029886\n", "LNF-NMW 0.027464\n", "ISO 4406 medium 0.027061\n", "P 0.023858\n", "CA 0.016000\n", "MG 0.013672\n", "LNF-SSW 0.010024\n", "LNF-UNC 0.009453\n", "ZN 0.007079\n", "LNF-FW 0.006504\n", "LNF-CUT 0.006081\n", "LNF-FIB 0.004499\n", "PB 0.000259\n", "NA 0.000177\n", "BA 0.000146\n", "CR 0.000023\n", "SN 0.000011\n", "MN 0.000000\n", "NI 0.000000\n", "LI 0.000000\n", "AL 0.000000\n", "V 0.000000" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#plot the importance of the various explanatory variables using Gini importance\n", "\n", "pd.DataFrame(rf.feature_importances_,index=X_names).sort_values(0,ascending=False)\n" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "tags": [] }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0
BRSTVD0.0
SN0.0
MG0.0
MN0.0
NA0.0
NI0.0
PB0.0
SI0.0
ZN0.0
ISO 4406 large0.0
LI0.0
TAN0.0
TBN0.0
VIS400.0
VIS990.0
VLAMCC0.0
FE0.0
CU0.0
CR0.0
CA0.0
BA0.0
AL0.0
V0.0
P0.0
LNF-UNC0.0
LNF-SSW0.0
LNF-NMW0.0
LNF-FW0.0
LNF-FIB0.0
LNF-CUT0.0
LNF-ROET0.0
ISO 4406 small0.0
ISO 4406 medium0.0
WATER0.0
\n", "
" ], "text/plain": [ " 0\n", "BRSTVD 0.0\n", "SN 0.0\n", "MG 0.0\n", "MN 0.0\n", "NA 0.0\n", "NI 0.0\n", "PB 0.0\n", "SI 0.0\n", "ZN 0.0\n", "ISO 4406 large 0.0\n", "LI 0.0\n", "TAN 0.0\n", "TBN 0.0\n", "VIS40 0.0\n", "VIS99 0.0\n", "VLAMCC 0.0\n", "FE 0.0\n", "CU 0.0\n", "CR 0.0\n", "CA 0.0\n", "BA 0.0\n", "AL 0.0\n", "V 0.0\n", "P 0.0\n", "LNF-UNC 0.0\n", "LNF-SSW 0.0\n", "LNF-NMW 0.0\n", "LNF-FW 0.0\n", "LNF-FIB 0.0\n", "LNF-CUT 0.0\n", "LNF-ROET 0.0\n", "ISO 4406 small 0.0\n", "ISO 4406 medium 0.0\n", "WATER 0.0" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#plot the importance of the various explanatory variables using permutation importance\n", "result = permutation_importance(rf, X_test, y_test, n_repeats=30, random_state=None, n_jobs=2)\n", "\n", "pd.DataFrame(result.importances_mean, index=X_names).sort_values(0,ascending=False)\n" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "#save the result in a csv\n", "\n", "dum = df.iloc[idx_test,:].copy()\n", "dum[\"Prediction of -Age_New-\"] =y_test\n", "dum.to_excel(\"C:/Users/Admin/Pythonprojects/RAMS/notebook/outputRF_testset.xlsx\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.8" }, "toc-autonumbering": false, "toc-showcode": true, "toc-showmarkdowntxt": true }, "nbformat": 4, "nbformat_minor": 4 }