{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import numpy as np\n", "import scipy\n", "import sklearn\n", "\n", "#feature engr'ing\n", "from sklearn.preprocessing import OneHotEncoder\n", "\n", "#models\n", "from sklearn import model_selection\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.linear_model import ElasticNet\n", "from sklearn.linear_model import Lasso\n", "from sklearn.linear_model import Ridge\n", "from sklearn.svm import SVR\n", "\n", "#metrics\n", "from sklearn.metrics import classification_report\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import accuracy_score\n", "\n", "#ignore some warnings we dont care about\n", "import warnings\n", "warnings.filterwarnings(\"ignore\", category=FutureWarning)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " season year month hour holiday weekday workingday weathersit temp \\\n", "0 1 0 1 0 0 6 0 1 0.24 \n", "1 1 0 1 1 0 6 0 1 0.22 \n", "2 1 0 1 2 0 6 0 1 0.22 \n", "3 1 0 1 3 0 6 0 1 0.24 \n", "4 1 0 1 4 0 6 0 1 0.24 \n", "\n", " atemp humidity windspeed casual registered total \n", "0 0.2879 0.81 0.0 3 13 16 \n", "1 0.2727 0.80 0.0 8 32 40 \n", "2 0.2727 0.80 0.0 5 27 32 \n", "3 0.2879 0.75 0.0 3 10 13 \n", "4 0.2879 0.75 0.0 0 1 1 \n" ] } ], "source": [ "names = ['instant','date','season','year','month','hour','holiday','weekday','workingday','weathersit','temp','atemp','humidity','windspeed','casual','registered','total']\n", "dataset = pd.read_csv('hour.csv',names=names,header=0,usecols=[*range(2,17)])\n", "\n", "print(dataset.head(5))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " season year month hour holiday weekday workingday temp atemp \\\n", "0 1 0 1 0 0 6 0 0.24 0.2879 \n", "1 1 0 1 1 0 6 0 0.22 0.2727 \n", "2 1 0 1 2 0 6 0 0.22 0.2727 \n", "3 1 0 1 3 0 6 0 0.24 0.2879 \n", "4 1 0 1 4 0 6 0 0.24 0.2879 \n", "\n", " humidity windspeed weather1 weather2 weather3 weather4 total \n", "0 0.81 0.0 1.0 0.0 0.0 0.0 16 \n", "1 0.80 0.0 1.0 0.0 0.0 0.0 40 \n", "2 0.80 0.0 1.0 0.0 0.0 0.0 32 \n", "3 0.75 0.0 1.0 0.0 0.0 0.0 13 \n", "4 0.75 0.0 1.0 0.0 0.0 0.0 1 \n" ] } ], "source": [ "weatherSits = dataset['weathersit'].values\n", "total = dataset['total'].values\n", "\n", "#One-Hot Encoding\n", "OHEr = OneHotEncoder(sparse=False)\n", "weatherSits = weatherSits.reshape(len(weatherSits), 1)\n", "OHEd = OHEr.fit_transform(weatherSits)\n", "\n", "#remove unwanted columns\n", "dataset = dataset.drop(columns=['weathersit','casual','registered','total'])\n", "\n", "#add new OHE columns and put total back at the end\n", "dataset['weather1'] = np.transpose(OHEd)[0]\n", "dataset['weather2'] = np.transpose(OHEd)[1]\n", "dataset['weather3'] = np.transpose(OHEd)[2]\n", "dataset['weather4'] = np.transpose(OHEd)[3]\n", "dataset['total'] = total\n", "\n", "print(dataset.head(5))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "array = dataset.values\n", "X = array[:,0:15]\n", "Y = array[:,15]\n", "validation_size = 0.20\n", "seed = 11\n", "\n", "X_train, X_validation, Y_train, Y_validation = model_selection.train_test_split(X,Y,test_size=validation_size,random_state=seed)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.41800686189447844" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_model = LinearRegression()\n", "reg_model.fit(X_train,Y_train)\n", "reg_model.score(X_validation,Y_validation)" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.9478203512307453" ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_model = RandomForestRegressor(max_depth=60,random_state=0,n_estimators=500)\n", "reg_model.fit(X_train,Y_train)\n", "reg_model.score(X_validation,Y_validation)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.2727850252848387" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_model = ElasticNet(random_state=0)\n", "reg_model.fit(X_train,Y_trcorner&2\n", " ain)\n", "reg_model.score(X_validation,Y_validation)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.4147073584579234" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_model = Lasso(random_state=0)\n", "reg_model.fit(X_train,Y_train)\n", "reg_model.score(X_validation,Y_validation)" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.41804293724431285" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_model = Ridge(random_state=0)\n", "reg_model.fit(X_train,Y_train)\n", "reg_model.score(X_validation,Y_validation)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7816133895418931" ] }, "execution_count": 32, "metadata": {}, "output_type": "execute_result" } ], "source": [ "reg_model = SVR(gamma='scale', C=400.0, epsilon=0.2)\n", "reg_model.fit(X_train,Y_train)\n", "reg_model.score(X_validation,Y_validation)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" } }, "nbformat": 4, "nbformat_minor": 2 }