{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "**We're going to start by import the necessary libraries**" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import seaborn as sns\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.tree import DecisionTreeClassifier, export_graphviz\n", "from sklearn.metrics import confusion_matrix, classification_report, mean_squared_error\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.model_selection import GridSearchCV\n", "from sklearn.svm import SVR\n", "from sklearn.datasets import load_iris\n", "import pydot\n", "from IPython.display import Image\n", "%matplotlib inline" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**1-Exploration de la base de données**" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "house = 'C:/Users/HP/Anaconda3/Lib/site-packages/notebook/train.csv'" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "df_train = pd.read_csv(house, sep = ',')" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | Id | \n", "MSSubClass | \n", "LotFrontage | \n", "LotArea | \n", "OverallQual | \n", "OverallCond | \n", "YearBuilt | \n", "YearRemodAdd | \n", "MasVnrArea | \n", "BsmtFinSF1 | \n", "... | \n", "WoodDeckSF | \n", "OpenPorchSF | \n", "EnclosedPorch | \n", "3SsnPorch | \n", "ScreenPorch | \n", "PoolArea | \n", "MiscVal | \n", "MoSold | \n", "YrSold | \n", "SalePrice | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
count | \n", "1460.000000 | \n", "1460.000000 | \n", "1201.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1452.000000 | \n", "1460.000000 | \n", "... | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "1460.000000 | \n", "
mean | \n", "730.500000 | \n", "56.897260 | \n", "70.049958 | \n", "10516.828082 | \n", "6.099315 | \n", "5.575342 | \n", "1971.267808 | \n", "1984.865753 | \n", "103.685262 | \n", "443.639726 | \n", "... | \n", "94.244521 | \n", "46.660274 | \n", "21.954110 | \n", "3.409589 | \n", "15.060959 | \n", "2.758904 | \n", "43.489041 | \n", "6.321918 | \n", "2007.815753 | \n", "180921.195890 | \n", "
std | \n", "421.610009 | \n", "42.300571 | \n", "24.284752 | \n", "9981.264932 | \n", "1.382997 | \n", "1.112799 | \n", "30.202904 | \n", "20.645407 | \n", "181.066207 | \n", "456.098091 | \n", "... | \n", "125.338794 | \n", "66.256028 | \n", "61.119149 | \n", "29.317331 | \n", "55.757415 | \n", "40.177307 | \n", "496.123024 | \n", "2.703626 | \n", "1.328095 | \n", "79442.502883 | \n", "
min | \n", "1.000000 | \n", "20.000000 | \n", "21.000000 | \n", "1300.000000 | \n", "1.000000 | \n", "1.000000 | \n", "1872.000000 | \n", "1950.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "1.000000 | \n", "2006.000000 | \n", "34900.000000 | \n", "
25% | \n", "365.750000 | \n", "20.000000 | \n", "59.000000 | \n", "7553.500000 | \n", "5.000000 | \n", "5.000000 | \n", "1954.000000 | \n", "1967.000000 | \n", "0.000000 | \n", "0.000000 | \n", "... | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "5.000000 | \n", "2007.000000 | \n", "129975.000000 | \n", "
50% | \n", "730.500000 | \n", "50.000000 | \n", "69.000000 | \n", "9478.500000 | \n", "6.000000 | \n", "5.000000 | \n", "1973.000000 | \n", "1994.000000 | \n", "0.000000 | \n", "383.500000 | \n", "... | \n", "0.000000 | \n", "25.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "6.000000 | \n", "2008.000000 | \n", "163000.000000 | \n", "
75% | \n", "1095.250000 | \n", "70.000000 | \n", "80.000000 | \n", "11601.500000 | \n", "7.000000 | \n", "6.000000 | \n", "2000.000000 | \n", "2004.000000 | \n", "166.000000 | \n", "712.250000 | \n", "... | \n", "168.000000 | \n", "68.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "0.000000 | \n", "8.000000 | \n", "2009.000000 | \n", "214000.000000 | \n", "
max | \n", "1460.000000 | \n", "190.000000 | \n", "313.000000 | \n", "215245.000000 | \n", "10.000000 | \n", "9.000000 | \n", "2010.000000 | \n", "2010.000000 | \n", "1600.000000 | \n", "5644.000000 | \n", "... | \n", "857.000000 | \n", "547.000000 | \n", "552.000000 | \n", "508.000000 | \n", "480.000000 | \n", "738.000000 | \n", "15500.000000 | \n", "12.000000 | \n", "2010.000000 | \n", "755000.000000 | \n", "
8 rows × 38 columns
\n", "\n", " | Id | \n", "MSSubClass | \n", "MSZoning | \n", "LotFrontage | \n", "LotArea | \n", "Street | \n", "Alley | \n", "LotShape | \n", "LandContour | \n", "Utilities | \n", "... | \n", "PoolArea | \n", "PoolQC | \n", "Fence | \n", "MiscFeature | \n", "MiscVal | \n", "MoSold | \n", "YrSold | \n", "SaleType | \n", "SaleCondition | \n", "SalePrice | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "60 | \n", "RL | \n", "65.0 | \n", "8450 | \n", "Pave | \n", "NaN | \n", "Reg | \n", "Lvl | \n", "AllPub | \n", "... | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "2 | \n", "2008 | \n", "WD | \n", "Normal | \n", "208500 | \n", "
1 | \n", "2 | \n", "20 | \n", "RL | \n", "80.0 | \n", "9600 | \n", "Pave | \n", "NaN | \n", "Reg | \n", "Lvl | \n", "AllPub | \n", "... | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "5 | \n", "2007 | \n", "WD | \n", "Normal | \n", "181500 | \n", "
2 | \n", "3 | \n", "60 | \n", "RL | \n", "68.0 | \n", "11250 | \n", "Pave | \n", "NaN | \n", "IR1 | \n", "Lvl | \n", "AllPub | \n", "... | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "9 | \n", "2008 | \n", "WD | \n", "Normal | \n", "223500 | \n", "
3 | \n", "4 | \n", "70 | \n", "RL | \n", "60.0 | \n", "9550 | \n", "Pave | \n", "NaN | \n", "IR1 | \n", "Lvl | \n", "AllPub | \n", "... | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "2 | \n", "2006 | \n", "WD | \n", "Abnorml | \n", "140000 | \n", "
4 | \n", "5 | \n", "60 | \n", "RL | \n", "84.0 | \n", "14260 | \n", "Pave | \n", "NaN | \n", "IR1 | \n", "Lvl | \n", "AllPub | \n", "... | \n", "0 | \n", "NaN | \n", "NaN | \n", "NaN | \n", "0 | \n", "12 | \n", "2008 | \n", "WD | \n", "Normal | \n", "250000 | \n", "
5 rows × 81 columns
\n", "\n", " | Total | \n", "Percent | \n", "
---|---|---|
PoolQC | \n", "1453 | \n", "99.520548 | \n", "
MiscFeature | \n", "1406 | \n", "96.301370 | \n", "
Alley | \n", "1369 | \n", "93.767123 | \n", "
Fence | \n", "1179 | \n", "80.753425 | \n", "
FireplaceQu | \n", "690 | \n", "47.260274 | \n", "
LotFrontage | \n", "259 | \n", "17.739726 | \n", "
GarageCond | \n", "81 | \n", "5.547945 | \n", "
GarageType | \n", "81 | \n", "5.547945 | \n", "
GarageYrBlt | \n", "81 | \n", "5.547945 | \n", "
GarageFinish | \n", "81 | \n", "5.547945 | \n", "
GarageQual | \n", "81 | \n", "5.547945 | \n", "
BsmtFinType2 | \n", "38 | \n", "2.602740 | \n", "
BsmtExposure | \n", "38 | \n", "2.602740 | \n", "
BsmtCond | \n", "37 | \n", "2.534247 | \n", "
BsmtQual | \n", "37 | \n", "2.534247 | \n", "
BsmtFinType1 | \n", "37 | \n", "2.534247 | \n", "
MasVnrArea | \n", "8 | \n", "0.547945 | \n", "
MasVnrType | \n", "8 | \n", "0.547945 | \n", "
Electrical | \n", "1 | \n", "0.068493 | \n", "
LandSlope | \n", "0 | \n", "0.000000 | \n", "