{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import warnings\n", "import numpy as np\n", "import pandas as pd\n", "warnings.filterwarnings('ignore')\n", "\n", "# data visualization\n", "import seaborn as sns\n", "%matplotlib inline\n", "from matplotlib import pyplot as plt\n", "from matplotlib import style\n", "\n", "# Algorithms\n", "from sklearn import linear_model\n", "from sklearn.linear_model import LogisticRegression\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.linear_model import Perceptron\n", "from sklearn.linear_model import SGDClassifier\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import LinearSVC\n", "from sklearn.naive_bayes import GaussianNB" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "test_df = pd.read_csv(\"input/test.csv\")\n", "train_df = pd.read_csv(\"input/train.csv\")" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 891 entries, 0 to 890\n", "Data columns (total 12 columns):\n", "PassengerId 891 non-null int64\n", "Survived 891 non-null int64\n", "Pclass 891 non-null int64\n", "Name 891 non-null object\n", "Sex 891 non-null object\n", "Age 714 non-null float64\n", "SibSp 891 non-null int64\n", "Parch 891 non-null int64\n", "Ticket 891 non-null object\n", "Fare 891 non-null float64\n", "Cabin 204 non-null object\n", "Embarked 889 non-null object\n", "dtypes: float64(2), int64(5), object(5)\n", "memory usage: 83.6+ KB\n", "--------------------------------------------------\n", "\n", "RangeIndex: 418 entries, 0 to 417\n", "Data columns (total 11 columns):\n", "PassengerId 418 non-null int64\n", "Pclass 418 non-null int64\n", "Name 418 non-null object\n", "Sex 418 non-null object\n", "Age 332 non-null float64\n", "SibSp 418 non-null int64\n", "Parch 418 non-null int64\n", "Ticket 418 non-null object\n", "Fare 417 non-null float64\n", "Cabin 91 non-null object\n", "Embarked 418 non-null object\n", "dtypes: float64(2), int64(4), object(5)\n", "memory usage: 36.0+ KB\n" ] } ], "source": [ "train_df.info()\n", "print('-'*50)\n", "test_df.info()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
0103Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
2313Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
4503Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 22.0 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n", "2 Heikkinen, Miss. Laina female 26.0 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n", "4 Allen, Mr. William Henry male 35.0 0 \n", "\n", " Parch Ticket Fare Cabin Embarked \n", "0 0 A/5 21171 7.2500 NaN S \n", "1 0 PC 17599 71.2833 C85 C \n", "2 0 STON/O2. 3101282 7.9250 NaN S \n", "3 0 113803 53.1000 C123 S \n", "4 0 373450 8.0500 NaN S " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df.head()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
08923Kelly, Mr. Jamesmale34.5003309117.8292NaNQ
18933Wilkes, Mrs. James (Ellen Needs)female47.0103632727.0000NaNS
28942Myles, Mr. Thomas Francismale62.0002402769.6875NaNQ
38953Wirz, Mr. Albertmale27.0003151548.6625NaNS
48963Hirvonen, Mrs. Alexander (Helga E Lindqvist)female22.011310129812.2875NaNS
\n", "
" ], "text/plain": [ " PassengerId Pclass Name Sex \\\n", "0 892 3 Kelly, Mr. James male \n", "1 893 3 Wilkes, Mrs. James (Ellen Needs) female \n", "2 894 2 Myles, Mr. Thomas Francis male \n", "3 895 3 Wirz, Mr. Albert male \n", "4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) female \n", "\n", " Age SibSp Parch Ticket Fare Cabin Embarked \n", "0 34.5 0 0 330911 7.8292 NaN Q \n", "1 47.0 1 0 363272 7.0000 NaN S \n", "2 62.0 0 0 240276 9.6875 NaN Q \n", "3 27.0 0 0 315154 8.6625 NaN S \n", "4 22.0 1 1 3101298 12.2875 NaN S " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Combine the train and test dataset into one for processing" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "data = [train_df, test_df]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Embarked" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Embarked\n", "C 168\n", "Q 77\n", "S 644\n", "Name: Survived, dtype: int64" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train_df.groupby('Embarked')['Survived'].count()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# Fill missing value with 'S'\n", "ports = {\"S\": 0, \"C\": 1, \"Q\": 2}\n", "for dataset in data:\n", " dataset[\"Embarked\"] = dataset[\"Embarked\"].fillna(\"S\")\n", " dataset['Embarked'] = dataset['Embarked'].map(ports)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Fare" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "for dataset in data:\n", " dataset['Fare'].fillna(dataset['Fare'].dropna().median(), inplace=True)\n", " dataset['Fare'] = dataset['Fare'].astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Age" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Fill the missing age with random value between mean, standard deviation" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "for dataset in data:\n", " average_age_titanic = dataset['Age'].mean()\n", " std_age_titanic = dataset['Age'].std()\n", " count_nan_age_titanic = dataset['Age'].isnull().sum()\n", " rand_ = np.random.randint(\n", " average_age_titanic - std_age_titanic, \n", " average_age_titanic + std_age_titanic, \n", " size = count_nan_age_titanic\n", " )\n", " dataset['Age'][np.isnan(dataset['Age'])] = rand_\n", " dataset['Age'] = dataset['Age'].astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert the `Age` feature into `Age Group`" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6 157\n", "4 157\n", "3 141\n", "5 139\n", "2 126\n", "1 103\n", "0 68\n", "Name: Age, dtype: int64" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for dataset in data:\n", " dataset.loc[ dataset['Age'] <= 11, 'Age'] = 0\n", " dataset.loc[(dataset['Age'] > 11) & (dataset['Age'] <= 18), 'Age'] = 1\n", " dataset.loc[(dataset['Age'] > 18) & (dataset['Age'] <= 22), 'Age'] = 2\n", " dataset.loc[(dataset['Age'] > 22) & (dataset['Age'] <= 27), 'Age'] = 3\n", " dataset.loc[(dataset['Age'] > 27) & (dataset['Age'] <= 33), 'Age'] = 4\n", " dataset.loc[(dataset['Age'] > 33) & (dataset['Age'] <= 40), 'Age'] = 5\n", " dataset.loc[(dataset['Age'] > 40) & (dataset['Age'] <= 66), 'Age'] = 6\n", " dataset.loc[ dataset['Age'] > 66, 'Age'] = 6\n", "\n", "train_df['Age'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Name" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "From `Name` I create another feature call `Title`" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitle
0103Braund, Mr. Owen Harrismale210A/5 211717NaN01
1211Cumings, Mrs. John Bradley (Florence Briggs Th...female510PC 1759971C8513
2313Heikkinen, Miss. Lainafemale300STON/O2. 31012827NaN02
3411Futrelle, Mrs. Jacques Heath (Lily May Peel)female51011380353C12303
4503Allen, Mr. William Henrymale5003734508NaN01
\n", "
" ], "text/plain": [ " PassengerId Survived Pclass \\\n", "0 1 0 3 \n", "1 2 1 1 \n", "2 3 1 3 \n", "3 4 1 1 \n", "4 5 0 3 \n", "\n", " Name Sex Age SibSp \\\n", "0 Braund, Mr. Owen Harris male 2 1 \n", "1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 5 1 \n", "2 Heikkinen, Miss. Laina female 3 0 \n", "3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 5 1 \n", "4 Allen, Mr. William Henry male 5 0 \n", "\n", " Parch Ticket Fare Cabin Embarked Title \n", "0 0 A/5 21171 7 NaN 0 1 \n", "1 0 PC 17599 71 C85 1 3 \n", "2 0 STON/O2. 3101282 7 NaN 0 2 \n", "3 0 113803 53 C123 0 3 \n", "4 0 373450 8 NaN 0 1 " ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "title_mapping = {\"Mr\": 1, \"Miss\": 2, \"Mrs\": 3, \"Master\": 4, \"Rare\": 5}\n", "\n", "for dataset in data:\n", " dataset['Title'] = dataset.Name.str.extract(' ([A-Za-z]+)\\.', expand=False)\n", " dataset['Title'] = dataset['Title'].replace(['Lady', 'Countess','Capt', 'Col',\\\n", " 'Don', 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer', 'Dona'], 'Rare')\n", " dataset['Title'] = dataset['Title'].replace('Mlle', 'Miss')\n", " dataset['Title'] = dataset['Title'].replace('Ms', 'Miss')\n", " dataset['Title'] = dataset['Title'].replace('Mme', 'Mrs')\n", " \n", " dataset['Title'] = dataset['Title'].map(title_mapping)\n", " dataset['Title'] = dataset['Title'].fillna(0)\n", "\n", "train_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Family size" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
FamilySizeSurvived
340.724138
230.578431
120.552795
670.333333
010.303538
450.200000
560.136364
780.000000
8110.000000
\n", "
" ], "text/plain": [ " FamilySize Survived\n", "3 4 0.724138\n", "2 3 0.578431\n", "1 2 0.552795\n", "6 7 0.333333\n", "0 1 0.303538\n", "4 5 0.200000\n", "5 6 0.136364\n", "7 8 0.000000\n", "8 11 0.000000" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for dataset in data:\n", " dataset['FamilySize'] = dataset['SibSp'] + dataset['Parch'] + 1\n", " \n", "train_df[['FamilySize', 'Survived']].groupby(['FamilySize'], as_index=False).mean().sort_values(by='Survived', ascending=False)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### IsAlone" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IsAloneSurvived
000.505650
110.303538
\n", "
" ], "text/plain": [ " IsAlone Survived\n", "0 0 0.505650\n", "1 1 0.303538" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "for dataset in data:\n", " dataset['IsAlone'] = 0\n", " dataset.loc[dataset['FamilySize'] == 1, 'IsAlone'] = 1\n", " \n", "train_df[['IsAlone', 'Survived']].groupby('IsAlone', as_index=False).mean()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Sex" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "genders = {\"male\": 0, \"female\": 1}\n", "for dataset in data:\n", " dataset['Sex'] = dataset['Sex'].map(genders)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Age*Times" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "for dataset in data:\n", " dataset['Age_Class'] = dataset['Age']*dataset['Pclass']" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Fare per Person" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "for dataset in data:\n", " dataset['FarePerPerson'] = dataset['Fare'] / dataset['FamilySize']\n", " dataset['FarePerPerson'] = dataset['FarePerPerson'].astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Clean up before training the data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "I decided to drop `Cabin` (many missing data) and `Ticket` (doesn't make any impacts) columns." ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "X_train = train_df.drop(['Name','Ticket', 'Cabin', 'SibSp', 'Parch', 'PassengerId', 'Survived'], axis=1)\n", "Y_train = train_df['Survived']\n", "X_test = test_df.drop(['Name','Ticket', 'Cabin', 'SibSp', 'Parch', 'PassengerId'], axis=1).copy()" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PclassSexAgeFareEmbarkedTitleFamilySizeIsAloneAge_ClassFarePerPerson
03027012063
1115711320535
23137021197
3115530320526
430580111158
\n", "
" ], "text/plain": [ " Pclass Sex Age Fare Embarked Title FamilySize IsAlone Age_Class \\\n", "0 3 0 2 7 0 1 2 0 6 \n", "1 1 1 5 71 1 3 2 0 5 \n", "2 3 1 3 7 0 2 1 1 9 \n", "3 1 1 5 53 0 3 2 0 5 \n", "4 3 0 5 8 0 1 1 1 15 \n", "\n", " FarePerPerson \n", "0 3 \n", "1 35 \n", "2 7 \n", "3 26 \n", "4 8 " ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "((891, 10), (891,), (418, 10))" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.shape, Y_train.shape, X_test.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Logistic Regression" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "logreg = LogisticRegression()\n", "logreg.fit(X_train, Y_train)\n", "Y_pred = logreg.predict(X_test)\n", "acc_log = round(logreg.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Random Forest" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "random_forest = RandomForestClassifier(n_estimators=100)\n", "random_forest.fit(X_train, Y_train)\n", "Y_prediction = random_forest.predict(X_test)\n", "random_forest.score(X_train, Y_train)\n", "acc_random_forest = round(random_forest.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Perceptron" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "perceptron = Perceptron(max_iter=5)\n", "perceptron.fit(X_train, Y_train)\n", "Y_pred = perceptron.predict(X_test)\n", "acc_perceptron = round(perceptron.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### SGDClassifier" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [], "source": [ "sgd = SGDClassifier(max_iter=5, tol=None)\n", "sgd.fit(X_train, Y_train)\n", "Y_pred = sgd.predict(X_test)\n", "sgd.score(X_train, Y_train)\n", "acc_sgd = round(sgd.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Decision Tree" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "decision_tree = DecisionTreeClassifier() \n", "decision_tree.fit(X_train, Y_train) \n", "Y_pred = decision_tree.predict(X_test) \n", "acc_decision_tree = round(decision_tree.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Support Vector Machine" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/calvin.tran/anaconda2/lib/python2.7/site-packages/sklearn/svm/base.py:931: ConvergenceWarning: Liblinear failed to converge, increase the number of iterations.\n", " \"the number of iterations.\", ConvergenceWarning)\n" ] } ], "source": [ "linear_svc = LinearSVC()\n", "linear_svc.fit(X_train, Y_train)\n", "Y_pred = linear_svc.predict(X_test)\n", "acc_linear_svc = round(linear_svc.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### K Nearest Neighbor" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "knn = KNeighborsClassifier(n_neighbors = 3) \n", "knn.fit(X_train, Y_train) \n", "Y_pred = knn.predict(X_test) \n", "acc_knn = round(knn.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Gaussian Naive Bayes" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "gaussian = GaussianNB() \n", "gaussian.fit(X_train, Y_train) \n", "Y_pred = gaussian.predict(X_test) \n", "acc_gaussian = round(gaussian.score(X_train, Y_train) * 100, 2)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Model
Score
93.60Random Forest
93.60Decision Tree
84.74KNN
81.48Logistic Regression
78.56Support Vector Machines
78.45Naive Bayes
66.11Stochastic Gradient Decent
64.98Perceptron
\n", "
" ], "text/plain": [ " Model\n", "Score \n", "93.60 Random Forest\n", "93.60 Decision Tree\n", "84.74 KNN\n", "81.48 Logistic Regression\n", "78.56 Support Vector Machines\n", "78.45 Naive Bayes\n", "66.11 Stochastic Gradient Decent\n", "64.98 Perceptron" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "results = pd.DataFrame({\n", " 'Model': ['Support Vector Machines', 'KNN', 'Logistic Regression', \n", " 'Random Forest', 'Naive Bayes', 'Perceptron', \n", " 'Stochastic Gradient Decent', \n", " 'Decision Tree'],\n", " 'Score': [acc_linear_svc, acc_knn, acc_log, \n", " acc_random_forest, acc_gaussian, acc_perceptron, \n", " acc_sgd, acc_decision_tree]})\n", "result_df = results.sort_values(by='Mod', ascending=False)\n", "result_df = result_df.set_index('Score')\n", "result_df.head(10)" ] }, { "cell_type": "code", "execution_count": 30, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" }, { "data": { "image/png": "iVBORw0KGgoAAAANSUhEUgAAA4UAAAGDCAYAAACcFrWUAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMi4zLCBodHRwOi8vbWF0cGxvdGxpYi5vcmcvIxREBQAAIABJREFUeJzs3XeUZVWdt/HnKw2DZEkKKDQqogLSQINiAETGHEcUEQTGgDpmRcc0ioERxxxfREeCIiLmDAaCIqkbmigmUEFwEMkZmt/7x9kll6Kqq7qp6lvteT5r9apz99nn7N+5dVfBd+19zk1VIUmSJEnqp3sNuwBJkiRJ0vAYCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJ0hJKMjtJJZk1ib77JPnl0qhL0yfJj5LsPew6JGkqGQolSb2Q5I9Jbk2y9qj2BS3YzR5OZXepZeUk1yf54bBrmU5JVkvy8SR/btf7+/Z67YmPHq6qekpVHTbsOiRpKhkKJUl9chGw+8iLJFsA9x5eOXezK3AL8MQk6y3NgScz2zlF46wA/AzYDHgysBrwaODvwHZLo4YlkY7/3yTpn5J/3CRJffIlYK+B13sDhw92SLJ6ksOT/C3Jn5K8cyQMJFkuyYeTXJHkQuBpYxz7v0kuS/KXJO9Pstxi1Lc3cBBwNrDHqHM/IMk3W11/T/LpgX0vS/LrJNclOT/J1q29kjx4oN+hSd7ftndKckmS/0zyV+CQJPdJ8v02xlVt+/4Dx6+Z5JAkl7b9327t5yZ5xkC/5dt7NGeMa9wL2BB4TlWdX1V3VNXlVfW+qvphO/5hSY5PcnWS85I8c9Q1fLYt47w+yUlJ7tdmGq9KckGSrQb6/zHJ29r7clWrf8W2b6LrPT7JAUlOAm4EHtjaXtr2PzjJCUmuadd71MCxj05yett3epJHjzrv+1rt1yU5dlmYJZX0z8tQKEnqk1OA1VroWA7YDfjyqD6fAlYHHgjsSBdi/r3texnwdGArYC7dzN6gw4DbgQe3Pk8EXjqZwpJsCOwEHNH+7TWwbzng+8CfgNnABsBX277nAfu3/qsBz6SbdZuM+wFrAhsB+9L9f8Eh7fWGwE3Apwf6fwlYiW6Wb13gY639cGDPgX5PBS6rqgVjjLkL8OOqun6sgpIsD3wPOLaN8RrgiCSbDnR7PvBOYG26mdWTgTPa668DHx112j2AJwEPAh7SjmUS1wvwIrr3ZlW693/Q+1qd9wHuT/fZIcmawA+ATwJrtXp+kGStgWNfSPe5WhdYAdhvrPdDkpYGQ6EkqW9GZgv/FbgA+MvIjoGg+Laquq6q/gh8hC4YQBdGPl5VF1fVlcAHBo69L/AU4PVVdUNVXU4Xml4wybr2As6uqvOBI4HNBma8tgPWB97czn1zVY08tOalwP9U1enV+X1VjQ4v47kDeHdV3VJVN1XV36vqG1V1Y1VdBxxAF4xpy1mfAryiqq6qqtuq6oR2ni8DT02yWnv9Irr3eSxrAZctoqZHAasAB1bVrVX1c7pAvPtAn29V1fyquhn4FnBzVR1eVQuBo+gC+aBPD/zODhg516Kud8ChVXVeVd1eVbeN2ncbXaBcf9Tv5GnA76rqS+24I+k+a88YOPaQqvptVd0EfA0Ya1ZVkpYKQ6EkqW++RDdLsw+jlo7SzTStwF1nhP5ENzMHXTC7eNS+ERsBywOXtWWPVwOfo5sJmoy96GYIqapLgRPolpMCPAD4U1XdPsZxDwD+MMkxRvtbC1YAJFkpyefastlrgROBNVpYfgBwZVVdNfokrd6TgOcmWYMuPB4xzph/BxZ1v+T6wMVVdcdA2+DvAOD/BrZvGuP1KqPOOfp3tj5MeL1jHTvaW4AAp7Vlri8euIbRwXz0Nfx1YPvGMWqWpKXGUChJ6pU2i3YR3RLHb47afQV3zv6M2JA7ZxMvowtHg/tGXEy3lHHtqlqj/VutqjabqKZ2v9kmwNuS/LXd4/dIYPd0D4C5GNgwYz8M5mK6ZZFjuZFuueeI+43aX6NevwnYFHhkVa0G7DBSYhtnzRb6xnIY3RLS5wEnV9Vfxun3U+BJSVYeZ/+lwANy14e6DP4OlsTo39mlbXtR1zti9Ht0546qv1bVy6pqfeDlwGfbPZyXctfP0Mi49+QaJGnaGAolSX30EmDnqrphsLEtP/wacECSVZNsBLyRO+87/Brw2iT3T3If4K0Dx15Gd3/ZR9J95cK9kjwoyejliGPZG/gJ8HC6ZYRzgM3pAt1TgNPoAumB6b62YsUkj2nHfgHYL8k26Ty41Q2wAHhhugfkPJm7L40cbVW6mbar231x7x51fT+iCz73aQ+T2WHg2G8DWwOv4+4zsIO+RBcwv5Hkoe19WivJ25M8FTgVuAF4SxtjJ7pll1+doPZFeVX7na0JvJ1uiekir3cykjxv4ME0V9EFyIXAD4GHJHlhkllJdqP73X7/HlyDJE0bQ6EkqXeq6g9VNW+c3a+hCyUXAr8EvgJ8se37PHAMcBbdg01GzzTuRbf89Hy6kPB1Fr1UkvYkzOcDn2ozTyP/LqILUHu3sPoMugfY/Bm4hO7eR6rqaLp74b4CXEcXztZsp39dO+5quoetfHtRtQAfp/uKjivoHsrz41H7X0Q3k3oBcDnw+pEd7d64bwAbj/G+MNDvFrqHzVxAF4SvpQu9awOnVtWtdA/LeUqr47PAXlV1wQS1L8pX6AL7he3f+1v7RNc7kW2BU5NcD3wXeF1VXVRVf6d7INGb6JbLvgV4elVdcQ+uQZKmTarGXRUhSZI0aUneBTykqvacsPNSkuSPwEur6qfDrkWSZqql8kW1kiTpn1tbfvkS7nxSqyRpGeHyUUmSdI8keRndfYI/qqoTh12PJGnxuHxUkiRJknrMmUJJkiRJ6jFDoSRJkiT1mA+a0Yyz9tpr1+zZs4ddhiRJkrRMmz9//hVVtc5E/QyFmnFmz57NvHnjfX2YJEmSpMlI8qfJ9HP5qCRJkiT1mKFQkiRJknrM5aOacX59yd/Z5s2HD7sMSZKkoZn/ob2GXYJ6xJlCSZIkSeoxQ6EkSZIk9ZihUJIkSZJ6zFAoSZIkST1mKJQkSZKkHjMUSpIkSVKPGQolSZIkqccMhZIkSZLUY4ZCSZIkSeoxQ6EkSZIk9ZihUJIkSZJ6zFAoSZIkST02Y0JhknckOS/J2UkWJHnkEGt5fZKVxmjfP8kHRrXNSfLrJRhjTpKn3sM6ZyepJO8baFs7yW1JPr2E57x+nPZXJNlrSWuVJEmSNDPNiFCYZHvg6cDWVfUIYBfg4iHVshzweuBuoRA4EthtVNsLgK8swVBzgMUKhUlmjdF8Id17N+J5wHlLUM8iVdVBVXX4VJ9XkiRJ0nDNiFAIrAdcUVW3AFTVFVV1KUCSPyZZu23PTXJ8294/yZeS/DzJ75K8rLXvlOTEJN9Kcn6Sg5Lcq+3bPck5Sc5N8sGRwZNcn+S9SU4F3gGsDxyX5LjBIqvqN8DVo2Yxnw98tZ3niUlOTnJGkqOTrNLat03yqyRnJTktyerAe4Hd2qzobknWTPLtNlN6SpJHDFznwUmOBcYKZTcBv04yt73eDfjawLU9I8mpSc5M8tMk923tqyQ5pL0fZyd57sAxB7RaTxnov3+S/dr28Uk+2K7lt0ke19qXS/KhJKe3c768ta/XficL2nv/uAk+D5IkSZKWkpkSCo8FHtACxmeT7DjJ4x4BPA3YHnhXkvVb+3bAm4AtgAcB/9b2fRDYmW6Wbtskz279VwbOrapHVtV7gUuBx1fV48cY80i62UGSPAr4e1X9rgXXdwK7VNXWwDzgjUlWAI4CXldVW9LNgt4AvAs4qqrmVNVRwHuAM9tM6du5awDcBnhWVb1wnPfhq8ALktwfWNjqH/FL4FFVtVXr95bW/l/ANVW1RRvz5wPvxSmt1hOBl40z5qyq2o5uVvXdre0l7ZzbAtsCL0uyMfBC4JiqmgNsCSwYfbIk+yaZl2Te7TdeN86QkiRJkqbaWMsRl7qquj7JNsDjgMcDRyV5a1UdOsGh36mqm4Cb2qzedsDVwGlVdSFAkiOBxwK3AcdX1d9a+xHADsC36YLUNyZZ7leBXyV5E104PLK1Pwp4OHBSEoAVgJOBTYHLqur0dq3XtvFHn/exwHNbn58nWavNKAJ8t13neH4MvA/4P7oAOuj+dO/neq2mi1r7Lq1+2phXtc1bge+37fnAv44z5jcH+sxu208EHpFk1/Z6dWAT4HTgi0mWB75dVXcLhVV1MHAwwMr327gWca2SJEmSptCMCIUAVbUQOB44Psk5wN7AocDt3DmjueLow8Z5PVb73VLYgJvb+JOp8+IkfwR2pAtx27ddAX5SVbsP9m/LQCcTcsaqb+S4Gyao6dYk8+lmRzcDnjGw+1PAR6vqu0l2AvYfGG+sum6rqpH2hYz/GblljD4BXlNVx4zunGQHulndLyX5kPcnSpIkSTPDjFg+mmTTJJsMNM0B/tS2/0i3fBLaTNqAZyVZMclawE50M1IA2yXZuN1LuBvdEspTgR3b0zmXA3YHThinpOuAVRdR8pHAx4A/VNUlre0U4DFJHtyuaaUkDwEuANZPsm1rX7U9MGb0GCcCe7Q+O9HdY3ntImoY7SPAf1bV30e1rw78pW3vPdB+LPDqkRdJ7rMYY43nGOCVbUaQJA9JsnKSjYDLq+rzwP8CW0/BWJIkSZKmwIwIhcAqwGHtwTBn0y3D3L/tew/wiSS/oJuVGnQa8AO6QPa+kYfT0C3bPBA4l2655Leq6jLgbcBxwFnAGVX1nXHqORj40egHzQw4mm5G7qsjDW1Z6j7Ake0aTgEeWlW30gXTTyU5C/gJ3YznccDDRx400653bjv2QO4a4CZUVedV1WFj7NofOLq9f1cMtL8fuE978MtZdMt276kvAOcDZyQ5F/gc3SziTsCCJGfSBftPTMFYkiRJkqZA7lwpuGxJsj9wfVV9eFT7TsB+VfX0sY7TzLfy/Tauh77oPcMuQ5IkaWjmf8ivh9Y9l2R+Vc2dqN9MmSmUJEmSJA3BjHnQzOKqqv3HaT+e7oE1kiRJkqQJOFMoSZIkST1mKJQkSZKkHjMUSpIkSVKPGQolSZIkqccMhZIkSZLUY4ZCSZIkSeoxQ6EkSZIk9ZihUJIkSZJ6bJn98nr983rY/ddi3of2GnYZkiRJUi84UyhJkiRJPWYolCRJkqQeMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mN9TqBnn1svO48/v3WLYZUiSJGkxbfiuc4ZdgpaAM4WSJEmS1GOGQkmSJEnqMUOhJEmSJPWYoVCSJEmSesxQKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqMUOhJEmSJPWYoVCSJEmSesxQqElJcv3A9lOT/C7Jhkn2T3JjknXH6VtJPjLwer8k+y+1wiVJkiQtkqFQiyXJE4BPAU+uqj+35iuAN41zyC3AvyVZe2nUJ0mSJGnxGAo1aUkeB3weeFpV/WFg1xeB3ZKsOcZhtwMHA29YCiVKkiRJWkyGQk3WvwDfAZ5dVReM2nc9XTB83TjHfgbYI8nq01ifJEmSpCVgKNRk3Qb8CnjJOPs/CeydZLXRO6rqWuBw4LXjnTzJvknmJZl35Q0Lp6JeSZIkSZNgKNRk3QE8H9g2ydtH76yqq4GvAP8xzvEfpwuUK4+1s6oOrqq5VTV3zZWXm6KSJUmSJE3EUKhJq6obgafTLQUda8bwo8DLgVljHHsl8DXGn2mUJEmSNASGQi2WFu6eDLwzybNG7bsC+Bbd/Ydj+QjgU0glSZKkGeRuMzrSWKpqlYHti4GN28vvjOr3RuCN4xz3f8BK01upJEmSpMXhTKEkSZIk9ZihUJIkSZJ6zFAoSZIkST1mKJQkSZKkHjMUSpIkSVKPGQolSZIkqccMhZIkSZLUY4ZCSZIkSeoxQ6EkSZIk9ZihUJIkSZJ6zFAoSZIkST02a9gFSKOtsN5mbPiuecMuQ5IkSeoFZwolSZIkqccMhZIkSZLUY4ZCSZIkSeoxQ6EkSZIk9ZihUJIkSZJ6zFAoSZIkST1mKJQkSZKkHvN7CjXjXHD5BTzmU48ZdhmSJEn/dE56zUnDLkEzkDOFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJkiRJPWYolCRJkqQeMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknqs16EwyfVTcI71k3x9EfvXSPIfk+0/xvGHJrkoyYIkZyV5wj2teSoleUWSvYZdhyRJkqQl0+tQOBWq6tKq2nURXdYA/mMx+o/lzVU1B3g9cNASlHk3SWZNxXmq6qCqOnwqziVJkiRp6TMUjpJkoyQ/S3J2+7lha39QklOSnJ7kvSOzjElmJzm3bW+W5LQ2q3d2kk2AA4EHtbYPjeq/XJIPJzmn9X/NBOWdDGwwUOs2SU5IMj/JMUnWa+3btvOd3MYcGW+fJEcn+R5wbGt7c7ums5O8p7WtnOQHbWby3CS7tfYDk5zf+n64te2fZL+2Pae9R2cn+VaS+7T245N8sL03v03yuCn4VUmSJEmaAobCu/s0cHhVPQI4Avhka/8E8Imq2ha4dJxjX9H6zAHmApcAbwX+UFVzqurNo/rvC2wMbDUw3qI8Gfg2QJLlgU8Bu1bVNsAXgQNav0OAV1TV9sDCUefYHti7qnZO8kRgE2A7YA6wTZId2jiXVtWWVbU58OMkawLPATZrtb5/jPoOB/6z7T8HePfAvllVtR3dbOe7xzhWkiRJ0hAYCu9ue+ArbftLwGMH2o9u218ZfVBzMvD2JP8JbFRVN00w1i7AQVV1O0BVXTlOvw8luRD4MvDfrW1TYHPgJ0kWAO8E7p9kDWDVqvrVOLX+ZGCcJ7Z/ZwJnAA+lC4nnALu02b3HVdU1wLXAzcAXkvwbcOPgSZOsDqxRVSe0psOAHQa6fLP9nA/MHn2BSfZNMi/JvNuuv22ct0GSJEnSVDMUTqwm3bHqK8AzgZuAY5LsPMEhmeT53ww8mC74HTZw7HltBnJOVW1RVU9s7Ytyw6jxPzBwjgdX1f9W1W+BbejC4QeSvKsF1+2AbwDPBn48iboH3dJ+LgTudj9jVR1cVXOrau7yqyy/mKeWJEmStKQMhXf3K+AFbXsP4Jdt+xTguW37BaMPAkjyQODCqvok8F3gEcB1wKrjjHUs8IqRh760JZpjqqo76Jaw3ivJk4DfAOsk2b4du3ySzarqKuC6JI9aVK3NMcCLk6zSzrFBknWTrA/cWFVfBj4MbN36rF5VP6RbAjpnVH3XAFcN3C/4IuAEJEmSJM1oU/IEymXYSkkuGXj9UeC1wBeTvBn4G/Dvbd/rgS8neRPwA+CaMc63G7BnktuAvwLvraork5zUHvbyI+AzA/2/ADwEOLsd83m6exrHVFWV5P3AW6rqmCS7Ap9sSzdnAR8HzgNeAnw+yQ3A8ePUSlUdm+RhwMlJAK4H9qSblfxQkjuA24BX0gXb7yRZkW6G8Q1jnHJv4KAkKwEXDrx3kiRJkmaoVE16dWSvtaBzUwtmLwB2r6pnDbuusSRZpapGno76VmC9qnrdkMuatFU2XKW2fPOWwy5DkiTpn85Jrzlp2CVoKUoyv6rmTtSv7zOFi2Mb4NPpptSuBl485HoW5WlJ3kb3+/0TsM9wy5EkSZI0UxkKJ6mqfgEsE9NXVXUUcNSw65AkSZI08/mgGUmSJEnqMUOhJEmSJPWYoVCSJEmSesxQKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqsVnDLkAa7aHrPpSTXnPSsMuQJEmSesGZQkmSJEnqMUOhJEmSJPWYoVCSJEmSesxQKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnH/J5CzTjX/eY3nLDDjsMuQ5IkSVpsO554wrBLWGzOFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqMUOhJEmSJPWYoVCSJEmSesxQKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqMUPhFEuyMMmCJOcm+V6SNabovLOTnDsV5xp13v2T/KXVvCDJgVM9xsBYc5I8dbrOL0mSJGnxGQqn3k1VNaeqNgeuBF417IIm4WOt5jlV9dbJHpRkucUcZw5gKJQkSZJmEEPh9DoZ2AAgySpJfpbkjCTnJHlWa5+d5NdJPp/kvCTHJrl327dNkrOSnMxAuEyyYpJD2nnOTPL41r5Pkm+3GcqLkrw6yRtbn1OSrDnZwpM8oR13TpIvJvmX1v7HJO9K8kvgeUkelOTHSeYn+UWSh7Z+z2uzpWclOTHJCsB7gd3ajORuU/IOS5IkSbpHDIXTpM2iPQH4bmu6GXhOVW0NPB74SJK0fZsAn6mqzYCrgee29kOA11bV9qNO/yqAqtoC2B04LMmKbd/mwAuB7YADgBuraiu6gLrXOOW+YWD56JPauQ4FdmtjzAJeOdD/5qp6bFV9FTgYeE1VbQPsB3y29XkX8KSq2hJ4ZlXd2tqOajOSR416v/ZNMi/JvGtuu22cMiVJkiRNNUPh1Lt3kgXA34E1gZ+09gD/neRs4Kd0M4j3bfsuqqoFbXs+MDvJ6sAaVXVCa//SwBiPHXldVRcAfwIe0vYdV1XXVdXfgGuA77X2c4DZ49Q8uHz0GGDTVtNv2/7DgB0G+h8F3ewn8Gjg6HbNnwPWa31OAg5N8jJgwmWmVXVwVc2tqrmrL7/8RN0lSZIkTRFD4dS7qarmABsBK3Dnss89gHWAbdr+/wNGZvduGTh+Id3MXIAaZ4yM0z76XHcMvL6jnXcyFnV+gBvaz3sBVw8EyjlV9TCAqnoF8E7gAcCCJGtNcmxJkiRJS5GhcJpU1TXAa4H9kiwPrA5cXlW3tXsAN5rg+KuBa5I8tjXtMbD7xJHXSR4CbAj8ZgrLv4ButvLB7fWLgBNGd6qqa4GLkjyv1ZIkW7btB1XVqVX1LuAKunB4HbDqFNYpSZIk6R4yFE6jqjoTOAt4AXAEMDfJPLpAd8EkTvHvwGfag2ZuGmj/LLBcknPolnLuU1W3jHWCJaz75jb20W2MO4CDxum+B/CSJGcB5wHPau0fag+pOZcuxJ4FHAc83AfNSJIkSTNHqsZboSgNx6arrloHb7X1sMuQJEmSFtuOJ95tgd3QJJlfVXMn6udMoSRJkiT1mKFQkiRJknpskU+jnOjLzqvqyqktR5IkSZK0NE30FQXz6b4WYayvKCjggVNekSRJkiRpqVlkKKyqjZdWIZIkSZKkpW9S9xS275/bM8l/tdcbJtluekuTJEmSJE23yT5o5rPA9sAL2+vrgM9MS0WSJEmSpKVmonsKRzyyqrZOciZAVV2VZIVprEuSJEmStBRMdqbwtiTL0T1chiTrAHdMW1WSJEmSpKVisqHwk8C3gHWTHAD8EvjvaatKkiRJkrRUTGr5aFUdkWQ+8AS6r6d4dlX9elorkyRJkiRNu8X58vrLgSMH9/nl9ZoOq266KTueeMKwy5AkSZJ6YXG+vH5D4Kq2vQbwZ8DvMZQkSZKkZdgi7ymsqo2r6oHAMcAzqmrtqloLeDrwzaVRoCRJkiRp+kz2QTPbVtUPR15U1Y+AHaenJEmSJEnS0jLZ7ym8Isk7gS/TLSfdE/j7tFUlSZIkSVoqJjtTuDuwDt3XUnwbWLe1SZIkSZKWYZP9SoorgdclWQ24o6qun96yJEmSJElLw6RmCpNskeRM4BzgvCTzk2w+vaVJkiRJkqbbZO8p/Bzwxqo6DiDJTsDBwKOnqS712OWXXMOn3/S9YZchSZI0FK/+yDOGXYJ6ZrL3FK48EggBqup4YOVpqUiSJEmStNRMdqbwwiT/BXypvd4TuGh6SpIkSZIkLS2TnSl8Md3TR79J9wTSdYB/n66iJEmSJElLx2SfPnoV8NpprkWSJEmStJQtMhQm+e6i9lfVM6e2HEmSJEnS0jTRTOH2wMXAkcCpQKa9IkmSJEnSUjNRKLwf8K/A7sALgR8AR1bVedNdmCRJkiRp+i3yQTNVtbCqflxVewOPAn4PHJ/kNUulOkmSJEnStJrwQTNJ/gV4Gt1s4Wzgk3RPIZUkSZIkLeMmetDMYcDmwI+A91TVuUulKkmSJEnSUjHRTOGLgBuAhwCvTf7xnJkAVVWrTWNtkiRJkqRptshQWFWT/XJ7SZIkSdIyyNA3QySpJB8ZeL1fkv0nOOaZSd46BWPvk+RvSRYkOS/J15OsdE/PK0mSJGnmMxTOHLcA/5Zk7ckeUFXfraoDp2j8o6pqTlVtBtwK7DZF55UkSZI0gxkKZ47bgYOBN4zekeQZSU5NcmaSnya5b2vfJ8mnk6ye5I9J7tXaV0pycZLlkzwoyY+TzE/yiyQPXVQRSWYBKwNXjTd2knsl+V2SdVqfeyX5fZK1k6yT5BtJTm//HtP67NhmIhe0c606lW+eJEmSpCVjKJxZPgPskWT1Ue2/BB5VVVsBXwXeMrizqq4BzgJ2bE3PAI6pqtvoguZrqmobYD/gs+OMvVuSBcBfgDWB7403dlXdAXwZ2KP12QU4q6quAD4BfKyqtgWeC3yh9dkPeFVVzQEeB9w0OHiSfZPMSzLv+huvWeSbJEmSJGnqTPg9hVp6quraJIcDr+Wuoen+wFFJ1gNWAC4a4/Cj6JZ8Hge8APhsklWARwNHDzw59l/GGf6oqnp1uo6fAd4MHLiIsb8IfAf4OPBi4JDWvgvw8IHxVmuzgicBH01yBPDNqrpk1LUfTBdg2fB+m9Q4NUqSJEmaYs4UzjwfB15Ct4RzxKeAT1fVFsDLgRXHOO67wFOSrAlsA/yc7vd7dbtXcOTfwxY1eFUV3SzhDosau6ouBv4vyc7AI+m+y5I25vYD421QVde1ex9fCtwbOGWiZaySJEmSlg5D4QxTVVcCX6MLhiNWp1vWCbD3OMddD5xGt3zz+1W1sKquBS5K8jyAdLacRBmPBf4wibG/QLeM9GtVtbBrAYJ9AAAWPUlEQVS1HQu8eqRDkjnt54Oq6pyq+iAwDzAUSpIkSTOAoXBm+ggw+BTS/emWgP4CuGIRxx0F7Nl+jtgDeEmSs4DzgGeNc+xu7SEwZwNbAe+bxNjfBVbhzqWj0C19nZvk7CTnA69o7a9Pcm6r4ybunFmUJEmSNETpVgtKiy/JXLqHyjxuKs+74f02qbfs8dGpPKUkSdIy49UfecawS9A/iSTzq2ruRP180IyWSJK3Aq/kzieQSpIkSVoGuXxUS6SqDqyqjarql8OuRZIkSdKSMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJkiRJPWYolCRJkqQemzXsAqTR1r3/6rz6I88YdhmSJElSLzhTKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqMUOhJEmSJPWYoVCSJEmSeswvr9eMc9lFf+CAPXcddhmSJPXKO7789WGXIGlInCmUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqMUOhJEmSJPWYoVCSJEmSesxQKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIXLgCQLkyxIcm6So5OsNKQ63j6McSVJkiRNH0PhsuGmqppTVZsDtwKvmOyBSZabwjrGDIXp+FmSJEmSlkH+j/yy5xfAgwGS7JnktDaL+LmRAJjk+iTvTXIqsH2SbZP8KslZrf+qSZZL8qEkpyc5O8nL27E7JTkxybeSnJ/koCT3SnIgcO821hFJZif5dZLPAmcAD0iye5Jz2ozmB0cKbvUc0MY/Jcl9l/q7JkmSJGlMhsJlSJJZwFOAc5I8DNgNeExVzQEWAnu0risD51bVI4HTgKOA11XVlsAuwE3AS4BrqmpbYFvgZUk2bsdvB7wJ2AJ4EPBvVfVW7pyxHBlnU+DwqtoKuA34ILAzMAfYNsmzB+o5pY1/IvCyqX5vJEmSJC0ZQ+Gy4d5JFgDzgD8D/ws8AdgGOL3tewLwwNZ/IfCNtr0pcFlVnQ5QVddW1e3AE4G92rGnAmsBm7RjTquqC6tqIXAk8Nhx6vpTVZ3StrcFjq+qv7XzHwHs0PbdCny/bc8HZo8+UZJ9k8xLMu+Gm2+Z7PsiSZIk6R6aNewCNCk3tdnAf0gS4LCqetsY/W9ugQ4gQI3RJ8BrquqYUefdaYz+Yx0PcMOo843ntqoaOcdCxvjcVdXBwMEAG6x1n/HGkyRJkjTFnClcdv0M2DXJugBJ1kyy0Rj9LgDWT7Jt67dqW4Z6DPDKJMu39ockWbkds12SjdvDY3YDftnabxvpP4ZTgR2TrN3ubdwdOGEKrlOSJEnSNDIULqOq6nzgncCxSc4GfgKsN0a/W+mC3aeSnNX6rQh8ATgfOCPJucDnuHMG72TgQOBc4CLgW639YODsJEeMMc5lwNuA44CzgDOq6jtTc7WSJEmSpkvuXNUn/WP56H5V9fRh1bDBWvep/3jKE4Y1vCRJvfSOL3992CVImmJJ5lfV3In6OVMoSZIkST3mg2Z0F1V1PHD8kMuQJEmStJQ4UyhJkiRJPWYolCRJkqQeMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJkiRJPTZr2AVIo6238YN4x5e/PuwyJEmSpF5wplCSJEmSesxQKEmSJEk9ZiiUJEmSpB4zFEqSJElSjxkKJUmSJKnHDIWSJEmS1GOGQkmSJEnqMb+nUDPOzZddx68P+Pmwy5AkSQMe9o6dh12CpGniTKEkSZIk9ZihUJIkSZJ6zFAoSZIkST1mKJQkSZKkHjMUSpIkSVKPGQolSZIkqccMhZIkSZLUY4ZCSZIkSeoxQ6EkSZIk9ZihUJIkSZJ6zFAoSZIkST1mKJQkSZKkHpu2UJjkHUnOS3J2kgVJHtnaX59kpSU85/5J9puC2vZJsv7A6y8kefhiHL9dkuOT/C7JGUl+kGSLe1jT8Unmtu0fJlljCc/z7PGupb1/f2m/j98l+ebiXPc9Nfp9lyRJkjR80xIKk2wPPB3YuqoeAewCXNx2vx5YolA4hfYB/hFOquqlVXX+ZA5Mcl/ga8Dbq2qTqtoa+ADwoDH6zlqS4qrqqVV19ZIcCzwbWFTQ+1hVzamqTYCjgJ8nWWcJx1pc+zDwvkuSJEkavumaKVwPuKKqbgGoqiuq6tIkr6ULBcclOQ4gye5JzklybpIPjpwgyZPbLNxZSX42cO6Ht1m1C9v5Rvp/O8n8Nju5b2tbLsmh7dznJHlDkl2BucARbcbs3qNm6cYbd8SrgcOq6lcjDVX1y6r6djv+0CQfbdf3wTar+KskZ7afm7Z+907y1TaTehRw74Fr+WOStdv2nklOa7V+Lslyrf36JAe0Ok9Jct8kjwaeCXyo9b9bUB1UVUcBxwIvbOfcJskJ7X08Jsl6rf3BSX7axjpj5LxJ3pzk9HYN72lts5P8Osnn2+/i2Hatd3vfF1WbJEmSpKVjukLhscADkvw2yWeT7AhQVZ8ELgUeX1WPb0sJPwjsDMwBtm3LH9cBPg88t6q2BJ43cO6HAk8CtgPenWT51v7iqtqGLni8Nsla7ZwbVNXmVbUFcEhVfR2YB+zRZsxuGjnxBOOO2Aw4Y4LrfwiwS1W9CbgA2KGqtgLeBfx36/NK4MY2k3oAsM3okyR5GLAb8JiqmgMsBPZou1cGTml1ngi8rAXV7wJvbtf2hwnqpF3LQ9v7+Clg1/Y+frHVBXAE8Jk21qOBy5I8EdiE7vcwB9gmyQ6t/yat/2bA1XTv57jvuyRJkqThWaLljROpquuTbAM8Dng8cFSSt1bVoaO6bgscX1V/A0hyBLADXfg5saouaue7cuCYH7QZyFuSXA7cF7iELgg+p/V5AF0w+Q3wwCSfAn5AF1YX5VGLGHdMSU4FVgOOrarXteajq2ph214dOCzJJkABIyF2B+CTbZyzk5w9xumfQBcWT08C3Wzi5W3frcD32/Z84F8nqnW8S2g/NwU2B37SxlqOLvytShesv9Vqvbld9xOBJwJntuNXoXvP/wxcVFULBmqbPWER3ezuvgDrrb7uEl6KJEmSpMU1LaEQoIWi44Hjk5wD7A0cOqpbGFvoAtRYbhnYXgjMSrIT3X2L21fVjUmOB1asqquSbEk3s/gq4PnAixdR9qLGHXEesDXwHYCqemRbGvn0gT43DGy/Dziuqp6TZDbdezJiorFCt1T1bWPsu62qRo5fyJL/Lreim8ELcF5VbX+XApLVFlHbB6rqc6P6z+buv6MJl4pW1cHAwQCbb7DpRO+LJEmSpCkyXQ+a2bTNjI2YA/ypbV8HrNq2TwV2TLJ2u1dud+AE4OTWvnE735oTDLk6cFULhA+lm/Gj3Zd3r6r6BvBfdGFudA2DJjPuZ4B92v17Ixb14JzVgb+07X0G2k+kLQVNsjnwiDGO/Rmwa5J1R+pJstEixoLxr+1ukjyXbrbvSLpZ1XXSPSSIJMsn2ayqrgUuSfLs1v4v6Z4eewzw4iSrtPYNRuqcitokSZIkLR3TNVO4CvCpdF+rcDvwe9rSQLrZoB8luazdV/g24Di6macfVtV34B/LCb+Z5F50SyYXtTzyx8Ar2hLM3wCntPYNgEPaOQBGZtwOBQ5KchPwj5mxqvrbRONW1V+T7Eb3EJkNWp8rgPeOU9v/0C0ffSPw84H2/9dqOxtYAJw2+sCqOj/JO4FjWz230c14/ml03wFfBT6f7iE8u45xX+EbkuxJd0/iucDOA8t3dwU+mWR1us/Gx+lmRl8EfC7Je1sNz6uqY9s9jye35abXA3vSzQyO51AG3nfvK5QkSZKGL3euQJRmhs032LSO/o//N+wyJEnSgIe9Y+dhlyBpMSWZX1VzJ+o3bV9eL0mSJEma+QyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJkiRJPWYolCRJkqQeMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6rFZwy5AGm3F9VblYe/YedhlSJIkSb3gTKEkSZIk9ZihUJIkSZJ6zFAoSZIkST1mKJQkSZKkHjMUSpIkSVKPGQolSZIkqccMhZIkSZLUY35PoWacSy+9lP3333/YZUiSJEmLbVn8/1hnCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJkiRJPWYolCRJkqQeMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFwKUqyMMmCJOclOSvJG5Ms0e8gyXuT7LKI/a9IsteSVwtJtmj1LkhyZZKL2vZP78l5JUmSJM0cs4ZdQM/cVFVzAJKsC3wFWB149+KeqKreNcH+g5aowrue4xxgpN5Dge9X1ddH90syq6puv6fjSZIkSVr6nCkckqq6HNgXeHU6yyX5UJLTk5yd5OUjfZO8Jck5bXbxwNZ2aJJd2/aBSc5vx324te2fZL+2PSfJKW3/t5Lcp7Ufn+SDSU5L8tskj5ts/Ul2SfLTJF8Fzmxte7dzLUjy2ZFZ0CRPSXJykjOSHJVk5Sl5EyVJkiTdY4bCIaqqC+l+B+sCLwGuqaptgW2BlyXZOMlTgGcDj6yqLYH/GTxHkjWB5wCbVdUjgPePMdThwH+2/edw15nJWVW1HfB6Fn/G8lHAW6pqiySbtzoe3WZDZwEvaDOibwWeUFVbA2cDrxt9oiT7JpmXZN6NN964mGVIkiRJWlIuHx2+tJ9PBB4xMvtHt6x0E2AX4JCquhGgqq4cdfy1wM3AF5L8APj+XU6erA6sUVUntKbDgKMHunyz/ZwPzF7M2k+uqj+37V3owuy8JAD3Bi4GbgQeDvyqta8A/HL0iarqYOBggPXXX78Wsw5JkiRJS8hQOERJHggsBC6nC4evqapjRvV5MjBuSKqq25NsBzwBeAHwamDnxSjjlvZzIYv/ebhhsFTgi1X1X4MdkjwH+HFVvWgxzy1JkiRpKXD56JAkWQc4CPh0VRVwDPDKJMu3/Q9p994dC7w4yUqtfc1R51kFWL2qfki3BHTO4P6quga4auB+wRcBJzD1fgo8P8nara61kmwI/ArYsQVgkqycZJNpGF+SJEnSEnCmcOm6d5IFwPLA7cCXgI+2fV+gW755Rrp1ln8Dnl1VP04yh25Z5q3AD4G3D5xzVeA7SVakm617wxjj7g0c1ILlhcC/T/WFVdU5Sd4D/LQ9YOY24BVVdXqSlwBHJVmhdX878LuprkGSJEnS4ks3SSXNHOuvv37tu+++wy5DkiRJWmz777//sEv4hyTzq2ruRP1cPipJkiRJPWYolCRJkqQeMxRKkiRJUo8ZCiVJkiSpxwyFkiRJktRjhkJJkiRJ6jFDoSRJkiT1mKFQkiRJknrMUChJkiRJPWYolCRJkqQeMxRKkiRJUo+lqoZdg3QXc+fOrXnz5g27DEmSJGmZlmR+Vc2dqJ8zhZIkSZLUY4ZCSZIkSeoxQ6EkSZIk9Zj3FGrGSXId8Jth16F/GmsDVwy7CP3T8POkqeJnSVPJz5PGs1FVrTNRp1lLoxJpMf1mMjfESpORZJ6fJ00VP0+aKn6WNJX8POmecvmoJEmSJPWYoVCSJEmSesxQqJno4GEXoH8qfp40lfw8aar4WdJU8vOke8QHzUiSJElSjzlTKEmSJEk9ZijUjJLkyUl+k+T3Sd467Hq0bEnygCTHJfl1kvOSvK61r5nkJ0l+137eZ9i1atmQZLkkZyb5fnu9cZJT22fpqCQrDLtGLRuSrJHk60kuaH+jtvdvk5ZEkje0/8adm+TIJCv6t0n3lKFQM0aS5YDPAE8BHg7snuThw61Ky5jbgTdV1cOARwGvap+htwI/q6pNgJ+119JkvA749cDrDwIfa5+lq4CXDKUqLYs+Afy4qh4KbEn3ufJvkxZLkg2A1wJzq2pzYDngBfi3SfeQoVAzyXbA76vqwqq6Ffgq8Kwh16RlSFVdVlVntO3r6P6nawO6z9FhrdthwLOHU6GWJUnuDzwN+EJ7HWBn4Outi58lTUqS1YAdgP8FqKpbq+pq/NukJTMLuHeSWcBKwGX4t0n3kKFQM8kGwMUDry9pbdJiSzIb2Ao4FbhvVV0GXXAE1h1eZVqGfBx4C3BHe70WcHVV3d5e+zdKk/VA4G/AIW058heSrIx/m7SYquovwIeBP9OFwWuA+fi3SfeQoVAzScZo8/G4WmxJVgG+Aby+qq4ddj1a9iR5OnB5Vc0fbB6jq3+jNBmzgK2B/1dVWwE34FJRLYF23+mzgI2B9YGV6W67Gc2/TVoshkLNJJcADxh4fX/g0iHVomVUkuXpAuERVfXN1vx/SdZr+9cDLh9WfVpmPAZ4ZpI/0i1l35lu5nCNtmQL/BulybsEuKSqTm2vv04XEv3bpMW1C3BRVf2tqm4Dvgk8Gv82/f/27h7EriKMw/jzxyWoJCIigh8EEfJhEQlKNIEgipJCUiopEkgU0cLCRgsjCCnEzkaQWGgKK7VyrVxFQwQDLrjqolERQQUjGraQiARWXoszi1GWmGyye/cyzw8u9zB3GN4Dhzm89505RxfJpFCryTSwoT1Baw3DxunJEcekMdL2fL0KnKiqF8/6aRLY3473A2+vdGwaL1X1TFXdVFU3M8xFH1TVXuBD4MHWzWtJ56WqfgF+SrKpNd0HfIVzky7cj8D2JFe2e97CteTcpIviy+u1qiR5gOHf+MuA16rq+RGHpDGSZCfwETDLP/vADjLsK3wTWM9wQ32oquZGEqTGTpJ7gKeqaneSWxgqh9cAM8C+qjozyvg0HpJsZXho0Rrge+Bhhj/nnZt0QZIcAvYwPHF7BniUYQ+hc5OWzKRQkiRJkjrm8lFJkiRJ6phJoSRJkiR1zKRQkiRJkjpmUihJkiRJHTMplCRJkqSOmRRKkiSSPJvkyyRfJPksyV2jjkmStDImRh2AJEkarSQ7gN3A7VV1Jsm1DO/TW+p4E1U1f8kClCQtKyuFkiTpeuDUwsuuq+pUVf2cZFuSj5N8nuSTJOuSXJ7kSJLZJDNJ7gVIciDJW0neAaZa29NJplv18dDoTk+SdC5WCiVJ0hTwXJJvgfeBN4Dj7XtPVU0nuQr4E3gSoKq2JNkMTCXZ2MbZAdxWVXNJdgEbgDuBAJNJ7q6qYyt6ZpKk/2WlUJKkzlXVaeAO4DHgN4Zk8HHgZFVNtz6/tyWhO4HXW9vXwA/AQlL4XlXNteNd7TMDfApsZkgSJUmrjJVCSZJEVf0FHAWOJpkFngBqka45xzB//KffC1X1yiULUpK0LKwUSpLUuSSbkpxdxdsKnABuSLKt9VmXZAI4BuxtbRuB9cA3iwz7LvBIkrWt741JrlvG05AkLZGVQkmStBZ4KcnVwDzwHcNS0iOt/QqG/YT3Ay8Dh1s1cR440J5Y+q8Bq2oqya3A8fbbaWAf8OvKnJIk6XylarGVIZIkSZKkHrh8VJIkSZI6ZlIoSZIkSR0zKZQkSZKkjpkUSpIkSVLHTAolSZIkqWMmhZIkSZLUMZNCSZIkSeqYSaEkSZIkdexvGWY5BX78x80AAAAASUVORK5CYII=\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "plt.figure(figsize=(13,6))\n", "plt.title(\"Model Accuracy Comparison\")\n", "sns.barplot(x=\"Score\", y=\"Model\", data=results)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Export `Random Forest` model to deploy production" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "import pickle as pkl" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "filename = 'model.pkl'\n", "pkl.dump(random_forest, open(filename, 'wb'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.16" } }, "nbformat": 4, "nbformat_minor": 2 }