{"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"KaggleTitanicKoban.ipynb","provenance":[]},"kernelspec":{"name":"python3","display_name":"Python 3"}},"cells":[{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"ohs-Fg2Y0vls","executionInfo":{"status":"ok","timestamp":1613092594716,"user_tz":300,"elapsed":56292,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"eae4ad35-236d-4ea4-8d9f-cdf08070e98b"},"source":["# Mount data drive\n","from google.colab import drive\n","drive.mount('/data/')\n","data_dir = '/data/My Drive/EMSE 6575/LogisticRegressionHomework'"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Mounted at /data/\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"wRJkv9QU1Jo-"},"source":["# Load libraries\n","import pandas as pd\n","import numpy as np\n","from sklearn import preprocessing\n","import matplotlib.pyplot as plt \n","plt.rc(\"font\", size=14)\n","from sklearn.linear_model import LogisticRegression\n","from sklearn.model_selection import train_test_split\n","import seaborn as sns\n","sns.set(style=\"white\")\n","sns.set(style=\"whitegrid\", color_codes=True)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":306},"id":"apheM3cS1LLa","executionInfo":{"status":"ok","timestamp":1613092598910,"user_tz":300,"elapsed":60474,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"08e2635e-29d7-43ae-c79f-ab531f0d9d85"},"source":["# Read the data - \n","# Data is marketing campaign data for a bank where the goal is to predict whether the client will subscribe to a term deposit.\n","train = pd.read_excel(data_dir + '/train.xlsx', header=0)\n","test = pd.read_excel(data_dir + '/test.xlsx', header=0)\n","train['train_test'] = 'train'\n","test['train_test'] = 'test'\n","data = pd.concat([train, test]).reset_index()\n","print(\"Train: \" + str(train.shape))\n","print(\"Test: \" + str(test.shape))\n","print(\"Total: \" + str(data.shape))\n","data.head()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Train: (891, 13)\n","Test: (418, 12)\n","Total: (1309, 14)\n"],"name":"stdout"},{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>index</th>\n"," <th>PassengerId</th>\n"," <th>Survived</th>\n"," <th>Pclass</th>\n"," <th>Name</th>\n"," <th>Sex</th>\n"," <th>Age</th>\n"," <th>SibSp</th>\n"," <th>Parch</th>\n"," <th>Ticket</th>\n"," <th>Fare</th>\n"," <th>Cabin</th>\n"," <th>Embarked</th>\n"," <th>train_test</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0.0</td>\n"," <td>3</td>\n"," <td>Braund, Mr. Owen Harris</td>\n"," <td>male</td>\n"," <td>22.0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>A/5 21171</td>\n"," <td>7.2500</td>\n"," <td>NaN</td>\n"," <td>S</td>\n"," <td>train</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>1</td>\n"," <td>2</td>\n"," <td>1.0</td>\n"," <td>1</td>\n"," <td>Cumings, Mrs. John Bradley (Florence Briggs Th...</td>\n"," <td>female</td>\n"," <td>38.0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>PC 17599</td>\n"," <td>71.2833</td>\n"," <td>C85</td>\n"," <td>C</td>\n"," <td>train</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>2</td>\n"," <td>3</td>\n"," <td>1.0</td>\n"," <td>3</td>\n"," <td>Heikkinen, Miss. Laina</td>\n"," <td>female</td>\n"," <td>26.0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>STON/O2. 3101282</td>\n"," <td>7.9250</td>\n"," <td>NaN</td>\n"," <td>S</td>\n"," <td>train</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>3</td>\n"," <td>4</td>\n"," <td>1.0</td>\n"," <td>1</td>\n"," <td>Futrelle, Mrs. Jacques Heath (Lily May Peel)</td>\n"," <td>female</td>\n"," <td>35.0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>113803</td>\n"," <td>53.1000</td>\n"," <td>C123</td>\n"," <td>S</td>\n"," <td>train</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>4</td>\n"," <td>5</td>\n"," <td>0.0</td>\n"," <td>3</td>\n"," <td>Allen, Mr. William Henry</td>\n"," <td>male</td>\n"," <td>35.0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>373450</td>\n"," <td>8.0500</td>\n"," <td>NaN</td>\n"," <td>S</td>\n"," <td>train</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" index PassengerId Survived Pclass ... Fare Cabin Embarked train_test\n","0 0 1 0.0 3 ... 7.2500 NaN S train\n","1 1 2 1.0 1 ... 71.2833 C85 C train\n","2 2 3 1.0 3 ... 7.9250 NaN S train\n","3 3 4 1.0 1 ... 53.1000 C123 S train\n","4 4 5 0.0 3 ... 8.0500 NaN S train\n","\n","[5 rows x 14 columns]"]},"metadata":{"tags":[]},"execution_count":3}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"8OtgE_1sVEp7","executionInfo":{"status":"ok","timestamp":1613092598911,"user_tz":300,"elapsed":60468,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"481186db-724d-4f30-cfc8-3389be05c584"},"source":["data.isnull().sum()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["index 0\n","PassengerId 0\n","Survived 418\n","Pclass 0\n","Name 0\n","Sex 0\n","Age 263\n","SibSp 0\n","Parch 0\n","Ticket 0\n","Fare 1\n","Cabin 1014\n","Embarked 2\n","train_test 0\n","dtype: int64"]},"metadata":{"tags":[]},"execution_count":4}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":142},"id":"1wfxoBVtVWvy","executionInfo":{"status":"ok","timestamp":1613092598912,"user_tz":300,"elapsed":60460,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"9691ff9a-de5f-4374-9046-bbe8ae0094d7"},"source":["(data[['Pclass', 'Survived']]\n"," .groupby(['Pclass'], as_index=False)\n"," .mean()\n"," .sort_values(by='Survived', ascending=False))"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Pclass</th>\n"," <th>Survived</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>1</td>\n"," <td>0.629630</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>2</td>\n"," <td>0.472826</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>3</td>\n"," <td>0.242363</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Pclass Survived\n","0 1 0.629630\n","1 2 0.472826\n","2 3 0.242363"]},"metadata":{"tags":[]},"execution_count":5}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":555},"id":"0NlFoQ9feWDt","executionInfo":{"status":"ok","timestamp":1613092600517,"user_tz":300,"elapsed":62057,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"45a03816-3195-4cd4-b8c3-717ce32d54eb"},"source":["data['Cabin'][data['Cabin'].isnull() == True] = \" \"\n","\n","def parse_cabin_letter(txt):\n"," cabin_letter = \"unknown\"\n"," if 'a' in txt.lower():\n"," cabin_letter = 'a'\n"," if 'b' in txt.lower():\n"," cabin_letter = 'b'\n"," if 'c' in txt.lower():\n"," cabin_letter = 'c'\n"," if 'd' in txt.lower():\n"," cabin_letter = 'd'\n"," if 'e' in txt.lower():\n"," cabin_letter = 'e'\n"," if 'f' in txt.lower():\n"," cabin_letter = 'f'\n"," if 'g' in txt.lower():\n"," cabin_letter = 'g'\n"," return cabin_letter\n","\n","data['cabin_letter'] = data['Cabin'].apply(lambda x: parse_cabin_letter(x)) \n","data['cabin_letter'].value_counts()\n","sns.factorplot('cabin_letter','Fare', col = 'Pclass', data=data)\n","plt.show()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:1: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame\n","\n","See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"," \"\"\"Entry point for launching an IPython kernel.\n","/usr/local/lib/python3.6/dist-packages/seaborn/categorical.py:3714: UserWarning: The `factorplot` function has been renamed to `catplot`. The original name will be removed in a future release. Please update your code. Note that the default `kind` in `factorplot` (`'point'`) has changed `'strip'` in `catplot`.\n"," warnings.warn(msg)\n","/usr/local/lib/python3.6/dist-packages/seaborn/_decorators.py:43: FutureWarning: Pass the following variables as keyword args: x, y. From version 0.12, the only valid positional argument will be `data`, and passing other arguments without an explicit keyword will result in an error or misinterpretation.\n"," FutureWarning\n"],"name":"stderr"},{"output_type":"display_data","data":{"image/png":"iVBORw0KGgoAAAANSUhEUgAABD0AAAFtCAYAAAAEQhEoAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeXxU9aH///fMJJkkkBAChISwyb6EPYgoAgUxFiFal0LV2isut1dty/WntSoCWq8taL3Sr1K1rXpV6lo3QAkoKqKobAFCgEDYIQvZIOsks/z+CAxEIAkwM2dy5vV8PHzMzDkzZ95Ecsi88zmfj8Xj8XgEAAAAAABgMlajAwAAAAAAAPgDpQcAAAAAADAlSg8AAAAAAGBKlB4AAAAAAMCUKD0AAAAAAIApUXoAAAAAAABTovQAmuH777/X2LFjjY4BAKbGuRYA/I9zLUJNmNEBgECbMGGCioqKZLPZFBUVpbFjx+rRRx9Vq1atjI52wd544w29//77ysnJ0ZQpU/TnP//Z6EgAQpRZz7W1tbWaO3eu1qxZo7KyMnXt2lX33Xefxo0bZ3Q0ACHIrOdaSbr//vv13XffqaqqSh06dNAdd9yhG2+80ehYaIEY6YGQ9MILL2jjxo364IMPlJWVpb/97W9GR/KJhIQE3X333br++uuNjgIApjzXOp1OJSUl6fXXX9f69es1c+ZMzZw5UwcPHjQ6GoAQZcZzrST953/+p1auXKkNGzZo4cKFevbZZ5WVlWV0LLRAlB4IaR07dtTll1+unTt3SpLKysr00EMPacyYMRo5cqTuvvvuM77upZde0hVXXKFhw4Zp8uTJWrFihXffvn37dMstt2jEiBEaNWqUZs6cKUnyeDx68sknNXr0aA0fPlxTp05VTk6OT/88V155pa644grFxcX59LgAcCHMdK6Njo7Wb37zG3Xu3FlWq1U/+clP1LlzZ23dutVn7wEA58NM51pJ6t27tyIiIiRJFotFFotF+/fv9+l7IDRweQtCWl5enlatWqVJkyZJkn7/+98rOjpaS5cuVXR0tDZu3HjG13Xp0kWLFi1Shw4dtGzZMj3wwANavny5EhIStGDBAl122WV67bXXVFdXpy1btkiSVq9erXXr1ikjI0MxMTHavXu3YmJiznj8uXPnasmSJWfcl5SUpMWLF/vgTw8AgWHmc21RUZH27t2rXr16NedLAQB+Y8Zz7dy5c/XBBx+opqZGAwYM4FJCnBdKD4Ske+65RzabTTExMRo3bpx+/etfq7CwUKtWrdL333+vNm3aSJIuvvjiM77+pz/9qff+5MmT9eKLL2rz5s264oorFBYWpsOHD6uwsFCJiYlKTU2VJIWFhamyslK7d+/W4MGD1bNnz7Pmmzt3rubOneu7PzAAGMDs59q6ujrdf//9+tnPftbo+wCAP5n5XDt37lw9+uij2rhxo3744QfvyA/gXFB6ICQ9//zzuvTSSxtsy8nJUZs2bbz/MDTmww8/1CuvvKJDhw5JkqqqqlRaWipJeuCBB7RgwQLdcMMNatOmjW677TbdcMMNGj16tG6++WY9/vjjOnTokK688ko9+OCDat26te//gAAQBMx8rnW73fr973+v8PBwPfrooz49NgCcCzOfayXJZrMpNTVVH3/8sd58803deuutPn8PmBulB3BcYmKijh49qmPHjik2Nvaszzt06JBmzZqlV199VcOGDZPNZtM111zj3d+hQwc98cQTkqR169bptttu08iRI9WtWzfdeuutuvXWW1VcXKyZM2fqH//4h/fayFPNnj37rEP9OnXqpKVLl17gnxYAjGGGc63H49EjjzyioqIi/f3vf1d4ePi5fAkAwO/McK79MZfLxZweOC+UHsBxCQkJGjt2rB577DHNnj1b0dHRyszM1MiRIxs8r7q6WhaLRfHx8ZKkf//7394JoyTp008/1bBhw5SYmKg2bdrIYrHIarVq8+bN8ng8GjBggKKiohQRESGr9cxzCT/++ON6/PHHz/nP4HQ65XK55Ha75XK55HA4ZLPZFBbGtzqA4GCGc+2cOXOUm5urV155RZGRkef8egDwt5Z+ri0uLtZ3332n8ePHKzIyUt9++62WLl2qv/zlL+f4lQAoPYAG5s+frz/96U/66U9/qrq6Oo0aNeq0fxx69eqlGTNmaPr06bJYLLr22ms1fPhw7/4tW7boySefVEVFhdq1a6dHHnlEXbp00cGDB/Xkk0/q4MGDioiI0JgxY3T77bf7NP/f/vY3Pffcc97HH3/8se6991795je/8en7AMCFaMnn2kOHDuntt9/2HvuExx57TOnp6T57HwC4UC35XGuxWPTmm29qzpw5crvdSk5O1sMPP6yJEyf67D0QOiwej8djdAgAAAAAAABfO/MYJAAAAAAAgBaO0gMAAAAAAJgSpQcAAAAAADAlSg8AAAAAAGBKpi09PB6PHA6HmKcVAPyHcy0A+B/nWgA4f6YtPWpra5WVlaXa2lqjowCAaXGuBQD/41wLAOfPtKUHAAAAAAAIbZQeAAAAAADAlCg9AAAAAACAKVF6AAAAAAAAU6L0AAAAAAAApkTpAQAAAAAATInSAwAAAAAAmBKlBwAAAAAAMCVKDwAAAAAAYEqUHgAAAAAAwJQoPQAAAAAAgClRegAIaWuz8/XQwtVam51vdBQAAAAAPhZmdAAAMNKijO3KPXhU1Q6nRg5INDoOAAAAAB9ipAeAkFZd42xwCwAAAMA8KD0AAAAAAIApUXoAAAAAAABTovQAAAAAAACmROkBAAAAAABMidIDAAAAAACYEqUHAAAAAAAwJUoPAAAAAABgSpQeAAAAAADAlCg9AAAAAACAKVF6AAAAAAAAU6L0AAAAAAAApkTpAQAAAAAATInSAwAAAAAAmBKlBwAAAAAAMCVKDwAAAAAAYEoBKz3mzZunCRMmqG/fvsrJyTlt/3PPPXfavszMTKWnpystLU0zZsxQcXFxoOICAAAAAIAWLmClx8SJE7Vo0SIlJyeftm/r1q3KzMxssM/tduuBBx7Q7NmzlZGRodTUVD399NOBigsAAAAAAFq4gJUeqampSkpKOm17bW2tHn/8cc2dO7fB9qysLNntdqWmpkqSpk+frmXLlgUiKgAAAAAAMIEwowMsWLBA6enp6ty5c4PteXl56tSpk/dxfHy83G63ysrKFBcX1+zjZ2Vl+SwrAPOpcTi8t+vXrzc4TWCNGDHCZ8fiXAsAZ8a5FgD8r7FzraGlx8aNG5WVlaX777/fb++RkpIiu93ut+MDaNkil38mlTsVabf79AfTUMO5FgD8j3MtAJw7Q0uPtWvXKjc3VxMnTpQk5efn6/bbb9ef/vQnJSUl6fDhw97nlpSUyGq1ntMoDwAAAAAAELoMLT3uuusu3XXXXd7HEyZM0AsvvKA+ffrI7XarpqZG69atU2pqqt566y1dddVVBqYFAAAAAAAtScBKjyeeeELLly9XUVGRbrvtNsXFxWnp0qVnfb7VatX8+fM1Z84cORwOJScn66mnngpUXAAAAAAA0MIFrPSYNWuWZs2a1ehzVq5c2eDx8OHDtXjxYn/GAgAAAAAAJhWwJWsBAAAAAAACidIDAAAAAACYEqUHAAAAAAAwJUoPAAAAAABgSpQeAAAAAADAlCg9AAAAAACAKVF6AAAAAAAAU6L0AAAAAAAApkTpAQAAAAAATInSAwAAAAAAmBKlBwAAAAAAMCVKDwAAAAAAYEqUHgAAAAAAwJQoPQAAAAAAgClRegAAAAAAAFOi9AAAAAAAAKZE6QEAAAAAAEyJ0gMAAAAAAJgSpQcAAAAAADAlSg8AAAAAAGBKlB4AAAAAAMCUKD0AAAAAAIApUXoAAAAAAABTovQAEJKOVjj0xqfbVFhaJUkqLXcoZ3+pwakAAAAA+BKlB4CQU1BSpZnPfKm3P8uR0+WRJFU7nLr/r6v05foDBqcDAAAA4CuUHgBCzgvvb1bR0ZrTtns80v97J1NHKxwGpAIAAADga5QeJrI2O18PLVyttdn5RkcBglbx0Wqt21Zw1v21Tre+2ngwgIkAAAAA+EuY0QHgO4sytiv34FFVO5waOSDR6DhAUDpSVt30c0qbfg4AAACA4MdIDxOprnE2uAVwunaxUU0+p31c088BAAAAEPwCVnrMmzdPEyZMUN++fZWTkyNJKi0t1Z133qm0tDRNnTpV9957r0pKSryvyczMVHp6utLS0jRjxgwVFxcHKi4Ak+rQNkrD+nQ46/7wMKvGDescwEQAAAAA/CVgpcfEiRO1aNEiJScne7dZLBbdcccdysjI0OLFi9WlSxc9/fTTkiS3260HHnhAs2fPVkZGhlJTU737AOBC/Nf1Q9Q2JuK07RZJd18/WHEx9sCHAgAAAOBzASs9UlNTlZSU1GBbXFycRo0a5X08dOhQHT58WJKUlZUlu92u1NRUSdL06dO1bNmyQMUFYGJJ7VvpV1cPbLAtMsKmefderisu7mZQKgAAAAC+FjQTmbrdbr355puaMGGCJCkvL0+dOnXy7o+Pj5fb7VZZWZni4uKafdysrCyfZw1WNQ6H93b9+vUGpwGC25oNZQ0eR9stqirZo/UlewxKFHgjRozw2bFC6VwLAOeCcy0A+F9j59qgKT3++Mc/Kjo6WrfccotPj5uSkiK7PTSGqkcu/0wqdyrSbvfpP7CAGf3z888lSRaL5PGI75sLFErnWgAwCudaADh3QVF6zJs3T/v27dMLL7wgq7X+ipukpCTvpS6SVFJSIqvVek6jPADgTIqPVutAQYUkKSLcJkety+BEAAAAAPzB8CVrn3nmGWVlZen5559XRMTJiQVTUlJUU1OjdevWSZLeeustXXXVVUbFBGAimTlHvPft4TYDkwAAAADwp4CN9HjiiSe0fPlyFRUV6bbbblNcXJyeffZZvfjii+revbumT58uSercubOef/55Wa1WzZ8/X3PmzJHD4VBycrKeeuqpQMUFYGKZO0+WHpERNh2rNDAMAAAAAL8JWOkxa9YszZo167TtO3bsOOtrhg8frsWLF/szFoAQ4/F4tOn4SI/42EiF2Qwf8AYAAADAT/hpH0BI2Z9frtLy+pWOhvbpYHAaAAAAAP5E6QEgpGw8ZT4PSg8AAADA3Cg9AISUTafM5zG0N6UHAAAAYGaUHgBCRp3TrazcIklS96RYtY2NNDgRAAAAAH+i9AAQMnbsK1FNrUuSNIRRHgAAAIDpUXoACBmZzOcBAAAAhBRKDwAhI/P4fB5hNotSerQzOA0AAAAAf6P0ABASKqrrtHN/qSSpX/d4RdrDDE4EAAAAwN8oPQCEhC27iuT21N9n1RYAAAAgNFB6AAgJmTmF3vvM5wEAAACEBkoPACHhxCSmrSLD1KtznMFpAAAAAAQCpQcA0yssqdLhokpJ0uDeHWSzceoDAAAAQgE/+ZtEZXWdqmqckqRqh1O1dS6DEwHB48SqLZI0hPk8AAAAgJBB6WECK9cd0H88nqGyCockqbTcodufWKHNu4408UogNGzKOfm9MIz5PAAAAICQQenRwm3JLdKzb25QTW3DkR1lFQ798Z/fK7+40qBkQHBwuz3ekR4d2kYpqX0rgxMBAAAACBRKjxbu/S92yXOWfTW1Li39Zk9A8wDBZm/eMR2rrJVUv1StxWIxOBEAAACAQKH0aOG27i5qdH/W7uIAJQGCE0vVAgAAAKGL0qOFs1kb/18YZuW32ghtmTlMYgoAAACEKkqPFm7kgI5N7E8MUBIg+NTWubT1+GinHp3aqE1ru8GJAAAAAAQSpUcLd8OE3rKdZTRHXOsIXTW6e2ADAUFk294S1Trdkri0BQAAAAhFlB4tnNPlkdt95qlML+rURrGtIgKcCAgeDS5tofQAAAAAQg6lRwvm8Xj0j4+yvKu3xESHS5J35MfGnCPam3fMoHSA8U4sVRseZtXAHu0MTgMAAAAg0Cg9WrDvsvK1Jbd+9ZbeXeIUE10/quPU0R3vfJZjSDbAaMcqa5V7sEyS1L97vOzhNoMTAQAAAAg0So8Wqs7p0iuLt3of33nNIO/9KHuYOrVvJUlavemQDhSUBzwfYLQtu4rkOT4Mivk8AAAAgNBE6dFCLVm9R3nFlZKky4cmq/9F8Q323zixjyTJ45He/ZzRHgg9G3MKvfcpPQAAAIDQROnRAh2tcOitFTsk1c9V8B9XDzjtOeNHdFZCfLQk6auNh5RXVBnQjIDRNh2fzyMmOlw9kuMMTgMAAADACJQeLdCiZdtVVeOUJF07rqe33DhVmM2qGyf0liS53R5GeyCk5BdXKr+4SpI0uFeHsy7rDAAAAMDcKD1amH15x5Tx3V5JUtsYu244XmycycSRXdS+TaQkaeW6AyosqQpERMBwpy5Vy6UtAAAAQOgKSOkxb948TZgwQX379lVOzskRB3v27NG0adOUlpamadOmae/evc3aF6pOLFHrPj45462T+ys6Mvyszw8Ps+n646WIy+3Re1/sDERMwHCUHgAAAACkAJUeEydO1KJFi5ScnNxg+5w5c3TTTTcpIyNDN910k2bPnt2sfaFq7bYCZR6fp6BHchtNSO3a5GsmjeqmtjF2SdKK7/er+Gi1XzMCRnO5Pdq8q/77JLFdtBLbtTI4EQAAAACjBKT0SE1NVVJSUoNtxcXFys7O1pQpUyRJU6ZMUXZ2tkpKShrdF6qcLrde/jjL+/iOa1JkbcY8BfZwm677SS/vMd7/YpffMgLBYPehMpVX1UmShvRmlAcAAAAQygyb0yMvL08dO3aUzWaTJNlsNiUkJCgvL6/RfaHqk2/26NCR+hVYLh2cpEE92zf7tVdd0l2xrSIkScvW7FVpeY0/IgJB4dRLW4b1STAwCQAAAACjhRkdwN+ysrKaflKQq3K49Pqn+ZIkm1VK7e7R+vXrT3tejcPhvf3x/pG9IvX5plrVOt164e1vdOUwlvCEOX29/mTp4a48qPXrGy9LG/u+MbsRI0b47FhmONcCgD9wrgUA/2vsXGtY6ZGUlKSCggK5XC7ZbDa5XC4VFhYqKSlJHo/nrPvOVUpKiux2ux/+BIHz4gebVVNbP3vpteN6adK4gWd8XuTyz6RypyLt9tP+p/cfWKfvc1aoorpOG3Krdff0MWrTumV/XYAfq6l16sDbn0qSenWJ0+WXXtzkaxr7vkHzmeFcCwDBjnMtAJw7wy5vadeunfr3768lS5ZIkpYsWaL+/fsrPj6+0X2h5kBBuT75dq8kKa61XT+/os95HSc6MlzpY3tKkmpqXfr4692+iggEjew9JXK63JKkocznAQAAAIS8gJQeTzzxhMaOHav8/HzddtttuvrqqyVJc+fO1RtvvKG0tDS98cYbeuyxx7yvaWxfKHl58Va5j69Re/NV/RpdorYpUy/voejI+sE9S1bvVkV1nU8yAsFiE0vVAgAAADhFQC5vmTVrlmbNmnXa9p49e+rdd98942sa2xcq1m8v0LptBZKk7kmxmjSq2wUdr3VUuKaM6aF3PstRVY1Ti7/erV9c2dcXUYGgcGIS04gwq/p3b97IsKjjReCJWwAAAADmYdjlLWicy+XWPz/e6n18R3qKbM1YorYp6Zf3UGRE/ao4H63KVVUNoz1gDkcrHNp9+KgkaWCPdooItzXrdTen9dOgnu11c1o/f8YDAAAAYABKjyC17Lt9OlBQLkkaNTBRQ3w0VL9Na7smX3qRJKmyuk5Lv9njk+MCRtu08/wubRk5IFFP3n2ZRg5I9EcsAAAAAAai9AhCFVW1WrRsuyQpzGbRjKlnXq3lfF07vqf3t+AffpWrGofTp8cHjJDZYD6PBAOTAAAAAAgWlB5B6K0VOSqvqpUkXX1ZD3Xq0Nqnx28bE6mrRtfPD3Ksslafrtnr0+MDgebxeJR5fKRHm9YR6p4Ua3AiAAAAAMGA0iPIHDpSoSWr65eTjYmO0PRJ57dEbVOuG99L4WH1//vf/3KXHHUuv7wPEAh5RZU6UlotSRrSq4OsPpj/BgAAAEDLR+kRZF5ZvFWuU5aobR0d4Zf3adcmSpMu7ipJKit3aPl3+/zyPkAgbDzl0hZfzX8DAAAAoOWj9Agim3KO6Put+ZKkLh1jdNUlF7ZEbVOun9BbYbb634j/+4udqnMy2gMt0/lOYgoAAADA3Cg9goTL7dE/Ps7yPr4jPUU2m3//9yS0jdaE1PrRHsVHa/TZ2gN+fT/AH1wutzYfLz2SO7RSQttogxMBAAAACBaUHkFixff7tDfvmCRpRL8EDe8XmNUnbpzY2zv/wXsrd8rpcgfkfQFf2XWwTJU19SsQDenNKA8AAAAAJ1F6BIHK6jq9sWybJMlqtej29JTzOk5UZFiD2+ZIbNdK44d3liQVllTpy/WM9kDLwlK1AAAAAM6G0iMIvPt5jo5W1C9RO/nS7urSMea8jnNzWj8N6tleN6f1O6fX3TixtyzHF7t45/OdcjHaAy3IiaVqrRZpUK/2BqcBAAAAEEwoPZppbXa+Hlq4Wmuz83163PziSn20qn6J2tZR4frFledWWJxq5IBEPXn3ZRo5IPGcXtc5IUaXD0mWVL/059eZh847AxBI1Q6ntu8tkST17tpWraPCDU4EAAAAIJhQejTTooztysot1qKM7T497itLtnrn0fjFlX0V28o/S9Q25edX9PHef+fzHLmPL5sLBLOtu4vldNX/XR3KfB4AAAAAfoTSo5mqj0+UeOLWF7bkFunbzXmSpOQOrTX5sot8duxz1S0pVqMHJUmSDhRU6Nsthw3LAjRXw/k8KD0AAAAANETpYRCX26N/fHRyidoZ6QMV5uclapsy7ZTRHm+vYLQHgt+m4/N5REbY1LdbvMFpAAAAAAQbSg+DfLFuv3YfOiqp/jfUI/t3NDiR1LNznEYOqM+xN++YfvDx/CWAL5Ueq/Eu85zSs73CwzidAQAAAGiITwkGqKqp02ufHF+i1iLdkZ4iy4nlUww2fVJf7/23V+yQx8NoDwSnE6M8JGkI83kAAAAAOANKDwO8t3KnSssdkqS0S7qrW1KswYlO6tO1rYYdnxth18GjWr+90OBEwJltPGU+j2HM5wEAAADgDCg9AqywpEoffpUrSYqODNPNV53/ErX+Mo3RHghyHo/HO9KjbYxdXRNjDE4EAAAAIBhRegTYq0uzVeesX6J22hV91aa13eBEpxvYo50G9WwvSdq+r1SbdxYZnAho6GBhhYqP1kiShvTpEDSXhwEAAAAILpQeAZS9p1hfZx6SJCW1a6Wplxu3RG1Tpk06uZLLW5/tMDAJcLqNOScvuxrKfB4AAAAAzoLSI0DcP1qi9rapAxUeZjMwUeMG92qv/t3rlwDNyi3W1t3FBicCTtqUc3L00VDm8wAAAABwFpQeAfLlhoPaeaBMkjSoZ3tdkpJocKLGWSyWhqM9VjDaA8HB6XJrS2596dGlY2u1axNlcCIAAAAAwYrSIwBqHE699km2JMlike64JniWqG3M8L4J6tUlTpKUmXNEO/aVGJwIkHL2l6ra4ZQkDe2TYHAaAAAAAMGM0iMA3v9yl3fSxUkXd1OP5DYGJ2oei8Wi6VecOtojx8A0QL3MU5aqZT4PAAAAAI2h9PCzorJq/fuLXZKkKLtNtwThErWNuXhgoi7qFCtJWretQLsOlhmcCKHuROlhtVqU0rOdwWkAAAAABLNzKj1yc3P1/PPP67HHHvM+3r59u1+CmcX/fZKt2jqXJOnGiX3UNjbS4ETnxmKxaNoVfb2P3/mM0R4wTlVNnXbsL5Uk9e3aVtGR4QYnAgAAABDMml16fPrpp7rllltUUFCgjz76SJJUVVWlP//5z34L19Lt2FeiL9cflCQlxEfrmrE9DU50fkYPSlKXjjGSpDVb8rQ375jBiRCqsnKL5XZ7JEnDWLUFAAAAQBOaXXr89a9/1SuvvKLHH39cNlv9Uqv9+vVjpMdZeDw/WqJ2ygBFhAfvErWNsVot+vkpc3sw2gNG2ZhT6L0/hNIDAAAAQBOaXXqUlJSob9/6yxxOrDxisVh8sgrJF198oWuvvVbXXHON0tPTtXz5cknSnj17NG3aNKWlpWnatGnau3fvBb9XoHydeUjb99UPwx9wUbwuG9zJ4EQX5vKhyerUvpUkafWmQzpQUG5wIoSiTTvr5/OIsoepT9e2BqcBAAAAEOyaXXoMHDjQe1nLCUuXLtXgwYMvKIDH49Hvf/97zZ8/Xx999JHmz5+vBx98UG63W3PmzNFNN92kjIwM3XTTTZo9e/YFvVegOOpcemVJtvdxS1mitjE2q0U3Tqwf7eHxSO9+zmgPBFZRWbUOFFRIkgb1bK8wG/MwAwAAAGhcsz81PPLII3r22Wd1yy23qKqqSrfffrsWLFighx566MJDWK0qL68fOVBeXq6EhASVlpYqOztbU6ZMkSRNmTJF2dnZKikpueD387cPv9ylorJqSdKE1C7q3cUcv5EeP6KzEuKjJUlfbTykvKJKgxMhlJwY5SFJQ7m0BQAAAEAzhDXnSR6PRxEREVqyZIlWrVql8ePHKykpSePHj1erVq0uKIDFYtGzzz6ru+++W9HR0aqsrNRLL72kvLw8dezY0Tt/iM1mU0JCgvLy8hQfH39B7+lPxUer9d7KnZIke4RNt07ub3Ai3wmzWXXjhN56/r1Ncrs9evfzHP122jCjYyFEnFiqVqL0AAAAANA8zSo9LBaLpk6dqg0bNmjy5Mk+DeB0OvXiiy9q4cKFGjFihNavX6+ZM2dq/vz5Pjl+VlZW009qhhqHw3u7fv36sz7vw+9KVFNbv0TtpX1bae+ubO31SYLg0NbmUWy0TceqXPp87X4NTKpVXKtm/TUCzpvH49Ha7DxJUkyUTQUHdqjwYMu+ZCwYjBgxwmfH8tW5FgDMhnMtAPhfY+faZn9a7d+/v/bs2aOePX277Oq2bdtUWFjoDTlixAhFRUXJbreroKBALpdLNptNLpdLhYWFSkpKOqfjp6SkyG63X3DOyOWfSeVORdrtZ/2C7jpQpszd9UvUto+L0t03Xa7ICPMVAr+o3a0XP9git0facSRSd48dYnQkmNzevGOqrDkkSRo5sJNSU4cbnAg/5qtzLQDg7DjXAsC5a/Yn8osvvlh33qQ4BGEAACAASURBVHmnfvaznykxMbHBxJw33HDDeQdITExUfn6+du/erR49eig3N1fFxcXq1q2b+vfvryVLluiaa67RkiVL1L9//6C9tMXj8egfH59s33919QBTFh6SNGlUN73zWY5Kyx1a8f1+Tbuij9q1iTI6Fkzs1EtbhnFpCwAAAIBmavan8g0bNig5OVk//PBDg+0Wi+WCSo8OHTpo7ty5+t3vfuctUp588knFxcVp7ty5+sMf/qCFCxcqNjZW8+bNO+/38bdvN+dp6+5iSVLfbm01bliywYn8xx5u03U/6aV/frxVTpdb73+xS3deO8joWDCxzJxC7/0hvSk9AAAAADRPs0uP119/3W8h0tPTlZ6eftr2nj176t133/Xb+/pKbZ1LryzZ6n1shiVqm3LVJd317uc7dayyVsvW7NUNE3qrbWyk0bFgQnVOt7KOF4rdk2L5ewYAAACg2Zq9ZO2pPB6P3G63979Q9/HXu1VQUiVJGjess/p1C85LcHwp0h6mn43vJUmqdbr1wVe5BieCWW3fVyLH8cmBGeUBAAAA4Fw0u/QoKCjQPffco1GjRmnAgAEaOHCg979QVlpeo3c+y5EkRYTb9KurBxicKHAmX9pdMdHhkqRPv92joxUOgxPBjDaxVC0AAACA89Ts0mPOnDkKDw/Xq6++qujoaH3wwQeaMGGCHnvsMX/mC3qLlm1XtcMpSbpufC91aBs6E3pGR4YrfWz9aj41tS59tKp5oz3WZufroYWrtTY735/xYBInJjENs1mU0qOdwWkAAAAAtCTNLj02btyoJ598Uv3795fFYlG/fv30P//zP3r55Zf9mS+o7T50VMu/3ydJio+N1PU/6WVwosCbMqaHoiPrp4ZZsnqPKqpqm3zNooztysot1qKM7f6OhxauorpOOw+USpL6dY9XpN2cKyIBAAAA8I9mlx5Wq1VhYfUfOGJjY1VSUqLo6GgVFBT4LVww83g8+ufHWfJ46h//6ur+IfmBrHVUuKaO6SFJqnY4tfjr3U2+prrG2eAWOJstu47Iffx7bCjzeQAAAAA4R02WHkeO1A8tHzJkiL766itJ0pgxYzRz5kzde++9SklJ8W/CIPX91nxt3lUkSerVJU7jh3cxOJFx0sf2VJTdJkn66OvdqqqpMzgRzCKT+TwAAAAAXIAmS4+0tDRJ0vz58zVy5Ejde++9evjhhzVq1Cj17t1bf/nLX/weMtjUOV16+eNTlqhNT5HVau4lahsT2ypCky+9SJJUWV2npd/sMTgRzOJE6dEqMky9OscZnAYAAABAS9Pk9Rie49dvxMbGSpJ++OEHRUZG6p577vFvsiC2ZPUe5RVXSpLGDOmkgUyuqGvH9dLi1XtUW+fSh1/lauqYHiF5uQ98p7CkSoeL6r/PBvfuIJvtvFbYBgAAABDCmvwUYbGE7giGM3F7PHp7xQ5JUniYVf8xJbSX7D0hLsauq0Z3kyQdq6zVp2v2GpoHLV/mzpOXtgxhPg8AAAAA56HJX8W7XC5999133hEfTqezwWNJGj16tP8SBgn38dkUj1XWqur4BJzXjuupjvHRRsYKKteN76VPv92rOqdb73+5S5Mvu0j2cJvRsdBCbTplPo9hzOcBAAAA4Dw0WXq0a9dODz/8sPdxXFxcg8cWi0Wff/65f9IFgV0HyvTKkq3KL6mSJG/hEdfarhsm9DYyWtBp1yZKky7uqk++3auycoeWf7dPUy/vYXQstEBut8c70iOhbZSS2rcyOBEAAACAlqjJ0mPlypWByBGUcg+W6Q/Pr5ajznXavraxdkUxZ8Vprp/QW8u/3yeny6N/f7FTV43upvAwRnvg3OzNO6ZjlbWS6i9t4TI7AAAAAOeDmQEb8don285YeEjSnsPHtOmUOQdQL6FttCakdpUkFR+t0WdrDxicCC1RZk6h9/6wPgkGJgEAAADQklF6nEVldZ027Chs9DmrNx0OUJqW5caJvb1L+L63cqecLrfBidDSZJ4yn8fg3u0NTAIAAACgJaP0OIuaWmeTz6muafo5oSixXSuNH95ZUv2yo1+uZ7QHmq+2zqWtu4slST2S26hNa7vBiQAAAAC0VJQeZxEXE6n42MY/bPVIbhOgNC3PjRN768Q0DO98vlMuRnugmbbtLVGts/7vy1CWqgUAAABwASg9zsJmtWjKmLOvPBJtD9MVF3cNYKKWpXNCjC4fkixJyiuq1NeZhwxOhJbi1EtbhrJULQAAAIALQOnRiOt+0ls/GdH5tO3RkWF6ZMbFDLtvws+v6OO9/87nOXK7PQamQUtxYqna8DCrBvRoZ3AaAAAAAC0ZpUcjbFaL/vsXw/X0by9Xq6hwSVJsqwj945FJGtyL30A3pVtSrEYPSpIkHSio0LdbmPgVjTtWWavcg2WSpAEXxcseznLHAAAAAM4fpUcTLBaL+naLV5tWEZKk1lHhiomOMDhVyzHtlNEeb69gtIcvrc3O10MLV2ttdr7RUXxmy64ieY7/FRnCfB4AAAAALhClB/yqZ+c4jRzQUZK0N++YfjDRB3SjLcrYrqzcYi3K2G50FJ/ZmHNymehhfRIMTAIAAADADCg94HfTJ/X13l+0bLtq61wGpjGPE0smm2np5E3H5/OIiQ7XRayOBAAAAOACUXrA7/p0bauU4xNS7s07pqKjNZKkoqPV2pd/zMhoCCL5xZXKL66SJA3u3UE2q8XgRAAAAABaOkoP+F1ldZ3yiitP215b59ZDz69W/hn2IfQ0WKqW+TwAAAAA+AClB/wu47t9Kj4+uuPHyqvq9O8vdgU4EYJRg9KjD6UHAAAAgAtH6QG/+y4rr/H9WxrfD/NzuT3avKu+9EhsF63Edq0MTgQAAADADCg94HeOJiYubWo/zG/3oTKVV9VJkoayagsAAAAAH6H0gN/17db2gvbD/JjPAwAAAIA/UHrA76aO6aEw29n/qv1sXK8ApkEwOlF6WCzS4N7tDU4DAAAAwCyCovRwOByaM2eOrrzySk2dOlWPPvqoJGnPnj2aNm2a0tLSNG3aNO3du9fYoDgvXTrG6MFbUxUZYTtt33/+bJCG9+NyhlBWU+tU9p4SSVLPznGKiY4wOBEAAAAAswgzOoAkPfXUU7Lb7crIyJDFYlFRUZEkac6cObrpppt0zTXX6KOPPtLs2bP12muvGZwW5+OSlCS9MjtNqzYe1GufbFNldZ06xkdrypgeRkdrcRx1Lq3OPKSyCof3scfjkcViMTjZ+cneUyKnyy1JGsaqLQAAAAB8yPCRHpWVlfrwww/1u9/9zvuhrX379iouLlZ2dramTJkiSZoyZYqys7NVUlJiZFxcgNZR4Zp86UVq06r+N/k2a8v8kG6kAwXlunve53r2rY2qqnFKkoqP1mj2S2tU43AanO78nDqfxxDm8wAAAADgQ4aXHgcOHFBcXJyee+45XXfddfrlL3+pdevWKS8vTx07dpTNVn9JhM1mU0JCgvLyWN4UocnlcuuPL3+vwtLq0/Zl5hzRSx9uMSDVhdt0vPSICLepf/d4g9MAAAAAMBPDL29xuVw6cOCABgwYoAcffFCbNm3Sr3/9ay1YsMAnx8/KyvLJcWocDu/t+vXrfXLMUMXX8vxsO1CtvKLKs+5fuW6/hnauU6vI0+dOCVYVNS7tPnxUktSlXZi2bM40OFFoGTFihM+O5atzLQCYDedaAPC/xs61hpceSUlJCgsL817GMmTIELVt21aRkZEqKCiQy+WSzWaTy+VSYWGhkpKSzun4KSkpstvtF5wzcvlnUrlTkXa7T//xCkV8Lc/PtiPbJBWfdb/LLbVN7KFBPVvO6ierNh6UVD96a2xqL40YwUo+LZWvzrUAgLPjXAsA587wy1vi4+M1atQoffPNN5LqV2wpLi5W9+7d1b9/fy1ZskSStGTJEvXv31/x8Qx/R2iKimi6o2zOc4LJqfN5DGUSUwAAAAA+FhSfkB577DE9/PDDmjdvnsLCwjR//nzFxsZq7ty5+sMf/qCFCxcqNjZW8+bNMzoqYJjRg5P06tLss+63WqTaOlcAE10Yj8ejjcdLjzatI9Q9KdbgRAAAAADMJihKjy5duuj1118/bXvPnj317rvvGpAICD6d2rfW1Mt7aPHXu8+43+2RHnnhW919/WBNGtUtwOnO3eGiShWV1U/KOqRXB1lZzQcAAACAjxl+eQuA5rsjPUX/cfUAtY05eT1vmM2iARfVX/bldLn113cy9dKHW+R0uY2K2Sxc2gIAAADA3yg9gBbEarXo+gm99fKjVyqhbZQkKaFttP58zxjdOrm/LMcHSyz+erfmvLRGxyprDUzbuMycQu/9IZQeAAAAAPyA0gNogcJsVoXZTn77WiwW3Tixj2bdNkpR9vqr1jbvKtJ9z36lvXnHjIp5Vi6XW1t2FUmSkju0UkLbaIMTAQAAADAjSg/ARC4emKi//G6sktq3kiQVlFTpgb+u0pothw1O1tDOg2WqrHFKkob0ZpQHAAAAAP+g9ABMpkvHGD3zu7EadvySkZpal558da3ezNgut9tjcLp6mxrM55FgYBIAAAAAZkbpAZhQ6+gIzbnjEv1sfC/vtn8t36E/v7ZW1Q6ngcnqnViq1mqRBvVqb3AaAAAAAGZF6QGYlM1m1YypA/Xfvxiu8LD6b/U1W/L0wF9XKb+40rBc1Q6nduwrkST17tpWraPCDcsCAAAAwNwoPQCTm5DaRX++Z4ziYyMlSfvyy3Xfs181uMQkkLbuLpbTVX+ZzVDm8wAAAADgR5QeQAjo07Wt/ve/x6lft7aSpPKqOs3++xp9/HWuPJ7AzvOR2WA+D0oPAAAAAP5D6QGEiPjYSD1592WadHFXSZLb7dHfP8zS/3snU3VOV8ByZOYUSpIiI2zq2y0+YO8LAAAAIPRQejRTVGRYg1ucP76WvnE+X8fwMJt+8/OhuuvaQbJaLZKkFT/s10MLv1HJsRq/5DxV6bEa7csvlySl9GzvnWsEAAAAAPyBTxzNdHNaPw3q2V43p/UzOkqLx9fSN87362ixWDT18h56/M7Riomun0R0x75S3ffsV8rZX+qPqF6ZO09e2jKE+TwAAAAA+Bm/am+mkQMSNXJAotExTIGvpW9c6NdxSJ8OembmOD3x8vfal1+u4qM1+sPzq/Wbnw/VT0Z08WHSk06dz2MY83kAAAAA8DNGegAhLLFdKz3127EaPShJklTndOuZf23Qy4u3yuX27QSnHo/HW3q0jbGra2KMT48PAAAAAD9G6QGEuCh7mP5w60jddGVf77YPvtylx/6+RhVVtT57n4OFFd55Q4b06SCLxeKzYwMAAADAmVB6AJDVatEv0vrpoV+NVGSETZK0MeeI7luwSgcKys/7uGuz8/XQwtVam52vjcdXbZG4tAUAAABAYFB6APC6dHAnPfXbseoYHy1Jyiuq1P+3YJV+yM4/r+MtytiurNxiLcrYrk05Rd7tTGIKAAAAIBAoPQA00D0pVs/MHKfBvdpLkqodTj3x8vd657MceTznNs9HdY1TklRV49SW3PrSo0vHGLVrE+Xb0AAAAABwBpQeAE4T2ypCj901WlMv7yFJ8nik1z/dpvmvr1ONw3nOx6urc6n6+OuGcmkLAAAAgACh9ABwRmE2q+66dpB++/OhCrPVTzq6etNhPfjcahWWVJ3TsRx1Lu99Sg8AAAAAgULpAaBRk0Z105P/NUZxMXZJ0u7DR3Xfgq+UlVvUxCtPctTWlx42q0UpPdr5JScAAAAA/BilB4Am9b8oXv87c5x6dYmTJB2tqNWsF77Vp9/uadbra51uSVLfbm0VHRnut5wAAAAAcCpKDwDN0j4uSn++Z4zGj+gsSXK5PVr47816/r1NqjteajRlKKu2AAAAAAggSg8AzWYPt+m+XwzXbVMGylo/zYeWrdmrR1/8VmXljiZfP7RPgn8DAgAAAMApKD0AnBOLxaLrftJLs++4RK0iwyRJW3cX674FXyn3YNlZXxdlD1PvrnGBigkAAAAAlB4Azs+Ifh31l5nj1DmhtSTpSGm1fv/can298ZAkyePxNLjsZXCv9gqzccoBAAAAEDhhRgcA0HIld2itp387Vk8vWq912wpUW+fS/DfW6etNh3T4SIWOlFV7n9s6iglMAQAAAAQWv3YFcEFaRYVr1oxRunFib++2NVvytC+/vMHzPl93QEu/ad5qLwAAAADgC5QeAC6YzWrRrZMHaOYvhjX6vFeXblVVTV2AUgEAAAAIdUFVejz33HPq27evcnJyJEmZmZlKT09XWlqaZsyYoeLiYoMTAmhM68jGL2Gpcbi0flthgNIAAAAACHVBU3ps3bpVmZmZSk5OliS53W498MADmj17tjIyMpSamqqnn37a4JQAGlNZ42zGcxjpAQAAACAwgqL0qK2t1eOPP665c+d6t2VlZclutys1NVWSNH36dC1btsyghACa46JOsU0+p0dymwAkAQAAAIAgKT0WLFig9PR0de7c2bstLy9PnTp18j6Oj4+X2+1WWVmZEREBNMNFndpoUM/2Z93ft2tb9e4SF8BEAAAAAEKZ4UvWbty4UVlZWbr//vv9cvysrCy/HBfAmU0aFK78ojAdOdrwUpf4mDD9dJhdGzZsMCgZfmzEiBE+OxbnWgA4M861AOB/jZ1rDS891q5dq9zcXE2cOFGSlJ+fr9tvv12//OUvdfjwYe/zSkpKZLVaFRd3br8lTklJkd1u92lmAI0bM9qlbzYd1sJ/b1a1w6m4GLv+/sgkRYTbjI4GP+FcCwD+x7kWAM6d4Ze33HXXXVq9erVWrlyplStXKjExUf/85z91xx13qKamRuvWrZMkvfXWW7rqqqsMTgugOcLDbBo/oovaxtT/YBZtD6PwAAAAABBwho/0OBur1ar58+drzpw5cjgcSk5O1lNPPWV0LAAAAAAA0EIEXemxcuVK7/3hw4dr8eLFBqYBAAAAAAAtleGXtwAAAAAAAPgDpQcAAAAAADAlSg8AAAAAAGBKlB4AAAAAAMCUKD0AAAAAAIApUXoAAAAAAABTovQAAAAAAACmROkBwG+iIsMa3AIAAABAIFF6APCbm9P6aVDP9ro5rZ/RUQAAAACEIH79CsBvRg5I1MgBiUbHAAAAABCiGOkBAAAAAABMidIDAAAAAACYEqUHAAAAAAAwJUoPAAAAQNLa7Hw9tHC11mbnGx0FAOAjTGQKAAAASFqUsV25B4+q2uFkIm4AMAlGegAAAACSqmucDW4BAC0fpQcAAAAAADAlSg8AAAAAAGBKlB4AAAAAAMCUKD0AAAAAAIApUXoAAAAAAABTYslaAAAAhDSPx6Md+0pV7WDVFgAwG0oPAAAAhKydB0r1v29u1IGCcu+2wtIq7dhXor7d4g1MBgDwBS5vAQAAQEgqKKnSrBe+bVB4SJLT5dGjL67R4aIKg5IBAHyF0gMAAAAh6aNVuaqqOfMlLdUOpz78KjfAiQAAvsblLQCAoFBRXadvNh1W8dFqJbSN1mVDOinKzj9TAPxnw/aCJvYXBigJAMBf+GkSAGC4rzYc1HPvZqqm1uXd9o+Ptui+m0bo4oGJBiYDYGZuT+P7PZ4mngAACHpc3gIAMNT2fSX6y7/WNyg8JKmyxqk//d9a7cs7ZlAyAGY3uFf7JvZ3CFASAIC/UHoAAAz14Ze5OtsvU50utz7+endgAwEIGdeM7amI8DP/OBweZtW143oGOBEAwNcMLz1KS0t15513Ki0tTVOnTtW9996rkpISSVJmZqbS09OVlpamGTNmqLi42OC0AABfy97T+Lm9qf0AcL66dIzR7NsvUXxsZIPtVotFs2aMUrekWIOSAQB8xfDSw2Kx6I477lBGRoYWL16sLl266Omnn5bb7dYDDzyg2bNnKyMjQ6mpqXr66aeNjgsA8LGwsMb/KQqzGf5PFQATG9K7g/45a5Jm3Xax2rSKkCR1bBet4X0TDE4GAPAFw3+SjIuL06hRo7yPhw4dqsOHDysrK0t2u12pqamSpOnTp2vZsmVGxQQA+MmoAY1PVDoqhYlMAfhXmM2qUSlJahUVLkmyGJwHAOA7hpcep3K73XrzzTc1YcIE5eXlqVOnTt598fHxcrvdKisrMzAhAMDXrh3fSzHR4WfcFx9r19WXXRTgRAAAADCLoFqy9o9//KOio6N1yy23aMWKFT45ZlZWlk+OAwBmM2LECJ8d60LPtbeMj9fH35fqUHGtd1u3hAilXxyn3TlbLzQeADRLjcPhvV2/fr1PjhlM51oAMKvGzrVBU3rMmzdP+/bt0wsvvCCr1aqkpCQdPnzYu7+kpERWq1VxcXHndNyUlBTZ7XZfxwUAnMIX59rJE6VPvt2jZWv26upLL1La6O4+yQYAzRW5/DOp3KlIu92nZYWv8HMtADNYm52v97/cpevG99LIJi5z9oWgKD2eeeYZZWVl6aWXXlJERP0EUikpKaqpqdG6deuUmpqqt956S1dddZXBSQEA/jT50os0+VIuZwEAADCrRRnblXvwqKodztAoPXbu3KkXX3xR3bt31/Tp0yVJnTt31vPPP6/58+drzpw5cjgcSk5O1lNPPWVwWgAAAAAAcL6qa5wNbv3N8NKjd+/e2rFjxxn3DR8+XIsXLw5wIgAAAAAAYAZBtXoLAAAAYJSoyLAGtwCAlo/SAwAAAJB0c1o/DerZXjen9TM6CgDAR6ixAQAAfqTG4dQXGw5q/bYCudweDerZXpNGdVVMdITR0eBHIwckBmRSPaMVH63WwcIKxbaKUPekWFksFqMjAYDfUHoAAACcoqisWo/87RsdLqr0blu3rUDvf7lTf/zPS3VRpzYGpgPO39EKh55/b5O+y8qTx1O/rWtijH593WAN6tne2HAA4Cdc3gIAAHCK/31zQ4PC44SjFbX60/+tlcvtMSAVcGFq61ya9cK3WrPlZOEhSfvzyzXnpTXK2V9qXDgA8CNKDwAAgOMOFJRr866is+7PK6pUZk5hABMBvrFq4yHtzTt2xn11TrfeWnHm1RTR8q3NztdDC1drbXa+0VEAQ3B5CwAAJrJhe6E+/jpXuQePKsoepsuGdNK143qqTWu70dFahIOF5U0+50BBuUb06xiANIDv/NDEB9712wpU53QrPIzfiZpJRXWdXvpgi/JLqlReWRsSc9YguHk8Hrk9gR0xSekBAIBJvP/FLr2yZKv3cVmFQ++t3KlVGw9q3r2Xq31clIHpWobmTFTKZKZoieqc7kb3uz2Sy+1WOAPBm6WorFpLv9mjLbuKZLFIw/sm6KeXXqS4mOAomD0ej95akaN/r9wpR51LkrQvv1x/WbRe99wwRJF2PgYisFwut97/cpc++WaPio7WSJJKy2t0uKhCndq39ut7c1YDAMAEDhdV6NWlW8+4r7C0Wv/8OCvAiVqm/he1U4e2Zy+H7OE2jUpJCmAiwDf6d49vdH+P5DaKjOCDcHPk7C/VvU9/ofdW7tSO/aXavq9U/1q+Q795+gvtzz/zJUSB9t7KnfpXxnZv4XHClxsO6ulF6w1KhVDl8Xj0zL826LVPtnkLD0mqdrh0/4KvdfhIhV/fn9IDAAATWLnugBobLbpmS54qqusCF6iFslkt+q/rBstmPfMSnrenD1TrqPAApwIu3JWjuqlVI393rxvfK4BpWi6X26N5r69T5RnOp2UVDj3z5gZ5zmPovtvtkdPlVm2dS9UOpyqr63SsslZl5Q4VH63WkdJqFZRU6XBRhQ4Wlmtf/jHtOXxUuw6WKWd/qbbvLdHW3cXakluktdn5evuznLO+1/db85V7sOycMwLna/OuIq3KPHTGfeVVtXrtk21+fX/qXAAATKC4rKbR/S63R0crHHxgb4aRAxL1P/91md5asUObdh6RxyP17dpWN0zsrUsY5YEWKi7Grrl3XqI/vfqDSo45vNutFumXkwdo3PD/v717D4q6/vc4/trlpoRKoJaXtKP9MDmeBOVWP/GkdFy0QETL0cDGZqwc7TY6XabmaGqdLmQzmQ1ank41WZlNWabicczjNKlH/KmY6E+HAtQQCm8/cFxg+Zw/zD2SICJ748vz8c/ud7+39+66r8U3n++H/n6sruPYd6RKVafOt7i+5PhZPfIfWxRkt6vRGLkajRobjRobG9XYePESosbGyx7/YxsfT3Ggv/29SoP7R/r2pOi0/udvx6+6fudPFbpQ1+C10WY0PQAAsIDeUeFXXR8cZNeNAXKteUfwz4OitfjRu1Tf4FKjuXhZC9DR3T4wSu+/8G/acaBC5ZX/UPfwUI2K66eo7l38XVqHUdHMn7P+s5PVLTdFAoWvmyzo3FobaepqNLrgdNH0AAAALUtLuEWf//ff5Wps/ifZ1Li+Cu/CKI+2Cgmm2QFrCQkO0uh4RnVcrxu7td4gurFbmLqEBstut8lutynoT7d2m01BQX/cutfZZbfrj9s/bfOnbZscy25TkM0mu3sbu1yNjVq9+bAaGlrubAz/S09PvizAVQ28ubt2HKhocX1ktzB1u8F7k4TT9AAAwAJ6R4Xrsew7tHzt/ivW9e8doYczhvmhKgCwloTYm9QtPET/ON/8b657R4Vr5fP3tDgvkK8461wtzusR95deihlwo48rQmc2Lnmg1m49qgZX839FasKdt3r1M8NEpgAAWET6nbfqjcdTNTq+n6K7h6lrWJDSEm5R3hOjA+bPKAJARxYWEqS598fJ3sx/0EKC7Xri/ji/NzwkaZrjdmX96+ArakmMvUnPPpQom83/NaLz6HVjV83PGangoCvbDynDbtaUtBivnt9mrmd64Q7A6XTqp59+0rBhwxQWxg96AOANZC0AeB9ZG3gO/XJKX35/VP9bfFLGSF1Cg/Ta3FQN6tfD36U1cercBT355jadqXGq141d9Z8vjvN3SejEqk6dV8GuMq3bXiJnnUtR3bvov/59nNebcIz0AAAAAIA2GPpPUXrx4WT1ib5BkhTVvUvANTykaei4WgAADctJREFUi3VFR16ch6S7F+dMAK5F76hw5Y4fqug/Jk/uEhrkk1FHND0AAAAA4Dp07RLc5DYQPei4Xf8yuKcedNzu71IAvwjcTycAAAAABLAHHbfrq20lmnT3YH+X0qLE2JuVGHuzv8sA/IamBwAAAABcBxoKQNv5eoQUl7cAAAAAAACf8PUlV4z0AAAAAAAAPuHrEVKM9AAAAAAAAJZE0wMAAAAAAFgSTQ8AAAAAAGBJND0AAAAAAIAl0fQAAAAAAACWRNMDAAAAAABYEk0PAAAAAABgSTQ9AAAAAACAJdH0AAAAAAAAlhTs7wK8xRgjSaqrq/NzJQAQuEJDQ2Wz2a57f7IWAFpH1gKA97WUtZZtetTX10uSjhw54udKACBwDRs2TGFhYde9P1kLAK0jawHA+1rKWpu51Dq2mMbGRtXW1iokJKRdnXUAsLL2/vaRrAWA1pG1AOB9LWWtZZseAAAAAACgc2MiUwAAAAAAYEk0PQAAAAAAgCXR9AAAAAAAAJZE0wMAAAAAAFgSTQ8AAAAAAGBJND0AAAAAAIAl0fQAAAAAAACW1OmbHsePH1dycrK/ywDaZciQIaqtrfV3GR1OR3rdtmzZovHjxysrK0s///yzv8tpM7IWVtCRMiOQdKTXraNnrUTeouPrSJkRSDrS6+brrA32+hkAAO322Wef6YknntD48eP9XQoAWBZZCwDe5+ustdRIjz93ti8tX7p96623lJWVJYfDocLCwiv2r6ur01NPPaVXX31Vxhjl5ubqtdde07Rp05SWlqa8vDz3tmVlZXrooYeUkZGhSZMmafv27ZIuvoEvvfSSJKmoqEhDhgxRUVGRJGnhwoX6/PPPJV3sxOXn52vy5MlKS0tTQUFBu5//3r17NW3aNGVmZiozM1M//PBDu4/pafv371dubq6ys7OVnZ2tbdu2+bukKxQUFCg9PV1ZWVnKz88PyK7p5s2blZ6erokTJ2r58uX+LqdZ8+bNU3Z2tjIyMjRnzhydPXvW3yU1a9WqVZo4caIcDodHPofe8Morr2jPnj3Ky8tTbm6uv8sha8lajyBrPYOs9ZxAy1qpc+ctWesZZK3ndIS8JWtbYCzk2LFjJikp6YrlY8eOmZiYGLN161ZjjDHr1q0zU6dObbLN6dOnTU5Ojvnwww/d++fk5Jgnn3zSuFwuc+7cOZOUlGR++eUXY4wxU6ZMMWvWrDHGGHP06FGTlJRkqqurTWlpqXE4HMYYY/Lz883UqVPNihUrjDHGjBs3zpSVlRljjImJiTEff/yxMcaYwsJCM2rUqHY999OnT5u77rrL7NmzxxhjTENDgzlz5ky7julpZ8+eNRMnTjSVlZXGGGMqKytNamqqOXv2rJ8r+3+//fZbk/f5gw8+MDExMaampsa/hV3mUo0lJSXGGGNWrlwZcDUaY0x1dbX7/tKlS80bb7zhx2qaFxMTY5YtW2aMMaakpMQkJSWZ33//3c9VNS8nJ8edYf5G1pK17UXWeg5Z61mBlLXGdN68JWs9g6z1rEDPW7K2ZZYa6XE14eHhGjNmjCQpLi5Ox44dc6+rq6vT9OnTNX36dM2YMaPJfunp6bLb7erWrZsGDx6s8vJy1dTU6NChQ5o8ebIk6bbbbtPQoUO1b98+DRw4UE6nUydPntSOHTv09NNPa8eOHaqoqFB9fb0GDBjgPvaECRPc9VRVVcnpdF7389u3b58GDx6sESNGSJKCgoLUo0eP6z6eN+zdu1fHjx/XrFmzNHHiRM2aNUs2m01lZWX+Ls1t//79io2N1a233ipJ7vc4kFyqcdCgQZKkqVOn+rmi5q1bt87dDV+/fr0OHTrk75Kadf/990uSBg0apNjYWO3bt8/PFXVsZK3/kbWeQdZ6FlnreVbOW7LWM8haz+oIeUvWNs9Sc3oEBwfLGONevjxoQ0ND3fftdrsaGhrcyyEhIRo+fLi2bt2qcePGKSgoyL0uLCzMfT8oKEgul6vVOlJSUvT999+rurpaycnJWrx4sbZt23bFpFKXjn3pfA0NDU3OZzXGGA0ZMkSffPKJv0uBlxUWFurTTz/VZ599pqioKH377bdas2aNv8uCh5C1gY2s7TzIWusjbwMXWdu5kLcdm6VGevTs2VP19fXuDuv69euvaT+bzaZXXnlFERERevrpp1VfX3/V7SMiIjR06FB99dVXkqSSkhIdPnxYcXFxki5+Mbz33nuKj4+XJI0YMULvvfee7rzzzut9aq2Ki4tTSUmJ9u7dK0lyuVwBd51ZfHy8ysrKtHPnTvdjRUVFTb7M/W348OEqLi5WeXm5JLnf40ASFxen4uJilZaWSpK++OIL/xbUjHPnzikiIkKRkZGqq6vTl19+6e+SWnSpttLSUhUXF7s/x2gZWUvWthdZ6xlkrfV11rwlaz2DrPWcjpK3ZG3zLNX0CA4O1gsvvKCZM2dqypQpTbrarbHZbFqwYIH69eunOXPmtDocLy8vT998840yMjI0f/58vf7664qKipJ08YvhxIkT7i+CS8spKSnX/+RaERkZqWXLlunVV19VRkaGsrOzdfDgQa+d73r06NFD7777rpYvX67MzEyNHz9e77zzTkB9OfTs2VMLFy7UrFmzlJWVpVOnTikkJERdu3b1d2lu0dHRWrx4sR577DFlZWW1a6i+t6SmpmrAgAFyOBzKyclRbGysv0tqkcvlUlZWlh599FEtWrRI0dHR/i4p4JG1ZG17kbWeQdZaX2fNW7LWM8haz+koeUvWNs9mAumTCQSAmpoaRURESLrYLV27dq0+/fRTP1cFANZC1gKA95G1gMXm9AA84eOPP9amTZvkcrnUo0cPLVmyxN8lAYDlkLUA4H1kLcBIDwAAAAAAYFGWmtMDAAAAAADgEpoeAAAAAADAkmh6AAAAAAAAS6LpAQAAAAAALImmB9CMsWPH6scff2x2XWFhoRwOR7uOv2vXLo0ePbpdxwCAjo6sBQDfIG/RmdH0ANooISFBBQUFPjvfsmXLNH/+/CaP5ebm6osvvvBZDQDga2QtAPgGeQuro+kBdEIul8vfJQCA5ZG1AOAb5C2uhqYHLK+iokJz585VSkqKkpOTtWjRIpWXl2vGjBlKTk5WcnKy5s2bp3PnzjXZ78CBA5owYYISExP1/PPPy+l0Srpy+N7YsWO1atUqZWRkaOTIkXrqqafc216ryspKPf7440pJSdHYsWP10UcfSZK2b9+uFStWaOPGjYqPj1dmZqbeeustFRYWatGiRYqPj9eiRYskSSUlJZo5c6aSkpLkcDi0YcMG9/Gfe+45LViwQLNmzVJcXJx27dp1Xa8lALSErCVrAfgGeUveoo0MYGENDQ0mIyPDvPzyy6a2ttZcuHDB7N6925SWlpoffvjBOJ1OU11dbaZPn26WLFni3m/MmDHm3nvvNb/++qs5ffq0mTp1qlm6dKkxxpidO3ea1NTUJttOnjzZnDx50pw+fdqkp6eb1atXX7Wuy4/hcrnMpEmTzLJly4zT6TTl5eVm7NixZvv27cYYY95++20zb968Jvvn5OSYNWvWuJdra2vN6NGjzdq1a019fb05ePCgSUpKMkePHjXGGPPss8+aESNGmMLCQuNyucyFCxfa8aoCQFNkLVkLwDfIW/IWbcdID1haUVGRqqqq9Mwzzyg8PFxhYWFKSEjQwIED9de//lWhoaGKiorSzJkztXv37ib7Pvjgg+rTp48iIyM1e/Zsfffddy2eJzc3VzfddJMiIyM1ZswYHTp06JprPHDggE6dOqW5c+cqNDRUt9xyix544IEm3ezWbNu2Tf369dPkyZMVHBys2NhYORwObdq0yb1NWlqaRo4cKbvdrrCwsGs+NgC0hqwlawH4BnlL3qLtgv1dAOBNFRUV6tu3r4KDm/5T//333/Xyyy+rsLBQtbW1Msaoe/fuTbbp06eP+37fvn1VVVXV4nl69erlvt+1a9erbvtnJ06cUFVVlRISEtyPuVyuJsvXcoyioqIrjpGZmelevvz5AIAnkbVkLQDfIG/JW7QdTQ9YWp8+fVRRUaGGhoYmXw5Lly6VzWbTt99+q8jISG3ZssV9/eAlFRUV7vu//vqrevfu7bUa+/fvr82bNze73mazXdMxEhMT9cEHH3i6PABoFVkLAL5B3gJtx+UtsLQ77rhDvXr10ptvvqnz58/L6XRqz549qq2tVXh4uLp166bKykq9//77V+y7evVqnTx5UmfOnFF+fr4mTJjgtRpvuOEGrVy5UhcuXJDL5dKRI0dUVFQkSYqOjtaJEyfU2Njo3qdnz546duyYe/nuu+9WaWmpvv76a9XX16u+vl5FRUUqKSnxSs0AcDmylqwF4BvkLXmLtqPpAUsLCgpSfn6+ysrKNGbMGI0ePVobN27U3LlzVVxcrISEBD3yyCMaN27cFfved999evjhh3XPPfdowIABmj17tldrPHz4sNLS0pSSkqIXX3xRNTU1kqT09HRJUnJysiZNmiRJmjFjhgoKCpSYmKglS5YoIiJCq1at0oYNG5SamqpRo0YpLy9PdXV1XqkZAC5H1pK1AHyDvCVv0XY2Y4zxdxEAAAAAAACexkgPAAAAAABgSUxkCnhJfn6+VqxYccXjI0eObPY6SwBA25G1AOAb5C06Ki5vAQAAAAAAlsTlLQAAAAAAwJJoegAAAAAAAEui6QEAAAAAACyJpgcAAAAAALAkmh4AAAAAAMCS/g9PVxJwqFN9UAAAAABJRU5ErkJggg==\n","text/plain":["<Figure size 1080x360 with 3 Axes>"]},"metadata":{"tags":[]}}]},{"cell_type":"markdown","metadata":{"id":"i8mLg6-V26ac"},"source":["## Parse out additional information"]},{"cell_type":"markdown","metadata":{"id":"dwAXlONO-qOk"},"source":["Marital status"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3PPoVQBh35iz","executionInfo":{"status":"ok","timestamp":1613092600517,"user_tz":300,"elapsed":62047,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"f5b0582d-58aa-42ea-c6fd-0c27ef3d5230"},"source":["import re\n","def parse_surname(txt):\n"," marital_status = \"unknown\"\n"," if 'mrs.' in txt.lower():\n"," marital_status = 'mrs'\n"," if 'mr.' in txt.lower():\n"," marital_status = 'mr' \n"," if 'miss.' in txt.lower():\n"," marital_status = 'miss' \n"," if 'master.' in txt.lower():\n"," marital_status = 'master' \n"," return marital_status\n","\n","data['surname'] = data['Name'].apply(lambda x: parse_surname(x)) \n","\n","died_mrs = len(data[(data['surname'] == 'mrs') & (data['Survived'] == 0) & (data['train_test'] == 'train')])\n","mrs = len(data[(data['surname'] == 'mrs') & (data['train_test'] == 'train')])\n","death_rate_mrs = died_mrs/mrs\n","print(\"Total Mrs. Surname: \" + str(mrs) + \"\\n\" + \"Mrs. Death Rate: \" + str(death_rate_mrs) + \"\\n\")\n","\n","died_mr = len(data[(data['surname'] == 'mr') & (data['Survived'] == 0) & (data['train_test'] == 'train')])\n","mr = len(data[(data['surname'] == 'mr') & (data['train_test'] == 'train')])\n","death_rate_mr = died_mr/mr\n","print(\"Total Mr. Surname: \" + str(mr) + \"\\n\" + \"Mr. Death Rate: \" + str(death_rate_mr) + \"\\n\")\n","\n","died_miss = len(data[(data['surname'] == 'miss') & (data['Survived'] == 0) & (data['train_test'] == 'train')])\n","miss = len(data[(data['surname'] == 'miss') & (data['train_test'] == 'train')])\n","death_rate_miss = died_miss/miss\n","print(\"Total Miss. Surname: \" + str(miss) + \"\\n\" + \"Miss Death Rate: \" + str(death_rate_miss) + \"\\n\")\n","\n","died_master = len(data[(data['surname'] == 'master') & (data['Survived'] == 0) & (data['train_test'] == 'train')])\n","master = len(data[(data['surname'] == 'master') & (data['train_test'] == 'train')])\n","death_rate_master = died_master/master\n","print(\"Total Master Surname: \" + str(master) + \"\\n\" + \"Master Death Rate: \" + str(death_rate_master))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Total Mrs. Surname: 125\n","Mrs. Death Rate: 0.208\n","\n","Total Mr. Surname: 517\n","Mr. Death Rate: 0.8433268858800773\n","\n","Total Miss. Surname: 182\n","Miss Death Rate: 0.3021978021978022\n","\n","Total Master Surname: 40\n","Master Death Rate: 0.425\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WWh0VekxQXiu","executionInfo":{"status":"ok","timestamp":1613092600518,"user_tz":300,"elapsed":62041,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"846378e0-a4f3-473c-8a83-f8873765afe8"},"source":["died_male = len(data[(data['Sex'] == 'male') & (data['Survived'] == 0) & (data['train_test'] == 'train')])\n","male = len(data[(data['Sex'] == 'male') & (data['train_test'] == 'train')])\n","death_rate_male = died_male/male\n","print(\"Total Males: \" + str(male) + \"\\n\" + \"Male Death Rate: \" + str(death_rate_male) + \"\\n\")\n","\n","died_female = len(data[(data['Sex'] == 'female') & (data['Survived'] == 0) & (data['train_test'] == 'train')])\n","female = len(data[(data['Sex'] == 'female') & (data['train_test'] == 'train')])\n","death_rate_female = died_female/female\n","print(\"Total Females: \" + str(female) + \"\\n\" + \"Female Death Rate: \" + str(death_rate_female))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Total Males: 577\n","Male Death Rate: 0.8110918544194108\n","\n","Total Females: 314\n","Female Death Rate: 0.25796178343949044\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"v0MDfY3vL9G1"},"source":["Family size"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":390},"id":"5Conos0sL-gd","executionInfo":{"status":"ok","timestamp":1613092600519,"user_tz":300,"elapsed":62035,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"b5aa56e1-cbb4-4afd-f3c0-3c27a084b74e"},"source":["data['family_size'] = data['SibSp'].astype(int) + data['Parch'].astype(int) + 1\n","# The Sage family apparently has 11 people on the cruise\n","# The father is Mr. John George Sage since he lists 9 children\n","# The mother is Msr. Annie Bullen Sage since she lists 9 children\n","# lots of the kids use a surname of Mr. and only one person has an age listed\n","data[['Name','family_size', 'SibSp', 'Parch', 'Age']][data['family_size'] == 11]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Name</th>\n"," <th>family_size</th>\n"," <th>SibSp</th>\n"," <th>Parch</th>\n"," <th>Age</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>159</th>\n"," <td>Sage, Master. Thomas Henry</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>180</th>\n"," <td>Sage, Miss. Constance Gladys</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>201</th>\n"," <td>Sage, Mr. Frederick</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>324</th>\n"," <td>Sage, Mr. George John Jr</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>792</th>\n"," <td>Sage, Miss. Stella Anna</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>846</th>\n"," <td>Sage, Mr. Douglas Bullen</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>863</th>\n"," <td>Sage, Miss. Dorothy Edith \"Dolly\"</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>1079</th>\n"," <td>Sage, Miss. Ada</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>1233</th>\n"," <td>Sage, Mr. John George</td>\n"," <td>11</td>\n"," <td>1</td>\n"," <td>9</td>\n"," <td>NaN</td>\n"," </tr>\n"," <tr>\n"," <th>1251</th>\n"," <td>Sage, Master. William Henry</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>14.5</td>\n"," </tr>\n"," <tr>\n"," <th>1256</th>\n"," <td>Sage, Mrs. John (Annie Bullen)</td>\n"," <td>11</td>\n"," <td>1</td>\n"," <td>9</td>\n"," <td>NaN</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Name family_size SibSp Parch Age\n","159 Sage, Master. Thomas Henry 11 8 2 NaN\n","180 Sage, Miss. Constance Gladys 11 8 2 NaN\n","201 Sage, Mr. Frederick 11 8 2 NaN\n","324 Sage, Mr. George John Jr 11 8 2 NaN\n","792 Sage, Miss. Stella Anna 11 8 2 NaN\n","846 Sage, Mr. Douglas Bullen 11 8 2 NaN\n","863 Sage, Miss. Dorothy Edith \"Dolly\" 11 8 2 NaN\n","1079 Sage, Miss. Ada 11 8 2 NaN\n","1233 Sage, Mr. John George 11 1 9 NaN\n","1251 Sage, Master. William Henry 11 8 2 14.5\n","1256 Sage, Mrs. John (Annie Bullen) 11 1 9 NaN"]},"metadata":{"tags":[]},"execution_count":9}]},{"cell_type":"markdown","metadata":{"id":"DqJv5L3U-s1f"},"source":["## Impute missing values"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jGQ0fDjNXxAK","executionInfo":{"status":"ok","timestamp":1613092600519,"user_tz":300,"elapsed":62028,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"5de7d709-8c4f-44b9-dd60-960769d6ae3e"},"source":["data.groupby('surname')['Age'].mean()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["surname\n","master 5.482642\n","miss 21.774238\n","mr 32.252151\n","mrs 36.994118\n","unknown 42.656250\n","Name: Age, dtype: float64"]},"metadata":{"tags":[]},"execution_count":10}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"05--52Qn3Bim","executionInfo":{"status":"ok","timestamp":1613092600520,"user_tz":300,"elapsed":62021,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"4e2fda79-d805-482c-8997-120bab95a5a2"},"source":["print(\"Missing ages: \" + str(sum(data['Age'].isnull())))\n","data['Age'] = data['Age'].astype(float)\n","data.loc[(data.Age.isnull())&(data.surname=='mr'),'Age']=33\n","data.loc[(data.Age.isnull())&(data.surname=='mrs'),'Age']=36\n","data.loc[(data.Age.isnull())&(data.surname=='master'),'Age']=5\n","data.loc[(data.Age.isnull())&(data.surname=='miss'),'Age']=22\n","data.loc[(data.Age.isnull())&(data.surname=='unknown'),'Age']=46\n","print(\"Missing ages: \" + str(sum(data['Age'].isnull())))\n","#sns.catplot(x='Survived', y=\"Age\", kind=\"box\", dodge=False, height = 5, aspect = 1.5,data=data);"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Missing ages: 263\n","Missing ages: 0\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/","height":390},"id":"zI6bMe7MOGdu","executionInfo":{"status":"ok","timestamp":1613092600521,"user_tz":300,"elapsed":62015,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"83f3926b-c153-4868-d90e-88c58569cfdb"},"source":["data[['Name','family_size', 'SibSp', 'Parch', 'Age']][data['family_size'] == 11]"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Name</th>\n"," <th>family_size</th>\n"," <th>SibSp</th>\n"," <th>Parch</th>\n"," <th>Age</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>159</th>\n"," <td>Sage, Master. Thomas Henry</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>5.0</td>\n"," </tr>\n"," <tr>\n"," <th>180</th>\n"," <td>Sage, Miss. Constance Gladys</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>22.0</td>\n"," </tr>\n"," <tr>\n"," <th>201</th>\n"," <td>Sage, Mr. Frederick</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>33.0</td>\n"," </tr>\n"," <tr>\n"," <th>324</th>\n"," <td>Sage, Mr. George John Jr</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>33.0</td>\n"," </tr>\n"," <tr>\n"," <th>792</th>\n"," <td>Sage, Miss. Stella Anna</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>22.0</td>\n"," </tr>\n"," <tr>\n"," <th>846</th>\n"," <td>Sage, Mr. Douglas Bullen</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>33.0</td>\n"," </tr>\n"," <tr>\n"," <th>863</th>\n"," <td>Sage, Miss. Dorothy Edith \"Dolly\"</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>22.0</td>\n"," </tr>\n"," <tr>\n"," <th>1079</th>\n"," <td>Sage, Miss. Ada</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>22.0</td>\n"," </tr>\n"," <tr>\n"," <th>1233</th>\n"," <td>Sage, Mr. John George</td>\n"," <td>11</td>\n"," <td>1</td>\n"," <td>9</td>\n"," <td>33.0</td>\n"," </tr>\n"," <tr>\n"," <th>1251</th>\n"," <td>Sage, Master. William Henry</td>\n"," <td>11</td>\n"," <td>8</td>\n"," <td>2</td>\n"," <td>14.5</td>\n"," </tr>\n"," <tr>\n"," <th>1256</th>\n"," <td>Sage, Mrs. John (Annie Bullen)</td>\n"," <td>11</td>\n"," <td>1</td>\n"," <td>9</td>\n"," <td>36.0</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Name family_size SibSp Parch Age\n","159 Sage, Master. Thomas Henry 11 8 2 5.0\n","180 Sage, Miss. Constance Gladys 11 8 2 22.0\n","201 Sage, Mr. Frederick 11 8 2 33.0\n","324 Sage, Mr. George John Jr 11 8 2 33.0\n","792 Sage, Miss. Stella Anna 11 8 2 22.0\n","846 Sage, Mr. Douglas Bullen 11 8 2 33.0\n","863 Sage, Miss. Dorothy Edith \"Dolly\" 11 8 2 22.0\n","1079 Sage, Miss. Ada 11 8 2 22.0\n","1233 Sage, Mr. John George 11 1 9 33.0\n","1251 Sage, Master. William Henry 11 8 2 14.5\n","1256 Sage, Mrs. John (Annie Bullen) 11 1 9 36.0"]},"metadata":{"tags":[]},"execution_count":12}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"jWuSOix4HFtT","executionInfo":{"status":"ok","timestamp":1613092600521,"user_tz":300,"elapsed":62008,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"c04c1b74-6aa9-45f6-ee9b-f3abb78b9794"},"source":["print(\"Missing Embarked Values: \" + str(sum(data['Embarked'].isnull())))\n","# replace with most common value since there are only 2 values missing\n","data['Embarked'].value_counts()\n","data['Embarked'].fillna(\"S\", inplace=True)\n","print(\"Missing Embarked Values: \" + str(sum(data['Embarked'].isnull())))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Missing Embarked Values: 2\n","Missing Embarked Values: 0\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"h2jNwCxDLQ-j","executionInfo":{"status":"ok","timestamp":1613092600522,"user_tz":300,"elapsed":62003,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"5e606ef5-f3c4-4117-b5de-540045bd1d7f"},"source":["print(\"Missing Fares: \" + str(sum(data['Fare'].isnull())))\n","data['Fare'] = data['Fare'].astype(float)\n","data['Fare_median'] = data.groupby(['Pclass'])['Fare'].transform('median')\n","data['Fare'] = np.where(data['Fare'].isnull(), data['Fare_median'], data['Fare'])\n","print(\"Missing Fares: \" + str(sum(data['Fare'].isnull())))"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Missing Fares: 1\n","Missing Fares: 0\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"qX72fexvLmm_"},"source":["## Bin continuous variables into categories"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"pCRQTVTjLql5","executionInfo":{"status":"ok","timestamp":1613092600523,"user_tz":300,"elapsed":61997,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"de477675-0d5e-43e8-eb66-cfed0bc7df3a"},"source":["bins = [0, 15, 25, 35, 60, 81] # binning\n","bin_names = ['child','young','adult','middle','senior'] # label tagging\n","data['Age_bin'] = pd.cut(data['Age'],\n"," bins = bins,\n"," labels=bin_names,\n"," include_lowest = True)\n","data['Age_bin'].value_counts()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["adult 457\n","young 378\n","middle 318\n","child 123\n","senior 33\n","Name: Age_bin, dtype: int64"]},"metadata":{"tags":[]},"execution_count":15}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"MB2B8lS0ZGXa","executionInfo":{"status":"ok","timestamp":1613092600886,"user_tz":300,"elapsed":62354,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"116d867d-0773-4d67-9d9b-9e6aeced31f0"},"source":["data['alone'] = False\n","data['alone'][data['family_size'] == 1] = True\n","data['alone'].value_counts()"],"execution_count":null,"outputs":[{"output_type":"stream","text":["/usr/local/lib/python3.6/dist-packages/ipykernel_launcher.py:2: SettingWithCopyWarning: \n","A value is trying to be set on a copy of a slice from a DataFrame\n","\n","See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n"," \n"],"name":"stderr"},{"output_type":"execute_result","data":{"text/plain":["True 790\n","False 519\n","Name: alone, dtype: int64"]},"metadata":{"tags":[]},"execution_count":16}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"3rn74OFaZs8W","executionInfo":{"status":"ok","timestamp":1613092600887,"user_tz":300,"elapsed":62351,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"7935a36c-bcc0-4c7f-b18a-cd3619d12135"},"source":["bins = [0, 8, 30, 100] # binning\n","bin_names = ['economy','business','first_class'] # label tagging\n","data['fare_bin'] = pd.cut(data['Fare'],\n"," bins = bins,\n"," labels=bin_names,\n"," include_lowest = True)\n","data['fare_bin'].value_counts()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["business 606\n","economy 360\n","first_class 259\n","Name: fare_bin, dtype: int64"]},"metadata":{"tags":[]},"execution_count":17}]},{"cell_type":"markdown","metadata":{"id":"8uGStVSmL61d"},"source":["## One-hot encode categorical variables"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"WXIjEWOsRfMe","executionInfo":{"status":"ok","timestamp":1613092600888,"user_tz":300,"elapsed":62348,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"069af06e-6bd7-4ef7-dae5-63ac721e55b3"},"source":["# One-hot encode the categorical variables \n","cat_vars=['Age_bin', 'surname', 'Embarked', 'Sex', 'Pclass', 'alone', 'fare_bin']\n","for var in cat_vars:\n"," cat_list='var'+'_'+var\n"," cat_list = pd.get_dummies(data[var], prefix=var)\n"," data1=data.join(cat_list)\n"," data=data1\n","\n","data_vars=data.columns.values.tolist()\n","to_keep=[i for i in data_vars if i not in cat_vars]\n","\n","data_final=data[to_keep]\n","data_final.columns.values"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["array(['index', 'PassengerId', 'Survived', 'Name', 'Age', 'SibSp',\n"," 'Parch', 'Ticket', 'Fare', 'Cabin', 'train_test', 'cabin_letter',\n"," 'family_size', 'Fare_median', 'Age_bin_child', 'Age_bin_young',\n"," 'Age_bin_adult', 'Age_bin_middle', 'Age_bin_senior',\n"," 'surname_master', 'surname_miss', 'surname_mr', 'surname_mrs',\n"," 'surname_unknown', 'Embarked_C', 'Embarked_Q', 'Embarked_S',\n"," 'Sex_female', 'Sex_male', 'Pclass_1', 'Pclass_2', 'Pclass_3',\n"," 'alone_False', 'alone_True', 'fare_bin_economy',\n"," 'fare_bin_business', 'fare_bin_first_class'], dtype=object)"]},"metadata":{"tags":[]},"execution_count":18}]},{"cell_type":"markdown","metadata":{"id":"B5NaeogAVFyF"},"source":["## Scale numeric values"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"XEwxI7KuVE5L","executionInfo":{"status":"ok","timestamp":1613092600888,"user_tz":300,"elapsed":62343,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"58ce28ef-edf9-477a-b8e6-e759aca8c555"},"source":["import sklearn.preprocessing\n","from sklearn.preprocessing import StandardScaler\n","from sklearn.preprocessing import MinMaxScaler\n","sc = StandardScaler()\n","\n","numeric_vars = ['family_size', 'Age', 'Fare']\n","\n","# #only standardize numerical features\n","features=data_final[numeric_vars]\n","features_standard=StandardScaler().fit_transform(features)# Gaussian Standardisation\n","temp=pd.DataFrame(features_standard,columns=numeric_vars)\n","#temp=pd.DataFrame(features,columns=numeric_vars)\n","temp.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>family_size</th>\n"," <th>Age</th>\n"," <th>Fare</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.073352</td>\n"," <td>-0.605528</td>\n"," <td>-0.503176</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>0.073352</td>\n"," <td>0.605797</td>\n"," <td>0.734809</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>-0.558346</td>\n"," <td>-0.302696</td>\n"," <td>-0.490126</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>0.073352</td>\n"," <td>0.378674</td>\n"," <td>0.383263</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>-0.558346</td>\n"," <td>0.378674</td>\n"," <td>-0.487709</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" family_size Age Fare\n","0 0.073352 -0.605528 -0.503176\n","1 0.073352 0.605797 0.734809\n","2 -0.558346 -0.302696 -0.490126\n","3 0.073352 0.378674 0.383263\n","4 -0.558346 0.378674 -0.487709"]},"metadata":{"tags":[]},"execution_count":19}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"iE2NhbpmV1YI","executionInfo":{"status":"ok","timestamp":1613092600889,"user_tz":300,"elapsed":62339,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"2f463e58-fd2e-426a-bbcf-d3101c231e82"},"source":["cat_data = data_final[['Survived', 'train_test', 'Age_bin_child', 'Pclass_1', 'Pclass_2', 'Pclass_3',\n"," 'Age_bin_young', 'Age_bin_adult', 'Age_bin_middle', 'Age_bin_senior',\n"," 'surname_master', 'surname_miss', 'surname_mr', 'surname_mrs', \n"," 'surname_unknown', 'Embarked_C', 'Embarked_Q', 'Embarked_S', 'Sex_female', 'Sex_male',\n"," 'alone_False', 'alone_True', 'fare_bin_economy', 'fare_bin_business', 'fare_bin_first_class']]\n","data_final_after_standardizing = pd.concat([cat_data.reset_index(drop=True), temp], axis=1)\n","data_final = data_final_after_standardizing\n","data_final.head()"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/html":["<div>\n","<style scoped>\n"," .dataframe tbody tr th:only-of-type {\n"," vertical-align: middle;\n"," }\n","\n"," .dataframe tbody tr th {\n"," vertical-align: top;\n"," }\n","\n"," .dataframe thead th {\n"," text-align: right;\n"," }\n","</style>\n","<table border=\"1\" class=\"dataframe\">\n"," <thead>\n"," <tr style=\"text-align: right;\">\n"," <th></th>\n"," <th>Survived</th>\n"," <th>train_test</th>\n"," <th>Age_bin_child</th>\n"," <th>Pclass_1</th>\n"," <th>Pclass_2</th>\n"," <th>Pclass_3</th>\n"," <th>Age_bin_young</th>\n"," <th>Age_bin_adult</th>\n"," <th>Age_bin_middle</th>\n"," <th>Age_bin_senior</th>\n"," <th>surname_master</th>\n"," <th>surname_miss</th>\n"," <th>surname_mr</th>\n"," <th>surname_mrs</th>\n"," <th>surname_unknown</th>\n"," <th>Embarked_C</th>\n"," <th>Embarked_Q</th>\n"," <th>Embarked_S</th>\n"," <th>Sex_female</th>\n"," <th>Sex_male</th>\n"," <th>alone_False</th>\n"," <th>alone_True</th>\n"," <th>fare_bin_economy</th>\n"," <th>fare_bin_business</th>\n"," <th>fare_bin_first_class</th>\n"," <th>family_size</th>\n"," <th>Age</th>\n"," <th>Fare</th>\n"," </tr>\n"," </thead>\n"," <tbody>\n"," <tr>\n"," <th>0</th>\n"," <td>0.0</td>\n"," <td>train</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0.073352</td>\n"," <td>-0.605528</td>\n"," <td>-0.503176</td>\n"," </tr>\n"," <tr>\n"," <th>1</th>\n"," <td>1.0</td>\n"," <td>train</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0.073352</td>\n"," <td>0.605797</td>\n"," <td>0.734809</td>\n"," </tr>\n"," <tr>\n"," <th>2</th>\n"," <td>1.0</td>\n"," <td>train</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>-0.558346</td>\n"," <td>-0.302696</td>\n"," <td>-0.490126</td>\n"," </tr>\n"," <tr>\n"," <th>3</th>\n"," <td>1.0</td>\n"," <td>train</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0.073352</td>\n"," <td>0.378674</td>\n"," <td>0.383263</td>\n"," </tr>\n"," <tr>\n"," <th>4</th>\n"," <td>0.0</td>\n"," <td>train</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>1</td>\n"," <td>0</td>\n"," <td>-0.558346</td>\n"," <td>0.378674</td>\n"," <td>-0.487709</td>\n"," </tr>\n"," </tbody>\n","</table>\n","</div>"],"text/plain":[" Survived train_test Age_bin_child ... family_size Age Fare\n","0 0.0 train 0 ... 0.073352 -0.605528 -0.503176\n","1 1.0 train 0 ... 0.073352 0.605797 0.734809\n","2 1.0 train 0 ... -0.558346 -0.302696 -0.490126\n","3 1.0 train 0 ... 0.073352 0.378674 0.383263\n","4 0.0 train 0 ... -0.558346 0.378674 -0.487709\n","\n","[5 rows x 28 columns]"]},"metadata":{"tags":[]},"execution_count":20}]},{"cell_type":"markdown","metadata":{"id":"xWOmvxLrXDOw"},"source":["## Recursive Feature Elimination"]},{"cell_type":"code","metadata":{"id":"gYwpgvZ_XRSO"},"source":["X_train = data_final[data_final['train_test'] == \"train\"]\n","X_train = X_train.loc[:, X_train.columns != 'Survived']\n","X_train = X_train.loc[:, X_train.columns != 'train_test']\n","\n","y_train = data_final[data_final['train_test'] == \"train\"]\n","y_train = y_train.loc[:, y_train.columns == 'Survived']"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Q056M40IXCOy","executionInfo":{"status":"ok","timestamp":1613092602160,"user_tz":300,"elapsed":63603,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"3ebd8ab0-11c5-40a6-f959-4d1633585f1d"},"source":["import warnings\n","from sklearn.feature_selection import RFE\n","from sklearn.linear_model import LogisticRegression\n","warnings.filterwarnings('ignore')\n","\n","data_final_vars=data_final.columns.values.tolist()\n","logreg = LogisticRegression()\n","rfe = RFE(logreg, 16)\n","rfe = rfe.fit(X_train, y_train.values.ravel())\n","print(rfe.support_)\n","print(rfe.ranking_)\n","\n","from itertools import compress\n","cols = list(compress(X_train.columns, rfe.support_))\n","X=X_train[cols]\n","y=y_train['Survived']\n","\n","# Implement the model\n","import statsmodels.api as sm\n","logit_model=sm.Logit(y,X)\n","result=logit_model.fit()\n","print(result.summary2())"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[False True False True True False False False True True True True\n"," True False False True True True False True False False True True\n"," True True]\n","[ 6 1 10 1 1 7 11 4 1 1 1 1 1 8 9 1 1 1 3 1 2 5 1 1\n"," 1 1]\n","Optimization terminated successfully.\n"," Current function value: 0.402754\n"," Iterations 19\n"," Results: Logit\n","=======================================================================================\n","Model: Logit Pseudo R-squared: 0.395 \n","Dependent Variable: Survived AIC: 747.7080 \n","Date: 2021-02-12 01:16 BIC: 819.5932 \n","No. Observations: 891 Log-Likelihood: -358.85 \n","Df Model: 14 LL-Null: -593.33 \n","Df Residuals: 876 LLR p-value: 3.4984e-91\n","Converged: 1.0000 Scale: 1.0000 \n","No. Iterations: 19.0000 \n","---------------------------------------------------------------------------------------\n"," Coef. Std.Err. z P>|z| [0.025 0.975] \n","---------------------------------------------------------------------------------------\n","Pclass_1 0.8267 0.3461 2.3883 0.0169 0.1483 1.5051\n","Pclass_3 -1.0152 0.2563 -3.9610 0.0001 -1.5175 -0.5128\n","Age_bin_young -0.3577 0.2468 -1.4495 0.1472 -0.8414 0.1260\n","surname_master 33.2789 18908497.6617 0.0000 1.0000 -37059941.1399 37060007.6976\n","surname_miss -41.2830 nan nan nan nan nan\n","surname_mr 30.1523 18908497.6617 0.0000 1.0000 -37059944.2665 37060004.5710\n","surname_mrs -40.4684 nan nan nan nan nan\n","surname_unknown 30.1393 18908497.6617 0.0000 1.0000 -37059944.2794 37060004.5580\n","Embarked_S -0.3726 0.2173 -1.7150 0.0863 -0.7985 0.0532\n","Sex_female 43.0897 nan nan nan nan nan\n","Sex_male -31.2708 18908497.6617 -0.0000 1.0000 -37060005.6895 37059943.1480\n","alone_True -0.3482 0.3071 -1.1338 0.2569 -0.9501 0.2537\n","fare_bin_first_class 0.4847 0.2806 1.7275 0.0841 -0.0652 1.0346\n","family_size -0.9611 0.1801 -5.3373 0.0000 -1.3141 -0.6082\n","Age -0.4611 0.1407 -3.2772 0.0010 -0.7369 -0.1853\n","Fare 0.2208 0.1388 1.5906 0.1117 -0.0513 0.4928\n","=======================================================================================\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"id":"oFeduKzxaaNN"},"source":["# remove_cols = ['Sex_male', 'surname_mr', 'surname_master', 'surname_unknown']\n","# new_cols = [x for x in cols if x not in remove_cols]\n","# X=X_train[new_cols]\n","# y=y_train['Survived']\n","# logit_model=sm.Logit(y,X)\n","# result=logit_model.fit()\n","# print(result.summary2())"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"Yptr-0gUawb6","executionInfo":{"status":"ok","timestamp":1613092602162,"user_tz":300,"elapsed":63597,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"27ab30fb-59da-49df-f692-1f6574e8c695"},"source":["from sklearn.linear_model import LogisticRegression\n","from sklearn import metrics\n","logreg = LogisticRegression()\n","logreg.fit(X, y)\n","X.columns"],"execution_count":null,"outputs":[{"output_type":"execute_result","data":{"text/plain":["Index(['Pclass_1', 'Pclass_3', 'Age_bin_young', 'surname_master',\n"," 'surname_miss', 'surname_mr', 'surname_mrs', 'surname_unknown',\n"," 'Embarked_S', 'Sex_female', 'Sex_male', 'alone_True',\n"," 'fare_bin_first_class', 'family_size', 'Age', 'Fare'],\n"," dtype='object')"]},"metadata":{"tags":[]},"execution_count":24}]},{"cell_type":"code","metadata":{"id":"JBz7mo3fbAzx"},"source":["X_test = data_final[data_final['train_test'] == \"test\"]\n","X_test = X_test[cols].reset_index(drop = True)\n","#y_test = data_final[data_final['train_test'] == \"test\"]\n","#y_test = y_test.loc[:, y_test.columns == 'Survived']"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"id":"VagUFjzTa7sQ"},"source":["y_pred = logreg.predict(X_test)\n","submission1 = pd.DataFrame({\"PassengerId\": test['PassengerId'].tolist(),\n"," \"Survived\": y_pred})\n","submission1.to_csv(data_dir + \"/koban_submission1.csv\")"],"execution_count":null,"outputs":[]},{"cell_type":"markdown","metadata":{"id":"s335sKrYxjrP"},"source":["# First submission was a hot mess inside a dumpster fire. I ranked in the bottom 20% of submissions. This time we will do model selection using the training data for test and train sets."]},{"cell_type":"code","metadata":{"id":"kTEoCmQZxe3Q"},"source":["X_train = data_final[data_final['train_test'] == \"train\"]\n","X_train = X_train.loc[:, X_train.columns != 'Survived']\n","X_train = X_train.loc[:, X_train.columns != 'train_test']\n","\n","y_train = data_final[data_final['train_test'] == \"train\"]\n","y_train = y_train.loc[:, y_train.columns == 'Survived']\n","\n","X_train2, X_test2, y_train2, y_test2 = train_test_split(X_train, y_train, test_size=0.2, random_state=0)"],"execution_count":null,"outputs":[]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CQsCHNIj2bau","executionInfo":{"status":"ok","timestamp":1613092603205,"user_tz":300,"elapsed":64614,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"d557c57b-0743-4cfb-a3ac-3bdcb7ad56c6"},"source":["logreg = LogisticRegression()\n","rfe = RFE(logreg, 16)\n","rfe = rfe.fit(X_train2, y_train2.values.ravel())\n","print(rfe.support_)\n","print(rfe.ranking_)\n","\n","cols = list(compress(X_train.columns, rfe.support_))\n","X=X_train[cols]\n","y=y_train['Survived']\n","\n","# Implement the model\n","import statsmodels.api as sm\n","logit_model=sm.Logit(y,X)\n","result=logit_model.fit()\n","print(result.summary2())"],"execution_count":null,"outputs":[{"output_type":"stream","text":["[False True False True True False False False True True True True\n"," True False False True True True True False True False True True\n"," True False]\n","[10 1 8 1 1 9 4 11 1 1 1 1 1 7 6 1 1 1 1 5 1 3 1 1\n"," 1 2]\n","Optimization terminated successfully.\n"," Current function value: 0.403995\n"," Iterations 20\n"," Results: Logit\n","=======================================================================================\n","Model: Logit Pseudo R-squared: 0.393 \n","Dependent Variable: Survived AIC: 749.9195 \n","Date: 2021-02-12 01:16 BIC: 821.8046 \n","No. Observations: 891 Log-Likelihood: -359.96 \n","Df Model: 14 LL-Null: -593.33 \n","Df Residuals: 876 LLR p-value: 1.0276e-90\n","Converged: 1.0000 Scale: 1.0000 \n","No. Iterations: 20.0000 \n","---------------------------------------------------------------------------------------\n"," Coef. Std.Err. z P>|z| [0.025 0.975] \n","---------------------------------------------------------------------------------------\n","Pclass_1 1.0840 0.3108 3.4872 0.0005 0.4747 1.6932\n","Pclass_3 -1.1732 0.2786 -4.2112 0.0000 -1.7193 -0.6272\n","Age_bin_young -0.3474 0.2472 -1.4056 0.1598 -0.8319 0.1370\n","surname_master 56.4272 8074028.1653 0.0000 1.0000 -15824747.9869 15824860.8413\n","surname_miss -72.4301 12498303.6419 -0.0000 1.0000 -24496297.4361 24496152.5759\n","surname_mr 53.3320 8074028.1653 0.0000 1.0000 -15824751.0821 15824857.7461\n","surname_mrs -71.6076 12498303.6419 -0.0000 1.0000 -24496296.6136 24496153.3984\n","surname_unknown 53.2466 8074028.1653 0.0000 1.0000 -15824751.1676 15824857.6607\n","Embarked_S -0.4045 0.2152 -1.8796 0.0602 -0.8263 0.0173\n","Sex_female 73.8304 12498303.6419 0.0000 1.0000 -24496151.1757 24496298.8364\n","Sex_male -54.8623 8074028.1653 -0.0000 1.0000 -15824859.2765 15824749.5518\n","alone_False 0.4389 0.3094 1.4184 0.1561 -0.1676 1.0454\n","fare_bin_economy 0.2492 0.2903 0.8585 0.3906 -0.3197 0.8181\n","fare_bin_first_class 0.4265 0.2791 1.5281 0.1265 -0.1205 0.9736\n","family_size -0.8862 0.1740 -5.0932 0.0000 -1.2272 -0.5451\n","Age -0.4702 0.1406 -3.3437 0.0008 -0.7458 -0.1946\n","=======================================================================================\n","\n"],"name":"stdout"}]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"CrDttQMWyh5i","executionInfo":{"status":"ok","timestamp":1613092603425,"user_tz":300,"elapsed":64828,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"8dae646a-c4db-4bc0-dcfa-d3d0b2cdee7b"},"source":["from sklearn.linear_model import LogisticRegression\n","from sklearn import metrics\n","logreg = LogisticRegression()\n","logreg.fit(X, y)\n","y_pred = logreg.predict(X_test2[cols])\n","\n","print('Accuracy of logistic regression classifier on test set: {:.2f}'.format(logreg.score(X_test2[cols], y_test2)))\n","\n","cm = metrics.confusion_matrix(y_test2, y_pred)\n","true_pos = cm[1,1]\n","true_neg = cm[0,0]\n","false_pos = cm[0,1]\n","false_neg = cm[1,0]\n","precision = true_pos/(true_pos + false_pos)\n","recall = true_pos/(true_pos + false_neg)\n","\n","print(\"When we check precision of our model against the test data set, \" + str(len(y_test2)) + \" passengers\")\n","print(\"\")\n","print(\"Precision - We predicted survival \" + str(cm[1,1] + cm[0,1]) + \" times and were correct \" + str(cm[1,1]) + \" times: \" + str(cm[1,1]/(cm[1,1] + cm[0,1]))[0:5])\n","print(\"Recall - We predicted \" + str(cm[1,1]) + \" out of the \" + str(cm[1,1] + cm[1,0]) + \" surviving passengers: \" + str(recall)[0:5])\n","print(\"\")\n","print(\"Our total accuracy was \" + str((cm[0,0] + cm[1,1])/len(y_test2))[0:5])\n","print(\"Our F1 score was \" + str(2*(precision*recall)/(precision+recall))[0:5])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["Accuracy of logistic regression classifier on test set: 0.83\n","When we check precision of our model against the test data set, 179 passengers\n","\n","Precision - We predicted survival 70 times and were correct 54 times: 0.771\n","Recall - We predicted 54 out of the 69 surviving passengers: 0.782\n","\n","Our total accuracy was 0.826\n","Our F1 score was 0.776\n"],"name":"stdout"}]},{"cell_type":"markdown","metadata":{"id":"ScqylR9I4hl_"},"source":["## Logistic Regression classifier still not performing, so we will try a Random Forest"]},{"cell_type":"code","metadata":{"colab":{"base_uri":"https://localhost:8080/"},"id":"VKz1-_nZ4g23","executionInfo":{"status":"ok","timestamp":1613092691438,"user_tz":300,"elapsed":14720,"user":{"displayName":"Donald Koban","photoUrl":"https://lh3.googleusercontent.com/a-/AOh14GgIl_q-klTdMSVMcpQ2RqU9YBN_aDPqg2-7Pd4=s64","userId":"12205738029019728376"}},"outputId":"a8e4903c-8f7b-40ad-944e-ee5d3fe86c8e"},"source":["from sklearn.model_selection import cross_val_score, cross_val_predict, KFold\n","from sklearn.ensemble import RandomForestClassifier\n","from statistics import mean\n","from numpy import std\n","\n","cv = KFold(n_splits=10, random_state = 1, shuffle = False)\n","rf = RandomForestClassifier(n_estimators=100, n_jobs=-1, criterion=\"gini\")\n","features = vars\n","accuracy = cross_val_score(rf, X_train, y_train, scoring = 'accuracy', cv = cv)\n","accuracy = list(accuracy)\n","f1_scores = cross_val_score(rf, X_train, y_train, scoring = 'f1', cv = cv)\n","f1_scores = list(f1_scores)\n","precision_scores = cross_val_score(rf, X_train, y_train, scoring = 'precision', cv = cv)\n","precision_scores = list(precision_scores)\n","recall_scores = cross_val_score(rf, X_train, y_train, scoring = 'recall', cv = cv)\n","recall_scores = list(recall_scores)\n","\n","print('accuracy score: ' + str(mean(accuracy))[0:5] + \" +/- \" + str(std(accuracy))[0:5])\n","print('f1 score: ' + str(mean(f1_scores))[0:5] + \" +/- \" + str(std(f1_scores))[0:5])\n","print('precision: ' + str(mean(precision_scores))[0:5] + \" +/- \" + str(std(precision_scores))[0:5])\n","print('recall: '+ str(mean(recall_scores))[0:5] + \" +/- \" + str(std(recall_scores))[0:5])"],"execution_count":null,"outputs":[{"output_type":"stream","text":["accuracy score: 0.799 +/- 0.041\n","f1 score: 0.737 +/- 0.053\n","precision: 0.756 +/- 0.067\n","recall: 0.715 +/- 0.084\n"],"name":"stdout"}]}]}