{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Bagging trees on Titanic\n", "\n", "\n", "sklearn.tree.DecisionTreeClassifier\n", "\n", "* Creer les train et test sets\n", "* comme baseline: arbre de decision simple, not pruned, quel accuracy sur le test set ?\n", "* maintenant prendre 20 arbres, en limitant la taille a 2 niveaux\n", "* pour chaque arbre, predire les probas des echantillons du test set\n", "* puis moyenner les proba et utiliser le resultat pour determiner la classe predite.\n", "* quel accuracy sur le test set ?" ] }, { "cell_type": "code", "execution_count": 112, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd\n", "import pandas as pd\n", "import numpy as np\n", "import re\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.metrics import confusion_matrix, classification_report\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.metrics import roc_auc_score" ] }, { "cell_type": "code", "execution_count": 113, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1309, 12)\n" ] }, { "data": { "text/html": [ "
\n", " | pclass | \n", "survived | \n", "name | \n", "sex | \n", "age | \n", "sibsp | \n", "parch | \n", "ticket | \n", "fare | \n", "cabin | \n", "embarked | \n", "home.dest | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "1 | \n", "1 | \n", "Allen, Miss. Elisabeth Walton | \n", "female | \n", "29.00 | \n", "0 | \n", "0 | \n", "24160 | \n", "211.3375 | \n", "B5 | \n", "S | \n", "St Louis, MO | \n", "
1 | \n", "1 | \n", "1 | \n", "Allison, Master. Hudson Trevor | \n", "male | \n", "0.92 | \n", "1 | \n", "2 | \n", "113781 | \n", "151.5500 | \n", "C22 C26 | \n", "S | \n", "Montreal, PQ / Chesterville, ON | \n", "
2 | \n", "1 | \n", "0 | \n", "Allison, Miss. Helen Loraine | \n", "female | \n", "2.00 | \n", "1 | \n", "2 | \n", "113781 | \n", "151.5500 | \n", "C22 C26 | \n", "S | \n", "Montreal, PQ / Chesterville, ON | \n", "
3 | \n", "1 | \n", "0 | \n", "Allison, Mr. Hudson Joshua Creighton | \n", "male | \n", "30.00 | \n", "1 | \n", "2 | \n", "113781 | \n", "151.5500 | \n", "C22 C26 | \n", "S | \n", "Montreal, PQ / Chesterville, ON | \n", "
4 | \n", "1 | \n", "0 | \n", "Allison, Mrs. Hudson J C (Bessie Waldo Daniels) | \n", "female | \n", "25.00 | \n", "1 | \n", "2 | \n", "113781 | \n", "151.5500 | \n", "C22 C26 | \n", "S | \n", "Montreal, PQ / Chesterville, ON | \n", "