{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from pyspark.sql import SparkSession\n", "import pandas as pd\n", "\n", "import sys\n", "sys.path.append('..')\n", "from utils.pysparkutils import *\n", "\n", "spark = SparkSession.builder.appName(\"titanic\").getOrCreate()" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "root\n", " |-- PassengerId: integer (nullable = true)\n", " |-- Survived: integer (nullable = true)\n", " |-- Pclass: integer (nullable = true)\n", " |-- Name: string (nullable = true)\n", " |-- Sex: string (nullable = true)\n", " |-- Age: double (nullable = true)\n", " |-- SibSp: integer (nullable = true)\n", " |-- Parch: integer (nullable = true)\n", " |-- Ticket: string (nullable = true)\n", " |-- Fare: double (nullable = true)\n", " |-- Cabin: string (nullable = true)\n", " |-- Embarked: string (nullable = true)\n", "\n", "root\n", " |-- PassengerId: integer (nullable = true)\n", " |-- Pclass: integer (nullable = true)\n", " |-- Name: string (nullable = true)\n", " |-- Sex: string (nullable = true)\n", " |-- Age: double (nullable = true)\n", " |-- SibSp: integer (nullable = true)\n", " |-- Parch: integer (nullable = true)\n", " |-- Ticket: string (nullable = true)\n", " |-- Fare: double (nullable = true)\n", " |-- Cabin: string (nullable = true)\n", " |-- Embarked: string (nullable = true)\n", "\n" ] } ], "source": [ "train = spark.read.csv('./train.csv', header=\"true\", inferSchema=\"true\")\n", "test = spark.read.csv('./test.csv', header=\"true\", inferSchema=\"true\")\n", "\n", "train.printSchema()\n", "test.printSchema()\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
| \n", " | PassengerId | \n", "Survived | \n", "Pclass | \n", "Name | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Ticket | \n", "Fare | \n", "Cabin | \n", "Embarked | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "0 | \n", "3 | \n", "Braund, Mr. Owen Harris | \n", "male | \n", "22.0 | \n", "1 | \n", "0 | \n", "A/5 21171 | \n", "7.2500 | \n", "None | \n", "S | \n", "
| 1 | \n", "2 | \n", "1 | \n", "1 | \n", "Cumings, Mrs. John Bradley (Florence Briggs Th... | \n", "female | \n", "38.0 | \n", "1 | \n", "0 | \n", "PC 17599 | \n", "71.2833 | \n", "C85 | \n", "C | \n", "
| 2 | \n", "3 | \n", "1 | \n", "3 | \n", "Heikkinen, Miss. Laina | \n", "female | \n", "26.0 | \n", "0 | \n", "0 | \n", "STON/O2. 3101282 | \n", "7.9250 | \n", "None | \n", "S | \n", "
| 3 | \n", "4 | \n", "1 | \n", "1 | \n", "Futrelle, Mrs. Jacques Heath (Lily May Peel) | \n", "female | \n", "35.0 | \n", "1 | \n", "0 | \n", "113803 | \n", "53.1000 | \n", "C123 | \n", "S | \n", "
| 4 | \n", "5 | \n", "0 | \n", "3 | \n", "Allen, Mr. William Henry | \n", "male | \n", "35.0 | \n", "0 | \n", "0 | \n", "373450 | \n", "8.0500 | \n", "None | \n", "S | \n", "
| 5 | \n", "6 | \n", "0 | \n", "3 | \n", "Moran, Mr. James | \n", "male | \n", "NaN | \n", "0 | \n", "0 | \n", "330877 | \n", "8.4583 | \n", "None | \n", "Q | \n", "
| 6 | \n", "7 | \n", "0 | \n", "1 | \n", "McCarthy, Mr. Timothy J | \n", "male | \n", "54.0 | \n", "0 | \n", "0 | \n", "17463 | \n", "51.8625 | \n", "E46 | \n", "S | \n", "
| 7 | \n", "8 | \n", "0 | \n", "3 | \n", "Palsson, Master. Gosta Leonard | \n", "male | \n", "2.0 | \n", "3 | \n", "1 | \n", "349909 | \n", "21.0750 | \n", "None | \n", "S | \n", "
| 8 | \n", "9 | \n", "1 | \n", "3 | \n", "Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) | \n", "female | \n", "27.0 | \n", "0 | \n", "2 | \n", "347742 | \n", "11.1333 | \n", "None | \n", "S | \n", "
| 9 | \n", "10 | \n", "1 | \n", "2 | \n", "Nasser, Mrs. Nicholas (Adele Achem) | \n", "female | \n", "14.0 | \n", "1 | \n", "0 | \n", "237736 | \n", "30.0708 | \n", "None | \n", "C | \n", "
| 10 | \n", "11 | \n", "1 | \n", "3 | \n", "Sandstrom, Miss. Marguerite Rut | \n", "female | \n", "4.0 | \n", "1 | \n", "1 | \n", "PP 9549 | \n", "16.7000 | \n", "G6 | \n", "S | \n", "
| 11 | \n", "12 | \n", "1 | \n", "1 | \n", "Bonnell, Miss. Elizabeth | \n", "female | \n", "58.0 | \n", "0 | \n", "0 | \n", "113783 | \n", "26.5500 | \n", "C103 | \n", "S | \n", "
| 12 | \n", "13 | \n", "0 | \n", "3 | \n", "Saundercock, Mr. William Henry | \n", "male | \n", "20.0 | \n", "0 | \n", "0 | \n", "A/5. 2151 | \n", "8.0500 | \n", "None | \n", "S | \n", "
| 13 | \n", "14 | \n", "0 | \n", "3 | \n", "Andersson, Mr. Anders Johan | \n", "male | \n", "39.0 | \n", "1 | \n", "5 | \n", "347082 | \n", "31.2750 | \n", "None | \n", "S | \n", "
| 14 | \n", "15 | \n", "0 | \n", "3 | \n", "Vestrom, Miss. Hulda Amanda Adolfina | \n", "female | \n", "14.0 | \n", "0 | \n", "0 | \n", "350406 | \n", "7.8542 | \n", "None | \n", "S | \n", "
| 15 | \n", "16 | \n", "1 | \n", "2 | \n", "Hewlett, Mrs. (Mary D Kingcome) | \n", "female | \n", "55.0 | \n", "0 | \n", "0 | \n", "248706 | \n", "16.0000 | \n", "None | \n", "S | \n", "
| 16 | \n", "17 | \n", "0 | \n", "3 | \n", "Rice, Master. Eugene | \n", "male | \n", "2.0 | \n", "4 | \n", "1 | \n", "382652 | \n", "29.1250 | \n", "None | \n", "Q | \n", "
| 17 | \n", "18 | \n", "1 | \n", "2 | \n", "Williams, Mr. Charles Eugene | \n", "male | \n", "NaN | \n", "0 | \n", "0 | \n", "244373 | \n", "13.0000 | \n", "None | \n", "S | \n", "
| 18 | \n", "19 | \n", "0 | \n", "3 | \n", "Vander Planke, Mrs. Julius (Emelia Maria Vande... | \n", "female | \n", "31.0 | \n", "1 | \n", "0 | \n", "345763 | \n", "18.0000 | \n", "None | \n", "S | \n", "
| 19 | \n", "20 | \n", "1 | \n", "3 | \n", "Masselmani, Mrs. Fatima | \n", "female | \n", "NaN | \n", "0 | \n", "0 | \n", "2649 | \n", "7.2250 | \n", "None | \n", "C | \n", "
| \n", " | Survived | \n", "count | \n", "
|---|---|---|
| 0 | \n", "1 | \n", "340 | \n", "
| 1 | \n", "0 | \n", "549 | \n", "
| \n", " | Survived_Sex | \n", "female | \n", "male | \n", "
|---|---|---|---|
| 0 | \n", "1 | \n", "231 | \n", "109 | \n", "
| 1 | \n", "0 | \n", "81 | \n", "468 | \n", "
| \n", " | Sex | \n", "Survived | \n", "Normalized PMI | \n", "
|---|---|---|---|
| 2 | \n", "female | \n", "0 | \n", "-0.361721 | \n", "
| 1 | \n", "female | \n", "1 | \n", "0.490151 | \n", "
| 0 | \n", "male | \n", "0 | \n", "0.424895 | \n", "
| 3 | \n", "male | \n", "1 | \n", "-0.336078 | \n", "
| \n", " | Survived_Pclass | \n", "1 | \n", "2 | \n", "3 | \n", "
|---|---|---|---|---|
| 0 | \n", "1 | \n", "134 | \n", "87 | \n", "119 | \n", "
| 1 | \n", "0 | \n", "80 | \n", "97 | \n", "372 | \n", "
| \n", " | Pclass | \n", "Survived | \n", "Normalized PMI | \n", "
|---|---|---|---|
| 0 | \n", "1 | \n", "0 | \n", "-0.208445 | \n", "
| 2 | \n", "1 | \n", "1 | \n", "0.260544 | \n", "
| 4 | \n", "2 | \n", "0 | \n", "-0.071421 | \n", "
| 3 | \n", "2 | \n", "1 | \n", "0.091268 | \n", "
| 5 | \n", "3 | \n", "0 | \n", "0.234674 | \n", "
| 1 | \n", "3 | \n", "1 | \n", "-0.226840 | \n", "
| \n", " | Survived_Embarked | \n", "C | \n", "Q | \n", "S | \n", "
|---|---|---|---|---|
| 0 | \n", "1 | \n", "93 | \n", "30 | \n", "217 | \n", "
| 1 | \n", "0 | \n", "75 | \n", "47 | \n", "427 | \n", "
| \n", " | Embarked | \n", "Survived | \n", "Normalized PMI | \n", "
|---|---|---|---|
| 5 | \n", "C | \n", "0 | \n", "-0.131229 | \n", "
| 3 | \n", "C | \n", "1 | \n", "0.163804 | \n", "
| 4 | \n", "Q | \n", "0 | \n", "-0.003966 | \n", "
| 0 | \n", "Q | \n", "1 | \n", "0.005472 | \n", "
| 1 | \n", "S | \n", "0 | \n", "0.096935 | \n", "
| 2 | \n", "S | \n", "1 | \n", "-0.089810 | \n", "
| \n", " | Survived_SibSp | \n", "0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "8 | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "208 | \n", "112 | \n", "13 | \n", "4 | \n", "3 | \n", "0 | \n", "0 | \n", "
| 1 | \n", "0 | \n", "398 | \n", "97 | \n", "15 | \n", "12 | \n", "15 | \n", "5 | \n", "7 | \n", "
| \n", " | SibSp | \n", "Survived | \n", "Normalized PMI | \n", "
|---|---|---|---|
| 4 | \n", "0 | \n", "0 | \n", "0.076614 | \n", "
| 6 | \n", "0 | \n", "1 | \n", "-0.074483 | \n", "
| 0 | \n", "1 | \n", "0 | \n", "-0.128928 | \n", "
| 3 | \n", "1 | \n", "1 | \n", "0.162829 | \n", "
| 7 | \n", "2 | \n", "0 | \n", "-0.034825 | \n", "
| 5 | \n", "2 | \n", "1 | \n", "0.045891 | \n", "
| 10 | \n", "3 | \n", "0 | \n", "0.045135 | \n", "
| 1 | \n", "3 | \n", "1 | \n", "-0.078675 | \n", "
| 2 | \n", "4 | \n", "0 | \n", "0.073413 | \n", "
| 11 | \n", "4 | \n", "1 | \n", "-0.145939 | \n", "
| 8 | \n", "5 | \n", "0 | \n", "0.093038 | \n", "
| 9 | \n", "8 | \n", "0 | \n", "0.099500 | \n", "
| \n", " | Survived_Parch | \n", "0 | \n", "1 | \n", "2 | \n", "3 | \n", "4 | \n", "5 | \n", "6 | \n", "
|---|---|---|---|---|---|---|---|---|
| 0 | \n", "1 | \n", "231 | \n", "65 | \n", "40 | \n", "3 | \n", "0 | \n", "1 | \n", "0 | \n", "
| 1 | \n", "0 | \n", "445 | \n", "53 | \n", "40 | \n", "2 | \n", "4 | \n", "4 | \n", "1 | \n", "
| \n", " | Parch | \n", "Survived | \n", "Normalized PMI | \n", "
|---|---|---|---|
| 5 | \n", "0 | \n", "0 | \n", "0.092309 | \n", "
| 7 | \n", "0 | \n", "1 | \n", "-0.083569 | \n", "
| 0 | \n", "1 | \n", "0 | \n", "-0.112913 | \n", "
| 4 | \n", "1 | \n", "1 | \n", "0.139486 | \n", "
| 8 | \n", "2 | \n", "0 | \n", "-0.068086 | \n", "
| 6 | \n", "2 | \n", "1 | \n", "0.086419 | \n", "
| 11 | \n", "3 | \n", "0 | \n", "-0.071231 | \n", "
| 1 | \n", "3 | \n", "1 | \n", "0.079123 | \n", "
| 3 | \n", "4 | \n", "0 | \n", "0.089196 | \n", "
| 9 | \n", "5 | \n", "0 | \n", "0.047902 | \n", "
| 10 | \n", "5 | \n", "1 | \n", "-0.095475 | \n", "
| 2 | \n", "6 | \n", "0 | \n", "0.070986 | \n", "
| \n", " | Feature | \n", "Entropy | \n", "
|---|---|---|
| 0 | \n", "Sex | \n", "0.934919 | \n", "
| 1 | \n", "Pclass | \n", "0.907245 | \n", "
| 2 | \n", "Embarked | \n", "0.692048 | \n", "
| 3 | \n", "SibSp | \n", "0.477435 | \n", "
| 4 | \n", "Parch | \n", "0.402510 | \n", "
| \n", " | PassengerId | \n", "Pclass | \n", "Name | \n", "Sex | \n", "Age | \n", "SibSp | \n", "Parch | \n", "Ticket | \n", "Fare | \n", "Cabin | \n", "... | \n", "bucketedAge | \n", "bucketedFare | \n", "oneHotSex | \n", "oneHotEmbarked | \n", "oneHotFare | \n", "oneHotAge | \n", "features | \n", "rawPrediction | \n", "probability | \n", "prediction | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "892 | \n", "3 | \n", "Kelly, Mr. James | \n", "male | \n", "34.5 | \n", "0 | \n", "0 | \n", "330911 | \n", "7.8292 | \n", "None | \n", "... | \n", "2.0 | \n", "0.0 | \n", "(1.0) | \n", "(0.0, 0.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 0.0, 1.0, 0.0) | \n", "(3.0, 0.0, 0.0, 2.0, 0.0, 2.0, 0.0) | \n", "[29.2960574888, 0.703942511236] | \n", "[0.976535249625, 0.0234647503745] | \n", "0.0 | \n", "
| 1 | \n", "893 | \n", "3 | \n", "Wilkes, Mrs. James (Ellen Needs) | \n", "female | \n", "47.0 | \n", "1 | \n", "0 | \n", "363272 | \n", "7.0000 | \n", "None | \n", "... | \n", "2.0 | \n", "0.0 | \n", "(0.0) | \n", "(1.0, 0.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 0.0, 1.0, 0.0) | \n", "[3.0, 1.0, 0.0, 2.0, 0.0, 0.0, 1.0] | \n", "[17.9363522365, 12.0636477635] | \n", "[0.597878407882, 0.402121592118] | \n", "0.0 | \n", "
| 2 | \n", "894 | \n", "2 | \n", "Myles, Mr. Thomas Francis | \n", "male | \n", "62.0 | \n", "0 | \n", "0 | \n", "240276 | \n", "9.6875 | \n", "None | \n", "... | \n", "3.0 | \n", "1.0 | \n", "(1.0) | \n", "(0.0, 0.0) | \n", "(0.0, 1.0, 0.0) | \n", "(0.0, 0.0, 0.0, 1.0) | \n", "[2.0, 0.0, 0.0, 3.0, 1.0, 2.0, 0.0] | \n", "[26.2911066277, 3.70889337228] | \n", "[0.876370220924, 0.123629779076] | \n", "0.0 | \n", "
| 3 | \n", "895 | \n", "3 | \n", "Wirz, Mr. Albert | \n", "male | \n", "27.0 | \n", "0 | \n", "0 | \n", "315154 | \n", "8.6625 | \n", "None | \n", "... | \n", "1.0 | \n", "1.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(0.0, 1.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "(3.0, 0.0, 0.0, 1.0, 1.0, 0.0, 0.0) | \n", "[25.9086506078, 4.09134939223] | \n", "[0.863621686926, 0.136378313074] | \n", "0.0 | \n", "
| 4 | \n", "896 | \n", "3 | \n", "Hirvonen, Mrs. Alexander (Helga E Lindqvist) | \n", "female | \n", "22.0 | \n", "1 | \n", "1 | \n", "3101298 | \n", "12.2875 | \n", "None | \n", "... | \n", "1.0 | \n", "1.0 | \n", "(0.0) | \n", "(1.0, 0.0) | \n", "(0.0, 1.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[3.0, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0] | \n", "[21.2731321988, 8.72686780122] | \n", "[0.709104406626, 0.290895593374] | \n", "0.0 | \n", "
| 5 | \n", "897 | \n", "3 | \n", "Svensson, Mr. Johan Cervin | \n", "male | \n", "14.0 | \n", "0 | \n", "0 | \n", "7538 | \n", "9.2250 | \n", "None | \n", "... | \n", "0.0 | \n", "1.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(0.0, 1.0, 0.0) | \n", "(1.0, 0.0, 0.0, 0.0) | \n", "(3.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0) | \n", "[21.79357844, 8.20642155996] | \n", "[0.726452614668, 0.273547385332] | \n", "0.0 | \n", "
| 6 | \n", "898 | \n", "3 | \n", "Connolly, Miss. Kate | \n", "female | \n", "30.0 | \n", "0 | \n", "0 | \n", "330972 | \n", "7.6292 | \n", "None | \n", "... | \n", "1.0 | \n", "0.0 | \n", "(0.0) | \n", "(0.0, 0.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[3.0, 0.0, 0.0, 1.0, 0.0, 2.0, 1.0] | \n", "[6.58546403436, 23.4145359656] | \n", "[0.219515467812, 0.780484532188] | \n", "1.0 | \n", "
| 7 | \n", "899 | \n", "2 | \n", "Caldwell, Mr. Albert Francis | \n", "male | \n", "26.0 | \n", "1 | \n", "1 | \n", "248738 | \n", "29.0000 | \n", "None | \n", "... | \n", "1.0 | \n", "2.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(0.0, 0.0, 1.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[2.0, 1.0, 1.0, 1.0, 2.0, 0.0, 0.0] | \n", "[26.9219114219, 3.07808857809] | \n", "[0.897397047397, 0.102602952603] | \n", "0.0 | \n", "
| 8 | \n", "900 | \n", "3 | \n", "Abrahim, Mrs. Joseph (Sophie Halaut Easu) | \n", "female | \n", "18.0 | \n", "0 | \n", "0 | \n", "2657 | \n", "7.2292 | \n", "None | \n", "... | \n", "1.0 | \n", "0.0 | \n", "(0.0) | \n", "(0.0, 1.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 1.0] | \n", "[5.95376028838, 24.0462397116] | \n", "[0.198458676279, 0.801541323721] | \n", "1.0 | \n", "
| 9 | \n", "901 | \n", "3 | \n", "Davies, Mr. John Samuel | \n", "male | \n", "21.0 | \n", "2 | \n", "0 | \n", "A/4 48871 | \n", "24.1500 | \n", "None | \n", "... | \n", "1.0 | \n", "2.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(0.0, 0.0, 1.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[3.0, 2.0, 0.0, 1.0, 2.0, 0.0, 0.0] | \n", "[27.683257006, 2.31674299405] | \n", "[0.922775233532, 0.0772247664683] | \n", "0.0 | \n", "
| 10 | \n", "902 | \n", "3 | \n", "Ilieff, Mr. Ylio | \n", "male | \n", "NaN | \n", "0 | \n", "0 | \n", "349220 | \n", "7.8958 | \n", "None | \n", "... | \n", "1.0 | \n", "0.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "(3.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0) | \n", "[27.1796680333, 2.8203319667] | \n", "[0.905988934443, 0.0940110655565] | \n", "0.0 | \n", "
| 11 | \n", "903 | \n", "1 | \n", "Jones, Mr. Charles Cresson | \n", "male | \n", "46.0 | \n", "0 | \n", "0 | \n", "694 | \n", "26.0000 | \n", "None | \n", "... | \n", "2.0 | \n", "2.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(0.0, 0.0, 1.0) | \n", "(0.0, 0.0, 1.0, 0.0) | \n", "(1.0, 0.0, 0.0, 2.0, 2.0, 0.0, 0.0) | \n", "[14.2653782322, 15.7346217678] | \n", "[0.475512607739, 0.524487392261] | \n", "1.0 | \n", "
| 12 | \n", "904 | \n", "1 | \n", "Snyder, Mrs. John Pillsbury (Nelle Stevenson) | \n", "female | \n", "23.0 | \n", "1 | \n", "0 | \n", "21228 | \n", "82.2667 | \n", "B45 | \n", "... | \n", "1.0 | \n", "3.0 | \n", "(0.0) | \n", "(1.0, 0.0) | \n", "(0.0, 0.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[1.0, 1.0, 0.0, 1.0, 3.0, 0.0, 1.0] | \n", "[0.295454545455, 29.7045454545] | \n", "[0.00984848484848, 0.990151515152] | \n", "1.0 | \n", "
| 13 | \n", "905 | \n", "2 | \n", "Howard, Mr. Benjamin | \n", "male | \n", "63.0 | \n", "1 | \n", "0 | \n", "24065 | \n", "26.0000 | \n", "None | \n", "... | \n", "3.0 | \n", "2.0 | \n", "(1.0) | \n", "(1.0, 0.0) | \n", "(0.0, 0.0, 1.0) | \n", "(0.0, 0.0, 0.0, 1.0) | \n", "[2.0, 1.0, 0.0, 3.0, 2.0, 0.0, 0.0] | \n", "[27.5151896549, 2.4848103451] | \n", "[0.917172988497, 0.0828270115032] | \n", "0.0 | \n", "
| 14 | \n", "906 | \n", "1 | \n", "Chaffee, Mrs. Herbert Fuller (Carrie Constance... | \n", "female | \n", "47.0 | \n", "1 | \n", "0 | \n", "W.E.P. 5734 | \n", "61.1750 | \n", "E31 | \n", "... | \n", "2.0 | \n", "3.0 | \n", "(0.0) | \n", "(1.0, 0.0) | \n", "(0.0, 0.0, 0.0) | \n", "(0.0, 0.0, 1.0, 0.0) | \n", "[1.0, 1.0, 0.0, 2.0, 3.0, 0.0, 1.0] | \n", "[0.0, 30.0] | \n", "[0.0, 1.0] | \n", "1.0 | \n", "
| 15 | \n", "907 | \n", "2 | \n", "del Carlo, Mrs. Sebastiano (Argenia Genovesi) | \n", "female | \n", "24.0 | \n", "1 | \n", "0 | \n", "SC/PARIS 2167 | \n", "27.7208 | \n", "None | \n", "... | \n", "1.0 | \n", "2.0 | \n", "(0.0) | \n", "(0.0, 1.0) | \n", "(0.0, 0.0, 1.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[2.0, 1.0, 0.0, 1.0, 2.0, 1.0, 1.0] | \n", "[2.02801517479, 27.9719848252] | \n", "[0.0676005058263, 0.932399494174] | \n", "1.0 | \n", "
| 16 | \n", "908 | \n", "2 | \n", "Keane, Mr. Daniel | \n", "male | \n", "35.0 | \n", "0 | \n", "0 | \n", "233734 | \n", "12.3500 | \n", "None | \n", "... | \n", "2.0 | \n", "1.0 | \n", "(1.0) | \n", "(0.0, 0.0) | \n", "(0.0, 1.0, 0.0) | \n", "(0.0, 0.0, 1.0, 0.0) | \n", "[2.0, 0.0, 0.0, 2.0, 1.0, 2.0, 0.0] | \n", "[26.5861882672, 3.41381173276] | \n", "[0.886206275575, 0.113793724425] | \n", "0.0 | \n", "
| 17 | \n", "909 | \n", "3 | \n", "Assaf, Mr. Gerios | \n", "male | \n", "21.0 | \n", "0 | \n", "0 | \n", "2692 | \n", "7.2250 | \n", "None | \n", "... | \n", "1.0 | \n", "0.0 | \n", "(1.0) | \n", "(0.0, 1.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "(3.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0) | \n", "[25.8019873912, 4.19801260875] | \n", "[0.860066246375, 0.139933753625] | \n", "0.0 | \n", "
| 18 | \n", "910 | \n", "3 | \n", "Ilmakangas, Miss. Ida Livija | \n", "female | \n", "27.0 | \n", "1 | \n", "0 | \n", "STON/O2. 3101270 | \n", "7.9250 | \n", "None | \n", "... | \n", "1.0 | \n", "1.0 | \n", "(0.0) | \n", "(1.0, 0.0) | \n", "(0.0, 1.0, 0.0) | \n", "(0.0, 1.0, 0.0, 0.0) | \n", "[3.0, 1.0, 0.0, 1.0, 1.0, 0.0, 1.0] | \n", "[20.8412096143, 9.15879038571] | \n", "[0.694706987143, 0.305293012857] | \n", "0.0 | \n", "
| 19 | \n", "911 | \n", "3 | \n", "\"Assaf Khalil, Mrs. Mariana (Miriam\"\")\"\"\" | \n", "female | \n", "45.0 | \n", "0 | \n", "0 | \n", "2696 | \n", "7.2250 | \n", "None | \n", "... | \n", "2.0 | \n", "0.0 | \n", "(0.0) | \n", "(0.0, 1.0) | \n", "(1.0, 0.0, 0.0) | \n", "(0.0, 0.0, 1.0, 0.0) | \n", "[3.0, 0.0, 0.0, 2.0, 0.0, 1.0, 1.0] | \n", "[17.3386478658, 12.6613521342] | \n", "[0.577954928861, 0.422045071139] | \n", "0.0 | \n", "
20 rows × 24 columns
\n", "