{"cells":[{"cell_type":"code","source":"from google.colab import drive\nimport os\ndrive.mount('/content/gdrive')\n# Establecer ruta de acceso en drive\nimport os\nprint(os.getcwd())\nos.chdir(\"/content/gdrive/My Drive\")","metadata":{"id":"j2626UpFhsvN","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"9843b2ca1bcd4ff3970c38b0de626e7b","outputId":"3bd1ed8a-1d5d-4696-ac38-f25cb4e95bd9","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3774,"user_tz":180,"timestamp":1650297871775},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount(\"/content/gdrive\", force_remount=True).\n/content/gdrive/My Drive\n"}],"execution_count":97},{"cell_type":"code","source":"import pandas as pd\nimport matplotlib.pyplot as plt\nimport seaborn as sns\nimport numpy as np\ntrain=pd.read_csv('train_titanic.csv')\ntest=pd.read_csv('test_titanic.csv')\ntest","metadata":{"id":"oj-XQ1jxiGt4","colab":{"height":423,"base_uri":"https://localhost:8080/"},"cell_id":"f4f3d2aab6eb42fa83e583803627deba","outputId":"0c469fb3-ab29-4c5a-ad16-74e382fc304e","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":26,"user_tz":180,"timestamp":1650297871777},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Pclass Name \\\n0 892 3 Kelly, Mr. James \n1 893 3 Wilkes, Mrs. James (Ellen Needs) \n2 894 2 Myles, Mr. Thomas Francis \n3 895 3 Wirz, Mr. Albert \n4 896 3 Hirvonen, Mrs. Alexander (Helga E Lindqvist) \n.. ... ... ... \n413 1305 3 Spector, Mr. Woolf \n414 1306 1 Oliva y Ocana, Dona. Fermina \n415 1307 3 Saether, Mr. Simon Sivertsen \n416 1308 3 Ware, Mr. Frederick \n417 1309 3 Peter, Master. Michael J \n\n Sex Age SibSp Parch Ticket Fare Cabin Embarked \n0 male 34.5 0 0 330911 7.8292 NaN Q \n1 female 47.0 1 0 363272 7.0000 NaN S \n2 male 62.0 0 0 240276 9.6875 NaN Q \n3 male 27.0 0 0 315154 8.6625 NaN S \n4 female 22.0 1 1 3101298 12.2875 NaN S \n.. ... ... ... ... ... ... ... ... \n413 male NaN 0 0 A.5. 3236 8.0500 NaN S \n414 female 39.0 0 0 PC 17758 108.9000 C105 C \n415 male 38.5 0 0 SOTON/O.Q. 3101262 7.2500 NaN S \n416 male NaN 0 0 359309 8.0500 NaN S \n417 male NaN 1 1 2668 22.3583 NaN C \n\n[418 rows x 11 columns]","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
08923Kelly, Mr. Jamesmale34.5003309117.8292NaNQ
18933Wilkes, Mrs. James (Ellen Needs)female47.0103632727.0000NaNS
28942Myles, Mr. Thomas Francismale62.0002402769.6875NaNQ
38953Wirz, Mr. Albertmale27.0003151548.6625NaNS
48963Hirvonen, Mrs. Alexander (Helga E Lindqvist)female22.011310129812.2875NaNS
....................................
41313053Spector, Mr. WoolfmaleNaN00A.5. 32368.0500NaNS
41413061Oliva y Ocana, Dona. Ferminafemale39.000PC 17758108.9000C105C
41513073Saether, Mr. Simon Sivertsenmale38.500SOTON/O.Q. 31012627.2500NaNS
41613083Ware, Mr. FrederickmaleNaN003593098.0500NaNS
41713093Peter, Master. Michael JmaleNaN11266822.3583NaNC
\n

418 rows × 11 columns

\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":98}],"execution_count":98},{"cell_type":"markdown","source":"Variable | Descripcion\n-------------------|------------------\nSurvived|\tSurvived (1) or died (0)\nPclass\t|Passenger’s class\nName\t|Passenger’s name\nSex\t|Passenger’s sex\nAge\t|Passenger’s age\nSibSp\t|Number of siblings/spouses aboard\nParch\t|Number of parents/children aboard\nTicket\t|Ticket number\nFare\t|Fare\nCabin\t|Cabin\nEmbarked\t|Port of embarkation","metadata":{"id":"7IKUg4AiqUEe","cell_id":"305735206504454a9aa18c17c3a0aaf0","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"# Feature Engineering","metadata":{"id":"GjY-wdXUVY-E","cell_id":"9ba45f99eba7436e8b9fc6e9febfa4a3","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full= pd.concat([train,test], axis=0)\nfull.head()","metadata":{"id":"eHmQy8Dvq8rn","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"20d84e0693944b648bbb7e13f4a30043","outputId":"df76faa1-553c-40af-8030-5abd5c7b7dc7","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":419,"user_tz":180,"timestamp":1650297876806},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n\n Parch Ticket Fare Cabin Embarked \n0 0 A/5 21171 7.2500 NaN S \n1 0 PC 17599 71.2833 C85 C \n2 0 STON/O2. 3101282 7.9250 NaN S \n3 0 113803 53.1000 C123 S \n4 0 373450 8.0500 NaN S ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarked
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNS
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85C
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNS
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123S
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNS
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":99}],"execution_count":99},{"cell_type":"code","source":"# Exploraremos la columna passenger name y nos interesa la abreviatura de cada uno\nimport re\nfull['Title']=full['Name'].apply(lambda x : re.sub(\"(.*, )|(\\\\..*)\", \"\", x)) \nfull.head()","metadata":{"id":"-wtvYTKIsJBh","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"871ccd9dd15345958101d1f325c84fbc","outputId":"77ba6fb2-be1d-4067-b35d-76587c6360e2","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":18,"user_tz":180,"timestamp":1650297901127},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n\n Parch Ticket Fare Cabin Embarked Title \n0 0 A/5 21171 7.2500 NaN S Mr \n1 0 PC 17599 71.2833 C85 C Mrs \n2 0 STON/O2. 3101282 7.9250 NaN S Miss \n3 0 113803 53.1000 C123 S Mrs \n4 0 373450 8.0500 NaN S Mr ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitle
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSMr
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CMrs
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSMiss
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SMrs
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNSMr
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":100}],"execution_count":100},{"cell_type":"code","source":"pd.crosstab(full.Sex, full.Title)","metadata":{"id":"UZ1Uu2C1sBc3","colab":{"height":143,"base_uri":"https://localhost:8080/"},"cell_id":"9fd856110eb74fef85874f8c8295cac4","outputId":"b5d2c5a2-7aaf-4c04-88f3-4c7920ae4b8d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":849,"user_tz":180,"timestamp":1650297922960},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Title Capt Col Don Dona Dr Jonkheer Lady Major Master Miss Mlle \\\nSex \nfemale 0 0 0 1 1 0 1 0 0 260 2 \nmale 1 4 1 0 7 1 0 2 61 0 0 \n\nTitle Mme Mr Mrs Ms Rev Sir the Countess \nSex \nfemale 1 0 197 2 0 0 1 \nmale 0 757 0 0 8 1 0 ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
TitleCaptColDonDonaDrJonkheerLadyMajorMasterMissMlleMmeMrMrsMsRevSirthe Countess
Sex
female0001101002602101972001
male141071026100075700810
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":101}],"execution_count":101},{"cell_type":"code","source":"rare_title= ['Dona', 'Lady', 'the Countess','Capt', 'Col', 'Don', \n 'Dr', 'Major', 'Rev', 'Sir', 'Jonkheer']\nfull.loc[full.Title == 'Mlle','Title'] ='Miss'\nfull.loc[full.Title == 'Ms','Title'] ='Miss'\nfull.loc[full.Title == 'Mme','Title'] ='Mrs'\nfull.loc[full.Title.isin(rare_title),'Title'] ='Rare Title'\npd.crosstab(full.Sex, full.Title)","metadata":{"id":"lH1CKzPnvqpH","colab":{"height":143,"base_uri":"https://localhost:8080/"},"cell_id":"a7b6d001ae764eb4b4602f3b0055c231","outputId":"8f2e64ee-7883-4337-87c9-2ee9a2ed1d4e","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":6,"user_tz":180,"timestamp":1650297927980},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Title Master Miss Mr Mrs Rare Title\nSex \nfemale 0 264 0 198 4\nmale 61 0 757 0 25","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
TitleMasterMissMrMrsRare Title
Sex
female026401984
male610757025
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":102}],"execution_count":102},{"cell_type":"code","source":"# Extrayendo el apellido\nfull['Surname']=full['Name'].apply(lambda x : x[:x.index(',')])\nfull.head()","metadata":{"id":"3XIOqEcVcn5J","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"daa05e9a6a7b48919d21b7a82284f881","outputId":"5bc83ca7-7434-49ad-b452-a313940b6707","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":369,"user_tz":180,"timestamp":1650297934276},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n\n Parch Ticket Fare Cabin Embarked Title Surname \n0 0 A/5 21171 7.2500 NaN S Mr Braund \n1 0 PC 17599 71.2833 C85 C Mrs Cumings \n2 0 STON/O2. 3101282 7.9250 NaN S Miss Heikkinen \n3 0 113803 53.1000 C123 S Mrs Futrelle \n4 0 373450 8.0500 NaN S Mr Allen ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleSurname
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSMrBraund
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CMrsCumings
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSMissHeikkinen
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SMrsFutrelle
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNSMrAllen
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":103}],"execution_count":103},{"cell_type":"markdown","source":"## Las familias se hundieron o nadaron juntos\n\nPrimero vamos a hacer una variable del tamaño de la familia basada en el número de hermanos/cónyuge(s) (¿quizás alguien tiene más de un cónyuge?) y el número de hijos/padres.","metadata":{"id":"wh1WDhe0wL4n","cell_id":"ef3bad66940d4d658a0cb4873aeb6c58","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full['Fsize']=full['SibSp']+full['Parch']+1\nfull['Fsize']=full['Fsize'].astype('str')\nfull['Family']=full[['Surname', 'Fsize']].agg('_'.join, axis=1)\nfull.head()","metadata":{"id":"zlgi5Fm5wWgT","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"75d8dcd999d44275b9a22f0390747060","outputId":"e302d860-566e-40bb-8f13-4de910c8274d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":552,"user_tz":180,"timestamp":1650297939484},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n\n Parch Ticket Fare Cabin Embarked Title Surname Fsize \\\n0 0 A/5 21171 7.2500 NaN S Mr Braund 2 \n1 0 PC 17599 71.2833 C85 C Mrs Cumings 2 \n2 0 STON/O2. 3101282 7.9250 NaN S Miss Heikkinen 1 \n3 0 113803 53.1000 C123 S Mrs Futrelle 2 \n4 0 373450 8.0500 NaN S Mr Allen 1 \n\n Family \n0 Braund_2 \n1 Cumings_2 \n2 Heikkinen_1 \n3 Futrelle_2 \n4 Allen_1 ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleSurnameFsizeFamily
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSMrBraund2Braund_2
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CMrsCumings2Cumings_2
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSMissHeikkinen1Heikkinen_1
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SMrsFutrelle2Futrelle_2
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNSMrAllen1Allen_1
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":104}],"execution_count":104},{"cell_type":"code","source":"full.Fsize.value_counts().values","metadata":{"id":"SOtNW-48eHjk","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"308a7a3cf510456f81c64a244a0a20c6","outputId":"3194ddec-c84b-44a1-c44f-b68562cb56d3","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":462,"user_tz":180,"timestamp":1650297945143},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([790, 235, 159, 43, 25, 22, 16, 11, 8])"},"metadata":{},"execution_count":105}],"execution_count":105},{"cell_type":"code","source":"full.Survived.unique()","metadata":{"id":"ZhObTeQaeaCD","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"c58d706e61df4fc3a1b70592ace312cc","outputId":"197e7eab-9cfa-4b38-ef46-5bce6ed074aa","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":180,"timestamp":1650297950302},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([ 0., 1., nan])"},"metadata":{},"execution_count":106}],"execution_count":106},{"cell_type":"code","source":"tr=pd.DataFrame(full.Fsize.value_counts())\ntr=tr.reset_index()\ntr.columns=['F','Fsize']\ntr","metadata":{"id":"AvzAL7MNegzu","colab":{"height":331,"base_uri":"https://localhost:8080/"},"cell_id":"208fa628519d4cbea77ba4598961f227","outputId":"196eb520-307b-487c-ed90-3c56a68f7a61","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":6,"user_tz":180,"timestamp":1650297955980},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" F Fsize\n0 1 790\n1 2 235\n2 3 159\n3 4 43\n4 6 25\n5 5 22\n6 7 16\n7 11 11\n8 8 8","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FFsize
01790
12235
23159
3443
4625
5522
6716
71111
888
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":107}],"execution_count":107},{"cell_type":"code","source":"tr.dtypes","metadata":{"id":"wuAN8Wxag7Hr","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"090c01db351e42e7a7db6e35a5b9dca5","outputId":"4bde5dad-c205-4dd3-f157-79f0b0e13308","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":180,"timestamp":1650297957818},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"F object\nFsize int64\ndtype: object"},"metadata":{},"execution_count":108}],"execution_count":108},{"cell_type":"code","source":"tr=full[['Fsize','Survived','Sex']].groupby(['Fsize','Survived']).count()\ntr=tr.reset_index()\ntr.columns=['Fsize','Survived','F']\ntr['Fsize']=tr['Fsize'].astype('int')\ntr.sort_values(by='Fsize')","metadata":{"id":"6oJRNT0pfqYI","colab":{"height":551,"base_uri":"https://localhost:8080/"},"cell_id":"f7c482472de943b58d06d54a2ae2d1f4","outputId":"01fc985c-6ac3-4ca5-9b9d-b6424c16615c","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":2355,"user_tz":180,"timestamp":1650298273179},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" Fsize Survived F\n0 1 0.0 374\n1 1 1.0 163\n3 2 0.0 72\n4 2 1.0 89\n5 3 0.0 43\n6 3 1.0 59\n7 4 0.0 8\n8 4 1.0 21\n9 5 0.0 12\n10 5 1.0 3\n11 6 0.0 19\n12 6 1.0 3\n13 7 0.0 8\n14 7 1.0 4\n15 8 0.0 6\n2 11 0.0 7","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
FsizeSurvivedF
010.0374
111.0163
320.072
421.089
530.043
631.059
740.08
841.021
950.012
1051.03
1160.019
1261.03
1370.08
1471.04
1580.06
2110.07
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":109}],"execution_count":109},{"cell_type":"code","source":"# algunos graficos descriptivos\nplt.figure(figsize=(10,7))\nsns.barplot(y=\"F\", x=\"Fsize\", data=tr,hue='Survived', orient='v')\nplt.xlabel('Family size')\nplt.ylabel('Conteo')\nplt.legend(loc='upper right')","metadata":{"id":"VGjLRmmedNRD","colab":{"height":458,"base_uri":"https://localhost:8080/"},"cell_id":"608e76f3924747f3af10c39fa63108c3","outputId":"5065d9df-8901-43f1-a16c-46de8781cf8e","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":28,"user_tz":180,"timestamp":1650298287958},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":""},"metadata":{},"execution_count":110},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":110},{"cell_type":"markdown","source":"Podemos ver que hay una penalización de supervivencia para los hijos únicos y aquellos con familias de más de 4. Podemos colapsar esta variable en tres niveles que serán útiles ya que hay comparativamente menos familias numerosas. Vamos a crear una variable de tamaño de familia discretizada.","metadata":{"id":"rFUqpskJhjwn","cell_id":"abe62881e7de43829aa901f066ad39ca","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full['Fsize'].unique()","metadata":{"id":"2qcThq0UiWYA","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"fb78353a53ff4ab0a16d453449887767","outputId":"72422d06-b7c0-4d0e-849f-c1b1c55f4a94","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":311,"user_tz":180,"timestamp":1650298296050},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array(['2', '1', '5', '3', '7', '6', '4', '8', '11'], dtype=object)"},"metadata":{},"execution_count":111}],"execution_count":111},{"cell_type":"code","source":"# Discretizar la variable family size\nfull['Fsize']=full['Fsize'].astype('int')\nfull['FsizeD']=np.where(full.Fsize ==1, 'singleton',# aqui viene el else\n np.where(((full.Fsize <5) & (full.Fsize >1)),'small','large'))\nfull.head()","metadata":{"id":"iTH-5cqMhm6P","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"ab80d7259da34a6d910562233f79dbe5","outputId":"0362a46d-34cb-4f3b-b4ca-728ee46f8855","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":602,"user_tz":180,"timestamp":1650298297982},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n\n Parch Ticket Fare Cabin Embarked Title Surname Fsize \\\n0 0 A/5 21171 7.2500 NaN S Mr Braund 2 \n1 0 PC 17599 71.2833 C85 C Mrs Cumings 2 \n2 0 STON/O2. 3101282 7.9250 NaN S Miss Heikkinen 1 \n3 0 113803 53.1000 C123 S Mrs Futrelle 2 \n4 0 373450 8.0500 NaN S Mr Allen 1 \n\n Family FsizeD \n0 Braund_2 small \n1 Cumings_2 small \n2 Heikkinen_1 singleton \n3 Futrelle_2 small \n4 Allen_1 singleton ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleSurnameFsizeFamilyFsizeD
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSMrBraund2Braund_2small
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CMrsCumings2Cumings_2small
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSMissHeikkinen1Heikkinen_1singleton
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SMrsFutrelle2Futrelle_2small
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNSMrAllen1Allen_1singleton
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":112}],"execution_count":112},{"cell_type":"code","source":"# Crear el mosaic plot\nfrom statsmodels.graphics.mosaicplot import mosaic\nts=pd.crosstab(full.FsizeD, full.Survived)\nts.columns=['No','Si']\nts=ts.reset_index()\nts1=pd.melt(ts,id_vars=['FsizeD'])\nG = ts1.groupby([\"FsizeD\", \"variable\"]).sum()\nmosaic(G[\"value\"])","metadata":{"id":"bO4sfgd9iqZk","colab":{"height":755,"base_uri":"https://localhost:8080/"},"cell_id":"b60b7ea81223444ab5a24813ca78e4b6","outputId":"8490cdb2-12c8-4a17-b715-a16751068082","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":438,"user_tz":180,"timestamp":1650298301912},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"(
,\n OrderedDict([(('large', 'No'),\n (0.0, 0.0, 0.06889577846673556, 0.8359232665309184)),\n (('large', 'Si'),\n (0.0,\n 0.839245525667131,\n 0.06889577846673556,\n 0.16075447433286888)),\n (('singleton', 'No'),\n (0.07384627351624051,\n 0.0,\n 0.5967263393005967,\n 0.6941479982924702)),\n (('singleton', 'Si'),\n (0.07384627351624051,\n 0.6974702574286827,\n 0.5967263393005967,\n 0.30252974257131715)),\n (('small', 'No'),\n (0.6755231078663423,\n 0.0,\n 0.32447689213365777,\n 0.4198334319391981)),\n (('small', 'Si'),\n (0.6755231078663423,\n 0.42315569107541073,\n 0.32447689213365777,\n 0.5768443089245892))]))"},"metadata":{},"execution_count":113},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":113},{"cell_type":"markdown","source":"La trama de mosaico muestra que preservamos nuestra regla de que hay una penalización de supervivencia entre los solteros y las familias numerosas, pero un beneficio para los pasajeros de familias pequeñas. Podemos hacer algo más con nuestra variable de edad, pero faltan valores de edad en 263 filas","metadata":{"id":"nLIx9tTqaDXe","cell_id":"5ac5a7fbc3bc45bcb732278f5191148d","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"## Tratar algunas otras variables","metadata":{"id":"sNr2WekbaP6W","cell_id":"7a3959cb8daf4ad5aa69aab2a26b6ecb","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"# Esta variable tiene muchos nulos\nfull.Cabin[1:28]","metadata":{"id":"aip43ji8aRxk","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"f7bc41ccabc44612b9ddcda64ae13b58","outputId":"819268cc-f6df-4e82-b6bd-c1b388d85cf6","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":411,"user_tz":180,"timestamp":1650298305484},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"1 C85\n2 NaN\n3 C123\n4 NaN\n5 NaN\n6 E46\n7 NaN\n8 NaN\n9 NaN\n10 G6\n11 C103\n12 NaN\n13 NaN\n14 NaN\n15 NaN\n16 NaN\n17 NaN\n18 NaN\n19 NaN\n20 NaN\n21 D56\n22 NaN\n23 A6\n24 NaN\n25 NaN\n26 NaN\n27 C23 C25 C27\nName: Cabin, dtype: object"},"metadata":{},"execution_count":114}],"execution_count":114},{"cell_type":"markdown","source":"Extraigamos las primeras letras de la columna Cabin que representa el Deck","metadata":{"id":"hKFWT7O0b9QR","cell_id":"0835b8a0cfe945ebabb661ab33ba3ebf","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full['Deck']=full['Cabin'].astype(str).str[0]\nfull['Deck']=full.Deck.str.upper()\nfull.head()","metadata":{"id":"WIjnuzdZcDQS","colab":{"height":206,"base_uri":"https://localhost:8080/"},"cell_id":"8e1994763fbb4b4fa697f0bb473fbac6","outputId":"27826faf-69f5-4a4f-ac2a-4c761b589829","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":403,"user_tz":180,"timestamp":1650298307739},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n\n Parch Ticket Fare Cabin Embarked Title Surname Fsize \\\n0 0 A/5 21171 7.2500 NaN S Mr Braund 2 \n1 0 PC 17599 71.2833 C85 C Mrs Cumings 2 \n2 0 STON/O2. 3101282 7.9250 NaN S Miss Heikkinen 1 \n3 0 113803 53.1000 C123 S Mrs Futrelle 2 \n4 0 373450 8.0500 NaN S Mr Allen 1 \n\n Family FsizeD Deck \n0 Braund_2 small N \n1 Cumings_2 small C \n2 Heikkinen_1 singleton N \n3 Futrelle_2 small C \n4 Allen_1 singleton N ","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleSurnameFsizeFamilyFsizeDDeck
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSMrBraund2Braund_2smallN
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CMrsCumings2Cumings_2smallC
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSMissHeikkinen1Heikkinen_1singletonN
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SMrsFutrelle2Futrelle_2smallC
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNSMrAllen1Allen_1singletonN
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":115}],"execution_count":115},{"cell_type":"code","source":"full.Deck.value_counts()","metadata":{"id":"TQLCschkc_qQ","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"b12dd5df079c46fba378a3ba65f77534","outputId":"abde8b19-04ba-4653-a5d2-8fa2f1269f6a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":255,"user_tz":180,"timestamp":1650298310866},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"N 1014\nC 94\nB 65\nD 46\nE 41\nA 22\nF 21\nG 5\nT 1\nName: Deck, dtype: int64"},"metadata":{},"execution_count":116}],"execution_count":116},{"cell_type":"markdown","source":"# Problema de nulos\n\nAhora estamos listos para comenzar a explorar los datos faltantes y rectificarlos a través de la imputación. Hay varias maneras diferentes en las que podríamos hacer esto. \n\nDado el pequeño tamaño del conjunto de datos, probablemente no deberíamos optar por eliminar observaciones completas (filas) o variables (columnas) que contengan valores faltantes. \n\nNos queda la opción de reemplazar los valores faltantes con valores sensibles dada la distribución de los datos, por ejemplo, la media, la mediana o la moda. Finalmente, podríamos ir con la predicción. Usaremos los dos últimos métodos y confiaré en alguna visualización de datos para guiar nuestras decisiones.","metadata":{"id":"2N6sMTnzdGuQ","cell_id":"8a89db6bea16485185f9b12300741797","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"## Imputacion sensible","metadata":{"id":"j9bCj2oadzp2","cell_id":"33b2f33649d84a6d81b29ddde0a4ec38","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full.dtypes","metadata":{"id":"8SlTBp4ve88B","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"90d451c5abb34ca1990097430f147345","outputId":"7753aa03-38a1-4d76-ba06-60fc32e07d5e","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":289,"user_tz":180,"timestamp":1650298313893},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId int64\nSurvived float64\nPclass int64\nName object\nSex object\nAge float64\nSibSp int64\nParch int64\nTicket object\nFare float64\nCabin object\nEmbarked object\nTitle object\nSurname object\nFsize int64\nFamily object\nFsizeD object\nDeck object\ndtype: object"},"metadata":{},"execution_count":117}],"execution_count":117},{"cell_type":"code","source":"full.Embarked.isnull().sum()","metadata":{"id":"3VuNAN5hfjJq","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"0ceb05c8aa654b84af5db40898d2c1a0","outputId":"5763c492-36f4-4d2d-b842-42f5041b0a96","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":399,"user_tz":180,"timestamp":1650298319500},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"2"},"metadata":{},"execution_count":118}],"execution_count":118},{"cell_type":"code","source":" # Pasajeros 62 y 830 no tienen datos\n full[(full.PassengerId == 62) | (full.PassengerId == 830)]['Embarked']","metadata":{"id":"_tZBx302fBN1","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"ff762ac88de4481d9565ac3160d7817c","outputId":"5719eb09-9164-456c-d30b-d5bf5393e7e5","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":180,"timestamp":1650298320894},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"61 NaN\n829 NaN\nName: Embarked, dtype: object"},"metadata":{},"execution_count":119}],"execution_count":119},{"cell_type":"code","source":"# elimiar los ids de pasajeros\nembark_fare = full[(full.PassengerId !=62)|(full.PassengerId !=830)]\nembark_fare","metadata":{"id":"e6IQaPa9fveg","colab":{"height":520,"base_uri":"https://localhost:8080/"},"cell_id":"b6d86a129ebd4aef8cc0592f247a8fa7","outputId":"fcee2067-1c7e-4b9b-ebc6-4a2431e8054f","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":7,"user_tz":180,"timestamp":1650298322434},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" PassengerId Survived Pclass \\\n0 1 0.0 3 \n1 2 1.0 1 \n2 3 1.0 3 \n3 4 1.0 1 \n4 5 0.0 3 \n.. ... ... ... \n413 1305 NaN 3 \n414 1306 NaN 1 \n415 1307 NaN 3 \n416 1308 NaN 3 \n417 1309 NaN 3 \n\n Name Sex Age SibSp \\\n0 Braund, Mr. Owen Harris male 22.0 1 \n1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n2 Heikkinen, Miss. Laina female 26.0 0 \n3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n4 Allen, Mr. William Henry male 35.0 0 \n.. ... ... ... ... \n413 Spector, Mr. Woolf male NaN 0 \n414 Oliva y Ocana, Dona. Fermina female 39.0 0 \n415 Saether, Mr. Simon Sivertsen male 38.5 0 \n416 Ware, Mr. Frederick male NaN 0 \n417 Peter, Master. Michael J male NaN 1 \n\n Parch Ticket Fare Cabin Embarked Title \\\n0 0 A/5 21171 7.2500 NaN S Mr \n1 0 PC 17599 71.2833 C85 C Mrs \n2 0 STON/O2. 3101282 7.9250 NaN S Miss \n3 0 113803 53.1000 C123 S Mrs \n4 0 373450 8.0500 NaN S Mr \n.. ... ... ... ... ... ... \n413 0 A.5. 3236 8.0500 NaN S Mr \n414 0 PC 17758 108.9000 C105 C Rare Title \n415 0 SOTON/O.Q. 3101262 7.2500 NaN S Mr \n416 0 359309 8.0500 NaN S Mr \n417 1 2668 22.3583 NaN C Master \n\n Surname Fsize Family FsizeD Deck \n0 Braund 2 Braund_2 small N \n1 Cumings 2 Cumings_2 small C \n2 Heikkinen 1 Heikkinen_1 singleton N \n3 Futrelle 2 Futrelle_2 small C \n4 Allen 1 Allen_1 singleton N \n.. ... ... ... ... ... \n413 Spector 1 Spector_1 singleton N \n414 Oliva y Ocana 1 Oliva y Ocana_1 singleton C \n415 Saether 1 Saether_1 singleton N \n416 Ware 1 Ware_1 singleton N \n417 Peter 3 Peter_3 small N \n\n[1309 rows x 18 columns]","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
PassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleSurnameFsizeFamilyFsizeDDeck
010.03Braund, Mr. Owen Harrismale22.010A/5 211717.2500NaNSMrBraund2Braund_2smallN
121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.010PC 1759971.2833C85CMrsCumings2Cumings_2smallC
231.03Heikkinen, Miss. Lainafemale26.000STON/O2. 31012827.9250NaNSMissHeikkinen1Heikkinen_1singletonN
341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.01011380353.1000C123SMrsFutrelle2Futrelle_2smallC
450.03Allen, Mr. William Henrymale35.0003734508.0500NaNSMrAllen1Allen_1singletonN
.........................................................
4131305NaN3Spector, Mr. WoolfmaleNaN00A.5. 32368.0500NaNSMrSpector1Spector_1singletonN
4141306NaN1Oliva y Ocana, Dona. Ferminafemale39.000PC 17758108.9000C105CRare TitleOliva y Ocana1Oliva y Ocana_1singletonC
4151307NaN3Saether, Mr. Simon Sivertsenmale38.500SOTON/O.Q. 31012627.2500NaNSMrSaether1Saether_1singletonN
4161308NaN3Ware, Mr. FrederickmaleNaN003593098.0500NaNSMrWare1Ware_1singletonN
4171309NaN3Peter, Master. Michael JmaleNaN11266822.3583NaNCMasterPeter3Peter_3smallN
\n

1309 rows × 18 columns

\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":120}],"execution_count":120},{"cell_type":"markdown","source":"Inferiremos sus valores para el embarque en base a los datos actuales que imaginamos pueden ser relevantes: clase de pasajero y tarifa. Vemos que pagaron $80 y $NA respectivamente y sus clases son 1 y NA. Entonces, ¿de dónde se embarcaron?","metadata":{"id":"qf2nOUy8gOZn","cell_id":"69ba022c46c14ee4a0b8475fe32058f2","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"plt.figure(figsize=(10,6))\nsns.boxplot(x='Embarked',y= 'Fare',hue='Pclass',data= embark_fare)\nplt.axhline(y = 80, color = 'r', linestyle = '--')","metadata":{"id":"lKSEhlmsgOBa","colab":{"height":405,"base_uri":"https://localhost:8080/"},"cell_id":"53ddc5680f2d41b780f5248cd5e7fe9c","outputId":"aae78336-255c-4c40-8839-1c046ce389d6","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":687,"user_tz":180,"timestamp":1650298328791},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":""},"metadata":{},"execution_count":121},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":121},{"cell_type":"markdown","source":"¡Voila! La tarifa mediana para un pasajero de primera clase que sale de Charbourg (\"C\") coincide muy bien con los $80 que pagan nuestros pasajeros con problemas de embarque. Creo que podemos reemplazar con seguridad los valores NA con 'C'.","metadata":{"id":"RZUAO1tyhOlA","cell_id":"f563514018c747638596d539c7a09721","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"# Dado que su tarifa era de $ 80 por primera clase, lo más probable es que se embarcaran desde 'C' \nfull.loc[full.PassengerId == 62, 'Embarked']= 'C'\nfull.loc[full.PassengerId == 380, 'Embarked']= 'C'\nfull.Embarked.value_counts() # Verificamos ","metadata":{"id":"-k1NJVPHhTeU","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"31f13b000f3048c99590dffebd7a9e4a","outputId":"b64d56b2-a7e1-4d60-9842-acb5a5b1316a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":180,"timestamp":1650298330723},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"S 913\nC 272\nQ 123\nName: Embarked, dtype: int64"},"metadata":{},"execution_count":122}],"execution_count":122},{"cell_type":"markdown","source":"Estamos cerca de arreglar el puñado de valores de NA aquí y allá. El pasajero de la fila 1043 tiene un valor de tarifa NA.","metadata":{"id":"1low24kUiAjg","cell_id":"d38458ec379748ebb38529837b161fd6","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full.iloc[1043,:]","metadata":{"id":"7LkA-raqiCyf","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"a80e67bb669643259bb147f8f546a910","outputId":"1d018e3d-283d-493f-f9bd-d82f6416f0c7","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":415,"user_tz":180,"timestamp":1650298333219},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId 1044\nSurvived NaN\nPclass 3\nName Storey, Mr. Thomas\nSex male\nAge 60.5\nSibSp 0\nParch 0\nTicket 3701\nFare NaN\nCabin NaN\nEmbarked S\nTitle Mr\nSurname Storey\nFsize 1\nFamily Storey_1\nFsizeD singleton\nDeck N\nName: 152, dtype: object"},"metadata":{},"execution_count":123}],"execution_count":123},{"cell_type":"markdown","source":"Este es un pasajero de tercera clase que partió de Southampton ('S'). Visualicemos las tarifas entre todos los demás compartiendo su clase y embarque (n = 494).","metadata":{"id":"-RjLPzuJiY4A","cell_id":"3b7e19f9e8c24225bf131c2eb8a06c1a","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full[(full.Pclass ==3)& (full.Embarked == 'S')]['Fare'].median()","metadata":{"id":"pVJsvks8kFLc","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"170061f5d64241b2ae36341ff8fa84dd","outputId":"2d28856f-2b42-4927-d800-327f09ede9eb","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":406,"user_tz":180,"timestamp":1650298337639},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"8.05"},"metadata":{},"execution_count":124}],"execution_count":124},{"cell_type":"code","source":"plt.figure(figsize=(10,6))\nsns.distplot(full[(full.Pclass ==3)& (full.Embarked == 'S')]['Fare'],kde=True,hist=False)\nplt.axvline(x = full[(full.Pclass ==3)& (full.Embarked == 'S')]['Fare'].median(), color = 'r', linestyle = '--')","metadata":{"id":"XEGuZxBliqxb","colab":{"height":460,"base_uri":"https://localhost:8080/"},"cell_id":"f1774cb780a441ba8b307d5ad5c4a239","outputId":"d3120b34-f0c8-4120-a55b-6b92e0d7bbf3","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":375,"user_tz":180,"timestamp":1650298339409},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stderr","text":"/usr/local/lib/python3.7/dist-packages/seaborn/distributions.py:2619: FutureWarning: `distplot` is a deprecated function and will be removed in a future version. Please adapt your code to use either `displot` (a figure-level function with similar flexibility) or `kdeplot` (an axes-level function for kernel density plots).\n warnings.warn(msg, FutureWarning)\n"},{"output_type":"execute_result","data":{"text/plain":""},"metadata":{},"execution_count":125},{"output_type":"display_data","data":{"text/plain":"
","image/png":"iVBORw0KGgoAAAANSUhEUgAAAmcAAAFzCAYAAAB7Ha4BAAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4yLjIsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+WH4yJAAAgAElEQVR4nOzdeZxcdZ3v/9enqvd976S7k3RnIyQBEgiLgMgVF2BkUUFcR5QZdBxmBh3vvYxX/flTfzo6zqhzBx256hXRGXBAMSIOIyKKspiFAAnZ1053J+l93+v7++NUh7bpJL3UqXOq+/18PL6Pqq46deqTaZm88z3n+/macw4RERERCYdI0AWIiIiIyCsUzkRERERCROFMREREJEQUzkRERERCROFMREREJEQUzkRERERCJC3oAhKlrKzM1dbWBl2GhEUs5j1G9O8PEREJny1btrQ458one2/OhLPa2lo2b94cdBkiIiIiZ2Rmh0/1nqYVZG76xje8ISIikmIUzmRu+tGPvCEiIpJiFM5EREREQkThTERERCREFM5EREREQkThTERERCRE5kwrDZE/8uSTQVcgIiIyI5o5ExEREQkRhTOZm77yFW+IiIikGIUzmZseecQbIiIiKUbhTERERCREFM5EREREQkThTOYc5xxNnQMcbe/np9saONjSG3RJIiIiU6ZWGjLn/NsfjlDdOQLA39y/jbSI8dM7LmNNVWHAlYmIiJyZZs5kTtnf3MPnHnmZ7/yvu7l413P84m9eS2F2Op98eDuxmAu6PBERkTPyNZyZ2dVmttvM9pnZXZO8n2lmD8Tff87MauOvv8fMto0bMTNb52etkvqGRmLcef82stOjfOXm88jOiHL2wgI+ce3ZPH+kgx9trg+6RBERkTPyLZyZWRS4G7gGWA28y8xWTzjsNqDdObcc+CrwJQDn3A+dc+ucc+uA9wEHnXPb/KpV5obv/v4gLzV08sW3nUvl1/8BPvc5AN52fjUX1ZXw9/+5i7beoYCrFBEROT0/Z84uAvY55w4454aA+4EbJhxzA3Bv/PmDwFVmZhOOeVf8syKn5JzjPzbXc1FdCVevXQC/+pU3ADPj8zeupWdghH95Yl/AlYqIiJyen+GsGhh/Helo/LVJj3HOjQCdQOmEY24B/t2nGmWO2HWsm/3NvVx3XtWk76+szOeNqyv52YuNjOreMxERCbFQLwgws4uBPufc9lO8f7uZbTazzc3NzUmuTsLkkRcbiRhcs3bBKY+55pyFNHcPsuVwexIrExERmR4/w1kDsGjczzXx1yY9xszSgEKgddz77+Q0s2bOuXuccxuccxvKy8sTUrSkHuccj7zYxGXLyyjLyzzlca9fVUFGWoRHX2pKYnUiIiLT42c42wSsMLM6M8vAC1obJxyzEXh//PlNwBPOOQdgZhHgHeh+MzmD7Q1dHG7t4y3nLnzlxdJSb4yTl5nGlSvL+cX2JrXVEBGR0PItnMXvIbsDeAzYCfzIObfDzD5rZtfHD/sOUGpm+4CPAePbbVwB1DvnDvhVo8wNP3uxkbSI8eY14y5pPvSQNya49pyFHO8a5Pl6XdoUEZFw8nWHAOfco8CjE1779LjnA8DNp/jsk8AlftYnqc85x89fbOK1K8ooysk44/FXnV1BRjTCz188xgVLSpJQoYiIyPSEekGAyJnsOd5DQ0c/16xd+Mdv/N3feWOC/Kx0rlhZpkubIiISWgpnktLGVl5eVDdhFuyZZ7wxiavXLqSpc4Cdx7r8Lk9ERGTaFM4kpW0+3EZZXgZLSnOm/JnXLPMWCmw+pPvOREQkfBTOJKVtPdzO+YuLefXGEqdWXZRNVWEWmw61+ViZiIjIzCicScpq7h7kUGsfFywpnvZnN9SWsOlQG/HOLSIiIqGhcCYpa+x+sw21k4SzmhpvnMKFtcUc7xrkaHu/X+WJiIjMiK+tNET8tPVIOxnRCGuqCl/95g9+cNrPbqj1FhBsOtTGopKp368mIiLiN82cScrafKiNc2oKyUqPTvuzKyvzyc9KY5MWBYiISMgonElKGhgeZXtD16nvN7vzTm+cQjRibFhSzGYtChARkZBROJOUtL2hk6HR2KnD2bZt3jiNDbUl7D3RQ3vvkA8VioiIzIzCmaSkscUA5y+e/krNMRfG7zsbO5eIiEgYKJxJStpW38HikhzK8zNnfI5zawrJiEbYdFiXNkVEJDwUziQl7TrWzeqFBbM6R1Z6lDXVBTx/pCNBVYmIiMyewpmknP6hUQ619nLWgvxTH7RypTfOYG1VITubutSMVkREQkPhTFLOnuPdOAerThfO7rnHG2ewuqqA7oERNaMVEZHQUDiTlLP7WDfA6WfOpmjs0uiOxs5Zn0tERCQRFM4k5ew61k1WeoQlpbmnPuj2271xBmctyCcaMV5u7EpghSIiIjOn7Zsk5ew+3sXKSi9UndKePVM6V1Z6lGXluexQOBMRkZDQzJmknN3HujmrcvaXNMesqSrk5SaFMxERCQeFM0kpzd2DtPQMJeR+szGrFxbQ1DlAm3YKEBGREFA4k5Qythhg1YLZ9Tgbb02Vdy7ddyYiImGgcCYpZdcxL0CtWniGmbN167wxBWdrxaaIiISIFgRIStl9rJuyvAzK8s6wbdPXvjblcxbnZlBVmKX7zkREJBQ0cyYpZdex7oTebzZmdVWhVmyKiEgoKJxJyhiNOfYc7+asyincb/be93pjilZXFXCguYf+odFZVCgiIjJ7CmeSMg639jI4Ejv9tk1jjh71xhStqSog5l65p01ERCQoCmeSMg409wKwvDIv4ece65u290RPws8tIiIyHQpnkjIOtnjhbGnZabZtmqFFJTlkpkXYe7w74ecWERGZDoUzSRkHWnopzkmnKCcj4eeORoxl5XmaORMRkcCplYakjIMtPdRNddbsNa+Z9vlXVOax+VD7tD8nIiKSSApnkjIOtvRy+fLyqR38xS9O+/wrKvL46bZGegdHyM3UfxoiIhIMXdaUlNA7OMLxrkGWlif+frMxyyu8RQH7m3VpU0REgqNwJilhbDHAlC9rvv3t3piGFfFVoHuOK5yJiEhwdO1GUsK0w1lr67S/Y0lJDhnRCHtPaMWmiIgERzNnkhLGwlltqX+XNdOiEZaW57JPM2ciIhIgX8OZmV1tZrvNbJ+Z3TXJ+5lm9kD8/efMrHbce+ea2TNmtsPMXjKzLD9rlXA72NJLVWEW2RlRX79neYXaaYiISLB8C2dmFgXuBq4BVgPvMrPVEw67DWh3zi0Hvgp8Kf7ZNOAHwIedc2uAK4Fhv2qV8DvQ0kudj4sBxqyoyKe+vU97bIqISGD8nDm7CNjnnDvgnBsC7gdumHDMDcC98ecPAleZmQFvAl50zr0A4Jxrdc7pb8t5yjnHweae6V3SvOoqb0zTiso8nNOKTRERCY6fCwKqgfpxPx8FLj7VMc65ETPrBEqBlYAzs8eAcuB+59yXJ36Bmd0O3A6wePHihP8BJBza+4bpGhiZ+mIAgE99akbftTK+YnPviW7WVhfO6BwiIiKzEdYFAWnA5cB74o9vNbNXTYM45+5xzm1wzm0oL59ic1JJOQdbvFksP3ucjVlSmktaxNirRQEiIhIQP8NZA7Bo3M818dcmPSZ+n1kh0Io3y/Zb51yLc64PeBQ438daJcQONI+10cib+oeuucYb05QejVBXlqtFASIiEhg/w9kmYIWZ1ZlZBvBOYOOEYzYC748/vwl4wjnngMeAc8wsJx7aXge87GOtEmIHW3pJixg1xdlT/1B/vzdmYHlFHvsVzkREJCC+hTPn3AhwB17Q2gn8yDm3w8w+a2bXxw/7DlBqZvuAjwF3xT/bDvwTXsDbBmx1zv3cr1ol3A629LK4JIf0aHKuwi8tz+VIWx/Do7GkfJ+IiMh4vu4Q4Jx7FO+S5PjXPj3u+QBw8yk++wO8dhoyzx1q7WNJaU7Svm9pWR4jMceRtj6WlU/jUqqIiEgChHVBgAjgtdGob+tjcUnywtlYP7WD8XvdREREkkl7a0qodfQN0zM4wqLphrO3vGXG37ksvvDgQEsPUDnj84iIiMyEwpmE2pG2PoDpz5x9/OMz/s7CnHRKczNOrhIVERFJJl3WlFA7Gc6SeM8ZeIsCFM5ERCQICmcSavXtXjhbVDzNcHblld6YobqyXA60KJyJiEjyKZxJqNW39VGam0FuZnKvwC8tz6OlZ5CugeGkfq+IiIjCmYTakba+6S8GSICl8X08dWlTRESSTeFMQu1IkttojFka7292oFk7BYiISHIpnElojYzGaOwYYFHJNLZtSpDFJTlEI8ZB3XcmIiJJplYaElpNnQOMxtzMZs7e8Y5ZfXdGWoRFxdm6rCkiIkmncCahNdZGY0b3nH3kI7P+/qXleezXZU0REUkyXdaU0KqfaQNagL4+b8zC0rJcDrX2Eou5WZ1HRERkOhTOJLSOtPWRFjEWFs7gnrNrr/XGLNSV5zIwHKOpa2BW5xEREZkOhTMJrSNtfVQXZxONWCDfv7RMKzZFRCT5FM4ktOoDaqMxZmm51+tMKzZFRCSZFM4ktOrb+wNpQDumIj+TnIyowpmIiCSVwpmEUvfAMG29Q9PfUzOBzIwlpbkcUjgTEZEkUisNCaX6tn5ghis1AW69NSF11JXlsLOpOyHnEhERmQqFMwml+vaxHmcz3B0gYeEsl8d2HGd4NEZ6VBPNIiLiP/1tI6HU2OHNnFUXzTCctbR4Y5ZqS3MZjTmOtvfP+lwiIiJToXAmodTQ3k9WeoSS3IyZneCmm7wxS3Vl3opN3XcmIiLJonAmodTY2U9VUTZmwfQ4G1NbpnYaIiKSXApnEkoN7f0zv6SZQKW5GeRnpnGoVeFMRESSQ+FMQqmhYyAU4czMqC3L1cyZiIgkjcKZhM7A8CgtPYNUhSCcgXdpUzNnIiKSLGqlIaHT1OltND6rmbO/+IsEVQN1pTn8/MVGBkdGyUyLJuy8IiIik1E4k9AZa6Mxq5mzW25JUDVQV55LzHl7fS6vyE/YeUVERCajy5oSOg3ts+xxBlBf740EqC0dW7HZl5DziYiInI5mziR0Gjr6MYMFhVkzP8n73uc9PvnkrOtRrzMREUkmzZxJ6DR29FORn0lGWjj+51mUk0FRTjoHtShARESSIBx/+4mM09ARjh5n49WW5mrmTEREkkLhTEKnsaM/NG00xtSVKZyJiEhyKJxJqMRijsaOAaqLwxXOaktzaewcYGB4NOhSRERkjtOCAAmVlt5BhkZjs7+s+bd/m5iC4mrLcgA43NrHWQvUTkNERPyjcCah0tjhNaCtKpxlOLvuugRU84q6cRugK5yJiIiffL2saWZXm9luM9tnZndN8n6mmT0Qf/85M6uNv15rZv1mti0+/tXPOiU8TvY4m+1lzd27vZEgtePCmYiIiJ98mzkzsyhwN/BG4Ciwycw2OudeHnfYbUC7c265mb0T+BIw1tp9v3NunV/1STglZHcAgA99yHtMQJ8zgIKsdMryMrQoQEREfOfnzNlFwD7n3AHn3BBwP3DDhGNuAO6NP38QuMrMzMeaJOQaOvrJz0yjMDs96FJepbY0V73ORETEd36Gs2pg/P45R+OvTXqMc24E6ARK4+/VmdnzZvYbM3vtZF9gZreb2WYz29zc3JzY6iUQDSFsozGmVu00REQkCcLaSqMJWOycWw98DPg3MyuYeJBz7h7n3Abn3Iby8vKkFymJ5/U4m8W2TT6qK8vlRPcgvYMjQZciIiJzmJ/hrAFYNO7nmvhrkx5jZmlAIdDqnBt0zrUCOOe2APuBlT7WKiHR0NEfuh5nY8Y2QD+kS5siIuIjP1tpbAJWmFkdXgh7J/DuCcdsBN4PPAPcBDzhnHNmVg60OedGzWwpsAI44GOtEgK9gyN09A0n5rLmJz85+3NMMNbr7FBLH2uqChN+fhEREfAxnDnnRszsDuAxIAp81zm3w8w+C2x2zm0EvgPcZ2b7gDa8AAdwBfBZMxsGYsCHnXNtftUq4TC2UjMh+2q+4Q2zP8cEmjkTEZFk8LUJrXPuUeDRCa99etzzAeDmST73EPCQn7VJ+DQkMpxt2+Y9rktcN5bczDQqCzI50KxwJiIi/tEOARIaJ3cHSEQ4u/NO7zFBfc7G1JbmauZMRER8FdbVmjIPNXT0EY0YlQXhXK0J3opNtdMQERE/KZxJaDR2DLCgIItoJLx9iGvLcmntHaJrYDjoUkREZI5SOJPQaGgPbxuNMScXBWj2TEREfKJwJqHR0NGfmMUAPqrTBugiIuIzLQiQUBiNOY51DSRud4AvfCEx55lgSekrvc5ERET8oHAmoXC8a4DRmKO6KCcxJ7z00sScZ4Ks9ChVhVlasSkiIr7RZU0JhbEGtAmbOXv6aW/4oLYsV5c1RUTEN5o5k1AYa0Bbk6gFAZ/4hPeY4D5n4N139siLTQk/r4iICGjmTEJiLJwtLAz3ggDwwlln/zDtvUNBlyIiInOQwpmEQmNHP0U56eRmhn8yd6ydxkHddyYiIj5QOJNQaGgPfxuNMbVl6nUmIiL+UTiTUGjsGEjMnppJsLgkh4gpnImIiD/Cfw1J5jznHA0d/bxmWWniTvq1ryXuXBNkpEWoLs7mYKt6nYmISOIpnEngugZG6BkcSexlzXXrEneuSdSWagN0ERHxhy5rSuDGepwldF/Nxx/3hk/qyrxw5pzz7TtERGR+0syZBK6hfawBbQLD2ec/7z2+4Q2JO+c4dWW5dA+O0NIzRHl+pi/fISIi85NmziRwjZ0J3h0gCU6u2FQ7DRERSTCFMwlcQ3s/GWkRynJTZwaqbqzXme47ExGRBFM4k8A1dPRTVZhFJGJBlzJlNcXZpEVMiwJERCThFM4kcA0d/YldDJAEadEIi0pydFlTREQSTgsCJHCNHf1csaI8sSf91rcSe75J1JbmcLBFvc5ERCSxFM4kUEMjMU50DyZ+d4Czzkrs+SZRW5bLswfacM5hljqXZEVEJNx0WVMCdaxzAOcS3OMM4Gc/84aPlpbn0T88yrGuAV+/R0RE5hfNnEmgGsYa0CZ65uwf/9F7vO66xJ53nGXxdhoHmntZWJha98yJiEh4aeZMAuVbOEuCpeV5ABxo7gm4EhERmUsUziRQY1s3LShMnQa0YyoLMsnNiLK/WSs2RUQkcRTOJFCNHf2U5WWSlR4NupRpMzOWluexXzNnIiKSQApnEqhU7HE23tLyXA5o5kxERBJICwIkUA0d/axakJ/4E993X+LPOYmlZXlsfKGRgeHRlJz9ExGR8NHMmQTGOUdjR78/iwEWLfKGz5aW5+Kc9tgUEZHEUTiTwLT1DjEwHEt8A1qABx7whs+WlnvtNHTfmYiIJMqUwpmZ/djM/sTMFOYkYRo7vOatvoSzb37TGz5bWjbWTkMzZyIikhhTDVvfAN4N7DWzvzcz//fGkTmvocPblzIVe5yNyc6IUl2UrV5nIiKSMFMKZ865x51z7wHOBw4Bj5vZ02b2ATNLP9XnzOxqM9ttZvvM7K5J3s80swfi7z9nZrUT3l9sZj1m9vHp/KEkNTTEZ85SOZxBfMWm7jkTEZEEmfJlSjMrBW4F/gx4Hvg6Xlj75SmOjwJ3A9cAq4F3mdnqCYfdBrQ755YDXwW+NOH9fwJ+MdUaJbU0tPeTkxGlKOeU+T4lLC3z2mk454IuRURE5oCp3nP2E+ApIAe4zjl3vXPuAefcXwF5p/jYRcA+59wB59wQcD9ww4RjbgDujT9/ELjKzCz+nTcCB4Ed0/kDSepo7Oinqiib+K88ZS2ryKNncIQT3YNBlyIiInPAVPuc/R/n3KPjXzCzTOfcoHNuwyk+Uw3Uj/v5KHDxqY5xzo2YWSdQamYDwP8E3gjokuYc1djZ789iAIAHH/TnvJMYWxSwv7mHyoLU24ZKRETCZaqXNT8/yWvPJLKQCT4DfNU5d9q7rM3sdjPbbGabm5ubfSxH/NDQ7lOPM4CyMm8kwVg7Da3YFBGRRDjtzJmZLcCb3co2s/XA2PWnArxLnKfTAIzvAloTf22yY46aWRpQCLTizbDdZGZfBoqAmJkNOOf+ZfyHnXP3APcAbNiwQTf8pJCB4VFae4eoLvJppul73/Meb73Vn/OPs6Agi5yMqHqdiYhIQpzpsuab8RYB1ODdnD+mG/jEGT67CVhhZnV4IeydeO04xtsIvB9vFu4m4Ann3VX92rEDzOwzQM/EYCapraGjH8C/fTWTGM4iEWNZeR77TiiciYjI7J02nDnn7gXuNbO3O+cems6J4/eQ3QE8BkSB7zrndpjZZ4HNzrmNwHeA+8xsH9CGF+BkHmiMh7OqwtRuozFmRUUezxxoDboMERGZA850WfO9zrkfALVm9rGJ7zvn/mmSj41//1Hg0QmvfXrc8wHg5jOc4zOne19S08lwluI9zsYsr8zjx8830D0wTH5WarcGERGRYJ1pQUBu/DEPyJ9kiMxIQ3s/EYMFhXNjdeOKCu8/B13aFBGR2TrTZc1vxR//3+SUI/NFQ8cAlQVZpEfnxnatKyq8dhp7T/SwfnFxwNWIiEgqm2oT2i+bWYGZpZvZr8ys2cze63dxMnc1dPT5u23To496I0kWleSQkRbRzJmIiMzaVKct3uSc6wLegre35nLgv/tVlMx9jR0D/t5vlpPjjSSJxlds7j3enbTvFBGRuWmq4Wzs8uefAP/hnOv0qR6ZB2IxR1Nnv39tNAC+8Q1vJNGKijz2auZMRERmaarh7BEz2wVcAPzKzMqBAf/KkrmsuWeQ4VHn78zZj37kjSRaUZHH0fZ++oZGkvq9IiIyt0wpnDnn7gIuBTY454aBXl69ibnIlJxsQOvX7gABWVEZ32PzhLZxEhGRmZvqxucAq/D6nY3/zPcTXI/MAw3tY+EsefeEJcPyeDuNvSe6OaemMOBqREQkVU0pnJnZfcAyYBswGn/ZoXAmM/BKA9q5NXO2pDSH9KjpvjMREZmVqc6cbQBWx/e9FJmVxo5+CrLS5lwn/fRohLqyXPYeVzgTEZGZm2o42w4sAJp8rEXmiYaOfv+3bXrySX/PfworKvLZ0ajFzCIiMnNTDWdlwMtm9gdgcOxF59z1vlQlc1pDx4C/DWgDtLwij19sb2JgeJSs9GjQ5YiISAqaajj7jJ9FyPzS0N7HhbU+b3H0la94jx//uL/fM8HKynxizttjc221FgWIiMj0TbWVxm/wdgZIjz/fBGz1sS6Zo7oHhukaGPH/suYjj3gjyVYt9FZs7jqmnQJERGRmprq35p8DDwLfir9UDTzsV1EydzV1er2L5+plzdrSXDLTIuw+1hV0KSIikqKmukPAXwKXAV0Azrm9QIVfRcncNdbjzPeZs4BEI8bKynzNnImIyIxNNZwNOueGxn6IN6JVWw2Ztld2B5ib4Qxg1YJ8djYpnImIyMxMNZz9xsw+AWSb2RuB/wB+5l9ZMlc1dPSTHjXK8zP9/aLsbG8EYNXCAlp6BmnuHjzzwSIiIhNMdbXmXcBtwEvAh4BHgW/7VZTMXUfbvR5n0Yj5+0W/+IW/5z+Nsxd4iwJ2H+v2P4SKiMicM6Vw5pyLmdnDwMPOuWafa5I57Gh7HzXFc/eSJsBZC8ZWbHZx+YqygKsREZFUc9rLmub5jJm1ALuB3WbWbGafTk55Mtc0tPdTk4wNzz/3OW8EoDQvk/L8TC0KEBGRGTnTPWcfxVuleaFzrsQ5VwJcDFxmZh/1vTqZUwaGRznRPUh1MmbOfvUrbwRk1YJ8dqmdhoiIzMCZwtn7gHc55w6OveCcOwC8F/hTPwuTuacxvlJzrl/WBDh7YQF7jvcwMhoLuhQREUkxZwpn6c65lokvxu87S/enJJmrjraPhbMkXNYM2KoF+QyNxDjU2ht0KSIikmLOFM6GZvieyKu8Es7m/szZK4sCdN+ZiIhMz5lWa55nZpPdOGNAlg/1yBzW0NFHWsSoLEjC/3RKS/3/jtNYXpFHNGLsaurmLecGWoqIiKSY04Yz51w0WYXI3He0vZ+FRVn+9zgDeOgh/7/jNDLToiwvz+PlJi0KEBGR6ZnqDgEis3Y0WW00QmJNdQEvNXQGXYaIiKQYhTNJmqQ2oP27v/NGgM6pLqS5e5DjXQOB1iEiIqllqts3iczK4IjX4yxpKzWfeSY533Ma51QXAvDS0U4qV+sWTRERmRrNnElSNHUM4Nz8WKk55uyFBZihS5siIjItCmeSFPOpjcaY3Mw0lpXnsV3hTEREpkHhTJLiaHsfQHK2bgqRc6oL2d6ocCYiIlOncCZJcbS9n2jEWJCMHmcANTXeCNja6kKOdw1yoluLAkREZGq0IECSoqGjn4WFWaRFk/TvgR/8IDnfcwZjiwK2N3Ty+lVaFCAiImfm69+UZna1me02s31mdtck72ea2QPx958zs9r46xeZ2bb4eMHM3upnneK/pLbRCJHVVfFFAUfVjFZERKbGt3BmZlHgbuAaYDXwLjNbPeGw24B259xy4KvAl+Kvbwc2OOfWAVcD3zIzzfKlsKPt/cnd8PzOO70RsLzMNOrKcrViU0REpszPmbOLgH3OuQPOuSHgfuCGCcfcANwbf/4gcJWZmXOuzzk3En89C3A+1ik+GxqJcaxrgOqiJM6cbdvmjRA4p7pQKzZFRGTK/Axn1UD9uJ+Pxl+b9Jh4GOsESgHM7GIz2wG8BHx4XFg7ycxuN7PNZra5ubnZhz+CJEJjRz/OweKS+bN103jnVBdyrGuA5u7BoEsREZEUENrVms6555xza4ALgb8zs1fdTe2cu8c5t8E5t6G8vDz5RcqUHGnz2mgsLp2f4ezcmiIAXqjvCLgSERFJBX6GswZg0bifa+KvTXpM/J6yQqB1/AHOuZ1AD7DWt0rFVyfD2TydOTu3ppC0iLHlSHvQpYiISArwM5xtAlaYWZ2ZZQDvBDZOOGYj8P7485uAJ5xzLv6ZNAAzWwKsAg75WKv4qL6tj8y0COV5mcn70pUrvRECWelRVlcVsPWwwpmIiJyZbysgnXMjZnYH8BgQBb7rnNthZp8FNjvnNgLfAe4zs31AG16AA7gcuMvMhoEY8BHnXItftYq/jrT1sagkh0jEkvel99yTvO+agvMXF/PApnqGR2OkJ+qi4KIAACAASURBVKvXm4iIpCRf21M45x4FHp3w2qfHPR8Abp7kc/cB9/lZmyTP4da+eXtJc8z5S4r53tOH2NXUzTk1hUGXIyIiIaZ/wouvnHPUt/WxKNkNaG+/3RshccGSYgC26r4zERE5A4Uz8VVH3zDdgyMsSvbM2Z493giJqsIsKgsy2aL7zkRE5AwUzsRX832l5hgz4/zFxZo5ExGRM1I4E1/N9x5n412wpJij7f2c6BoIuhQREQkxhTPx1Vg4W5TMfTVDav1i3XcmIiJnpnAmvqpv66MsL4PczCTvW79unTdCZG11ARnRCFuPaKcAERE5tST/jSnzzViPs6T72teS/51nkJkWZW11AZsOtQVdioiIhJhmzsRXR9rU42y8S5aW8uLRTnoGR4IuRUREQkrhTHwzPBqjsaM/mHD23vd6I2QuXVbGaMxp9kxERE5J4Ux809jRT8wRzGXNo0e9ETIXLCkmPWo8s7816FJERCSkFM7EN+px9mrZGVHWLy5WOBMRkVNSOBPfKJxN7jVLS9ne2Eln33DQpYiISAgpnIlvjrT1kRGNUFmQFXQpoXLpslKcg+cOavZMREReTeFMfHO4pY+akmyiEUv+l7/mNd4IoXWLi8hMi/C0Lm2KiMgk1OdMfHOwpZe60txgvvyLXwzme6cgMy3KhbUlPHtA4UxERF5NM2fii1jMcai1l9qygMJZyL1mWSm7jnXT2jMYdCkiIhIyCmfii2NdAwyOxIILZ29/uzdC6tJlpQD8bl9LwJWIiEjYKJyJLw619AIEd1mztdUbIXVuTREluRn8eteJoEsREZGQUTgTXxxsjYezcl3WnEw0Yly5spwn9zQzGnNBlyMiIiGicCa+ONTSS2ZahIVqo3FKrz+7go6+YZ4/0h50KSIiEiIKZ+KLgy19LCnNIRJEG40U8doV5UQjxhO6tCkiIuMonIkvDrX2UhvU/WYAV13ljRArzE7nwtpihTMREfkjCmeScKMxx5HWPuqCbKPxqU95I+Rev6qCXce6aejoD7oUEREJCYUzSbjGjn6GRgNso5FCXr+qEkCzZyIicpLCmSTcwXgbjUAva15zjTdCbll5LktKc3hi5/GgSxERkZBQOJOEOzTWRiPImbP+fm+EnJnxhrMr+f2+Vjr7h4MuR0REQkDhTBLuYEsv2elRKgsygy4lJVx3XhVDozH+a8exoEsREZEQUDiThDvU4u2paaY2GlNxXk0hi0qy+dmLTUGXIiIiIaBwJgl3qLWPurKcoMtIGWbGdedW8ft9LdoIXUREFM4ksYZHY9S39QW7GADgLW/xRoq47rwqRmOOX2zXpU0RkfkuLegCZG452t7PSMwF30bj4x8P9vunadWCfJZX5PGzFxp57yVLgi5HREQCpJkzSah9J3oAWF6RF3AlqWXs0uYfDrVxvGsg6HJERCRACmeSUKEJZ1de6Y0Uct15C3EOfrqtIehSREQkQApnklB7T3RTWZBJQVZ60KWknKXleWxYUsz9f6jHORd0OSIiEhBfw5mZXW1mu81sn5ndNcn7mWb2QPz958ysNv76G81si5m9FH98vZ91SuLsP9ET/KxZCnv3xYs50NLLswfagi5FREQC4ls4M7MocDdwDbAaeJeZrZ5w2G1Au3NuOfBV4Evx11uA65xz5wDvB+7zq05JHOcc+070sLxc4Wymrj1nIQVZafz7H44EXYqIiATEz5mzi4B9zrkDzrkh4H7ghgnH3ADcG3/+IHCVmZlz7nnnXGP89R1Atpmp3XzINXUO0Ds0yvLK/KBLSVlZ6VHedn4N/7n9GG29Q0GXIyIiAfAznFUD9eN+Php/bdJjnHMjQCdQOuGYtwNbnXPqzhlyJxcDhGHm7B3v8EYKevfFixkajfHQlqNBlyIiIgEIdZ8zM1uDd6nzTad4/3bgdoDFixcnsTKZzN54OFtRGYJw9pGPBF3BjK2szGfDkmL+7Q9HuO3yOiIRbYMlIjKf+Dlz1gAsGvdzTfy1SY8xszSgEGiN/1wD/AT4U+fc/sm+wDl3j3Nug3NuQ3l5eYLLl+nad6KHopx0SnMzgi4F+vq8kaL+9NJaDrb08sudx4MuRUREkszPcLYJWGFmdWaWAbwT2DjhmI14N/wD3AQ84ZxzZlYE/By4yzn3ex9rlATaH18MEIoNz6+91hsp6tq1C1hcksM3n9yvthoiIvOMb+Esfg/ZHcBjwE7gR865HWb2WTO7Pn7Yd4BSM9sHfAwYa7dxB7Ac+LSZbYuPCr9qlcTYe6I7HJc054C0aIQ/v2Ip2+o7eO6g2mqIiMwnvt5z5px7FHh0wmufHvd8ALh5ks99Hvi8n7VJYrX2DNLeN8yyMCwGmCNuvqCGrz++h28+uZ9Llk5cJyMiInOVdgiQhNgblm2b5pCs9CgfuKyO3+xpZkdjZ9DliIhIkiicSULsO7lSUz3OEum9lywhPzONrz2+N+hSREQkSRTOJCH2neghJyNKVWFW0KV4br3VGymuMDudD71uKb98+TibD+neMxGR+UDhTBJiz/FulleEZKUmzJlwBvDBy+uoyM/ki7/YpZWbIiLzgMKZzJpzjp1NXZy9oCDoUl7R0uKNOSAnI42PvnElWw63818vq++ZiMhcp3Ams3a8y1upefbCEN1vdtNN3pgjbr6ghmXluXzpP3cxNBILuhwREfGRwpnM2s6mLgDOXhiimbM5Ji0a4RPXns2B5l6+/bsDQZcjIiI+UjiTWXs5Hs5WKZz56qqzK7l6zQK+/vhejrSm7tZUIiJyegpnMms7m7qoLsqmMDs96FLmvM9cv4b0aIRP/nS7FgeIiMxRCmcyazubunRJM0kWFGbx8Tet5Ld7mtn4QmPQ5YiIiA8UzmRWBoZHOdjSy+owLQYA+Iu/8MYc9L7X1LJuURGfeng7jR39QZcjIiIJpnAms7L7WDcxF8LFALfc4o05KBoxvnbLOkZijo8+sI3RmC5viojMJQpnMiuhXalZX++NOaq2LJfPXL+G5w628a3f7g+6HBERSSCFM5mVnU1d5GZEWVySE3Qpf+x97/PGHHbzBTX8yTkL+af/2qOtnURE5hCFM5mVnU3dnLUgn0gkJNs2zSNmxhfeeg7Vxdl8+AdbOdY5EHRJIiKSAApnMmPOOXYe00rNIBXmpHPP+zbQNzTCh3+whcGR0aBLEhGRWVI4kxk72t5P98CIwlnAzlqQzz/efB7b6jv4xI/V/0xEJNUpnMmM7WjsBGB1lcJZ0K45ZyF/c9UKHtp6lK8+vjfockREZBbSgi5AUte2+k7So8bqMM6c/e3fBl1B0t35hhU0dvTzz7/aS1VhFu+8aHHQJYmIyAwonMmMvVDfwdkLC8hKjwZdyqtdd13QFSSdmfGFt53Die5B/tfD2ynOzeDNaxYEXZaIiEyTLmvKjIzGHC8e7WDdoqKgS5nc7t3emGfSoxG+8Z7zObemkDv+bSu/3n0i6JJERGSaFM5kRvY399A7NMp5NSENZx/6kDfmodzMNL73gYtYWZnPh+/bwtP7WoIuSUREpkHhTGZkW30HAOeFdeZsnivMTue+2y6mtjSXD3xvE7/epRk0EZFUoXAmM7KtvoP8rDSWluUGXYqcQkluBv/25xezojKPP//+Zn7+YlPQJYmIyBQonMmMvFDfwXk1RdoZIORK8zL5tz+/hPWLi/irf9/KjzbN3f1GRUTmCoUzmbaB4VF2HevmvEWFQZciU1CQlc73P3gxl68o53889CLf+d3BoEsSEZHTUCsNmbbtDZ2MxhzrFhUHXcqpffKTQVcQKtkZUf7Pn17Anfdv43OPvExX/zB3vmEFZpr5FBEJG82cybS9shggxDNnb3iDN+SkzLQo//td67n5ghq+/qu9/I8HX2RoJBZ0WSIiMoFmzmTaXjjaSXVRNhX5WUGXcmrbtnmP69YFW0fIpEUjfPmmc6kqyubrv9pLQ0c/33zvBRRmpwddmoiIxGnmTKbFOceWQ23hbT475s47vSGvYmZ89I0r+crN57HpUBs3ffNp6tv6gi5LRETiFM5kWurb+mnsHOCSpSVBlyKzdNMFNdz7wYs43jXAW7/xNC/EL1eLiEiwFM5kWp490ArAJUtLA65EEuHSZWX8+COXkpUe4ZZ7nmHjC41BlyQiMu8pnMm0PHugldLcDJZX5AVdiiTI8op8Hv7LyzinupC//vfn+dJ/7mI05oIuS0Rk3lI4kylzzvHsgVYuWVqqFgxzTFleJj/8s0t498WL+eaT+/mzezfRNTAcdFkiIvOSwplM2dj9Zhenwv1mX/iCN2TKMtIifOGt5/D5G9fy1N4Wbrz79+xv7gm6LBGRecfXcGZmV5vZbjPbZ2Z3TfJ+ppk9EH//OTOrjb9eama/NrMeM/sXP2uUqXv2YArdb3bppd6QaXvvJUv44Z9dTEffMDf+y+95bMexoEsSEZlXfAtnZhYF7gauAVYD7zKz1RMOuw1od84tB74KfCn++gDwKeDjftUn0/fsgVZKcjNYkQr3mz39tDdkRi5eWsrGOy6jrjyXD923hS8+upORUTWsFRFJBj9nzi4C9jnnDjjnhoD7gRsmHHMDcG/8+YPAVWZmzrle59zv8EKahIBzjucOtHHJ0pLUuN/sE5/whsxYTXEO//Hh1/DeSxbzrd8e4N3ffo4TXfpPUkTEb36Gs2qgftzPR+OvTXqMc24E6ASmfM3MzG43s81mtrm5uXmW5crpHG3vp6GjPzUuaUrCZKZF+fyN5/C1W9bx0tFOrv3n3/HM/tagyxIRmdNSekGAc+4e59wG59yG8vLyoMuZ0363rwVIkfvNJOFuXF/NT++4jILsNN7z7Wf56i/36DKniIhP/AxnDcCicT/XxF+b9BgzSwMKAf2zPIR+tfME1UXZqXG/mfhiZWU+G++4nBvXV/P1X+3lHd96hiOt2vZJRCTR/Axnm4AVZlZnZhnAO4GNE47ZCLw//vwm4AnnnLpfhszA8Ci/39fCVWdXpMb9ZuKbvMw0/ukd6/jnd61n74kerv3np3hoy1H0n62ISOKk+XVi59yImd0BPAZEge8653aY2WeBzc65jcB3gPvMbB/QhhfgADCzQ0ABkGFmNwJvcs697Fe9cmrP7G+lf3iU16+qCLqUqfva14KuYE67/rwqLlhSzEcf2Mbf/scL/Hr3CT53w1qKczOCLk1EJOXZXPkX74YNG9zmzZuDLmNO+uTDL/HjrQ1s/dQbyUqPBl2OhMhozPGvv9nPV3+5h8LsdD593WquP69KM6wiImdgZluccxsmey+lFwSI/5xzPLHzBJcvL0utYPb4494QX0Ujxl/+t+X87K8up6Ykh7+5fxu3/t9N1LfpXjQRkZlSOJPT2tnUTWPnAG84uzLoUqbn85/3hiTF2QsL+PFfXMr/c91qNh1q401f/S3ffuqAVnSKiMyAwpmc1hO7jgNw5Sq1KpHTi0aMD1xWxy8/9jouXVbK53++kzd/7bc8tuOYFgyIiEyDwpmc1uM7T3DeoiIq8rOCLkVSRHVRNt9+/wbued8FAHzovi3c/K/PsOVwW8CViYikBoUzOaUjrX1sq+/gzWtS7JKmBM7MeNOaBTx25xV84a3ncLitj7d/8xlu//5mXm7sCro8EZFQ862VhqS+h7d5PYNvXDdx1y2RqUmLRnj3xYu5cX0V3/3dQf71Nwf4r5ef4tJlpfzZa+u4cmUFkYhWdoqIjKdwJpNyzvGT5xu4ZGkJVUXZQZczfd/6VtAVyDg5GWnc8foVvO81tdz/hyN87+lDfPB7m1lanssHL6vjbedXk5Oh/3ckIgLqcyan8PyRdt76jaf58tvP5R0XLjrzB0SmYXg0xqMvNfHtpw7yUkMnORlR3rS6khvWV/Pa5WWkRXXHhYjMbafrc6Z/qsqkfvJ8A5lpEa4+Z0HQpczMz37mPV53XbB1yKTSoxFuWFfN9edVseVwOw9tbeDRl5p4eFsjZXkZvOXcKv7k3IWsX1SkoCYi845mzuRVhkdjXPT/Pc6ly8u4+93nB13OzFx5pff45JNBViHTMDgyypO7m/nptgYe33mCoZEYRTnpvG5lOa9fVcHrVpZTlKPtoURkbtDMmUzLk7ubae8b5m3rtRBAkiczLcqb1yzgzWsW0DUwzFN7Wnhi1wme3H2Cn25rJGJw/uJirlhZzhUryzmnupCoFhOIyBykcCav8v1nDlGRn8kVK9V4VoJRkJXOn5y7kD85dyGjMceLRzt4YtcJfrOnma8+vod/+uUeinLSuWx5GVesKOOKleUsLEzBhSsiIpNQOJM/sutYF0/tbeG/v/ks0nWvj4RANGKsX1zM+sXF/O2bzqKtd4jf7Wvht3uaeWpvMz9/sQmA5RV5XLGinCtWlnFxXSnZGSm0F6yIyDgKZ/JHvvPUQbLTo7zn4sVBlyIyqZLcDK4/r4rrz6vCOcee4z38dk8zv93bzA+fO8x3f3+QjLQIF9WW8NoVZVx1diXLK/KCLltEZMq0IEBOOtE9wOV//2tuuXARn7txbdDlzE59vfe4SG1A5pOB4VH+cLDtZFjbc7wHgBUVeVy9dgFXr13A6oUFmOleNREJlhYEyJT84JnDDMdifOCy2qBLmT2FsnkpKz16csEAQGNHP798+Ti/2N7E3b/ex/9+Yh+LSrJ5y7lVvHV9NSsr8wOuWETk1TRzJgB0DwxzxZd/zQVLSvj2+ycN8qnlgQe8x1tuCbYOCY3WnkF++fJxHt1+jN/va2E05li9sIC3rq/m+nVVVBZkBV2iiMwjp5s5UzgTAP7hsV3c/ev9bLzjMs6tKQq6nNlTnzM5jZaeQR55oZGfbGvkhfoOzOCyZWXcuL6aq9cuIC9TFxVExF+6rCmn1djRz7efOsiN66rmRjATOYOyvExuvayOWy+r40BzDw9va+Th5xv4+H+8wCcffok3rl7A29ZX89oV2kpKRJJP4Uz4yn/txgEff/NZQZciknRLy/P42BtX8tE3rGDrkQ5+8vxRHnmxiZ+94G0ldd15VbxtfQ1rq7WQQESSQ+Fsntve0MlPnm/gQ1cso6Y4J+hyRAJjZlywpJgLlhTz6bes4de7T/CTrQ388Nkj/N/fH2J5RR5vXV/NDeuq9N+KiPhK4WweGxqJ8T8fepGSnAw+8t+WBV2OSGhkpEVObiXV2TfMIy818pOtDfzDY7v5h8d2c96iIq5du4Br1i5kcamCmogklhYEzGNf/s9dfOPJ/XzrfRfw5jULgi4nsVpavMeysmDrkDnlSGsfj7zUyH9uP8aLRzsBWFNVwNVrFvC6s8pZW1VIRPt9isgUaLWmvMqmQ23c8q1nuOmCGr5803lBlyOScurb+nhsxzEefamJrUc6ACjNzeDyFWW8bmU5l68ooyJf7TlEZHIKZ/JH2nqHuP5ffkfEjEf/5rVzs23A977nPd56a5BVyDzR0jPI7/a28Jv4fp8tPUMA1JbmcH78PrYLlhSzoiKfqGbWRASFMxmnf2iU93z7WbY3dnH/7Zdw/uLioEvyh/qcSUBiMcfLTV38fl8LWw63s/VI+8mwlpeZxqoF+axamM9ZCwo4e0E+Zy3IJz8rPeCqRSTZ1OdMABiNOf76/ud5vr6Db77n/LkbzEQCFIkYa6sLWVtdCIBzjiNtfWw53M7zRzrYdayLnz7fSPfgkZOfKcvLZElpDktKclhcmsOS0hwWFGRTWZBJZUEWuXNxdltCZ2gkRvfAMD2DIwyOxIiYt4o5NyONguw0stOjaieTJPovfp4YHvVWZv7y5eN85rrVXL12YdAlicwLZsaS0lyWlObytvNrAC+wNXT0s/tYN7uOdXO4tZfDrX08c6CVHz/f8Kpz5GWmUZGfSXFuBkXZ6RRmp1OQnU5RTjq5GWlkpkfITIuQlR4lMy1C5thjWpS0iBGNGGlRiz+PkBYxIhF75b2TjxGi8edjfzHL3OKc43BrH1uPtLP3RA8Hmns40tbPsc5+2vuGT/vZzLQIVUXZVBdls6w8lzVVhaypLmBFRT4ZaWrWnEgKZ/NA98AwH/nhVp7a28LH3riSWy+rC7okkXnNzKgpzqGmOIerzq78o/cGhkc52t7P8a6B+BjkRPcAJ7oGae8boqlzgF3HuunqH6Z7cMTXOnMyouRlppGXlUZ+/LEoJ4PyvEzK8jIoz8+kLC8+8r3XMtOivtYk0zMac+xs6mLTobb4aKe5exCAtIixpDSH2tJc1i8uYkFBFoXZ6eRmppGVHsE5iDlH39Aonf3DtPUO0dDRz9H2fh7ccpR7nzkMQEY0worKPNYvLuKyZWW8ZlkpRTkZQf6xU57C2Rx3sKWXv/zhVnYf7+bLbz+Xd1y4KOiSROQ0stKjLK/IY3lF3hmPHRmN0T88yuBIjMGRGAPDowwOxxgcGWUg/jgacyfHyB89xhiJOWKvet0xMuoYicXoHxqlZ3CE7sERegZG6B4YprGji5buwVMGw8LsdCryvcuxFfmZlBdkUpmfRUVBJhX5WVTGH7MzZhfiYjFH9+AIXf3DdA0M0z0w9vyV17r6R+KPw96f1bmTgcM5byYoLyvtjwJoaV4mCwuzqC7KZmFRdsotmBoYHuWF+g42HWrjD4fa2Xq4nZ7476q6KJvLlpVyYV0JG5aUsLQ8l/QZbk8WizkOtfayo7GL7Y2d7Gjo4sdbG/jBs0cwg7VVhVy6vJTXLi/nwrpihfZp0oKAOco5xw+ePcwXHt1FetT43+8+n9etLA+6rOTp6/Mec9QgVMQPA8OjtPQM0tIzREv3IC09gzR3D9LcM8jxrgFOdA9yIj7rNzz66r9nMtIiJ2fj8jLTyM1MIyMaweKXUyMGETOGR72Q2D8cH/HA2DM4wpn++srLTKMgK438rHQy0v743AYMjsS8cw14AXRoJPaqcxRkpbG4NIfl5XmsqMxnWXkeKyrzWFKSE4p9Vzv7htl82JsR23SojZeOdjI06v05VlbmcWFtCRfVlbChtoTqomxfaxkejfFCfQe/29fC0/taeb6+neFRR3Z6lMuWl/K6syq4cmU5i0r0/5dBqzXnnS2H2/nSL3bxh0NtXLGynC+//VwWFKrfkogkn3OOjr5hjscvzZ7o9sJb98AIPYPD9Ax4QatrYISR0RgOiDnAOUadIz0aIScjSnZ6lKz0KDkZUXIy0ijITqcga+wxnYLsNAqyvPvx8uOBb7rhaXBklJaeIZo6+mno6Kepc4DGjn4Ot/ax70QPDR39J49Njxq1pbmsqMxjWXkeS8tzWVrmPfq1+nZwZJRdTd281NDJ9oZOttV3sPt4N8559ZxTXciFtSVcWFvChtriwC8t9g2N8OyBVn69q5kn95ygvs37v9+y8lyuPKuCK88q56K6knk7q6ZwNg845/jDwTb+z1MHeHznCcryMvn4m1Zyy4WL5udNvd/4hvf4kY8EW4eIzBm9gyPsb+5h7/Ee9sUf9zf3cKStj9HYK3+XVuRnemGtPI+60lwqCsbdm5eXQXFOxqQ7ScRijt6hEVp6hjja3sfR9v6Tj/tO9LDnePfJWciinHTOrSniwiXFbKgtYd2iollfKvaTc44DLb08ubuZJ3ef4LmDbQyNxMhOj3LpslKuPKucK8+qmFezagpnc9hYl/IHNtWz90QPhdnp3H7FUj5wWS05Gal1r0RCqc+ZiCTJ0EiMI2297DvRy4GWHg4097K/2Xvs7H/1CkgzyEqLkpkeIWp28l6/3qFXX6pNixgLi7KoLc1lbXUh58bbtNQUZ6f0P7z7h0Z55kBLPKw1c6TNuxVlaXkuly4rZcMSb/avuii1/5ynE1g4M7Orga8DUeDbzrm/n/B+JvB94AKgFbjFOXco/t7fAbcBo8BfO+ceO913zZdw1jUwzJZD7Tx7oJWn9rbwclMXAOfVFPKeS5Zw3blVof7XU9IonIlIwJxzdPWP0NwzGL8/b5CW7kHaeocYiC/giDlH1LwWJ3mZUfKz0inJzaCmOJuakhwq8zNDcW+bn5xzHIzPqv1mTzObD7XROzQKwIKCLC6oLWZdTRGrFuazakEB5fmZAVecGIE0oTWzKHA38EbgKLDJzDY6514ed9htQLtzbrmZvRP4EnCLma0G3gmsAaqAx81spXNu1K96w2Q05jjeNcDh1j7q2/o40tbH/uYedjR2nfzXRXrUWLeoiE9cu4o3r1nAktLcgKsWEZHxzIzCnHQKc9KntPp2vjIzlpbnsbQ8jw9eXsdozLHrWBebD7Wz+XA7Ww618fMXm04eX5aXwVkL8llenseiEq8lzaKSbGqKcyjMnhu7bfh53esiYJ9z7gCAmd0P3ACMD2c3AJ+JP38Q+Bfz5i9vAO53zg0CB81sX/x8z/hY75TEYt5NqieXnceXnI9fhj7++XB8qXvf0Cj9QyP0DY3SO+5598AIbb1DtPYO0dY7SHvvMM3dgydX2wBEI0ZNcTZrqgp4x4Yazl9czPrFxZohExGROScaMa/BbVUh77+0FvD2hN51rItdTd3sPtbNzmNe646JLV3yMtMozcugNDeD0vg9fkU5GeTGF5LkZk54jDdxTo9GSI9a/DFCVnok0FuD/PzmaqB+3M9HgYtPdYxzbsTMOoHS+OvPTvhstX+lnlln3zDrPvdfZ1y6PV25GVFK8jIoiTd2PKvSm7JdXJJzciwsyppxLxoREZFUV5KbwaXLyrh0WdnJ18YuG9e3e1eZ6tv7aOoc8CY8eoaob+tjW30HHX1Dk7ZzOZ3XrSzn3g9elOg/xpSl9B3jZnY7cHv8xx4z2x1kPXNcGdASdBHTNkdvJJ2G1Py9iX5vqUm/t9T0qt/b94Hv3+b79y451Rt+hrMGYHw7+pr4a5Mdc9TM0oBCvIUBU/kszrl7gHsSWLOcgpltPtWNixJe+r2lJv3eUpN+b6kpjL83P6+VbQJWmFmdmWXg3eC/ccIxG4H3x5/fBDzhvOWjG4F3mlmmmdUBK4A/+FiriIiISCj4NnMWv4fsucQiWwAABQRJREFUDuAxvFYa33XO7TCzzwKbnXMbge8A98Vv+G/DC3DEj/sR3uKBEeAv58tKTREREZnf5kwTWvGXmd0ev4wsKUS/t9Sk31tq0u8tNYXx96ZwJiIiIhIi6s8gIiIiEiIKZ3JGZna1me02s31mdlfQ9cjkzGyRmf3azF42sx1m9jfx10vM7Jdmtjf+WBx0rfLHzCxqZs+b2SPxn+vM7Ln/v717DflzjuM4/v7UyOGuLac1G0aWQ8qGNNFyajnFPEDaSuSZcsghPPMAKTk8kiJRSzQKT1CjeGCLmUyNJ3ZAm60cZksMXw+u62837rnn0P3/ufd+1V3/639dd/3urr7/+9P/97t+377mnusfqlJDkkxLsizJx0nWJjnDWmtfklv6z8ePkjybZL8W681wpr80qg3XhcCJwNV9ey215yfg1qo6EZgP3NDfqzuB5VU1B1jeH6stNwFrRx0/ADxcVccCX9O1ulNbHgVerarjgZPp7p+11rAkM4EbgdOq6iS6hxUHrSObqjfDmcbzWxuuqvoRGLThUmOqalNVvd+//o7un8VMuvv1dH/Z08Ci4YxQY0kyC7gYeKI/DnAuXUs78J41J8lUYAHdjgNU1Y9V9Q3W2v/BFGD/fm/VA4BNNFhvhjONZ6w2XENtpaXxJZkNzANWAtOratA1eDMwfUjD0tgeAe4ABg11Dwa+qapB00Brrj1HA1uBp/rp6CeSHIi11rSq+gJ4ENhIF8q+BVbRYL0ZzqRJJskI8AJwc1VtG32u3+TZR7QbkeQSYEtVrRr2WPS3TAFOAR6rqnnADv4whWmttadfA3gZXbg+HDgQuGCog9oNw5nGs0ettNSGJPvQBbOlVfVi//aXSWb052cAW4Y1Pv3JmcClSdbTLRk4l24t07R+2gWsuRZ9DnxeVSv742V0Yc1aa9v5wLqq2lpVO4EX6WqwuXoznGk8e9KGSw3o1yo9CaytqodGnRrdJu0a4KWJHpvGVlV3VdWsqppNV1tvVNVi4E26lnbgPWtOVW0GPktyXP/WeXQdbay1tm0E5ic5oP+8HNy35urNTWg1riQX0a2LGbThunfIQ9IYkpwFvA2sYdf6pbvp1p09DxwJbACurKqvhjJI7VaSs4HbquqSJMfQfZN2ELAaWFJVPwxzfPq9JHPpHuLYF/gUuJbuCw9rrWFJ7gGuonu6fTVwPd0as6bqzXAmSZLUEKc1JUmSGmI4kyRJaojhTJIkqSGGM0mSpIYYziRJkhoyZfxLJGlySvIz3dYjA4uqav2QhiNJgFtpSNqLJdleVSN/83dC99n5y7gXS9I/4LSmJPWSjCRZnuT9JGuSXNa/PzvJJ0meAT4Cjkhye5J3k3zYb2wpSf8JpzUl7c32T/JB/3odcAVweVVtS3IIsCLJoF3ZHOCaqlqRZGF/fDoQ4OUkC6rqrYn+AyRNPoYzSXuz76tq7uCgbxx/X5IFdC2wZgLT+9MbqmpF/3ph/7O6Px6hC2uGM0n/muFMknZZDBwKnFpVO5OsB/brz+0YdV2A+6vq8Qken6S9gGvOJGmXqcCWPpidAxy1m+teA65LMgKQZGaSwyZqkJImN785k6RdlgKvJFkDvAd8PNZFVfV6khOAd7qHN9kOLAG2TNRAJU1ebqUhSZLUEKc1JUmSGmI4kyRJaojhTJIkqSGGM0mSpIYYziRJkhpiOJMkSWqI4UySJKkhhjNJkqSG/AraiuEkMM+41AAAAABJRU5ErkJggg==\n"},"metadata":{"needs_background":"light"}}],"execution_count":125},{"cell_type":"markdown","source":"A partir de esta visualización, parece bastante razonable reemplazar el valor de la tarifa NA con una mediana para su clase y embarque, que es de $8,05.","metadata":{"id":"YtD68Hnrj4GE","cell_id":"bbbd18de53dc4a6999dd3a60c57e5a92","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":" full.iloc[1043,:]","metadata":{"id":"qtHXkLdNl43h","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"277a2c3cefe04742a9655f1ba4276f65","outputId":"adb10ed6-16d6-4899-c56c-2a472ae98d0a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3,"user_tz":180,"timestamp":1650298342254},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId 1044\nSurvived NaN\nPclass 3\nName Storey, Mr. Thomas\nSex male\nAge 60.5\nSibSp 0\nParch 0\nTicket 3701\nFare NaN\nCabin NaN\nEmbarked S\nTitle Mr\nSurname Storey\nFsize 1\nFamily Storey_1\nFsizeD singleton\nDeck N\nName: 152, dtype: object"},"metadata":{},"execution_count":126}],"execution_count":126},{"cell_type":"code","source":"full.loc[full.PassengerId == 1044, 'Fare']= full[(full.Pclass ==3)& (full.Embarked == 'S')]['Fare'].median()\nfull.iloc[1043,:]","metadata":{"id":"kGvHafLtmF0g","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"a738a285503749dba99d8a1726d5f493","outputId":"b8d00522-045b-4214-d438-2c11fcf22495","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":4,"user_tz":180,"timestamp":1650298343763},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId 1044\nSurvived NaN\nPclass 3\nName Storey, Mr. Thomas\nSex male\nAge 60.5\nSibSp 0\nParch 0\nTicket 3701\nFare 8.05\nCabin NaN\nEmbarked S\nTitle Mr\nSurname Storey\nFsize 1\nFamily Storey_1\nFsizeD singleton\nDeck N\nName: 152, dtype: object"},"metadata":{},"execution_count":127}],"execution_count":127},{"cell_type":"markdown","source":"## Imputacion predictiva","metadata":{"id":"_kqnD5eImbZS","cell_id":"ef385687d72441a9927fbc9021c1b8c6","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"Finalmente, como señalamos anteriormente, faltan bastantes valores de Edad en nuestros datos. Vamos a ser un poco más sofisticados en la imputación de valores de edad faltantes. ¿Por qué? Porque podemos. Crearemos un modelo que prediga las edades en función de otras variables.","metadata":{"id":"rQmgybR8tg_h","cell_id":"d090fe8bc4c64755b599503e9ecf2aa5","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"sum(full.Age.isna())","metadata":{"id":"i5WrV9pGmdMa","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"77248a2b12154cccb600e367501344fa","outputId":"68e4902e-8c0a-4108-99ee-80a3caa77913","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":346,"user_tz":180,"timestamp":1650298347097},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"263"},"metadata":{},"execution_count":128}],"execution_count":128},{"cell_type":"code","source":"!pip install fancyimpute","metadata":{"id":"Q6tpIlCHMysJ","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"adc0502760d749f9bd719b3ab0b2c685","outputId":"a9bb7984-60fa-4dfc-8b55-9249364fe5f8","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":3204,"user_tz":180,"timestamp":1650298351469},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"Requirement already satisfied: fancyimpute in /usr/local/lib/python3.7/dist-packages (0.7.0)\nRequirement already satisfied: nose in /usr/local/lib/python3.7/dist-packages (from fancyimpute) (1.3.7)\nRequirement already satisfied: knnimpute>=0.1.0 in /usr/local/lib/python3.7/dist-packages (from fancyimpute) (0.1.0)\nRequirement already satisfied: scikit-learn>=0.24.2 in /usr/local/lib/python3.7/dist-packages (from fancyimpute) (1.0.2)\nRequirement already satisfied: pytest in /usr/local/lib/python3.7/dist-packages (from fancyimpute) (3.6.4)\nRequirement already satisfied: cvxopt in /usr/local/lib/python3.7/dist-packages (from fancyimpute) (1.2.7)\nRequirement already satisfied: cvxpy in /usr/local/lib/python3.7/dist-packages (from fancyimpute) (1.0.31)\nRequirement already satisfied: six in /usr/local/lib/python3.7/dist-packages (from knnimpute>=0.1.0->fancyimpute) (1.15.0)\nRequirement already satisfied: numpy>=1.10 in /usr/local/lib/python3.7/dist-packages (from knnimpute>=0.1.0->fancyimpute) (1.21.5)\nRequirement already satisfied: scipy>=1.1.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.24.2->fancyimpute) (1.4.1)\nRequirement already satisfied: threadpoolctl>=2.0.0 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.24.2->fancyimpute) (3.1.0)\nRequirement already satisfied: joblib>=0.11 in /usr/local/lib/python3.7/dist-packages (from scikit-learn>=0.24.2->fancyimpute) (1.1.0)\nRequirement already satisfied: scs>=1.1.3 in /usr/local/lib/python3.7/dist-packages (from cvxpy->fancyimpute) (3.2.0)\nRequirement already satisfied: multiprocess in /usr/local/lib/python3.7/dist-packages (from cvxpy->fancyimpute) (0.70.12.2)\nRequirement already satisfied: osqp>=0.4.1 in /usr/local/lib/python3.7/dist-packages (from cvxpy->fancyimpute) (0.6.2.post0)\nRequirement already satisfied: ecos>=2 in /usr/local/lib/python3.7/dist-packages (from cvxpy->fancyimpute) (2.0.10)\nRequirement already satisfied: qdldl in /usr/local/lib/python3.7/dist-packages (from osqp>=0.4.1->cvxpy->fancyimpute) (0.1.5.post2)\nRequirement already satisfied: dill>=0.3.4 in /usr/local/lib/python3.7/dist-packages (from multiprocess->cvxpy->fancyimpute) (0.3.4)\nRequirement already satisfied: attrs>=17.4.0 in /usr/local/lib/python3.7/dist-packages (from pytest->fancyimpute) (21.4.0)\nRequirement already satisfied: py>=1.5.0 in /usr/local/lib/python3.7/dist-packages (from pytest->fancyimpute) (1.11.0)\nRequirement already satisfied: more-itertools>=4.0.0 in /usr/local/lib/python3.7/dist-packages (from pytest->fancyimpute) (8.12.0)\nRequirement already satisfied: atomicwrites>=1.0 in /usr/local/lib/python3.7/dist-packages (from pytest->fancyimpute) (1.4.0)\nRequirement already satisfied: pluggy<0.8,>=0.5 in /usr/local/lib/python3.7/dist-packages (from pytest->fancyimpute) (0.7.1)\nRequirement already satisfied: setuptools in /usr/local/lib/python3.7/dist-packages (from pytest->fancyimpute) (57.4.0)\n"}],"execution_count":129},{"cell_type":"code","source":"full.dtypes","metadata":{"id":"jIJD8cSWcoCl","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"8cb4243904704996af281e3e64398942","outputId":"f6433683-2887-4f4a-dad9-0ee49636f25a","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":393,"user_tz":180,"timestamp":1650298360358},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId int64\nSurvived float64\nPclass int64\nName object\nSex object\nAge float64\nSibSp int64\nParch int64\nTicket object\nFare float64\nCabin object\nEmbarked object\nTitle object\nSurname object\nFsize int64\nFamily object\nFsizeD object\nDeck object\ndtype: object"},"metadata":{},"execution_count":130}],"execution_count":130},{"cell_type":"code","source":"from fancyimpute import IterativeImputer\nmice_impute = IterativeImputer()\ntraindatafill = mice_impute.fit_transform(full[['Age','SibSp','Fare','Survived']])\ntraindatafill= pd.DataFrame(traindatafill)","metadata":{"id":"yazEuLQnM1aV","cell_id":"3a24a71026e54ed2b2a0fcc1e15c07cf","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":318,"user_tz":180,"timestamp":1650298376424},"deepnote_cell_type":"code"},"outputs":[],"execution_count":131},{"cell_type":"code","source":"traindatafill[3].isnull().sum()","metadata":{"id":"SvAQ0_Zlct__","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"f7e09d1ccd804606a48a101eb98fad6c","outputId":"60cb93b2-4534-472a-988b-e22bab4030ae","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":282,"user_tz":180,"timestamp":1650298397863},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0"},"metadata":{},"execution_count":134}],"execution_count":134},{"cell_type":"code","source":"plt.figure(figsize=(10,6)) \nplt.subplot(121)\nsns.histplot(full.Age)\nplt.ylim([0,170])\nplt.title('Original Age')\nplt.subplot(122)\nsns.histplot(traindatafill[0])\nplt.ylim([0,170])\nplt.title('Modificacion MICE Age')","metadata":{"id":"2V9Qo9DUQYtK","colab":{"height":421,"base_uri":"https://localhost:8080/"},"cell_id":"e83530c57f2b4bc78697dc350fb0ea2a","outputId":"f9d37d38-9e0a-4559-a907-cd512e70ad68","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1353,"user_tz":180,"timestamp":1650298402150},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Text(0.5, 1.0, 'Modificacion MICE Age')"},"metadata":{},"execution_count":135},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":135},{"cell_type":"code","source":"# Reasignacion\nfull['Age']= traindatafill[0] \nfull['Survived']= traindatafill[3] \nprint(full.Age.isnull().sum())\nprint(full.Survived.isnull().sum())","metadata":{"id":"vQZ5syj6RErz","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"4a5977d04e9345d0b3371df6811063f0","outputId":"b367d531-0797-4009-ea47-d570fe0c7c30","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1809,"user_tz":180,"timestamp":1650298439808},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":"0\n0\n"}],"execution_count":136},{"cell_type":"markdown","source":"## Feature Engineering Parte II","metadata":{"id":"Sdd3OYDHTnuj","cell_id":"d0ab58095aa048dd8fc6a59d93d6440f","deepnote_cell_type":"markdown"}},{"cell_type":"markdown","source":"Ahora que sabemos la edad de todos, podemos crear un par de nuevas variables dependientes de la edad: 'Child' y 'Mother'. Un niño será simplemente alguien menor de 18 años y una madre es un pasajero que es 1) mujer, 2) tiene más de 18 años, 3) tiene más de 0 hijos (¡no es broma!), y 4) no tiene el título 'Miss'.","metadata":{"id":"kapmgAx4TqGW","cell_id":"0647699bbace4d71bccaaa06a6404aa7","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"full['Survived']=full['Survived'].astype('str')\nfull.dtypes","metadata":{"id":"xgtskUuEVNEt","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"cd0eb2f869c14c10bb400c9e45618fff","outputId":"0a43f6b0-985f-46f1-a216-4db038ccdfa6","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1369,"user_tz":180,"timestamp":1650298486518},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId int64\nSurvived object\nPclass int64\nName object\nSex object\nAge float64\nSibSp int64\nParch int64\nTicket object\nFare float64\nCabin object\nEmbarked object\nTitle object\nSurname object\nFsize int64\nFamily object\nFsizeD object\nDeck object\ndtype: object"},"metadata":{},"execution_count":138}],"execution_count":138},{"cell_type":"code","source":"df_male= full.loc[full.Sex == 'male']\ndf_male.reset_index()\ndf_female= full.loc[full.Sex == 'female']\ndf_female.reset_index()","metadata":{"id":"fLhJ9ZwYVeSy","colab":{"height":606,"base_uri":"https://localhost:8080/"},"cell_id":"c5d8fd93e258490bab49efc77677b172","outputId":"fc4c7d5f-b65d-4aeb-b1fe-5ba6b2478ff6","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":888,"user_tz":180,"timestamp":1650298493643},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":" index PassengerId Survived Pclass \\\n0 1 2 1.0 1 \n1 2 3 1.0 3 \n2 3 4 1.0 1 \n3 8 9 1.0 3 \n4 9 10 1.0 2 \n.. ... ... ... ... \n461 409 1301 0.0 3 \n462 410 1302 0.0 3 \n463 411 1303 0.0 1 \n464 412 1304 1.0 3 \n465 414 1306 1.0 1 \n\n Name Sex Age \\\n0 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.000000 \n1 Heikkinen, Miss. Laina female 26.000000 \n2 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.000000 \n3 Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg) female 27.000000 \n4 Nasser, Mrs. Nicholas (Adele Achem) female 14.000000 \n.. ... ... ... \n461 Peacock, Miss. Treasteall female 20.235103 \n462 Naughton, Miss. Hannah female 32.368066 \n463 Minahan, Mrs. William Edward (Lillian E Thorpe) female 32.295987 \n464 Henriksson, Miss. Jenny Lovisa female 33.000000 \n465 Oliva y Ocana, Dona. Fermina female 44.000000 \n\n SibSp Parch Ticket Fare Cabin Embarked Title \\\n0 1 0 PC 17599 71.2833 C85 C Mrs \n1 0 0 STON/O2. 3101282 7.9250 NaN S Miss \n2 1 0 113803 53.1000 C123 S Mrs \n3 0 2 347742 11.1333 NaN S Mrs \n4 1 0 237736 30.0708 NaN C Mrs \n.. ... ... ... ... ... ... ... \n461 1 1 SOTON/O.Q. 3101315 13.7750 NaN S Miss \n462 0 0 365237 7.7500 NaN Q Miss \n463 1 0 19928 90.0000 C78 Q Mrs \n464 0 0 347086 7.7750 NaN S Miss \n465 0 0 PC 17758 108.9000 C105 C Rare Title \n\n Surname Fsize Family FsizeD Deck \n0 Cumings 2 Cumings_2 small C \n1 Heikkinen 1 Heikkinen_1 singleton N \n2 Futrelle 2 Futrelle_2 small C \n3 Johnson 3 Johnson_3 small N \n4 Nasser 2 Nasser_2 small N \n.. ... ... ... ... ... \n461 Peacock 3 Peacock_3 small N \n462 Naughton 1 Naughton_1 singleton N \n463 Minahan 2 Minahan_2 small C \n464 Henriksson 1 Henriksson_1 singleton N \n465 Oliva y Ocana 1 Oliva y Ocana_1 singleton C \n\n[466 rows x 19 columns]","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
indexPassengerIdSurvivedPclassNameSexAgeSibSpParchTicketFareCabinEmbarkedTitleSurnameFsizeFamilyFsizeDDeck
0121.01Cumings, Mrs. John Bradley (Florence Briggs Th...female38.00000010PC 1759971.2833C85CMrsCumings2Cumings_2smallC
1231.03Heikkinen, Miss. Lainafemale26.00000000STON/O2. 31012827.9250NaNSMissHeikkinen1Heikkinen_1singletonN
2341.01Futrelle, Mrs. Jacques Heath (Lily May Peel)female35.0000001011380353.1000C123SMrsFutrelle2Futrelle_2smallC
3891.03Johnson, Mrs. Oscar W (Elisabeth Vilhelmina Berg)female27.0000000234774211.1333NaNSMrsJohnson3Johnson_3smallN
49101.02Nasser, Mrs. Nicholas (Adele Achem)female14.0000001023773630.0708NaNCMrsNasser2Nasser_2smallN
............................................................
46140913010.03Peacock, Miss. Treasteallfemale20.23510311SOTON/O.Q. 310131513.7750NaNSMissPeacock3Peacock_3smallN
46241013020.03Naughton, Miss. Hannahfemale32.368066003652377.7500NaNQMissNaughton1Naughton_1singletonN
46341113030.01Minahan, Mrs. William Edward (Lillian E Thorpe)female32.295987101992890.0000C78QMrsMinahan2Minahan_2smallC
46441213041.03Henriksson, Miss. Jenny Lovisafemale33.000000003470867.7750NaNSMissHenriksson1Henriksson_1singletonN
46541413061.01Oliva y Ocana, Dona. Ferminafemale44.00000000PC 17758108.9000C105CRare TitleOliva y Ocana1Oliva y Ocana_1singletonC
\n

466 rows × 19 columns

\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":139}],"execution_count":139},{"cell_type":"code","source":"# Ahora miremos la relacion edad vs survival\nplt.figure(figsize=(12,6))\nplt.subplot(121)\nsns.histplot(df_male.loc[df_male['Survived']=='1.0']['Age'], color='orange',label='1')\nsns.histplot(df_male.loc[df_male['Survived']=='0.0']['Age'],color='red',label='0')\nplt.legend()\nplt.title('Male')\nplt.ylim([0,100])\nplt.subplot(122)\nsns.histplot(df_female.loc[df_female['Survived']=='1.0']['Age'], color='orange',label='1')\nsns.histplot(df_female.loc[df_female['Survived']=='0.0']['Age'],color='red',label='0')\nplt.legend()\nplt.title('Female')\nplt.ylim([0,100])","metadata":{"id":"JUxNwuKIRN5l","colab":{"height":421,"base_uri":"https://localhost:8080/"},"cell_id":"ecb9b395e0f84806944e0bde5c167cfd","outputId":"ddaedcc9-469c-40d0-d2f7-5b2beeba21de","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":22,"user_tz":180,"timestamp":1650298503963},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"(0.0, 100.0)"},"metadata":{},"execution_count":140},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":140},{"cell_type":"code","source":"# Crear la columna child\nfull['Child']= np.where(full['Age']<18, 'Child','Adult')\n# mostrar conteos\npd.crosstab(full.Child, full.Survived)","metadata":{"id":"8YT3cVnGXWyZ","colab":{"height":143,"base_uri":"https://localhost:8080/"},"cell_id":"2ab2c90705e940c2ab90af473eb22e49","outputId":"9c34ca15-3310-40b2-bb45-794987ca5ddf","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":8,"user_tz":180,"timestamp":1650298524695},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Survived 0.0 1.0\nChild \nAdult 714 415\nChild 90 90","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Survived0.01.0
Child
Adult714415
Child9090
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":141}],"execution_count":141},{"cell_type":"markdown","source":"Parece que ser un niño no duele, ¡pero tampoco necesariamente te salvará! Terminaremos nuestra ingeniería de características creando la variable Madre. Tal vez podamos esperar que las madres tengan más probabilidades de haber sobrevivido en el Titanic.","metadata":{"id":"Z968WIvodc55","cell_id":"2f4c0643e50e45b69eda5057fc2734a3","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"# Creando la variable Mother\nfull['Mother'] = 'Not Mother'\nfull.loc[(full.Sex == 'female') & (full.Parch >0) & (full.Age >18) & (full.Title != 'Miss'), 'Mother']= 'Mother'\nfull.Mother.value_counts()","metadata":{"id":"m_NECJKddf0Y","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"386cfdc999f24aad83699c6c8f7a6191","outputId":"279cb2cb-0deb-4c3b-9ae5-0c1bf98c9a37","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":287,"user_tz":180,"timestamp":1650298742385},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Not Mother 1231\nMother 78\nName: Mother, dtype: int64"},"metadata":{},"execution_count":142}],"execution_count":142},{"cell_type":"code","source":"pd.crosstab(full.Mother, full.Survived)","metadata":{"id":"j9K679N4eHOE","colab":{"height":143,"base_uri":"https://localhost:8080/"},"cell_id":"9761faf048bf43dab9e657a037779337","outputId":"a22fbc08-270f-4b30-b83f-d8bd65ac1e2d","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":509,"user_tz":180,"timestamp":1650298775164},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Survived 0.0 1.0\nMother \nMother 33 45\nNot Mother 771 460","text/html":"\n
\n
\n
\n\n\n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n \n
Survived0.01.0
Mother
Mother3345
Not Mother771460
\n
\n \n \n \n\n \n
\n
\n "},"metadata":{},"execution_count":143}],"execution_count":143},{"cell_type":"code","source":"full.isnull().sum()","metadata":{"id":"t0ox4cAieYKA","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"873ccd125b064f92898e06e861fa18f6","outputId":"dc82762b-ae98-40da-cb34-690867314f92","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":5,"user_tz":180,"timestamp":1650298821453},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId 0\nSurvived 0\nPclass 0\nName 0\nSex 0\nAge 0\nSibSp 0\nParch 0\nTicket 0\nFare 0\nCabin 1014\nEmbarked 1\nTitle 0\nSurname 0\nFsize 0\nFamily 0\nFsizeD 0\nDeck 0\nChild 0\nMother 0\ndtype: int64"},"metadata":{},"execution_count":144}],"execution_count":144},{"cell_type":"code","source":"full= full.drop(columns='Cabin')\nfull.dropna()\nfull.isnull().sum()","metadata":{"id":"N-XlHd7Yeg-I","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"1c98ee18d44e4ed8957964287d0c735e","outputId":"52b1bad0-a13f-4c5d-d159-37eddca1664c","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":16,"user_tz":180,"timestamp":1650298901368},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"PassengerId 0\nSurvived 0\nPclass 0\nName 0\nSex 0\nAge 0\nSibSp 0\nParch 0\nTicket 0\nFare 0\nEmbarked 1\nTitle 0\nSurname 0\nFsize 0\nFamily 0\nFsizeD 0\nDeck 0\nChild 0\nMother 0\ndtype: int64"},"metadata":{},"execution_count":145}],"execution_count":145},{"cell_type":"markdown","source":"# Prediccion","metadata":{"id":"uJ_Ogsnpeun4","cell_id":"d9e7cbf31d064e7d8010ffe5b780065e","deepnote_cell_type":"markdown"}},{"cell_type":"code","source":"X=full[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked','Title','FsizeD','Child','Mother']]\ny= full['Survived']","metadata":{"id":"rOtossUve_hz","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"a3c191c9157e4f4da498b6dd914a11bc","outputId":"2c83d8f8-97ee-438b-879c-983b60b220b3","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":300,"user_tz":180,"timestamp":1650299061178},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"0 0.0\n1 1.0\n2 1.0\n3 1.0\n4 0.0\n ... \n413 0.0\n414 1.0\n415 0.0\n416 1.0\n417 1.0\nName: Survived, Length: 1309, dtype: object"},"metadata":{},"execution_count":149}],"execution_count":149},{"cell_type":"code","source":"import numpy as np\nfrom sklearn.model_selection import train_test_split\nX=full[['Pclass','Sex','Age','SibSp','Parch','Fare','Embarked','Title','FsizeD','Child','Mother']]\ny= full['Survived'].astype('str')","metadata":{"id":"w_kE1T8jexFw","cell_id":"7ea0cbd638294abb846bf4e21cd210ed","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1117,"user_tz":180,"timestamp":1650300345785},"deepnote_cell_type":"code"},"outputs":[],"execution_count":172},{"cell_type":"code","source":"A=X[['Pclass','Age','SibSp','Parch','Fare']]\nB=pd.get_dummies(X[['Sex','Embarked','Title','FsizeD','Child','Mother']])\nX_new=pd.concat([A,B],axis=1)","metadata":{"id":"deMZk6Ouimj3","cell_id":"62f23bc47ca64d9d99563f56a2e78be5","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":284,"user_tz":180,"timestamp":1650300349165},"deepnote_cell_type":"code"},"outputs":[],"execution_count":173},{"cell_type":"code","source":"X_train, X_test, y_train, y_test = train_test_split(X_new, y, test_size=0.33, random_state=42)","metadata":{"id":"h_z97SNWkTJj","cell_id":"738fbe118d7345469de689237b2550dd","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":267,"user_tz":180,"timestamp":1650300374317},"deepnote_cell_type":"code"},"outputs":[],"execution_count":174},{"cell_type":"code","source":"from sklearn.ensemble import RandomForestClassifier\nclf = RandomForestClassifier(max_depth=4, random_state=42)\nclf.fit(X_train, y_train)","metadata":{"id":"KZa7JxplfkYf","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"993a7c0c52dd4074a55763afdb730acd","outputId":"6f12b2db-2b03-41e4-afe1-21911f275dc5","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1546,"user_tz":180,"timestamp":1650300386364},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"RandomForestClassifier(max_depth=4, random_state=42)"},"metadata":{},"execution_count":175}],"execution_count":175},{"cell_type":"code","source":"# Obtener predicciones\ny_pred= clf.predict(X_test)","metadata":{"id":"hcnBSMwWkem_","cell_id":"b300fbe16e6e4c1a98afedfac149ab92","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":289,"user_tz":180,"timestamp":1650300451277},"deepnote_cell_type":"code"},"outputs":[],"execution_count":178},{"cell_type":"code","source":"from sklearn.metrics import classification_report\nprint(classification_report(y_pred=y_pred, y_true= y_test))","metadata":{"id":"ZxGK2tOgkau0","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"499d988595ae4293908356950e8739b9","outputId":"12c2fc2f-ab61-472b-aac0-369ba5fd0945","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":6,"user_tz":180,"timestamp":1650300479248},"deepnote_cell_type":"code"},"outputs":[{"output_type":"stream","name":"stdout","text":" precision recall f1-score support\n\n 0.0 0.76 0.84 0.80 267\n 1.0 0.69 0.58 0.63 165\n\n accuracy 0.74 432\n macro avg 0.72 0.71 0.71 432\nweighted avg 0.73 0.74 0.73 432\n\n"}],"execution_count":180},{"cell_type":"code","source":"clf.feature_importances_","metadata":{"id":"GPtxcAGgk55e","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"8bbeb242858a412cbc4d1f4c4e140948","outputId":"4b8f1e76-b723-4b2b-e838-590505be8734","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":419,"user_tz":180,"timestamp":1650300531157},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"array([0.09985367, 0.08764557, 0.04983003, 0.01964413, 0.1008589 ,\n 0.15419572, 0.11766094, 0.01818664, 0.00641462, 0.02050968,\n 0.01440092, 0.02383615, 0.14245717, 0.06134339, 0.00428262,\n 0.02346692, 0.0043631 , 0.01823211, 0.01080604, 0.0110123 ,\n 0.00454172, 0.00645766])"},"metadata":{},"execution_count":181}],"execution_count":181},{"cell_type":"code","source":"len(clf.feature_importances_)","metadata":{"id":"Hu3jMf--lH8v","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"9aab7615606247e2bb62706d881f5bfc","outputId":"6009ecbd-ae7b-401f-f7f7-fad3090e0bad","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1018,"user_tz":180,"timestamp":1650300591889},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"22"},"metadata":{},"execution_count":182}],"execution_count":182},{"cell_type":"code","source":"len(X_train.columns)","metadata":{"id":"rE82cP7UlKyw","colab":{"base_uri":"https://localhost:8080/"},"cell_id":"13c057925d13445cb0b3c28017a3dcc2","outputId":"a81e522b-13ac-4c70-eb4c-78bf37e072d1","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":509,"user_tz":180,"timestamp":1650300628036},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"22"},"metadata":{},"execution_count":186}],"execution_count":186},{"cell_type":"code","source":"plt.barh(X_train.columns, clf.feature_importances_)","metadata":{"id":"jrmqB5WNlFiX","colab":{"height":282,"base_uri":"https://localhost:8080/"},"cell_id":"55a91905355f4170ac71c17ea550574a","outputId":"095d8f44-00ed-4243-cc34-e52203615f19","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":8,"user_tz":180,"timestamp":1650300645706},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":""},"metadata":{},"execution_count":187},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":187},{"cell_type":"code","source":"sorted_idx = clf.feature_importances_.argsort()\nplt.barh(X_train.columns[sorted_idx], clf.feature_importances_[sorted_idx])\nplt.xlabel(\"Random Forest Feature Importance\")","metadata":{"id":"eIq_MMwWlhte","colab":{"height":296,"base_uri":"https://localhost:8080/"},"cell_id":"36ed142f922e4968897c709b033a6155","outputId":"5e1fc2c3-8171-4749-ff54-3af1f72f3150","executionInfo":{"user":{"userId":"09471607480253994520","displayName":"David Francisco Bustos Usta"},"status":"ok","elapsed":1030,"user_tz":180,"timestamp":1650300719485},"deepnote_cell_type":"code"},"outputs":[{"output_type":"execute_result","data":{"text/plain":"Text(0.5, 0, 'Random Forest Feature Importance')"},"metadata":{},"execution_count":188},{"output_type":"display_data","data":{"text/plain":"
","image/png":"\n"},"metadata":{"needs_background":"light"}}],"execution_count":188},{"cell_type":"markdown","source":"\nCreated in deepnote.com \nCreated in Deepnote","metadata":{"created_in_deepnote_cell":true,"deepnote_cell_type":"markdown"}}],"nbformat":4,"nbformat_minor":0,"metadata":{"colab":{"name":"Clase 2.ipynb","provenance":[],"authorship_tag":"ABX9TyPfS2BJ8Dng//02m9e34b+Y","collapsed_sections":[]},"deepnote":{},"kernelspec":{"name":"python3","display_name":"Python 3"},"language_info":{"name":"python"},"deepnote_notebook_id":"1895494b43794a3494191e6dec03da0a","deepnote_execution_queue":[]}}