{ "cells": [ { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "xls = pd.ExcelFile('code/titanic3.xls')\n", "sheet_1 = xls.parse(0)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['pclass', 'survived', 'name', 'sex', 'age', 'sibsp', 'parch', 'ticket',\n", " 'fare', 'cabin', 'embarked', 'boat', 'body', 'home.dest'],\n", " dtype='object')" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet_1.columns" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] } ], "source": [ "print(type(sheet_1))" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssurvivednamesexagesibspparchticketfarecabinembarkedboatbodyhome.dest
011Allen, Miss. Elisabeth Waltonfemale29.00000024160211.3375B5S2NaNSt Louis, MO
111Allison, Master. Hudson Trevormale0.916712113781151.5500C22 C26S11NaNMontreal, PQ / Chesterville, ON
210Allison, Miss. Helen Lorainefemale2.000012113781151.5500C22 C26SNaNNaNMontreal, PQ / Chesterville, ON
310Allison, Mr. Hudson Joshua Creightonmale30.000012113781151.5500C22 C26SNaN135.0Montreal, PQ / Chesterville, ON
410Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female25.000012113781151.5500C22 C26SNaNNaNMontreal, PQ / Chesterville, ON
511Anderson, Mr. Harrymale48.0000001995226.5500E12S3NaNNew York, NY
611Andrews, Miss. Kornelia Theodosiafemale63.0000101350277.9583D7S10NaNHudson, NY
710Andrews, Mr. Thomas Jrmale39.0000001120500.0000A36SNaNNaNBelfast, NI
811Appleton, Mrs. Edward Dale (Charlotte Lamson)female53.0000201176951.4792C101SDNaNBayside, Queens, NY
910Artagaveytia, Mr. Ramonmale71.000000PC 1760949.5042NaNCNaN22.0Montevideo, Uruguay
1010Astor, Col. John Jacobmale47.000010PC 17757227.5250C62 C64CNaN124.0New York, NY
1111Astor, Mrs. John Jacob (Madeleine Talmadge Force)female18.000010PC 17757227.5250C62 C64C4NaNNew York, NY
1211Aubart, Mme. Leontine Paulinefemale24.000000PC 1747769.3000B35C9NaNParis, France
1311Barber, Miss. Ellen \"Nellie\"female26.0000001987778.8500NaNS6NaNNaN
1411Barkworth, Mr. Algernon Henry Wilsonmale80.0000002704230.0000A23SBNaNHessle, Yorks
1510Baumann, Mr. John DmaleNaN00PC 1731825.9250NaNSNaNNaNNew York, NY
1610Baxter, Mr. Quigg Edmondmale24.000001PC 17558247.5208B58 B60CNaNNaNMontreal, PQ
1711Baxter, Mrs. James (Helene DeLaudeniere Chaput)female50.000001PC 17558247.5208B58 B60C6NaNMontreal, PQ
1811Bazzani, Miss. Albinafemale32.0000001181376.2917D15C8NaNNaN
1910Beattie, Mr. Thomsonmale36.0000001305075.2417C6CANaNWinnipeg, MN
2011Beckwith, Mr. Richard Leonardmale37.0000111175152.5542D35S5NaNNew York, NY
2111Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female47.0000111175152.5542D35S5NaNNew York, NY
2211Behr, Mr. Karl Howellmale26.00000011136930.0000C148C5NaNNew York, NY
2311Bidois, Miss. Rosaliefemale42.000000PC 17757227.5250NaNC4NaNNaN
2411Bird, Miss. Ellenfemale29.000000PC 17483221.7792C97S8NaNNaN
2510Birnbaum, Mr. Jakobmale25.0000001390526.0000NaNCNaN148.0San Francisco, CA
2611Bishop, Mr. Dickinson Hmale25.0000101196791.0792B49C7NaNDowagiac, MI
2711Bishop, Mrs. Dickinson H (Helen Walton)female19.0000101196791.0792B49C7NaNDowagiac, MI
2811Bissette, Miss. Ameliafemale35.000000PC 17760135.6333C99S8NaNNaN
2911Bjornstrom-Steffansson, Mr. Mauritz Hakanmale28.00000011056426.5500C52SDNaNStockholm, Sweden / Washington, DC
.............................................
127930Vestrom, Miss. Hulda Amanda Adolfinafemale14.0000003504067.8542NaNSNaNNaNNaN
128030Vovk, Mr. Jankomale22.0000003492527.8958NaNSNaNNaNNaN
128130Waelens, Mr. Achillemale22.0000003457679.0000NaNSNaNNaNAntwerp, Belgium / Stanton, OH
128230Ware, Mr. FrederickmaleNaN003593098.0500NaNSNaNNaNNaN
128330Warren, Mr. Charles WilliammaleNaN00C.A. 498677.5500NaNSNaNNaNNaN
128430Webber, Mr. JamesmaleNaN00SOTON/OQ 31013168.0500NaNSNaNNaNNaN
128530Wenzel, Mr. Linhartmale32.5000003457759.5000NaNSNaN298.0NaN
128631Whabee, Mrs. George Joseph (Shawneene Abi-Saab)female38.00000026887.2292NaNCCNaNNaN
128730Widegren, Mr. Carl/Charles Petermale51.0000003470647.7500NaNSNaNNaNNaN
128830Wiklund, Mr. Jakob Alfredmale18.00001031012676.4958NaNSNaN314.0NaN
128930Wiklund, Mr. Karl Johanmale21.00001031012666.4958NaNSNaNNaNNaN
129031Wilkes, Mrs. James (Ellen Needs)female47.0000103632727.0000NaNSNaNNaNNaN
129130Willer, Mr. Aaron (\"Abi Weller\")maleNaN0034108.7125NaNSNaNNaNNaN
129230Willey, Mr. EdwardmaleNaN00S.O./P.P. 7517.5500NaNSNaNNaNNaN
129330Williams, Mr. Howard Hugh \"Harry\"maleNaN00A/5 24668.0500NaNSNaNNaNNaN
129430Williams, Mr. Lesliemale28.5000005463616.1000NaNSNaN14.0NaN
129530Windelov, Mr. Einarmale21.000000SOTON/OQ 31013177.2500NaNSNaNNaNNaN
129630Wirz, Mr. Albertmale27.0000003151548.6625NaNSNaN131.0NaN
129730Wiseman, Mr. PhillippemaleNaN00A/4. 342447.2500NaNSNaNNaNNaN
129830Wittevrongel, Mr. Camillemale36.0000003457719.5000NaNSNaNNaNNaN
129930Yasbeck, Mr. Antonimale27.000010265914.4542NaNCCNaNNaN
130031Yasbeck, Mrs. Antoni (Selini Alexander)female15.000010265914.4542NaNCNaNNaNNaN
130130Youseff, Mr. Geriousmale45.50000026287.2250NaNCNaN312.0NaN
130230Yousif, Mr. WazlimaleNaN0026477.2250NaNCNaNNaNNaN
130330Yousseff, Mr. GeriousmaleNaN00262714.4583NaNCNaNNaNNaN
130430Zabour, Miss. Hilenifemale14.500010266514.4542NaNCNaN328.0NaN
130530Zabour, Miss. ThaminefemaleNaN10266514.4542NaNCNaNNaNNaN
130630Zakarian, Mr. Mapriededermale26.50000026567.2250NaNCNaN304.0NaN
130730Zakarian, Mr. Ortinmale27.00000026707.2250NaNCNaNNaNNaN
130830Zimmerman, Mr. Leomale29.0000003150827.8750NaNSNaNNaNNaN
\n", "

1309 rows × 14 columns

\n", "
" ], "text/plain": [ " pclass survived name \\\n", "0 1 1 Allen, Miss. Elisabeth Walton \n", "1 1 1 Allison, Master. Hudson Trevor \n", "2 1 0 Allison, Miss. Helen Loraine \n", "3 1 0 Allison, Mr. Hudson Joshua Creighton \n", "4 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) \n", "5 1 1 Anderson, Mr. Harry \n", "6 1 1 Andrews, Miss. Kornelia Theodosia \n", "7 1 0 Andrews, Mr. Thomas Jr \n", "8 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) \n", "9 1 0 Artagaveytia, Mr. Ramon \n", "10 1 0 Astor, Col. John Jacob \n", "11 1 1 Astor, Mrs. John Jacob (Madeleine Talmadge Force) \n", "12 1 1 Aubart, Mme. Leontine Pauline \n", "13 1 1 Barber, Miss. Ellen \"Nellie\" \n", "14 1 1 Barkworth, Mr. Algernon Henry Wilson \n", "15 1 0 Baumann, Mr. John D \n", "16 1 0 Baxter, Mr. Quigg Edmond \n", "17 1 1 Baxter, Mrs. James (Helene DeLaudeniere Chaput) \n", "18 1 1 Bazzani, Miss. Albina \n", "19 1 0 Beattie, Mr. Thomson \n", "20 1 1 Beckwith, Mr. Richard Leonard \n", "21 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) \n", "22 1 1 Behr, Mr. Karl Howell \n", "23 1 1 Bidois, Miss. Rosalie \n", "24 1 1 Bird, Miss. Ellen \n", "25 1 0 Birnbaum, Mr. Jakob \n", "26 1 1 Bishop, Mr. Dickinson H \n", "27 1 1 Bishop, Mrs. Dickinson H (Helen Walton) \n", "28 1 1 Bissette, Miss. Amelia \n", "29 1 1 Bjornstrom-Steffansson, Mr. Mauritz Hakan \n", "... ... ... ... \n", "1279 3 0 Vestrom, Miss. Hulda Amanda Adolfina \n", "1280 3 0 Vovk, Mr. Janko \n", "1281 3 0 Waelens, Mr. Achille \n", "1282 3 0 Ware, Mr. Frederick \n", "1283 3 0 Warren, Mr. Charles William \n", "1284 3 0 Webber, Mr. James \n", "1285 3 0 Wenzel, Mr. Linhart \n", "1286 3 1 Whabee, Mrs. George Joseph (Shawneene Abi-Saab) \n", "1287 3 0 Widegren, Mr. Carl/Charles Peter \n", "1288 3 0 Wiklund, Mr. Jakob Alfred \n", "1289 3 0 Wiklund, Mr. Karl Johan \n", "1290 3 1 Wilkes, Mrs. James (Ellen Needs) \n", "1291 3 0 Willer, Mr. Aaron (\"Abi Weller\") \n", "1292 3 0 Willey, Mr. Edward \n", "1293 3 0 Williams, Mr. Howard Hugh \"Harry\" \n", "1294 3 0 Williams, Mr. Leslie \n", "1295 3 0 Windelov, Mr. Einar \n", "1296 3 0 Wirz, Mr. Albert \n", "1297 3 0 Wiseman, Mr. Phillippe \n", "1298 3 0 Wittevrongel, Mr. Camille \n", "1299 3 0 Yasbeck, Mr. Antoni \n", "1300 3 1 Yasbeck, Mrs. Antoni (Selini Alexander) \n", "1301 3 0 Youseff, Mr. Gerious \n", "1302 3 0 Yousif, Mr. Wazli \n", "1303 3 0 Yousseff, Mr. Gerious \n", "1304 3 0 Zabour, Miss. Hileni \n", "1305 3 0 Zabour, Miss. Thamine \n", "1306 3 0 Zakarian, Mr. Mapriededer \n", "1307 3 0 Zakarian, Mr. Ortin \n", "1308 3 0 Zimmerman, Mr. Leo \n", "\n", " sex age sibsp parch ticket fare cabin \\\n", "0 female 29.0000 0 0 24160 211.3375 B5 \n", "1 male 0.9167 1 2 113781 151.5500 C22 C26 \n", "2 female 2.0000 1 2 113781 151.5500 C22 C26 \n", "3 male 30.0000 1 2 113781 151.5500 C22 C26 \n", "4 female 25.0000 1 2 113781 151.5500 C22 C26 \n", "5 male 48.0000 0 0 19952 26.5500 E12 \n", "6 female 63.0000 1 0 13502 77.9583 D7 \n", "7 male 39.0000 0 0 112050 0.0000 A36 \n", "8 female 53.0000 2 0 11769 51.4792 C101 \n", "9 male 71.0000 0 0 PC 17609 49.5042 NaN \n", "10 male 47.0000 1 0 PC 17757 227.5250 C62 C64 \n", "11 female 18.0000 1 0 PC 17757 227.5250 C62 C64 \n", "12 female 24.0000 0 0 PC 17477 69.3000 B35 \n", "13 female 26.0000 0 0 19877 78.8500 NaN \n", "14 male 80.0000 0 0 27042 30.0000 A23 \n", "15 male NaN 0 0 PC 17318 25.9250 NaN \n", "16 male 24.0000 0 1 PC 17558 247.5208 B58 B60 \n", "17 female 50.0000 0 1 PC 17558 247.5208 B58 B60 \n", "18 female 32.0000 0 0 11813 76.2917 D15 \n", "19 male 36.0000 0 0 13050 75.2417 C6 \n", "20 male 37.0000 1 1 11751 52.5542 D35 \n", "21 female 47.0000 1 1 11751 52.5542 D35 \n", "22 male 26.0000 0 0 111369 30.0000 C148 \n", "23 female 42.0000 0 0 PC 17757 227.5250 NaN \n", "24 female 29.0000 0 0 PC 17483 221.7792 C97 \n", "25 male 25.0000 0 0 13905 26.0000 NaN \n", "26 male 25.0000 1 0 11967 91.0792 B49 \n", "27 female 19.0000 1 0 11967 91.0792 B49 \n", "28 female 35.0000 0 0 PC 17760 135.6333 C99 \n", "29 male 28.0000 0 0 110564 26.5500 C52 \n", "... ... ... ... ... ... ... ... \n", "1279 female 14.0000 0 0 350406 7.8542 NaN \n", "1280 male 22.0000 0 0 349252 7.8958 NaN \n", "1281 male 22.0000 0 0 345767 9.0000 NaN \n", "1282 male NaN 0 0 359309 8.0500 NaN \n", "1283 male NaN 0 0 C.A. 49867 7.5500 NaN \n", "1284 male NaN 0 0 SOTON/OQ 3101316 8.0500 NaN \n", "1285 male 32.5000 0 0 345775 9.5000 NaN \n", "1286 female 38.0000 0 0 2688 7.2292 NaN \n", "1287 male 51.0000 0 0 347064 7.7500 NaN \n", "1288 male 18.0000 1 0 3101267 6.4958 NaN \n", "1289 male 21.0000 1 0 3101266 6.4958 NaN \n", "1290 female 47.0000 1 0 363272 7.0000 NaN \n", "1291 male NaN 0 0 3410 8.7125 NaN \n", "1292 male NaN 0 0 S.O./P.P. 751 7.5500 NaN \n", "1293 male NaN 0 0 A/5 2466 8.0500 NaN \n", "1294 male 28.5000 0 0 54636 16.1000 NaN \n", "1295 male 21.0000 0 0 SOTON/OQ 3101317 7.2500 NaN \n", "1296 male 27.0000 0 0 315154 8.6625 NaN \n", "1297 male NaN 0 0 A/4. 34244 7.2500 NaN \n", "1298 male 36.0000 0 0 345771 9.5000 NaN \n", "1299 male 27.0000 1 0 2659 14.4542 NaN \n", "1300 female 15.0000 1 0 2659 14.4542 NaN \n", "1301 male 45.5000 0 0 2628 7.2250 NaN \n", "1302 male NaN 0 0 2647 7.2250 NaN \n", "1303 male NaN 0 0 2627 14.4583 NaN \n", "1304 female 14.5000 1 0 2665 14.4542 NaN \n", "1305 female NaN 1 0 2665 14.4542 NaN \n", "1306 male 26.5000 0 0 2656 7.2250 NaN \n", "1307 male 27.0000 0 0 2670 7.2250 NaN \n", "1308 male 29.0000 0 0 315082 7.8750 NaN \n", "\n", " embarked boat body home.dest \n", "0 S 2 NaN St Louis, MO \n", "1 S 11 NaN Montreal, PQ / Chesterville, ON \n", "2 S NaN NaN Montreal, PQ / Chesterville, ON \n", "3 S NaN 135.0 Montreal, PQ / Chesterville, ON \n", "4 S NaN NaN Montreal, PQ / Chesterville, ON \n", "5 S 3 NaN New York, NY \n", "6 S 10 NaN Hudson, NY \n", "7 S NaN NaN Belfast, NI \n", "8 S D NaN Bayside, Queens, NY \n", "9 C NaN 22.0 Montevideo, Uruguay \n", "10 C NaN 124.0 New York, NY \n", "11 C 4 NaN New York, NY \n", "12 C 9 NaN Paris, France \n", "13 S 6 NaN NaN \n", "14 S B NaN Hessle, Yorks \n", "15 S NaN NaN New York, NY \n", "16 C NaN NaN Montreal, PQ \n", "17 C 6 NaN Montreal, PQ \n", "18 C 8 NaN NaN \n", "19 C A NaN Winnipeg, MN \n", "20 S 5 NaN New York, NY \n", "21 S 5 NaN New York, NY \n", "22 C 5 NaN New York, NY \n", "23 C 4 NaN NaN \n", "24 S 8 NaN NaN \n", "25 C NaN 148.0 San Francisco, CA \n", "26 C 7 NaN Dowagiac, MI \n", "27 C 7 NaN Dowagiac, MI \n", "28 S 8 NaN NaN \n", "29 S D NaN Stockholm, Sweden / Washington, DC \n", "... ... ... ... ... \n", "1279 S NaN NaN NaN \n", "1280 S NaN NaN NaN \n", "1281 S NaN NaN Antwerp, Belgium / Stanton, OH \n", "1282 S NaN NaN NaN \n", "1283 S NaN NaN NaN \n", "1284 S NaN NaN NaN \n", "1285 S NaN 298.0 NaN \n", "1286 C C NaN NaN \n", "1287 S NaN NaN NaN \n", "1288 S NaN 314.0 NaN \n", "1289 S NaN NaN NaN \n", "1290 S NaN NaN NaN \n", "1291 S NaN NaN NaN \n", "1292 S NaN NaN NaN \n", "1293 S NaN NaN NaN \n", "1294 S NaN 14.0 NaN \n", "1295 S NaN NaN NaN \n", "1296 S NaN 131.0 NaN \n", "1297 S NaN NaN NaN \n", "1298 S NaN NaN NaN \n", "1299 C C NaN NaN \n", "1300 C NaN NaN NaN \n", "1301 C NaN 312.0 NaN \n", "1302 C NaN NaN NaN \n", "1303 C NaN NaN NaN \n", "1304 C NaN 328.0 NaN \n", "1305 C NaN NaN NaN \n", "1306 C NaN 304.0 NaN \n", "1307 C NaN NaN NaN \n", "1308 S NaN NaN NaN \n", "\n", "[1309 rows x 14 columns]" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet_1" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pclass 1\n", "survived 1\n", "name Allen, Miss. Elisabeth Walton\n", "sex female\n", "age 29\n", "sibsp 0\n", "parch 0\n", "ticket 24160\n", "fare 211.338\n", "cabin B5\n", "embarked S\n", "boat 2\n", "body NaN\n", "home.dest St Louis, MO\n", "Name: 0, dtype: object" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sheet_1.iloc[0]" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Counter({'a': 5, 'b': 2, 'r': 2, 'c': 1, 'd': 1})\n", "[('a', 5), ('b', 2), ('r', 2)]\n" ] } ], "source": [ "from collections import Counter\n", "c = Counter('baracadabra')\n", "print(c)\n", "d = c.most_common(3)\n", "print(d)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Counter({0: 809, 1: 500})\n", "survived: 500\n", "survival rate: 0.3819709702062643\n" ] } ], "source": [ "counter = Counter(sheet_1[\"survived\"].values)\n", "print(counter)\n", "print(\"survived:\", counter[1])\n", "print(\"survival rate:\", counter[1]/(counter[0]+counter[1]))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Counter({0: 121})\n" ] } ], "source": [ "counter = Counter(sheet_1.loc[sheet_1[\"body\"].notna(), \"survived\"].values)\n", "print(counter)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "pclass\n", "1 0.619195\n", "2 0.429603\n", "3 0.255289\n", "Name: survived, dtype: float64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sheet_1.groupby('pclass')['survived'].mean())" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sex\n", "female 0.727468\n", "male 0.190985\n", "Name: survived, dtype: float64\n" ] } ], "source": [ "print(sheet_1.groupby('sex')['survived'].mean())" ] }, { "cell_type": "code", "execution_count": 144, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "sex\n", "female 0.727468\n", "male 0.190985\n", "Name: survived, dtype: float64\n", "pclass sex \n", "1 female 0.965278\n", " male 0.340782\n", "2 female 0.886792\n", " male 0.146199\n", "3 female 0.490741\n", " male 0.152130\n", "Name: survived, dtype: float64\n" ] } ], "source": [ "print(sheet_1.groupby([\"pclass\",\"sex\"])[\"survived\"].mean())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "age\n", "0.1667 1.000000\n", "0.3333 0.000000\n", "0.4167 1.000000\n", "0.6667 1.000000\n", "0.7500 0.666667\n", "0.8333 1.000000\n", "0.9167 1.000000\n", "1.0000 0.700000\n", "2.0000 0.333333\n", "3.0000 0.714286\n", "4.0000 0.700000\n", "5.0000 0.800000\n", "6.0000 0.500000\n", "7.0000 0.500000\n", "8.0000 0.666667\n", "9.0000 0.400000\n", "10.0000 0.000000\n", "11.0000 0.250000\n", "11.5000 0.000000\n", "12.0000 1.000000\n", "13.0000 0.600000\n", "14.0000 0.500000\n", "14.5000 0.000000\n", "15.0000 0.833333\n", "16.0000 0.421053\n", "17.0000 0.350000\n", "18.0000 0.358974\n", "18.5000 0.000000\n", "19.0000 0.379310\n", "20.0000 0.347826\n", " ... \n", "46.0000 0.000000\n", "47.0000 0.214286\n", "48.0000 0.714286\n", "49.0000 0.555556\n", "50.0000 0.400000\n", "51.0000 0.375000\n", "52.0000 0.500000\n", "53.0000 1.000000\n", "54.0000 0.500000\n", "55.0000 0.500000\n", "55.5000 0.000000\n", "56.0000 0.500000\n", "57.0000 0.000000\n", "58.0000 0.666667\n", "59.0000 0.333333\n", "60.0000 0.571429\n", "60.5000 0.000000\n", "61.0000 0.000000\n", "62.0000 0.400000\n", "63.0000 0.500000\n", "64.0000 0.400000\n", "65.0000 0.000000\n", "66.0000 0.000000\n", "67.0000 0.000000\n", "70.0000 0.000000\n", "70.5000 0.000000\n", "71.0000 0.000000\n", "74.0000 0.000000\n", "76.0000 1.000000\n", "80.0000 1.000000\n", "Name: survived, Length: 98, dtype: float64\n" ] } ], "source": [ "print(sheet_1.groupby([\"age\"])[\"survived\"].mean())" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Counter({29.0: 30,\n", " 0.9167: 2,\n", " 2.0: 12,\n", " 30.0: 40,\n", " 25.0: 34,\n", " 48.0: 14,\n", " 63.0: 4,\n", " 39.0: 20,\n", " 53.0: 4,\n", " 71.0: 2,\n", " 47.0: 14,\n", " 18.0: 39,\n", " 24.0: 47,\n", " 26.0: 30,\n", " 80.0: 1,\n", " nan: 1,\n", " 50.0: 15,\n", " 32.0: 24,\n", " 36.0: 31,\n", " 37.0: 9,\n", " 42.0: 18,\n", " 19.0: 29,\n", " 35.0: 23,\n", " 28.0: 32,\n", " 45.0: 21,\n", " 40.0: 18,\n", " 58.0: 6,\n", " 22.0: 43,\n", " nan: 1,\n", " 41.0: 11,\n", " nan: 1,\n", " 44.0: 10,\n", " 59.0: 3,\n", " 60.0: 7,\n", " nan: 1,\n", " 33.0: 21,\n", " 17.0: 20,\n", " 11.0: 4,\n", " 14.0: 8,\n", " 49.0: 9,\n", " nan: 1,\n", " 76.0: 1,\n", " 46.0: 6,\n", " 27.0: 30,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 64.0: 5,\n", " 55.0: 8,\n", " nan: 1,\n", " 70.0: 2,\n", " 38.0: 14,\n", " 51.0: 8,\n", " 31.0: 23,\n", " 4.0: 10,\n", " 54.0: 10,\n", " 23.0: 26,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 43.0: 9,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 52.0: 6,\n", " nan: 1,\n", " 16.0: 19,\n", " nan: 1,\n", " 32.5: 4,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 21.0: 41,\n", " 15.0: 6,\n", " nan: 1,\n", " nan: 1,\n", " 65.0: 3,\n", " nan: 1,\n", " 28.5: 3,\n", " nan: 1,\n", " 45.5: 2,\n", " 56.0: 4,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 13.0: 5,\n", " 61.0: 5,\n", " nan: 1,\n", " nan: 1,\n", " 34.0: 16,\n", " nan: 1,\n", " 6.0: 6,\n", " 57.0: 5,\n", " nan: 1,\n", " 62.0: 5,\n", " nan: 1,\n", " 67.0: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 1.0: 10,\n", " 12.0: 3,\n", " 20.0: 23,\n", " 0.8333: 3,\n", " nan: 1,\n", " 8.0: 6,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 0.6667: 1,\n", " 7.0: 4,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 3.0: 7,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 36.5: 2,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 18.5: 3,\n", " nan: 1,\n", " 5.0: 5,\n", " 66.0: 1,\n", " nan: 1,\n", " nan: 1,\n", " 9.0: 10,\n", " 0.75: 3,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 70.5: 1,\n", " 22.5: 1,\n", " 0.3333: 1,\n", " nan: 1,\n", " nan: 1,\n", " 0.1667: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 40.5: 3,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 10.0: 4,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 23.5: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 34.5: 2,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 20.5: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 30.5: 2,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 55.5: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 38.5: 1,\n", " nan: 1,\n", " 14.5: 2,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 24.5: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 60.5: 1,\n", " 74.0: 1,\n", " 0.4167: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 11.5: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " nan: 1,\n", " 26.5: 1})" ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "Counter(sheet_1[\"age\"])" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1309, 14)\n" ] }, { "data": { "text/plain": [ "pclass 1309\n", "survived 1309\n", "name 1309\n", "sex 1309\n", "age 1046\n", "sibsp 1309\n", "parch 1309\n", "ticket 1309\n", "fare 1308\n", "cabin 295\n", "embarked 1307\n", "boat 486\n", "body 121\n", "home.dest 745\n", "dtype: int64" ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "print(sheet_1.shape)\n", "sheet_1.count()" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "criteria = sheet_1.count() == sheet_1.shape[0]" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "pclass True\n", "survived True\n", "name True\n", "sex True\n", "age False\n", "sibsp True\n", "parch True\n", "ticket True\n", "fare False\n", "cabin False\n", "embarked False\n", "boat False\n", "body False\n", "home.dest False\n", "dtype: bool\n" ] } ], "source": [ "print(criteria)" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssurvivednamesexsibspparchticket
011Allen, Miss. Elisabeth Waltonfemale0024160
111Allison, Master. Hudson Trevormale12113781
210Allison, Miss. Helen Lorainefemale12113781
310Allison, Mr. Hudson Joshua Creightonmale12113781
410Allison, Mrs. Hudson J C (Bessie Waldo Daniels)female12113781
511Anderson, Mr. Harrymale0019952
611Andrews, Miss. Kornelia Theodosiafemale1013502
710Andrews, Mr. Thomas Jrmale00112050
811Appleton, Mrs. Edward Dale (Charlotte Lamson)female2011769
910Artagaveytia, Mr. Ramonmale00PC 17609
1010Astor, Col. John Jacobmale10PC 17757
1111Astor, Mrs. John Jacob (Madeleine Talmadge Force)female10PC 17757
1211Aubart, Mme. Leontine Paulinefemale00PC 17477
1311Barber, Miss. Ellen \"Nellie\"female0019877
1411Barkworth, Mr. Algernon Henry Wilsonmale0027042
1510Baumann, Mr. John Dmale00PC 17318
1610Baxter, Mr. Quigg Edmondmale01PC 17558
1711Baxter, Mrs. James (Helene DeLaudeniere Chaput)female01PC 17558
1811Bazzani, Miss. Albinafemale0011813
1910Beattie, Mr. Thomsonmale0013050
2011Beckwith, Mr. Richard Leonardmale1111751
2111Beckwith, Mrs. Richard Leonard (Sallie Monypeny)female1111751
2211Behr, Mr. Karl Howellmale00111369
2311Bidois, Miss. Rosaliefemale00PC 17757
2411Bird, Miss. Ellenfemale00PC 17483
2510Birnbaum, Mr. Jakobmale0013905
2611Bishop, Mr. Dickinson Hmale1011967
2711Bishop, Mrs. Dickinson H (Helen Walton)female1011967
2811Bissette, Miss. Ameliafemale00PC 17760
2911Bjornstrom-Steffansson, Mr. Mauritz Hakanmale00110564
........................
127930Vestrom, Miss. Hulda Amanda Adolfinafemale00350406
128030Vovk, Mr. Jankomale00349252
128130Waelens, Mr. Achillemale00345767
128230Ware, Mr. Frederickmale00359309
128330Warren, Mr. Charles Williammale00C.A. 49867
128430Webber, Mr. Jamesmale00SOTON/OQ 3101316
128530Wenzel, Mr. Linhartmale00345775
128631Whabee, Mrs. George Joseph (Shawneene Abi-Saab)female002688
128730Widegren, Mr. Carl/Charles Petermale00347064
128830Wiklund, Mr. Jakob Alfredmale103101267
128930Wiklund, Mr. Karl Johanmale103101266
129031Wilkes, Mrs. James (Ellen Needs)female10363272
129130Willer, Mr. Aaron (\"Abi Weller\")male003410
129230Willey, Mr. Edwardmale00S.O./P.P. 751
129330Williams, Mr. Howard Hugh \"Harry\"male00A/5 2466
129430Williams, Mr. Lesliemale0054636
129530Windelov, Mr. Einarmale00SOTON/OQ 3101317
129630Wirz, Mr. Albertmale00315154
129730Wiseman, Mr. Phillippemale00A/4. 34244
129830Wittevrongel, Mr. Camillemale00345771
129930Yasbeck, Mr. Antonimale102659
130031Yasbeck, Mrs. Antoni (Selini Alexander)female102659
130130Youseff, Mr. Geriousmale002628
130230Yousif, Mr. Wazlimale002647
130330Yousseff, Mr. Geriousmale002627
130430Zabour, Miss. Hilenifemale102665
130530Zabour, Miss. Thaminefemale102665
130630Zakarian, Mr. Mapriededermale002656
130730Zakarian, Mr. Ortinmale002670
130830Zimmerman, Mr. Leomale00315082
\n", "

1309 rows × 7 columns

\n", "
" ], "text/plain": [ " pclass survived name \\\n", "0 1 1 Allen, Miss. Elisabeth Walton \n", "1 1 1 Allison, Master. Hudson Trevor \n", "2 1 0 Allison, Miss. Helen Loraine \n", "3 1 0 Allison, Mr. Hudson Joshua Creighton \n", "4 1 0 Allison, Mrs. Hudson J C (Bessie Waldo Daniels) \n", "5 1 1 Anderson, Mr. Harry \n", "6 1 1 Andrews, Miss. Kornelia Theodosia \n", "7 1 0 Andrews, Mr. Thomas Jr \n", "8 1 1 Appleton, Mrs. Edward Dale (Charlotte Lamson) \n", "9 1 0 Artagaveytia, Mr. Ramon \n", "10 1 0 Astor, Col. John Jacob \n", "11 1 1 Astor, Mrs. John Jacob (Madeleine Talmadge Force) \n", "12 1 1 Aubart, Mme. Leontine Pauline \n", "13 1 1 Barber, Miss. Ellen \"Nellie\" \n", "14 1 1 Barkworth, Mr. Algernon Henry Wilson \n", "15 1 0 Baumann, Mr. John D \n", "16 1 0 Baxter, Mr. Quigg Edmond \n", "17 1 1 Baxter, Mrs. James (Helene DeLaudeniere Chaput) \n", "18 1 1 Bazzani, Miss. Albina \n", "19 1 0 Beattie, Mr. Thomson \n", "20 1 1 Beckwith, Mr. Richard Leonard \n", "21 1 1 Beckwith, Mrs. Richard Leonard (Sallie Monypeny) \n", "22 1 1 Behr, Mr. Karl Howell \n", "23 1 1 Bidois, Miss. Rosalie \n", "24 1 1 Bird, Miss. Ellen \n", "25 1 0 Birnbaum, Mr. Jakob \n", "26 1 1 Bishop, Mr. Dickinson H \n", "27 1 1 Bishop, Mrs. Dickinson H (Helen Walton) \n", "28 1 1 Bissette, Miss. Amelia \n", "29 1 1 Bjornstrom-Steffansson, Mr. Mauritz Hakan \n", "... ... ... ... \n", "1279 3 0 Vestrom, Miss. Hulda Amanda Adolfina \n", "1280 3 0 Vovk, Mr. Janko \n", "1281 3 0 Waelens, Mr. Achille \n", "1282 3 0 Ware, Mr. Frederick \n", "1283 3 0 Warren, Mr. Charles William \n", "1284 3 0 Webber, Mr. James \n", "1285 3 0 Wenzel, Mr. Linhart \n", "1286 3 1 Whabee, Mrs. George Joseph (Shawneene Abi-Saab) \n", "1287 3 0 Widegren, Mr. Carl/Charles Peter \n", "1288 3 0 Wiklund, Mr. Jakob Alfred \n", "1289 3 0 Wiklund, Mr. Karl Johan \n", "1290 3 1 Wilkes, Mrs. James (Ellen Needs) \n", "1291 3 0 Willer, Mr. Aaron (\"Abi Weller\") \n", "1292 3 0 Willey, Mr. Edward \n", "1293 3 0 Williams, Mr. Howard Hugh \"Harry\" \n", "1294 3 0 Williams, Mr. Leslie \n", "1295 3 0 Windelov, Mr. Einar \n", "1296 3 0 Wirz, Mr. Albert \n", "1297 3 0 Wiseman, Mr. Phillippe \n", "1298 3 0 Wittevrongel, Mr. Camille \n", "1299 3 0 Yasbeck, Mr. Antoni \n", "1300 3 1 Yasbeck, Mrs. Antoni (Selini Alexander) \n", "1301 3 0 Youseff, Mr. Gerious \n", "1302 3 0 Yousif, Mr. Wazli \n", "1303 3 0 Yousseff, Mr. Gerious \n", "1304 3 0 Zabour, Miss. Hileni \n", "1305 3 0 Zabour, Miss. Thamine \n", "1306 3 0 Zakarian, Mr. Mapriededer \n", "1307 3 0 Zakarian, Mr. Ortin \n", "1308 3 0 Zimmerman, Mr. Leo \n", "\n", " sex sibsp parch ticket \n", "0 female 0 0 24160 \n", "1 male 1 2 113781 \n", "2 female 1 2 113781 \n", "3 male 1 2 113781 \n", "4 female 1 2 113781 \n", "5 male 0 0 19952 \n", "6 female 1 0 13502 \n", "7 male 0 0 112050 \n", "8 female 2 0 11769 \n", "9 male 0 0 PC 17609 \n", "10 male 1 0 PC 17757 \n", "11 female 1 0 PC 17757 \n", "12 female 0 0 PC 17477 \n", "13 female 0 0 19877 \n", "14 male 0 0 27042 \n", "15 male 0 0 PC 17318 \n", "16 male 0 1 PC 17558 \n", "17 female 0 1 PC 17558 \n", "18 female 0 0 11813 \n", "19 male 0 0 13050 \n", "20 male 1 1 11751 \n", "21 female 1 1 11751 \n", "22 male 0 0 111369 \n", "23 female 0 0 PC 17757 \n", "24 female 0 0 PC 17483 \n", "25 male 0 0 13905 \n", "26 male 1 0 11967 \n", "27 female 1 0 11967 \n", "28 female 0 0 PC 17760 \n", "29 male 0 0 110564 \n", "... ... ... ... ... \n", "1279 female 0 0 350406 \n", "1280 male 0 0 349252 \n", "1281 male 0 0 345767 \n", "1282 male 0 0 359309 \n", "1283 male 0 0 C.A. 49867 \n", "1284 male 0 0 SOTON/OQ 3101316 \n", "1285 male 0 0 345775 \n", "1286 female 0 0 2688 \n", "1287 male 0 0 347064 \n", "1288 male 1 0 3101267 \n", "1289 male 1 0 3101266 \n", "1290 female 1 0 363272 \n", "1291 male 0 0 3410 \n", "1292 male 0 0 S.O./P.P. 751 \n", "1293 male 0 0 A/5 2466 \n", "1294 male 0 0 54636 \n", "1295 male 0 0 SOTON/OQ 3101317 \n", "1296 male 0 0 315154 \n", "1297 male 0 0 A/4. 34244 \n", "1298 male 0 0 345771 \n", "1299 male 1 0 2659 \n", "1300 female 1 0 2659 \n", "1301 male 0 0 2628 \n", "1302 male 0 0 2647 \n", "1303 male 0 0 2627 \n", "1304 female 1 0 2665 \n", "1305 female 1 0 2665 \n", "1306 male 0 0 2656 \n", "1307 male 0 0 2670 \n", "1308 male 0 0 315082 \n", "\n", "[1309 rows x 7 columns]" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df = sheet_1[criteria.index[criteria]]\n", "df" ] }, { "cell_type": "code", "execution_count": 75, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "(1309, 14)\n", "(1309, 7)\n" ] } ], "source": [ "print(sheet_1.shape)\n", "print(df.shape)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "from sklearn import preprocessing" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "le = preprocessing.LabelEncoder()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/Users/yueli/anaconda3/lib/python3.7/site-packages/pandas/core/indexing.py:543: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy\n", " self.obj[item] = s\n" ] } ], "source": [ "data = sheet_1[criteria.index[criteria]]\n", "data.loc[0:data.shape[0],'sex'] = le.fit_transform(data.sex)" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "data = data.drop(['name','ticket'],axis=1)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssurvivedsexsibspparch
011000
111112
210012
310112
410012
511100
611010
710100
811020
910100
1010110
1111010
1211000
1311000
1411100
1510100
1610101
1711001
1811000
1910100
2011111
2111011
2211100
2311000
2411000
2510100
2611110
2711010
2811000
2911100
..................
127930000
128030100
128130100
128230100
128330100
128430100
128530100
128631000
128730100
128830110
128930110
129031010
129130100
129230100
129330100
129430100
129530100
129630100
129730100
129830100
129930110
130031010
130130100
130230100
130330100
130430010
130530010
130630100
130730100
130830100
\n", "

1309 rows × 5 columns

\n", "
" ], "text/plain": [ " pclass survived sex sibsp parch\n", "0 1 1 0 0 0\n", "1 1 1 1 1 2\n", "2 1 0 0 1 2\n", "3 1 0 1 1 2\n", "4 1 0 0 1 2\n", "5 1 1 1 0 0\n", "6 1 1 0 1 0\n", "7 1 0 1 0 0\n", "8 1 1 0 2 0\n", "9 1 0 1 0 0\n", "10 1 0 1 1 0\n", "11 1 1 0 1 0\n", "12 1 1 0 0 0\n", "13 1 1 0 0 0\n", "14 1 1 1 0 0\n", "15 1 0 1 0 0\n", "16 1 0 1 0 1\n", "17 1 1 0 0 1\n", "18 1 1 0 0 0\n", "19 1 0 1 0 0\n", "20 1 1 1 1 1\n", "21 1 1 0 1 1\n", "22 1 1 1 0 0\n", "23 1 1 0 0 0\n", "24 1 1 0 0 0\n", "25 1 0 1 0 0\n", "26 1 1 1 1 0\n", "27 1 1 0 1 0\n", "28 1 1 0 0 0\n", "29 1 1 1 0 0\n", "... ... ... ... ... ...\n", "1279 3 0 0 0 0\n", "1280 3 0 1 0 0\n", "1281 3 0 1 0 0\n", "1282 3 0 1 0 0\n", "1283 3 0 1 0 0\n", "1284 3 0 1 0 0\n", "1285 3 0 1 0 0\n", "1286 3 1 0 0 0\n", "1287 3 0 1 0 0\n", "1288 3 0 1 1 0\n", "1289 3 0 1 1 0\n", "1290 3 1 0 1 0\n", "1291 3 0 1 0 0\n", "1292 3 0 1 0 0\n", "1293 3 0 1 0 0\n", "1294 3 0 1 0 0\n", "1295 3 0 1 0 0\n", "1296 3 0 1 0 0\n", "1297 3 0 1 0 0\n", "1298 3 0 1 0 0\n", "1299 3 0 1 1 0\n", "1300 3 1 0 1 0\n", "1301 3 0 1 0 0\n", "1302 3 0 1 0 0\n", "1303 3 0 1 0 0\n", "1304 3 0 0 1 0\n", "1305 3 0 0 1 0\n", "1306 3 0 1 0 0\n", "1307 3 0 1 0 0\n", "1308 3 0 1 0 0\n", "\n", "[1309 rows x 5 columns]" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "from sklearn import model_selection\n", "X = data.drop([\"survived\"], axis=1).values\n", "y = data[\"survived\"].values\n", "X_train, X_test, y_train, y_test = model_selection.train_test_split(X,y,test_size=0.2, shuffle=True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[1, 0, 1, 2],\n", " [2, 0, 0, 0],\n", " [3, 1, 0, 0],\n", " ...,\n", " [3, 1, 0, 0],\n", " [1, 1, 0, 0],\n", " [3, 0, 0, 0]])" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "from sklearn.linear_model import LogisticRegression" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "logitreg = LogisticRegression(solver='liblinear')\n", "fit = logitreg.fit(X_train, y_train)" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[-0.80616445, -2.53244105, -0.20512107, 0.04807778]])" ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "fit.coef_" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
pclasssexsibspparch
0-0.806164-2.532441-0.2051210.048078
\n", "
" ], "text/plain": [ " pclass sex sibsp parch\n", "0 -0.806164 -2.532441 -0.205121 0.048078" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "effect_size = pd.DataFrame(fit.coef_)\n", "effect_size.columns = data.drop([\"survived\"], axis=1).columns\n", "effect_size" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "y_train_pred = fit.predict(X_train)\n", "y_test_pred = fit.predict(X_test)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The predictions are already thesholded to be either 0 or 1" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "[0 0 1 1 0 1 0 0 0 1 1 0 1 1 1 0 0 0 0 0 1 0 0 0 1 0 0 1 1 1 1 0 1 1 1 1 0\n", " 1 1 1 1 1 0 1 0 0 0 0 0 0 1 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 0 1 1 0 0 0 1\n", " 1 0 0 1 1 0 0 0 0 1 0 1 0 0 1 1 0 0 1 0 0 0 0 0 1 1 0 0 0 0 1 0 0 0 1 1 1\n", " 1 1 0 1 0 0 0 0 0 0 0 0 1 0 0 0 1 1 0 0 1 1 0 0 1 0 1 0 0 1 1 1 1 0 1 0 1\n", " 0 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 1 0 0 1 1 1 0 0 1 0 0 1 0 1 0 1 1 0 1 0 1\n", " 1 0 0 0 1 1 0 0 0 1 0 1 1 1 0 0 0 0 0 0 0 0 0 0 1 1 1 1 1 0 0 0 0 0 0 1 0\n", " 1 0 1 0 0 0 0 1 0 0 0 0 1 0 0 0 0 0 0 0 0 0 0 0 0 1 0 1 0 1 0 0 0 0 0 1 0\n", " 0 0 0]\n" ] } ], "source": [ "print(y_test_pred)" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "train accuracy: 0.799; test accuracy: 0.760\n" ] } ], "source": [ "acc_train = sum(y_train_pred==y_train)/len(y_train)\n", "acc_test = sum(y_test_pred==y_test)/len(y_test)\n", "print(f\"train accuracy: {acc_train:.3f}; \\\n", "test accuracy: {acc_test:.3f}\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To get probabilities from logistic regression, we need to call the following function" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "y_test_prob = fit.predict_proba(X_test)" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([[0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.38132888, 0.61867112],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.48159095, 0.51840905],\n", " [0.75881787, 0.24118213],\n", " [0.77597541, 0.22402459],\n", " [0.92120054, 0.07879946],\n", " [0.24360185, 0.75639815],\n", " [0.38132888, 0.61867112],\n", " [0.77597541, 0.22402459],\n", " [0.25256948, 0.74743052],\n", " [0.25256948, 0.74743052],\n", " [0.35891714, 0.64108286],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.77597541, 0.22402459],\n", " [0.80961123, 0.19038877],\n", " [0.43075031, 0.56924969],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.60957787, 0.39042213],\n", " [0.12573608, 0.87426392],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.23485257, 0.76514743],\n", " [0.43075031, 0.56924969],\n", " [0.38132888, 0.61867112],\n", " [0.13111688, 0.86888312],\n", " [0.90496496, 0.09503504],\n", " [0.12573608, 0.87426392],\n", " [0.13111688, 0.86888312],\n", " [0.10486335, 0.89513665],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.38132888, 0.61867112],\n", " [0.10043541, 0.89956459],\n", " [0.38132888, 0.61867112],\n", " [0.38132888, 0.61867112],\n", " [0.13111688, 0.86888312],\n", " [0.88579894, 0.11420106],\n", " [0.4190028 , 0.5809972 ],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.59583071, 0.40416929],\n", " [0.88579894, 0.11420106],\n", " [0.8870884 , 0.1129116 ],\n", " [0.77597541, 0.22402459],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.21583994, 0.78416006],\n", " [0.88579894, 0.11420106],\n", " [0.90496496, 0.09503504],\n", " [0.88579894, 0.11420106],\n", " [0.65505215, 0.34494785],\n", " [0.60735332, 0.39264668],\n", " [0.90496496, 0.09503504],\n", " [0.90496496, 0.09503504],\n", " [0.77597541, 0.22402459],\n", " [0.60735332, 0.39264668],\n", " [0.59583071, 0.40416929],\n", " [0.77597541, 0.22402459],\n", " [0.55981643, 0.44018357],\n", " [0.88579894, 0.11420106],\n", " [0.13111688, 0.86888312],\n", " [0.13111688, 0.86888312],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.90496496, 0.09503504],\n", " [0.43075031, 0.56924969],\n", " [0.37005301, 0.62994699],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.21583994, 0.78416006],\n", " [0.40734641, 0.59265359],\n", " [0.88579894, 0.11420106],\n", " [0.90074883, 0.09925117],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.38132888, 0.61867112],\n", " [0.90496496, 0.09503504],\n", " [0.21583994, 0.78416006],\n", " [0.94379673, 0.05620327],\n", " [0.64410908, 0.35589092],\n", " [0.38132888, 0.61867112],\n", " [0.38132888, 0.61867112],\n", " [0.64410908, 0.35589092],\n", " [0.77597541, 0.22402459],\n", " [0.25256948, 0.74743052],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.32926956, 0.67073044],\n", " [0.38132888, 0.61867112],\n", " [0.90496496, 0.09503504],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.90074883, 0.09925117],\n", " [0.10946276, 0.89053724],\n", " [0.88579894, 0.11420106],\n", " [0.65505215, 0.34494785],\n", " [0.88579894, 0.11420106],\n", " [0.10946276, 0.89053724],\n", " [0.35891714, 0.64108286],\n", " [0.37005301, 0.62994699],\n", " [0.13111688, 0.86888312],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.12573608, 0.87426392],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.67924126, 0.32075874],\n", " [0.77597541, 0.22402459],\n", " [0.88579894, 0.11420106],\n", " [0.90496496, 0.09503504],\n", " [0.58420145, 0.41579855],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.90496496, 0.09503504],\n", " [0.88579894, 0.11420106],\n", " [0.10946276, 0.89053724],\n", " [0.38132888, 0.61867112],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.43075031, 0.56924969],\n", " [0.43075031, 0.56924969],\n", " [0.60735332, 0.39264668],\n", " [0.77597541, 0.22402459],\n", " [0.13111688, 0.86888312],\n", " [0.90496496, 0.09503504],\n", " [0.38132888, 0.61867112],\n", " [0.94379673, 0.05620327],\n", " [0.77597541, 0.22402459],\n", " [0.21583994, 0.78416006],\n", " [0.43075031, 0.56924969],\n", " [0.21583994, 0.78416006],\n", " [0.13111688, 0.86888312],\n", " [0.88579894, 0.11420106],\n", " [0.25256948, 0.74743052],\n", " [0.88579894, 0.11420106],\n", " [0.10946276, 0.89053724],\n", " [0.80208997, 0.19791003],\n", " [0.60735332, 0.39264668],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.77597541, 0.22402459],\n", " [0.38132888, 0.61867112],\n", " [0.77597541, 0.22402459],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.63301335, 0.36698665],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.43075031, 0.56924969],\n", " [0.88579894, 0.11420106],\n", " [0.94379673, 0.05620327],\n", " [0.24360185, 0.75639815],\n", " [0.20781382, 0.79218618],\n", " [0.43075031, 0.56924969],\n", " [0.88579894, 0.11420106],\n", " [0.91393181, 0.08606819],\n", " [0.19242812, 0.80757188],\n", " [0.94379673, 0.05620327],\n", " [0.52083139, 0.47916861],\n", " [0.38132888, 0.61867112],\n", " [0.60735332, 0.39264668],\n", " [0.21583994, 0.78416006],\n", " [0.77597541, 0.22402459],\n", " [0.10486335, 0.89513665],\n", " [0.40734641, 0.59265359],\n", " [0.88579894, 0.11420106],\n", " [0.24360185, 0.75639815],\n", " [0.88579894, 0.11420106],\n", " [0.21583994, 0.78416006],\n", " [0.13111688, 0.86888312],\n", " [0.88579894, 0.11420106],\n", " [0.92120054, 0.07879946],\n", " [0.77597541, 0.22402459],\n", " [0.17121611, 0.82878389],\n", " [0.35891714, 0.64108286],\n", " [0.88579894, 0.11420106],\n", " [0.59583071, 0.40416929],\n", " [0.88579894, 0.11420106],\n", " [0.13111688, 0.86888312],\n", " [0.92120054, 0.07879946],\n", " [0.11071774, 0.88928226],\n", " [0.21583994, 0.78416006],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.60735332, 0.39264668],\n", " [0.77597541, 0.22402459],\n", " [0.88579894, 0.11420106],\n", " [0.64410908, 0.35589092],\n", " [0.90074883, 0.09925117],\n", " [0.59583071, 0.40416929],\n", " [0.77597541, 0.22402459],\n", " [0.88579894, 0.11420106],\n", " [0.80208997, 0.19791003],\n", " [0.38132888, 0.61867112],\n", " [0.38132888, 0.61867112],\n", " [0.48159095, 0.51840905],\n", " [0.38132888, 0.61867112],\n", " [0.38132888, 0.61867112],\n", " [0.88579894, 0.11420106],\n", " [0.65505215, 0.34494785],\n", " [0.88579894, 0.11420106],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.13111688, 0.86888312],\n", " [0.65505215, 0.34494785],\n", " [0.10946276, 0.89053724],\n", " [0.88579894, 0.11420106],\n", " [0.38132888, 0.61867112],\n", " [0.92875611, 0.07124389],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.55981643, 0.44018357],\n", " [0.37005301, 0.62994699],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.88579894, 0.11420106],\n", " [0.87570526, 0.12429474],\n", " [0.35891714, 0.64108286],\n", " [0.90496496, 0.09503504],\n", " [0.60735332, 0.39264668],\n", " [0.92875611, 0.07124389],\n", " [0.95156933, 0.04843067],\n", " [0.65505215, 0.34494785],\n", " [0.77597541, 0.22402459],\n", " [0.77597541, 0.22402459],\n", " [0.77597541, 0.22402459],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.80961123, 0.19038877],\n", " [0.88579894, 0.11420106],\n", " [0.33710487, 0.66289513],\n", " [0.88579894, 0.11420106],\n", " [0.38132888, 0.61867112],\n", " [0.80961123, 0.19038877],\n", " [0.12573608, 0.87426392],\n", " [0.60735332, 0.39264668],\n", " [0.88579894, 0.11420106],\n", " [0.64410908, 0.35589092],\n", " [0.80208997, 0.19791003],\n", " [0.90074883, 0.09925117],\n", " [0.10946276, 0.89053724],\n", " [0.74286276, 0.25713724],\n", " [0.88579894, 0.11420106],\n", " [0.93187249, 0.06812751],\n", " [0.60735332, 0.39264668]])" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test_prob" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0.11420106, 0.11420106, 0.61867112, 0.61867112, 0.11420106,\n", " 0.51840905, 0.24118213, 0.22402459, 0.07879946, 0.75639815,\n", " 0.61867112, 0.22402459, 0.74743052, 0.74743052, 0.64108286,\n", " 0.11420106, 0.11420106, 0.11420106, 0.22402459, 0.19038877,\n", " 0.56924969, 0.39264668, 0.11420106, 0.39042213, 0.87426392,\n", " 0.39264668, 0.11420106, 0.76514743, 0.56924969, 0.61867112,\n", " 0.86888312, 0.09503504, 0.87426392, 0.86888312, 0.89513665,\n", " 0.61867112, 0.11420106, 0.61867112, 0.89956459, 0.61867112,\n", " 0.61867112, 0.86888312, 0.11420106, 0.5809972 , 0.11420106,\n", " 0.11420106, 0.40416929, 0.11420106, 0.1129116 , 0.22402459,\n", " 0.61867112, 0.11420106, 0.11420106, 0.11420106, 0.78416006,\n", " 0.11420106, 0.09503504, 0.11420106, 0.34494785, 0.39264668,\n", " 0.09503504, 0.09503504, 0.22402459, 0.39264668, 0.40416929,\n", " 0.22402459, 0.44018357, 0.11420106, 0.86888312, 0.86888312,\n", " 0.11420106, 0.11420106, 0.09503504, 0.56924969, 0.62994699,\n", " 0.11420106, 0.11420106, 0.78416006, 0.59265359, 0.11420106,\n", " 0.09925117, 0.39264668, 0.11420106, 0.61867112, 0.09503504,\n", " 0.78416006, 0.05620327, 0.35589092, 0.61867112, 0.61867112,\n", " 0.35589092, 0.22402459, 0.74743052, 0.11420106, 0.11420106,\n", " 0.11420106, 0.11420106, 0.11420106, 0.67073044, 0.61867112,\n", " 0.09503504, 0.11420106, 0.11420106, 0.09925117, 0.89053724,\n", " 0.11420106, 0.34494785, 0.11420106, 0.89053724, 0.64108286,\n", " 0.62994699, 0.86888312, 0.61867112, 0.11420106, 0.87426392,\n", " 0.11420106, 0.11420106, 0.11420106, 0.32075874, 0.22402459,\n", " 0.11420106, 0.09503504, 0.41579855, 0.61867112, 0.11420106,\n", " 0.09503504, 0.11420106, 0.89053724, 0.61867112, 0.39264668,\n", " 0.11420106, 0.56924969, 0.56924969, 0.39264668, 0.22402459,\n", " 0.86888312, 0.09503504, 0.61867112, 0.05620327, 0.22402459,\n", " 0.78416006, 0.56924969, 0.78416006, 0.86888312, 0.11420106,\n", " 0.74743052, 0.11420106, 0.89053724, 0.19791003, 0.39264668,\n", " 0.39264668, 0.11420106, 0.22402459, 0.61867112, 0.22402459,\n", " 0.11420106, 0.11420106, 0.11420106, 0.11420106, 0.36698665,\n", " 0.11420106, 0.11420106, 0.11420106, 0.11420106, 0.56924969,\n", " 0.11420106, 0.05620327, 0.75639815, 0.79218618, 0.56924969,\n", " 0.11420106, 0.08606819, 0.80757188, 0.05620327, 0.47916861,\n", " 0.61867112, 0.39264668, 0.78416006, 0.22402459, 0.89513665,\n", " 0.59265359, 0.11420106, 0.75639815, 0.11420106, 0.78416006,\n", " 0.86888312, 0.11420106, 0.07879946, 0.22402459, 0.82878389,\n", " 0.64108286, 0.11420106, 0.40416929, 0.11420106, 0.86888312,\n", " 0.07879946, 0.88928226, 0.78416006, 0.61867112, 0.11420106,\n", " 0.39264668, 0.22402459, 0.11420106, 0.35589092, 0.09925117,\n", " 0.40416929, 0.22402459, 0.11420106, 0.19791003, 0.61867112,\n", " 0.61867112, 0.51840905, 0.61867112, 0.61867112, 0.11420106,\n", " 0.34494785, 0.11420106, 0.39264668, 0.11420106, 0.11420106,\n", " 0.86888312, 0.34494785, 0.89053724, 0.11420106, 0.61867112,\n", " 0.07124389, 0.11420106, 0.11420106, 0.44018357, 0.62994699,\n", " 0.11420106, 0.11420106, 0.11420106, 0.12429474, 0.64108286,\n", " 0.09503504, 0.39264668, 0.07124389, 0.04843067, 0.34494785,\n", " 0.22402459, 0.22402459, 0.22402459, 0.39264668, 0.11420106,\n", " 0.19038877, 0.11420106, 0.66289513, 0.11420106, 0.61867112,\n", " 0.19038877, 0.87426392, 0.39264668, 0.11420106, 0.35589092,\n", " 0.19791003, 0.09925117, 0.89053724, 0.25713724, 0.11420106,\n", " 0.06812751, 0.39264668])" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "y_test_prob[:,1]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Receiver Operator Characteristic (ROC) curve\n", "\n", "Often we don't want to set a single threshold but rather evaluate the model based on all thresholds. ROC is the a way to this.\n", "\n", "At each threshold, it calculates two rates:\n", "\n", "True Positive Rate = True Positives / (True Positives + False Negatives)\n", "False Positive Rate = False Positives / (False Positives + True Negatives)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "AUC: 0.794\n" ] }, { "data": { "image/png": "\n", "text/plain": [ "
" ] }, "metadata": { "needs_background": "light" }, "output_type": "display_data" } ], "source": [ "from sklearn.metrics import roc_curve\n", "from sklearn.metrics import roc_auc_score\n", "from matplotlib import pyplot\n", "\n", "# calculate AUC\n", "probs = y_test_prob[:,1]\n", "auc = roc_auc_score(y_test, probs)\n", "print('AUC: %.3f' % auc)\n", "pyplot.plot([0, 1], [0, 1], linestyle='--')\n", "# plot the roc curve for the model\n", "fpr, tpr, thresholds = roc_curve(y_test, probs)\n", "pyplot.plot(fpr, tpr, marker='.')\n", "pyplot.xlabel(\"False Positive Rate\")\n", "pyplot.ylabel(\"True Positive Rate\")\n", "pyplot.show()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.2" } }, "nbformat": 4, "nbformat_minor": 2 }