{ "cells": [ { "cell_type": "markdown", "id": "1ba24d52", "metadata": {}, "source": [ "## 8.2.3" ] }, { "cell_type": "code", "execution_count": 1, "id": "9b6e3509", "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "id": "89cd3712", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "['crowdfunding_info', 'contact_info']" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Read an excel file \n", "crowdfunding_data = pd.ExcelFile(\"../crowdfunding.xlsx\")\n", "# Get sheet names\n", "crowdfunding_data.sheet_names" ] }, { "cell_type": "code", "execution_count": 3, "id": "5ec9289d", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_nameblurbgoalpledgedoutcomebackers_countcountrycurrencylaunched_atdeadlinestaff_pickspotlightcategory & sub-category
0147Baldwin, Riley and JacksonPre-emptive tertiary standardization1000failed0CACAD15815736001614578400FalseFalsefood/food trucks
11621Odom IncManaged bottom-line architecture140014560successful158USUSD16115544001621918800FalseTruemusic/rock
21812Melton, Robinson and FritzFunction-based leadingedge pricing structure108400142523successful1425AUAUD16081848001640844000FalseFalsetechnology/web
32156Mcdonald, Gonzalez and RossVision-oriented fresh-thinking conglomeration42002477failed24USUSD16347924001642399200FalseFalsemusic/rock
41365Larson-LittleProactive foreground core76005265failed53USUSD16085304001629694800FalseFalsetheater/plays
52057Harris GroupOpen-source optimizing database760013195successful174DKDKK16076664001630213200FalseFalsetheater/plays
61894Ortiz, Coleman and MitchellOperative upward-trending algorithm52001090failed18GBGBP15961716001620709200FalseFalsefilm & video/documentary
72669Carter-GuzmanCentralized cohesive challenge450014741successful227DKDKK16086168001632200400FalseFalsetheater/plays
81114Nunez-RichardsExclusive attitude-oriented intranet11010021946live708DKDKK15863220001615356000FalseFalsetheater/plays
9970Rangel, Holt and JonesOpen-source fresh-thinking model62003208failed44USUSD16288308001630386000FalseFalsemusic/electric music
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "0 147 Baldwin, Riley and Jackson \n", "1 1621 Odom Inc \n", "2 1812 Melton, Robinson and Fritz \n", "3 2156 Mcdonald, Gonzalez and Ross \n", "4 1365 Larson-Little \n", "5 2057 Harris Group \n", "6 1894 Ortiz, Coleman and Mitchell \n", "7 2669 Carter-Guzman \n", "8 1114 Nunez-Richards \n", "9 970 Rangel, Holt and Jones \n", "\n", " blurb goal pledged outcome \\\n", "0 Pre-emptive tertiary standardization 100 0 failed \n", "1 Managed bottom-line architecture 1400 14560 successful \n", "2 Function-based leadingedge pricing structure 108400 142523 successful \n", "3 Vision-oriented fresh-thinking conglomeration 4200 2477 failed \n", "4 Proactive foreground core 7600 5265 failed \n", "5 Open-source optimizing database 7600 13195 successful \n", "6 Operative upward-trending algorithm 5200 1090 failed \n", "7 Centralized cohesive challenge 4500 14741 successful \n", "8 Exclusive attitude-oriented intranet 110100 21946 live \n", "9 Open-source fresh-thinking model 6200 3208 failed \n", "\n", " backers_count country currency launched_at deadline staff_pick \\\n", "0 0 CA CAD 1581573600 1614578400 False \n", "1 158 US USD 1611554400 1621918800 False \n", "2 1425 AU AUD 1608184800 1640844000 False \n", "3 24 US USD 1634792400 1642399200 False \n", "4 53 US USD 1608530400 1629694800 False \n", "5 174 DK DKK 1607666400 1630213200 False \n", "6 18 GB GBP 1596171600 1620709200 False \n", "7 227 DK DKK 1608616800 1632200400 False \n", "8 708 DK DKK 1586322000 1615356000 False \n", "9 44 US USD 1628830800 1630386000 False \n", "\n", " spotlight category & sub-category \n", "0 False food/food trucks \n", "1 True music/rock \n", "2 False technology/web \n", "3 False music/rock \n", "4 False theater/plays \n", "5 False theater/plays \n", "6 False film & video/documentary \n", "7 False theater/plays \n", "8 False theater/plays \n", "9 False music/electric music " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Transform sheets into dataframes\n", "crowdfunding_info_df = pd.read_excel(crowdfunding_data, sheet_name=\"crowdfunding_info\")\n", "crowdfunding_info_df.head(10)" ] }, { "cell_type": "code", "execution_count": 4, "id": "f91ec41c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0contact_info
1{\"contact_id\": 4661, \"name\": \"Cecilia Velasco\", \"email\": \"cecilia.velasco@rodrigues.fr\"}
2{\"contact_id\": 3765, \"name\": \"Mariana Ellis\", \"email\": \"mariana.ellis@rossi.org\"}
3{\"contact_id\": 4187, \"name\": \"Sofie Woods\", \"email\": \"sofie.woods@riviere.com\"}
4{\"contact_id\": 4941, \"name\": \"Jeanette Iannotti\", \"email\": \"jeanette.iannotti@yahoo.com\"}
5{\"contact_id\": 2199, \"name\": \"Samuel Sorgatz\", \"email\": \"samuel.sorgatz@gmail.com\"}
6{\"contact_id\": 5650, \"name\": \"Socorro Luna\", \"email\": \"socorro.luna@hotmail.com\"}
7{\"contact_id\": 5889, \"name\": \"Carolina Murray\", \"email\": \"carolina.murray@knight.com\"}
8{\"contact_id\": 4842, \"name\": \"Kayla Moon\", \"email\": \"kayla.moon@yahoo.de\"}
9{\"contact_id\": 3280, \"name\": \"Ariadna Geisel\", \"email\": \"ariadna.geisel@rangel.com\"}
\n", "
" ], "text/plain": [ " \n", "0 contact_info\n", "1 {\"contact_id\": 4661, \"name\": \"Cecilia Velasco\", \"email\": \"cecilia.velasco@rodrigues.fr\"}\n", "2 {\"contact_id\": 3765, \"name\": \"Mariana Ellis\", \"email\": \"mariana.ellis@rossi.org\"}\n", "3 {\"contact_id\": 4187, \"name\": \"Sofie Woods\", \"email\": \"sofie.woods@riviere.com\"}\n", "4 {\"contact_id\": 4941, \"name\": \"Jeanette Iannotti\", \"email\": \"jeanette.iannotti@yahoo.com\"}\n", "5 {\"contact_id\": 2199, \"name\": \"Samuel Sorgatz\", \"email\": \"samuel.sorgatz@gmail.com\"}\n", "6 {\"contact_id\": 5650, \"name\": \"Socorro Luna\", \"email\": \"socorro.luna@hotmail.com\"}\n", "7 {\"contact_id\": 5889, \"name\": \"Carolina Murray\", \"email\": \"carolina.murray@knight.com\"}\n", "8 {\"contact_id\": 4842, \"name\": \"Kayla Moon\", \"email\": \"kayla.moon@yahoo.de\"}\n", "9 {\"contact_id\": 3280, \"name\": \"Ariadna Geisel\", \"email\": \"ariadna.geisel@rangel.com\"}" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Increase the column width for next dataframe\n", "pd.set_option('max_colwidth', 400)\n", "contact_info_df = pd.read_excel(crowdfunding_data, sheet_name=\"contact_info\", header=2) # Skip 3 header rows (first is 0)\n", "contact_info_df.head(10)" ] }, { "cell_type": "markdown", "id": "c8fdacca", "metadata": {}, "source": [ "### Sample dataframes " ] }, { "cell_type": "code", "execution_count": 5, "id": "2b1c4747", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
991{\"contact_id\": 5500, \"name\": \"Vincentio Sanders\", \"email\": \"vincentio.sanders@voila.fr\"}
992{\"contact_id\": 3494, \"name\": \"Bettina Norbiato\", \"email\": \"bettina.norbiato@allen-lutz.org\"}
993{\"contact_id\": 3924, \"name\": \"Julio Renner\", \"email\": \"julio.renner@industrias.net\"}
994{\"contact_id\": 3521, \"name\": \"Jelena Stiffel\", \"email\": \"jelena.stiffel@trupp.de\"}
995{\"contact_id\": 5262, \"name\": \"Guadalupe Munoz\", \"email\": \"guadalupe.munoz@murray-hamilton.com.au\"}
996{\"contact_id\": 3684, \"name\": \"Whitney Noack\", \"email\": \"whitney.noack@laboratorios.org\"}
997{\"contact_id\": 5784, \"name\": \"Gelsomina Migliaccio\", \"email\": \"gelsomina.migliaccio@junk.com\"}
998{\"contact_id\": 1498, \"name\": \"Evangelista Pereira\", \"email\": \"evangelista.pereira@thompson-peterson.biz\"}
999{\"contact_id\": 6073, \"name\": \"Gareth Comolli\", \"email\": \"gareth.comolli@tiscali.fr\"}
1000{\"contact_id\": 4939, \"name\": \"Michelangelo Hess\", \"email\": \"michelangelo.hess@bouygtel.fr\"}
\n", "
" ], "text/plain": [ " \n", "991 {\"contact_id\": 5500, \"name\": \"Vincentio Sanders\", \"email\": \"vincentio.sanders@voila.fr\"}\n", "992 {\"contact_id\": 3494, \"name\": \"Bettina Norbiato\", \"email\": \"bettina.norbiato@allen-lutz.org\"}\n", "993 {\"contact_id\": 3924, \"name\": \"Julio Renner\", \"email\": \"julio.renner@industrias.net\"}\n", "994 {\"contact_id\": 3521, \"name\": \"Jelena Stiffel\", \"email\": \"jelena.stiffel@trupp.de\"}\n", "995 {\"contact_id\": 5262, \"name\": \"Guadalupe Munoz\", \"email\": \"guadalupe.munoz@murray-hamilton.com.au\"}\n", "996 {\"contact_id\": 3684, \"name\": \"Whitney Noack\", \"email\": \"whitney.noack@laboratorios.org\"}\n", "997 {\"contact_id\": 5784, \"name\": \"Gelsomina Migliaccio\", \"email\": \"gelsomina.migliaccio@junk.com\"}\n", "998 {\"contact_id\": 1498, \"name\": \"Evangelista Pereira\", \"email\": \"evangelista.pereira@thompson-peterson.biz\"}\n", "999 {\"contact_id\": 6073, \"name\": \"Gareth Comolli\", \"email\": \"gareth.comolli@tiscali.fr\"}\n", "1000 {\"contact_id\": 4939, \"name\": \"Michelangelo Hess\", \"email\": \"michelangelo.hess@bouygtel.fr\"}" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "contact_info_df.tail(10)" ] }, { "cell_type": "code", "execution_count": 6, "id": "eeba7067", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
129{\"contact_id\": 5501, \"name\": \"Venancio Stadelmann\", \"email\": \"venancio.stadelmann@gmx.de\"}
882{\"contact_id\": 3173, \"name\": \"Erin Weinhold\", \"email\": \"erin.weinhold@hotmail.fr\"}
393{\"contact_id\": 3365, \"name\": \"Alessio Bruneau\", \"email\": \"alessio.bruneau@hotmail.co.uk\"}
434{\"contact_id\": 3176, \"name\": \"Stephen Pratesi\", \"email\": \"stephen.pratesi@hotmail.com\"}
263{\"contact_id\": 2761, \"name\": \"Laetitia Gallet\", \"email\": \"laetitia.gallet@aubry.org\"}
116{\"contact_id\": 6026, \"name\": \"Laura Buckley\", \"email\": \"laura.buckley@ryan.org\"}
339{\"contact_id\": 4618, \"name\": \"Heather Zimmer\", \"email\": \"heather.zimmer@bluemel.de\"}
168{\"contact_id\": 4951, \"name\": \"Sergio Abbott\", \"email\": \"sergio.abbott@moore.net.au\"}
739{\"contact_id\": 6028, \"name\": \"Jean Berthelot\", \"email\": \"jean.berthelot@oscuro.it\"}
406{\"contact_id\": 6055, \"name\": \"Emmanuel Dominguez\", \"email\": \"emmanuel.dominguez@gmail.com\"}
\n", "
" ], "text/plain": [ " \n", "129 {\"contact_id\": 5501, \"name\": \"Venancio Stadelmann\", \"email\": \"venancio.stadelmann@gmx.de\"}\n", "882 {\"contact_id\": 3173, \"name\": \"Erin Weinhold\", \"email\": \"erin.weinhold@hotmail.fr\"}\n", "393 {\"contact_id\": 3365, \"name\": \"Alessio Bruneau\", \"email\": \"alessio.bruneau@hotmail.co.uk\"}\n", "434 {\"contact_id\": 3176, \"name\": \"Stephen Pratesi\", \"email\": \"stephen.pratesi@hotmail.com\"}\n", "263 {\"contact_id\": 2761, \"name\": \"Laetitia Gallet\", \"email\": \"laetitia.gallet@aubry.org\"}\n", "116 {\"contact_id\": 6026, \"name\": \"Laura Buckley\", \"email\": \"laura.buckley@ryan.org\"}\n", "339 {\"contact_id\": 4618, \"name\": \"Heather Zimmer\", \"email\": \"heather.zimmer@bluemel.de\"}\n", "168 {\"contact_id\": 4951, \"name\": \"Sergio Abbott\", \"email\": \"sergio.abbott@moore.net.au\"}\n", "739 {\"contact_id\": 6028, \"name\": \"Jean Berthelot\", \"email\": \"jean.berthelot@oscuro.it\"}\n", "406 {\"contact_id\": 6055, \"name\": \"Emmanuel Dominguez\", \"email\": \"emmanuel.dominguez@gmail.com\"}" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Run multiple times to get new random rows each time\n", "contact_info_df.sample(n=10)" ] }, { "cell_type": "code", "execution_count": 7, "id": "71e889c6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_nameblurbgoalpledgedoutcomebackers_countcountrycurrencylaunched_atdeadlinestaff_pickspotlightcategory & sub-category
9901561Ortiz-RobertsDevolved foreground customer loyalty78006839failed64USUSD16232148001623992400FalseTruefilm & video/drama
9912632Ramirez LLCReduced reciprocal focus group980011091successful241USUSD16074936001622005200FalseTruemusic/rock
992439Morrow IncNetworked global migration310013223successful132USUSD15921108001612850400FalseTruefilm & video/drama
993461Erickson-RogersDe-engineered even-keeled definition98007608canceled75ITEUR16252884001625720400FalseTruephotography/photography books
9941419Leach, Rich and PriceImplemented bi-directional flexibility14110074073failed842USUSD16369560001638856800FalseTruepublishing/translations
9952986Manning-HamiltonVision-oriented scalable definition97300153216successful2043USUSD16092216001622350800FalseTruefood/food trucks
9962031Butler LLCFuture-proofed upward-trending migration66004814failed112USUSD16342740001638252000FalseFalsetheater/plays
9971627Ball LLCRight-sized full-range throughput76004603canceled139ITEUR16361748001639116000FalseFalsetheater/plays
9982175Taylor, Santiago and FloresPolarized composite customer loyalty6660037823failed374USUSD16021332001618117200FalseTruemusic/indie rock
9991788Hernandez, Norton and KelleyExpanded eco-centric policy11110062819canceled1122USUSD16093080001629262800FalseFalsefood/food trucks
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "990 1561 Ortiz-Roberts \n", "991 2632 Ramirez LLC \n", "992 439 Morrow Inc \n", "993 461 Erickson-Rogers \n", "994 1419 Leach, Rich and Price \n", "995 2986 Manning-Hamilton \n", "996 2031 Butler LLC \n", "997 1627 Ball LLC \n", "998 2175 Taylor, Santiago and Flores \n", "999 1788 Hernandez, Norton and Kelley \n", "\n", " blurb goal pledged outcome \\\n", "990 Devolved foreground customer loyalty 7800 6839 failed \n", "991 Reduced reciprocal focus group 9800 11091 successful \n", "992 Networked global migration 3100 13223 successful \n", "993 De-engineered even-keeled definition 9800 7608 canceled \n", "994 Implemented bi-directional flexibility 141100 74073 failed \n", "995 Vision-oriented scalable definition 97300 153216 successful \n", "996 Future-proofed upward-trending migration 6600 4814 failed \n", "997 Right-sized full-range throughput 7600 4603 canceled \n", "998 Polarized composite customer loyalty 66600 37823 failed \n", "999 Expanded eco-centric policy 111100 62819 canceled \n", "\n", " backers_count country currency launched_at deadline staff_pick \\\n", "990 64 US USD 1623214800 1623992400 False \n", "991 241 US USD 1607493600 1622005200 False \n", "992 132 US USD 1592110800 1612850400 False \n", "993 75 IT EUR 1625288400 1625720400 False \n", "994 842 US USD 1636956000 1638856800 False \n", "995 2043 US USD 1609221600 1622350800 False \n", "996 112 US USD 1634274000 1638252000 False \n", "997 139 IT EUR 1636174800 1639116000 False \n", "998 374 US USD 1602133200 1618117200 False \n", "999 1122 US USD 1609308000 1629262800 False \n", "\n", " spotlight category & sub-category \n", "990 True film & video/drama \n", "991 True music/rock \n", "992 True film & video/drama \n", "993 True photography/photography books \n", "994 True publishing/translations \n", "995 True food/food trucks \n", "996 False theater/plays \n", "997 False theater/plays \n", "998 True music/indie rock \n", "999 False food/food trucks " ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crowdfunding_info_df.tail(10)" ] }, { "cell_type": "code", "execution_count": 8, "id": "b498ea0c", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_nameblurbgoalpledgedoutcomebackers_countcountrycurrencylaunched_atdeadlinestaff_pickspotlightcategory & sub-category
3522514Adams, Willis and SanchezExpanded hybrid hardware2800977failed33CACAD16193268001625461200FalseFalsetheater/plays
2732440Thomas and SonsRe-engineered heuristic forecast780010704successful282CACAD16044696001626325200FalseFalsetheater/plays
5931813Hale-HayesAmeliorated client-driven open system121600188288successful4006USUSD16098264001637992800FalseFalsefilm & video/animation
11621Odom IncManaged bottom-line architecture140014560successful158USUSD16115544001621918800FalseTruemusic/rock
9681755Gonzalez-WhiteOpen-architected disintermediate budgetary management24008117successful114USUSD16085304001635310800FalseFalsefood/food trucks
7521496Lowery GroupSharable motivating emulation58005362canceled114USUSD16087896001630213200FalseTruetheater/plays
1933195Calhoun, Rogers and LongProgressive discrete hub66003012failed65USUSD15877908001616389200TrueFalsemusic/indie rock
6292940Jackson, Martinez and RayMulti-tiered executive toolset8590055476failed750USUSD16175988001642572000FalseTruetheater/plays
3221373Hebert GroupVisionary asymmetric Graphical User Interface117900196377successful5168USUSD16160436001616389200FalseFalsetheater/plays
470470Grimes, Holland and SloanExtended dedicated archive360010289successful381USUSD16185492001634101200FalseFalsetechnology/wearables
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "352 2514 Adams, Willis and Sanchez \n", "273 2440 Thomas and Sons \n", "593 1813 Hale-Hayes \n", "1 1621 Odom Inc \n", "968 1755 Gonzalez-White \n", "752 1496 Lowery Group \n", "193 3195 Calhoun, Rogers and Long \n", "629 2940 Jackson, Martinez and Ray \n", "322 1373 Hebert Group \n", "470 470 Grimes, Holland and Sloan \n", "\n", " blurb goal pledged \\\n", "352 Expanded hybrid hardware 2800 977 \n", "273 Re-engineered heuristic forecast 7800 10704 \n", "593 Ameliorated client-driven open system 121600 188288 \n", "1 Managed bottom-line architecture 1400 14560 \n", "968 Open-architected disintermediate budgetary management 2400 8117 \n", "752 Sharable motivating emulation 5800 5362 \n", "193 Progressive discrete hub 6600 3012 \n", "629 Multi-tiered executive toolset 85900 55476 \n", "322 Visionary asymmetric Graphical User Interface 117900 196377 \n", "470 Extended dedicated archive 3600 10289 \n", "\n", " outcome backers_count country currency launched_at deadline \\\n", "352 failed 33 CA CAD 1619326800 1625461200 \n", "273 successful 282 CA CAD 1604469600 1626325200 \n", "593 successful 4006 US USD 1609826400 1637992800 \n", "1 successful 158 US USD 1611554400 1621918800 \n", "968 successful 114 US USD 1608530400 1635310800 \n", "752 canceled 114 US USD 1608789600 1630213200 \n", "193 failed 65 US USD 1587790800 1616389200 \n", "629 failed 750 US USD 1617598800 1642572000 \n", "322 successful 5168 US USD 1616043600 1616389200 \n", "470 successful 381 US USD 1618549200 1634101200 \n", "\n", " staff_pick spotlight category & sub-category \n", "352 False False theater/plays \n", "273 False False theater/plays \n", "593 False False film & video/animation \n", "1 False True music/rock \n", "968 False False food/food trucks \n", "752 False True theater/plays \n", "193 True False music/indie rock \n", "629 False True theater/plays \n", "322 False False theater/plays \n", "470 False False technology/wearables " ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crowdfunding_info_df.sample(n=10)" ] }, { "cell_type": "code", "execution_count": 9, "id": "cbfa1ca7", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1000 entries, 0 to 999\n", "Data columns (total 14 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 cf_id 1000 non-null int64 \n", " 1 company_name 1000 non-null object\n", " 2 blurb 1000 non-null object\n", " 3 goal 1000 non-null int64 \n", " 4 pledged 1000 non-null int64 \n", " 5 outcome 1000 non-null object\n", " 6 backers_count 1000 non-null int64 \n", " 7 country 1000 non-null object\n", " 8 currency 1000 non-null object\n", " 9 launched_at 1000 non-null int64 \n", " 10 deadline 1000 non-null int64 \n", " 11 staff_pick 1000 non-null bool \n", " 12 spotlight 1000 non-null bool \n", " 13 category & sub-category 1000 non-null object\n", "dtypes: bool(2), int64(6), object(6)\n", "memory usage: 95.8+ KB\n" ] } ], "source": [ "# Get a dataframe summary\n", "crowdfunding_info_df.info()" ] }, { "cell_type": "code", "execution_count": 10, "id": "f0657349", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 1001 entries, 0 to 1000\n", "Data columns (total 1 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 1001 non-null object\n", "dtypes: object(1)\n", "memory usage: 7.9+ KB\n" ] } ], "source": [ "contact_info_df.info()" ] }, { "cell_type": "markdown", "id": "9b7b70f0", "metadata": {}, "source": [ "## 8.3.3" ] }, { "cell_type": "markdown", "id": "9e0402ee", "metadata": {}, "source": [ "### Inspect" ] }, { "cell_type": "code", "execution_count": 11, "id": "05499d8a", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['cf_id', 'company_name', 'blurb', 'goal', 'pledged', 'outcome',\n", " 'backers_count', 'country', 'currency', 'launched_at', 'deadline',\n", " 'staff_pick', 'spotlight', 'category & sub-category'],\n", " dtype='object')" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crowdfunding_info_df.columns" ] }, { "cell_type": "code", "execution_count": 12, "id": "4be2e667", "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array(['food/food trucks', 'music/rock', 'technology/web',\n", " 'theater/plays', 'film & video/documentary',\n", " 'music/electric music', 'film & video/drama', 'music/indie rock',\n", " 'technology/wearables', 'publishing/nonfiction',\n", " 'film & video/animation', 'games/video games',\n", " 'film & video/shorts', 'publishing/fiction',\n", " 'photography/photography books', 'publishing/radio & podcasts',\n", " 'music/metal', 'music/jazz', 'publishing/translations',\n", " 'film & video/television', 'games/mobile games',\n", " 'music/world music', 'film & video/science fiction',\n", " 'journalism/audio'], dtype=object)" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crowdfunding_info_df['category & sub-category'].unique()" ] }, { "cell_type": "code", "execution_count": 13, "id": "262a91ae", "metadata": {}, "outputs": [], "source": [ "# This column has 24 unique values" ] }, { "cell_type": "markdown", "id": "e736cb55", "metadata": {}, "source": [ "### Plan" ] }, { "cell_type": "code", "execution_count": 14, "id": "821d81bc", "metadata": {}, "outputs": [], "source": [ "# Split category and sub-category values using code rather than \"hard coding\" (puttin them in manually)" ] }, { "cell_type": "markdown", "id": "e8b82592", "metadata": {}, "source": [ "### Execute" ] }, { "cell_type": "code", "execution_count": 15, "id": "e94eee70", "metadata": {}, "outputs": [], "source": [ "# Use str.split method with \"/\" as delimiter" ] }, { "cell_type": "code", "execution_count": 16, "id": "f6a0fa00", "metadata": {}, "outputs": [], "source": [ "# Assign the category and subcategory values to category and subcategory columns.\n", "# Documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.split.html\n", "crowdfunding_info_df[[\"category\", \"subcategory\"]] = crowdfunding_info_df[\"category & sub-category\"].str.split('/', n=1, expand=True)" ] }, { "cell_type": "code", "execution_count": 17, "id": "59391db9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_nameblurbgoalpledgedoutcomebackers_countcountrycurrencylaunched_atdeadlinestaff_pickspotlightcategory & sub-categorycategorysubcategory
0147Baldwin, Riley and JacksonPre-emptive tertiary standardization1000failed0CACAD15815736001614578400FalseFalsefood/food trucksfoodfood trucks
11621Odom IncManaged bottom-line architecture140014560successful158USUSD16115544001621918800FalseTruemusic/rockmusicrock
21812Melton, Robinson and FritzFunction-based leadingedge pricing structure108400142523successful1425AUAUD16081848001640844000FalseFalsetechnology/webtechnologyweb
32156Mcdonald, Gonzalez and RossVision-oriented fresh-thinking conglomeration42002477failed24USUSD16347924001642399200FalseFalsemusic/rockmusicrock
41365Larson-LittleProactive foreground core76005265failed53USUSD16085304001629694800FalseFalsetheater/playstheaterplays
52057Harris GroupOpen-source optimizing database760013195successful174DKDKK16076664001630213200FalseFalsetheater/playstheaterplays
61894Ortiz, Coleman and MitchellOperative upward-trending algorithm52001090failed18GBGBP15961716001620709200FalseFalsefilm & video/documentaryfilm & videodocumentary
72669Carter-GuzmanCentralized cohesive challenge450014741successful227DKDKK16086168001632200400FalseFalsetheater/playstheaterplays
81114Nunez-RichardsExclusive attitude-oriented intranet11010021946live708DKDKK15863220001615356000FalseFalsetheater/playstheaterplays
9970Rangel, Holt and JonesOpen-source fresh-thinking model62003208failed44USUSD16288308001630386000FalseFalsemusic/electric musicmusicelectric music
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "0 147 Baldwin, Riley and Jackson \n", "1 1621 Odom Inc \n", "2 1812 Melton, Robinson and Fritz \n", "3 2156 Mcdonald, Gonzalez and Ross \n", "4 1365 Larson-Little \n", "5 2057 Harris Group \n", "6 1894 Ortiz, Coleman and Mitchell \n", "7 2669 Carter-Guzman \n", "8 1114 Nunez-Richards \n", "9 970 Rangel, Holt and Jones \n", "\n", " blurb goal pledged outcome \\\n", "0 Pre-emptive tertiary standardization 100 0 failed \n", "1 Managed bottom-line architecture 1400 14560 successful \n", "2 Function-based leadingedge pricing structure 108400 142523 successful \n", "3 Vision-oriented fresh-thinking conglomeration 4200 2477 failed \n", "4 Proactive foreground core 7600 5265 failed \n", "5 Open-source optimizing database 7600 13195 successful \n", "6 Operative upward-trending algorithm 5200 1090 failed \n", "7 Centralized cohesive challenge 4500 14741 successful \n", "8 Exclusive attitude-oriented intranet 110100 21946 live \n", "9 Open-source fresh-thinking model 6200 3208 failed \n", "\n", " backers_count country currency launched_at deadline staff_pick \\\n", "0 0 CA CAD 1581573600 1614578400 False \n", "1 158 US USD 1611554400 1621918800 False \n", "2 1425 AU AUD 1608184800 1640844000 False \n", "3 24 US USD 1634792400 1642399200 False \n", "4 53 US USD 1608530400 1629694800 False \n", "5 174 DK DKK 1607666400 1630213200 False \n", "6 18 GB GBP 1596171600 1620709200 False \n", "7 227 DK DKK 1608616800 1632200400 False \n", "8 708 DK DKK 1586322000 1615356000 False \n", "9 44 US USD 1628830800 1630386000 False \n", "\n", " spotlight category & sub-category category subcategory \n", "0 False food/food trucks food food trucks \n", "1 True music/rock music rock \n", "2 False technology/web technology web \n", "3 False music/rock music rock \n", "4 False theater/plays theater plays \n", "5 False theater/plays theater plays \n", "6 False film & video/documentary film & video documentary \n", "7 False theater/plays theater plays \n", "8 False theater/plays theater plays \n", "9 False music/electric music music electric music " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "crowdfunding_info_df.head(10)" ] }, { "cell_type": "markdown", "id": "b65827eb", "metadata": {}, "source": [ "### Assign unique id numbers to all categories and subcategories" ] }, { "cell_type": "code", "execution_count": 18, "id": "aa9015a3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['food' 'music' 'technology' 'theater' 'film & video' 'publishing' 'games'\n", " 'photography' 'journalism']\n", "['food trucks' 'rock' 'web' 'plays' 'documentary' 'electric music' 'drama'\n", " 'indie rock' 'wearables' 'nonfiction' 'animation' 'video games' 'shorts'\n", " 'fiction' 'photography books' 'radio & podcasts' 'metal' 'jazz'\n", " 'translations' 'television' 'mobile games' 'world music'\n", " 'science fiction' 'audio']\n" ] } ], "source": [ "print(crowdfunding_info_df[\"category\"].unique())\n", "print(crowdfunding_info_df[\"subcategory\"].unique())" ] }, { "cell_type": "code", "execution_count": 19, "id": "d3570cda", "metadata": {}, "outputs": [], "source": [ "# 9 categories\n", "# 24 subcategories" ] }, { "cell_type": "markdown", "id": "932720a1", "metadata": {}, "source": [ "### Create 2 new dataframes for category and subcategory" ] }, { "cell_type": "code", "execution_count": 20, "id": "208a4455", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['food' 'music' 'technology' 'theater' 'film & video' 'publishing' 'games'\n", " 'photography' 'journalism']\n", "\n", "['food trucks' 'rock' 'web' 'plays' 'documentary' 'electric music' 'drama'\n", " 'indie rock' 'wearables' 'nonfiction' 'animation' 'video games' 'shorts'\n", " 'fiction' 'photography books' 'radio & podcasts' 'metal' 'jazz'\n", " 'translations' 'television' 'mobile games' 'world music'\n", " 'science fiction' 'audio']\n" ] } ], "source": [ "# Get the unique categories and subcategories into different LISTS\n", "categories = crowdfunding_info_df[\"category\"].unique()\n", "subcategories = crowdfunding_info_df[\"subcategory\"].unique()\n", "print(categories)\n", "print()\n", "print(subcategories)" ] }, { "cell_type": "markdown", "id": "3d21300d", "metadata": {}, "source": [ "### Use numpy arrays to assign numbers to each list item" ] }, { "cell_type": "code", "execution_count": 22, "id": "b22c3705", "metadata": {}, "outputs": [], "source": [ "import numpy as np" ] }, { "cell_type": "code", "execution_count": 24, "id": "83c39a0e", "metadata": {}, "outputs": [], "source": [ "# Assign to variables two arrays of sequential numbers equal in lenght to their corresponding column\n", "category_ids = np.arange(1, 10)\n", "subcategory_ids = np.arange(1, 25)" ] }, { "cell_type": "markdown", "id": "d730cf36", "metadata": {}, "source": [ "### Note:\n", "In order to ensure that these category and subcategory id's are unique, including unique to each other, we will convert them to strings and add \"cat0\" to the beginning of each category id and \"scat0\" to the beginning of each subcategory id. This is neccessary because the first 9 numerals of category and subcategory id's are identical (1-9)." ] }, { "cell_type": "code", "execution_count": 25, "id": "83944cb3", "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "['cat01', 'cat02', 'cat03', 'cat04', 'cat05', 'cat06', 'cat07', 'cat08', 'cat09']\n", "\n", "['scat01', 'scat02', 'scat03', 'scat04', 'scat05', 'scat06', 'scat07', 'scat08', 'scat09', 'scat010', 'scat011', 'scat012', 'scat013', 'scat014', 'scat015', 'scat016', 'scat017', 'scat018', 'scat019', 'scat020', 'scat021', 'scat022', 'scat023', 'scat024']\n" ] } ], "source": [ "cat_ids = [\"cat0\" + str(cat_id) for cat_id in category_ids]\n", "\n", "scat_ids = [\"scat0\" + str(scat_id) for scat_id in subcategory_ids]\n", "\n", "print(cat_ids)\n", "print()\n", "print(scat_ids)" ] }, { "cell_type": "markdown", "id": "bb84dee1", "metadata": {}, "source": [ "### Create the DataFrames" ] }, { "cell_type": "code", "execution_count": 28, "id": "5ea5ed9d", "metadata": {}, "outputs": [], "source": [ "category_df = pd.DataFrame({\n", " \"category_id\": cat_ids,\n", " \"category\": categories\n", "})\n", "\n", "subcategory_df = pd.DataFrame({\n", " \"subcategory_id\": scat_ids,\n", " \"subcategory\": subcategories\n", "})" ] }, { "cell_type": "code", "execution_count": 29, "id": "db166cbe", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
category_idcategory
0cat01food
1cat02music
2cat03technology
3cat04theater
4cat05film & video
5cat06publishing
6cat07games
7cat08photography
8cat09journalism
\n", "
" ], "text/plain": [ " category_id category\n", "0 cat01 food\n", "1 cat02 music\n", "2 cat03 technology\n", "3 cat04 theater\n", "4 cat05 film & video\n", "5 cat06 publishing\n", "6 cat07 games\n", "7 cat08 photography\n", "8 cat09 journalism" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "category_df" ] }, { "cell_type": "code", "execution_count": 30, "id": "3c51e4b6", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
subcategory_idsubcategory
0scat01food trucks
1scat02rock
2scat03web
3scat04plays
4scat05documentary
5scat06electric music
6scat07drama
7scat08indie rock
8scat09wearables
9scat010nonfiction
10scat011animation
11scat012video games
12scat013shorts
13scat014fiction
14scat015photography books
15scat016radio & podcasts
16scat017metal
17scat018jazz
18scat019translations
19scat020television
20scat021mobile games
21scat022world music
22scat023science fiction
23scat024audio
\n", "
" ], "text/plain": [ " subcategory_id subcategory\n", "0 scat01 food trucks\n", "1 scat02 rock\n", "2 scat03 web\n", "3 scat04 plays\n", "4 scat05 documentary\n", "5 scat06 electric music\n", "6 scat07 drama\n", "7 scat08 indie rock\n", "8 scat09 wearables\n", "9 scat010 nonfiction\n", "10 scat011 animation\n", "11 scat012 video games\n", "12 scat013 shorts\n", "13 scat014 fiction\n", "14 scat015 photography books\n", "15 scat016 radio & podcasts\n", "16 scat017 metal\n", "17 scat018 jazz\n", "18 scat019 translations\n", "19 scat020 television\n", "20 scat021 mobile games\n", "21 scat022 world music\n", "22 scat023 science fiction\n", "23 scat024 audio" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "subcategory_df" ] }, { "cell_type": "code", "execution_count": 31, "id": "ad8c8643", "metadata": {}, "outputs": [], "source": [ "# Send dataframes to CSV files\n", "category_df.to_csv(\"Data/categories.csv\", index=False)\n", "subcategory_df.to_csv(\"Data/subcategories.csv\", index=False)" ] }, { "cell_type": "markdown", "id": "b604fc65", "metadata": {}, "source": [ "## 8.3.4\n", "Building campaing_df" ] }, { "cell_type": "code", "execution_count": 47, "id": "e05b4a33", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_nameblurbgoalpledgedoutcomebackers_countcountrycurrencylaunched_atdeadlinestaff_pickspotlightcategory & sub-categorycategorysubcategory
0147Baldwin, Riley and JacksonPre-emptive tertiary standardization1000failed0CACAD15815736001614578400FalseFalsefood/food trucksfoodfood trucks
11621Odom IncManaged bottom-line architecture140014560successful158USUSD16115544001621918800FalseTruemusic/rockmusicrock
21812Melton, Robinson and FritzFunction-based leadingedge pricing structure108400142523successful1425AUAUD16081848001640844000FalseFalsetechnology/webtechnologyweb
32156Mcdonald, Gonzalez and RossVision-oriented fresh-thinking conglomeration42002477failed24USUSD16347924001642399200FalseFalsemusic/rockmusicrock
41365Larson-LittleProactive foreground core76005265failed53USUSD16085304001629694800FalseFalsetheater/playstheaterplays
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "0 147 Baldwin, Riley and Jackson \n", "1 1621 Odom Inc \n", "2 1812 Melton, Robinson and Fritz \n", "3 2156 Mcdonald, Gonzalez and Ross \n", "4 1365 Larson-Little \n", "\n", " blurb goal pledged outcome \\\n", "0 Pre-emptive tertiary standardization 100 0 failed \n", "1 Managed bottom-line architecture 1400 14560 successful \n", "2 Function-based leadingedge pricing structure 108400 142523 successful \n", "3 Vision-oriented fresh-thinking conglomeration 4200 2477 failed \n", "4 Proactive foreground core 7600 5265 failed \n", "\n", " backers_count country currency launched_at deadline staff_pick \\\n", "0 0 CA CAD 1581573600 1614578400 False \n", "1 158 US USD 1611554400 1621918800 False \n", "2 1425 AU AUD 1608184800 1640844000 False \n", "3 24 US USD 1634792400 1642399200 False \n", "4 53 US USD 1608530400 1629694800 False \n", "\n", " spotlight category & sub-category category subcategory \n", "0 False food/food trucks food food trucks \n", "1 True music/rock music rock \n", "2 False technology/web technology web \n", "3 False music/rock music rock \n", "4 False theater/plays theater plays " ] }, "execution_count": 47, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# create a copy with a new name to preserve original dataframe\n", "campaign_df = crowdfunding_info_df.copy()\n", "campaign_df.head()" ] }, { "cell_type": "code", "execution_count": 48, "id": "9b53fadd", "metadata": {}, "outputs": [], "source": [ "# Rename columns according to instructions\n", "campaign_df = campaign_df.rename(columns={\"blurb\":\"description\", \"launched_at\":\"launched_date\", \"deadline\":\"end_date\"})" ] }, { "cell_type": "code", "execution_count": 49, "id": "236ceb62", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_namedescriptiongoalpledgedoutcomebackers_countcountrycurrencylaunched_dateend_datestaff_pickspotlightcategory & sub-categorycategorysubcategory
0147Baldwin, Riley and JacksonPre-emptive tertiary standardization100.00.0failed0CACAD15815736001614578400FalseFalsefood/food trucksfoodfood trucks
11621Odom IncManaged bottom-line architecture1400.014560.0successful158USUSD16115544001621918800FalseTruemusic/rockmusicrock
21812Melton, Robinson and FritzFunction-based leadingedge pricing structure108400.0142523.0successful1425AUAUD16081848001640844000FalseFalsetechnology/webtechnologyweb
32156Mcdonald, Gonzalez and RossVision-oriented fresh-thinking conglomeration4200.02477.0failed24USUSD16347924001642399200FalseFalsemusic/rockmusicrock
41365Larson-LittleProactive foreground core7600.05265.0failed53USUSD16085304001629694800FalseFalsetheater/playstheaterplays
52057Harris GroupOpen-source optimizing database7600.013195.0successful174DKDKK16076664001630213200FalseFalsetheater/playstheaterplays
61894Ortiz, Coleman and MitchellOperative upward-trending algorithm5200.01090.0failed18GBGBP15961716001620709200FalseFalsefilm & video/documentaryfilm & videodocumentary
72669Carter-GuzmanCentralized cohesive challenge4500.014741.0successful227DKDKK16086168001632200400FalseFalsetheater/playstheaterplays
81114Nunez-RichardsExclusive attitude-oriented intranet110100.021946.0live708DKDKK15863220001615356000FalseFalsetheater/playstheaterplays
9970Rangel, Holt and JonesOpen-source fresh-thinking model6200.03208.0failed44USUSD16288308001630386000FalseFalsemusic/electric musicmusicelectric music
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "0 147 Baldwin, Riley and Jackson \n", "1 1621 Odom Inc \n", "2 1812 Melton, Robinson and Fritz \n", "3 2156 Mcdonald, Gonzalez and Ross \n", "4 1365 Larson-Little \n", "5 2057 Harris Group \n", "6 1894 Ortiz, Coleman and Mitchell \n", "7 2669 Carter-Guzman \n", "8 1114 Nunez-Richards \n", "9 970 Rangel, Holt and Jones \n", "\n", " description goal pledged \\\n", "0 Pre-emptive tertiary standardization 100.0 0.0 \n", "1 Managed bottom-line architecture 1400.0 14560.0 \n", "2 Function-based leadingedge pricing structure 108400.0 142523.0 \n", "3 Vision-oriented fresh-thinking conglomeration 4200.0 2477.0 \n", "4 Proactive foreground core 7600.0 5265.0 \n", "5 Open-source optimizing database 7600.0 13195.0 \n", "6 Operative upward-trending algorithm 5200.0 1090.0 \n", "7 Centralized cohesive challenge 4500.0 14741.0 \n", "8 Exclusive attitude-oriented intranet 110100.0 21946.0 \n", "9 Open-source fresh-thinking model 6200.0 3208.0 \n", "\n", " outcome backers_count country currency launched_date end_date \\\n", "0 failed 0 CA CAD 1581573600 1614578400 \n", "1 successful 158 US USD 1611554400 1621918800 \n", "2 successful 1425 AU AUD 1608184800 1640844000 \n", "3 failed 24 US USD 1634792400 1642399200 \n", "4 failed 53 US USD 1608530400 1629694800 \n", "5 successful 174 DK DKK 1607666400 1630213200 \n", "6 failed 18 GB GBP 1596171600 1620709200 \n", "7 successful 227 DK DKK 1608616800 1632200400 \n", "8 live 708 DK DKK 1586322000 1615356000 \n", "9 failed 44 US USD 1628830800 1630386000 \n", "\n", " staff_pick spotlight category & sub-category category \\\n", "0 False False food/food trucks food \n", "1 False True music/rock music \n", "2 False False technology/web technology \n", "3 False False music/rock music \n", "4 False False theater/plays theater \n", "5 False False theater/plays theater \n", "6 False False film & video/documentary film & video \n", "7 False False theater/plays theater \n", "8 False False theater/plays theater \n", "9 False False music/electric music music \n", "\n", " subcategory \n", "0 food trucks \n", "1 rock \n", "2 web \n", "3 rock \n", "4 plays \n", "5 plays \n", "6 documentary \n", "7 plays \n", "8 plays \n", "9 electric music " ] }, "execution_count": 49, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Convert int64 to float for specified columns\n", "campaign_df[[\"goal\", \"pledged\"]] = campaign_df[[\"goal\", \"pledged\"]].astype(float)\n", "campaign_df.head(10)" ] }, { "cell_type": "markdown", "id": "cfaae929", "metadata": {}, "source": [ "### Convert epoch time to ISO using datetime module" ] }, { "cell_type": "code", "execution_count": 50, "id": "17a59941", "metadata": {}, "outputs": [], "source": [ "from datetime import datetime as dt" ] }, { "cell_type": "code", "execution_count": 52, "id": "2d198bf9", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
cf_idcompany_namedescriptiongoalpledgedoutcomebackers_countcountrycurrencylaunched_dateend_datestaff_pickspotlightcategory & sub-categorycategorysubcategory
0147Baldwin, Riley and JacksonPre-emptive tertiary standardization100.00.0failed0CACAD2020-02-132021-03-01FalseFalsefood/food trucksfoodfood trucks
11621Odom IncManaged bottom-line architecture1400.014560.0successful158USUSD2021-01-252021-05-25FalseTruemusic/rockmusicrock
21812Melton, Robinson and FritzFunction-based leadingedge pricing structure108400.0142523.0successful1425AUAUD2020-12-172021-12-30FalseFalsetechnology/webtechnologyweb
32156Mcdonald, Gonzalez and RossVision-oriented fresh-thinking conglomeration4200.02477.0failed24USUSD2021-10-212022-01-17FalseFalsemusic/rockmusicrock
41365Larson-LittleProactive foreground core7600.05265.0failed53USUSD2020-12-212021-08-23FalseFalsetheater/playstheaterplays
\n", "
" ], "text/plain": [ " cf_id company_name \\\n", "0 147 Baldwin, Riley and Jackson \n", "1 1621 Odom Inc \n", "2 1812 Melton, Robinson and Fritz \n", "3 2156 Mcdonald, Gonzalez and Ross \n", "4 1365 Larson-Little \n", "\n", " description goal pledged \\\n", "0 Pre-emptive tertiary standardization 100.0 0.0 \n", "1 Managed bottom-line architecture 1400.0 14560.0 \n", "2 Function-based leadingedge pricing structure 108400.0 142523.0 \n", "3 Vision-oriented fresh-thinking conglomeration 4200.0 2477.0 \n", "4 Proactive foreground core 7600.0 5265.0 \n", "\n", " outcome backers_count country currency launched_date end_date \\\n", "0 failed 0 CA CAD 2020-02-13 2021-03-01 \n", "1 successful 158 US USD 2021-01-25 2021-05-25 \n", "2 successful 1425 AU AUD 2020-12-17 2021-12-30 \n", "3 failed 24 US USD 2021-10-21 2022-01-17 \n", "4 failed 53 US USD 2020-12-21 2021-08-23 \n", "\n", " staff_pick spotlight category & sub-category category subcategory \n", "0 False False food/food trucks food food trucks \n", "1 False True music/rock music rock \n", "2 False False technology/web technology web \n", "3 False False music/rock music rock \n", "4 False False theater/plays theater plays " ] }, "execution_count": 52, "metadata": {}, "output_type": "execute_result" } ], "source": [ "campaign_df[\"launched_date\"] = pd.to_datetime(campaign_df[\"launched_date\"], unit='s').dt.strftime('%Y-%m-%d')\n", "campaign_df[\"end_date\"] = pd.to_datetime(campaign_df[\"end_date\"], unit='s').dt.strftime('%Y-%m-%d')\n", "campaign_df.head()" ] }, { "cell_type": "markdown", "id": "4000c7db", "metadata": {}, "source": [ "### Get cat and scat id #'s by merging those dataframes on common columns" ] }, { "cell_type": "code", "execution_count": 54, "id": "b17961ba", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
category_idcategorycf_idcompany_namedescriptiongoalpledgedoutcomebackers_countcountrycurrencylaunched_dateend_datestaff_pickspotlightcategory & sub-categorysubcategory
0cat01food147Baldwin, Riley and JacksonPre-emptive tertiary standardization100.00.0failed0CACAD2020-02-132021-03-01FalseFalsefood/food trucksfood trucks
1cat01food1175Werner-BryantVirtual uniform frame1800.07991.0successful222USUSD2020-06-202021-01-30FalseFalsefood/food trucksfood trucks
2cat01food873Stewart LLCCloned bi-directional architecture1300.012047.0successful113USUSD2020-11-292021-06-11FalseFalsefood/food trucksfood trucks
3cat01food2568Castillo-CareyCross-platform solution-oriented process improvement142400.021307.0failed296USUSD2020-05-052021-05-30FalseFalsefood/food trucksfood trucks
4cat01food1211Wright, Hartman and YuUser-friendly tertiary array3300.012437.0successful131USUSD2021-01-292021-02-13FalseFalsefood/food trucksfood trucks
\n", "
" ], "text/plain": [ " category_id category cf_id company_name \\\n", "0 cat01 food 147 Baldwin, Riley and Jackson \n", "1 cat01 food 1175 Werner-Bryant \n", "2 cat01 food 873 Stewart LLC \n", "3 cat01 food 2568 Castillo-Carey \n", "4 cat01 food 1211 Wright, Hartman and Yu \n", "\n", " description goal pledged \\\n", "0 Pre-emptive tertiary standardization 100.0 0.0 \n", "1 Virtual uniform frame 1800.0 7991.0 \n", "2 Cloned bi-directional architecture 1300.0 12047.0 \n", "3 Cross-platform solution-oriented process improvement 142400.0 21307.0 \n", "4 User-friendly tertiary array 3300.0 12437.0 \n", "\n", " outcome backers_count country currency launched_date end_date \\\n", "0 failed 0 CA CAD 2020-02-13 2021-03-01 \n", "1 successful 222 US USD 2020-06-20 2021-01-30 \n", "2 successful 113 US USD 2020-11-29 2021-06-11 \n", "3 failed 296 US USD 2020-05-05 2021-05-30 \n", "4 successful 131 US USD 2021-01-29 2021-02-13 \n", "\n", " staff_pick spotlight category & sub-category subcategory \n", "0 False False food/food trucks food trucks \n", "1 False False food/food trucks food trucks \n", "2 False False food/food trucks food trucks \n", "3 False False food/food trucks food trucks \n", "4 False False food/food trucks food trucks " ] }, "execution_count": 54, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": null, "id": "11f38a5b", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "PythonData", "language": "python", "name": "pythondata" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.12" } }, "nbformat": 4, "nbformat_minor": 5 }