{
"cells": [
{
"cell_type": "markdown",
"id": "1ba24d52",
"metadata": {},
"source": [
"## 8.2.3"
]
},
{
"cell_type": "code",
"execution_count": 1,
"id": "9b6e3509",
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "89cd3712",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"['crowdfunding_info', 'contact_info']"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Read an excel file \n",
"crowdfunding_data = pd.ExcelFile(\"../crowdfunding.xlsx\")\n",
"# Get sheet names\n",
"crowdfunding_data.sheet_names"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5ec9289d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" blurb | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_at | \n",
" deadline | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 147 | \n",
" Baldwin, Riley and Jackson | \n",
" Pre-emptive tertiary standardization | \n",
" 100 | \n",
" 0 | \n",
" failed | \n",
" 0 | \n",
" CA | \n",
" CAD | \n",
" 1581573600 | \n",
" 1614578400 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" 1621 | \n",
" Odom Inc | \n",
" Managed bottom-line architecture | \n",
" 1400 | \n",
" 14560 | \n",
" successful | \n",
" 158 | \n",
" US | \n",
" USD | \n",
" 1611554400 | \n",
" 1621918800 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
"
\n",
" \n",
" 2 | \n",
" 1812 | \n",
" Melton, Robinson and Fritz | \n",
" Function-based leadingedge pricing structure | \n",
" 108400 | \n",
" 142523 | \n",
" successful | \n",
" 1425 | \n",
" AU | \n",
" AUD | \n",
" 1608184800 | \n",
" 1640844000 | \n",
" False | \n",
" False | \n",
" technology/web | \n",
"
\n",
" \n",
" 3 | \n",
" 2156 | \n",
" Mcdonald, Gonzalez and Ross | \n",
" Vision-oriented fresh-thinking conglomeration | \n",
" 4200 | \n",
" 2477 | \n",
" failed | \n",
" 24 | \n",
" US | \n",
" USD | \n",
" 1634792400 | \n",
" 1642399200 | \n",
" False | \n",
" False | \n",
" music/rock | \n",
"
\n",
" \n",
" 4 | \n",
" 1365 | \n",
" Larson-Little | \n",
" Proactive foreground core | \n",
" 7600 | \n",
" 5265 | \n",
" failed | \n",
" 53 | \n",
" US | \n",
" USD | \n",
" 1608530400 | \n",
" 1629694800 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 5 | \n",
" 2057 | \n",
" Harris Group | \n",
" Open-source optimizing database | \n",
" 7600 | \n",
" 13195 | \n",
" successful | \n",
" 174 | \n",
" DK | \n",
" DKK | \n",
" 1607666400 | \n",
" 1630213200 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 6 | \n",
" 1894 | \n",
" Ortiz, Coleman and Mitchell | \n",
" Operative upward-trending algorithm | \n",
" 5200 | \n",
" 1090 | \n",
" failed | \n",
" 18 | \n",
" GB | \n",
" GBP | \n",
" 1596171600 | \n",
" 1620709200 | \n",
" False | \n",
" False | \n",
" film & video/documentary | \n",
"
\n",
" \n",
" 7 | \n",
" 2669 | \n",
" Carter-Guzman | \n",
" Centralized cohesive challenge | \n",
" 4500 | \n",
" 14741 | \n",
" successful | \n",
" 227 | \n",
" DK | \n",
" DKK | \n",
" 1608616800 | \n",
" 1632200400 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 8 | \n",
" 1114 | \n",
" Nunez-Richards | \n",
" Exclusive attitude-oriented intranet | \n",
" 110100 | \n",
" 21946 | \n",
" live | \n",
" 708 | \n",
" DK | \n",
" DKK | \n",
" 1586322000 | \n",
" 1615356000 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 9 | \n",
" 970 | \n",
" Rangel, Holt and Jones | \n",
" Open-source fresh-thinking model | \n",
" 6200 | \n",
" 3208 | \n",
" failed | \n",
" 44 | \n",
" US | \n",
" USD | \n",
" 1628830800 | \n",
" 1630386000 | \n",
" False | \n",
" False | \n",
" music/electric music | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"0 147 Baldwin, Riley and Jackson \n",
"1 1621 Odom Inc \n",
"2 1812 Melton, Robinson and Fritz \n",
"3 2156 Mcdonald, Gonzalez and Ross \n",
"4 1365 Larson-Little \n",
"5 2057 Harris Group \n",
"6 1894 Ortiz, Coleman and Mitchell \n",
"7 2669 Carter-Guzman \n",
"8 1114 Nunez-Richards \n",
"9 970 Rangel, Holt and Jones \n",
"\n",
" blurb goal pledged outcome \\\n",
"0 Pre-emptive tertiary standardization 100 0 failed \n",
"1 Managed bottom-line architecture 1400 14560 successful \n",
"2 Function-based leadingedge pricing structure 108400 142523 successful \n",
"3 Vision-oriented fresh-thinking conglomeration 4200 2477 failed \n",
"4 Proactive foreground core 7600 5265 failed \n",
"5 Open-source optimizing database 7600 13195 successful \n",
"6 Operative upward-trending algorithm 5200 1090 failed \n",
"7 Centralized cohesive challenge 4500 14741 successful \n",
"8 Exclusive attitude-oriented intranet 110100 21946 live \n",
"9 Open-source fresh-thinking model 6200 3208 failed \n",
"\n",
" backers_count country currency launched_at deadline staff_pick \\\n",
"0 0 CA CAD 1581573600 1614578400 False \n",
"1 158 US USD 1611554400 1621918800 False \n",
"2 1425 AU AUD 1608184800 1640844000 False \n",
"3 24 US USD 1634792400 1642399200 False \n",
"4 53 US USD 1608530400 1629694800 False \n",
"5 174 DK DKK 1607666400 1630213200 False \n",
"6 18 GB GBP 1596171600 1620709200 False \n",
"7 227 DK DKK 1608616800 1632200400 False \n",
"8 708 DK DKK 1586322000 1615356000 False \n",
"9 44 US USD 1628830800 1630386000 False \n",
"\n",
" spotlight category & sub-category \n",
"0 False food/food trucks \n",
"1 True music/rock \n",
"2 False technology/web \n",
"3 False music/rock \n",
"4 False theater/plays \n",
"5 False theater/plays \n",
"6 False film & video/documentary \n",
"7 False theater/plays \n",
"8 False theater/plays \n",
"9 False music/electric music "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Transform sheets into dataframes\n",
"crowdfunding_info_df = pd.read_excel(crowdfunding_data, sheet_name=\"crowdfunding_info\")\n",
"crowdfunding_info_df.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "f91ec41c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" contact_info | \n",
"
\n",
" \n",
" 1 | \n",
" {\"contact_id\": 4661, \"name\": \"Cecilia Velasco\", \"email\": \"cecilia.velasco@rodrigues.fr\"} | \n",
"
\n",
" \n",
" 2 | \n",
" {\"contact_id\": 3765, \"name\": \"Mariana Ellis\", \"email\": \"mariana.ellis@rossi.org\"} | \n",
"
\n",
" \n",
" 3 | \n",
" {\"contact_id\": 4187, \"name\": \"Sofie Woods\", \"email\": \"sofie.woods@riviere.com\"} | \n",
"
\n",
" \n",
" 4 | \n",
" {\"contact_id\": 4941, \"name\": \"Jeanette Iannotti\", \"email\": \"jeanette.iannotti@yahoo.com\"} | \n",
"
\n",
" \n",
" 5 | \n",
" {\"contact_id\": 2199, \"name\": \"Samuel Sorgatz\", \"email\": \"samuel.sorgatz@gmail.com\"} | \n",
"
\n",
" \n",
" 6 | \n",
" {\"contact_id\": 5650, \"name\": \"Socorro Luna\", \"email\": \"socorro.luna@hotmail.com\"} | \n",
"
\n",
" \n",
" 7 | \n",
" {\"contact_id\": 5889, \"name\": \"Carolina Murray\", \"email\": \"carolina.murray@knight.com\"} | \n",
"
\n",
" \n",
" 8 | \n",
" {\"contact_id\": 4842, \"name\": \"Kayla Moon\", \"email\": \"kayla.moon@yahoo.de\"} | \n",
"
\n",
" \n",
" 9 | \n",
" {\"contact_id\": 3280, \"name\": \"Ariadna Geisel\", \"email\": \"ariadna.geisel@rangel.com\"} | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" \n",
"0 contact_info\n",
"1 {\"contact_id\": 4661, \"name\": \"Cecilia Velasco\", \"email\": \"cecilia.velasco@rodrigues.fr\"}\n",
"2 {\"contact_id\": 3765, \"name\": \"Mariana Ellis\", \"email\": \"mariana.ellis@rossi.org\"}\n",
"3 {\"contact_id\": 4187, \"name\": \"Sofie Woods\", \"email\": \"sofie.woods@riviere.com\"}\n",
"4 {\"contact_id\": 4941, \"name\": \"Jeanette Iannotti\", \"email\": \"jeanette.iannotti@yahoo.com\"}\n",
"5 {\"contact_id\": 2199, \"name\": \"Samuel Sorgatz\", \"email\": \"samuel.sorgatz@gmail.com\"}\n",
"6 {\"contact_id\": 5650, \"name\": \"Socorro Luna\", \"email\": \"socorro.luna@hotmail.com\"}\n",
"7 {\"contact_id\": 5889, \"name\": \"Carolina Murray\", \"email\": \"carolina.murray@knight.com\"}\n",
"8 {\"contact_id\": 4842, \"name\": \"Kayla Moon\", \"email\": \"kayla.moon@yahoo.de\"}\n",
"9 {\"contact_id\": 3280, \"name\": \"Ariadna Geisel\", \"email\": \"ariadna.geisel@rangel.com\"}"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Increase the column width for next dataframe\n",
"pd.set_option('max_colwidth', 400)\n",
"contact_info_df = pd.read_excel(crowdfunding_data, sheet_name=\"contact_info\", header=2) # Skip 3 header rows (first is 0)\n",
"contact_info_df.head(10)"
]
},
{
"cell_type": "markdown",
"id": "c8fdacca",
"metadata": {},
"source": [
"### Sample dataframes "
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2b1c4747",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 991 | \n",
" {\"contact_id\": 5500, \"name\": \"Vincentio Sanders\", \"email\": \"vincentio.sanders@voila.fr\"} | \n",
"
\n",
" \n",
" 992 | \n",
" {\"contact_id\": 3494, \"name\": \"Bettina Norbiato\", \"email\": \"bettina.norbiato@allen-lutz.org\"} | \n",
"
\n",
" \n",
" 993 | \n",
" {\"contact_id\": 3924, \"name\": \"Julio Renner\", \"email\": \"julio.renner@industrias.net\"} | \n",
"
\n",
" \n",
" 994 | \n",
" {\"contact_id\": 3521, \"name\": \"Jelena Stiffel\", \"email\": \"jelena.stiffel@trupp.de\"} | \n",
"
\n",
" \n",
" 995 | \n",
" {\"contact_id\": 5262, \"name\": \"Guadalupe Munoz\", \"email\": \"guadalupe.munoz@murray-hamilton.com.au\"} | \n",
"
\n",
" \n",
" 996 | \n",
" {\"contact_id\": 3684, \"name\": \"Whitney Noack\", \"email\": \"whitney.noack@laboratorios.org\"} | \n",
"
\n",
" \n",
" 997 | \n",
" {\"contact_id\": 5784, \"name\": \"Gelsomina Migliaccio\", \"email\": \"gelsomina.migliaccio@junk.com\"} | \n",
"
\n",
" \n",
" 998 | \n",
" {\"contact_id\": 1498, \"name\": \"Evangelista Pereira\", \"email\": \"evangelista.pereira@thompson-peterson.biz\"} | \n",
"
\n",
" \n",
" 999 | \n",
" {\"contact_id\": 6073, \"name\": \"Gareth Comolli\", \"email\": \"gareth.comolli@tiscali.fr\"} | \n",
"
\n",
" \n",
" 1000 | \n",
" {\"contact_id\": 4939, \"name\": \"Michelangelo Hess\", \"email\": \"michelangelo.hess@bouygtel.fr\"} | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" \n",
"991 {\"contact_id\": 5500, \"name\": \"Vincentio Sanders\", \"email\": \"vincentio.sanders@voila.fr\"}\n",
"992 {\"contact_id\": 3494, \"name\": \"Bettina Norbiato\", \"email\": \"bettina.norbiato@allen-lutz.org\"}\n",
"993 {\"contact_id\": 3924, \"name\": \"Julio Renner\", \"email\": \"julio.renner@industrias.net\"}\n",
"994 {\"contact_id\": 3521, \"name\": \"Jelena Stiffel\", \"email\": \"jelena.stiffel@trupp.de\"}\n",
"995 {\"contact_id\": 5262, \"name\": \"Guadalupe Munoz\", \"email\": \"guadalupe.munoz@murray-hamilton.com.au\"}\n",
"996 {\"contact_id\": 3684, \"name\": \"Whitney Noack\", \"email\": \"whitney.noack@laboratorios.org\"}\n",
"997 {\"contact_id\": 5784, \"name\": \"Gelsomina Migliaccio\", \"email\": \"gelsomina.migliaccio@junk.com\"}\n",
"998 {\"contact_id\": 1498, \"name\": \"Evangelista Pereira\", \"email\": \"evangelista.pereira@thompson-peterson.biz\"}\n",
"999 {\"contact_id\": 6073, \"name\": \"Gareth Comolli\", \"email\": \"gareth.comolli@tiscali.fr\"}\n",
"1000 {\"contact_id\": 4939, \"name\": \"Michelangelo Hess\", \"email\": \"michelangelo.hess@bouygtel.fr\"}"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"contact_info_df.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "eeba7067",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" 129 | \n",
" {\"contact_id\": 5501, \"name\": \"Venancio Stadelmann\", \"email\": \"venancio.stadelmann@gmx.de\"} | \n",
"
\n",
" \n",
" 882 | \n",
" {\"contact_id\": 3173, \"name\": \"Erin Weinhold\", \"email\": \"erin.weinhold@hotmail.fr\"} | \n",
"
\n",
" \n",
" 393 | \n",
" {\"contact_id\": 3365, \"name\": \"Alessio Bruneau\", \"email\": \"alessio.bruneau@hotmail.co.uk\"} | \n",
"
\n",
" \n",
" 434 | \n",
" {\"contact_id\": 3176, \"name\": \"Stephen Pratesi\", \"email\": \"stephen.pratesi@hotmail.com\"} | \n",
"
\n",
" \n",
" 263 | \n",
" {\"contact_id\": 2761, \"name\": \"Laetitia Gallet\", \"email\": \"laetitia.gallet@aubry.org\"} | \n",
"
\n",
" \n",
" 116 | \n",
" {\"contact_id\": 6026, \"name\": \"Laura Buckley\", \"email\": \"laura.buckley@ryan.org\"} | \n",
"
\n",
" \n",
" 339 | \n",
" {\"contact_id\": 4618, \"name\": \"Heather Zimmer\", \"email\": \"heather.zimmer@bluemel.de\"} | \n",
"
\n",
" \n",
" 168 | \n",
" {\"contact_id\": 4951, \"name\": \"Sergio Abbott\", \"email\": \"sergio.abbott@moore.net.au\"} | \n",
"
\n",
" \n",
" 739 | \n",
" {\"contact_id\": 6028, \"name\": \"Jean Berthelot\", \"email\": \"jean.berthelot@oscuro.it\"} | \n",
"
\n",
" \n",
" 406 | \n",
" {\"contact_id\": 6055, \"name\": \"Emmanuel Dominguez\", \"email\": \"emmanuel.dominguez@gmail.com\"} | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" \n",
"129 {\"contact_id\": 5501, \"name\": \"Venancio Stadelmann\", \"email\": \"venancio.stadelmann@gmx.de\"}\n",
"882 {\"contact_id\": 3173, \"name\": \"Erin Weinhold\", \"email\": \"erin.weinhold@hotmail.fr\"}\n",
"393 {\"contact_id\": 3365, \"name\": \"Alessio Bruneau\", \"email\": \"alessio.bruneau@hotmail.co.uk\"}\n",
"434 {\"contact_id\": 3176, \"name\": \"Stephen Pratesi\", \"email\": \"stephen.pratesi@hotmail.com\"}\n",
"263 {\"contact_id\": 2761, \"name\": \"Laetitia Gallet\", \"email\": \"laetitia.gallet@aubry.org\"}\n",
"116 {\"contact_id\": 6026, \"name\": \"Laura Buckley\", \"email\": \"laura.buckley@ryan.org\"}\n",
"339 {\"contact_id\": 4618, \"name\": \"Heather Zimmer\", \"email\": \"heather.zimmer@bluemel.de\"}\n",
"168 {\"contact_id\": 4951, \"name\": \"Sergio Abbott\", \"email\": \"sergio.abbott@moore.net.au\"}\n",
"739 {\"contact_id\": 6028, \"name\": \"Jean Berthelot\", \"email\": \"jean.berthelot@oscuro.it\"}\n",
"406 {\"contact_id\": 6055, \"name\": \"Emmanuel Dominguez\", \"email\": \"emmanuel.dominguez@gmail.com\"}"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Run multiple times to get new random rows each time\n",
"contact_info_df.sample(n=10)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "71e889c6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" blurb | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_at | \n",
" deadline | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
"
\n",
" \n",
" \n",
" \n",
" 990 | \n",
" 1561 | \n",
" Ortiz-Roberts | \n",
" Devolved foreground customer loyalty | \n",
" 7800 | \n",
" 6839 | \n",
" failed | \n",
" 64 | \n",
" US | \n",
" USD | \n",
" 1623214800 | \n",
" 1623992400 | \n",
" False | \n",
" True | \n",
" film & video/drama | \n",
"
\n",
" \n",
" 991 | \n",
" 2632 | \n",
" Ramirez LLC | \n",
" Reduced reciprocal focus group | \n",
" 9800 | \n",
" 11091 | \n",
" successful | \n",
" 241 | \n",
" US | \n",
" USD | \n",
" 1607493600 | \n",
" 1622005200 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
"
\n",
" \n",
" 992 | \n",
" 439 | \n",
" Morrow Inc | \n",
" Networked global migration | \n",
" 3100 | \n",
" 13223 | \n",
" successful | \n",
" 132 | \n",
" US | \n",
" USD | \n",
" 1592110800 | \n",
" 1612850400 | \n",
" False | \n",
" True | \n",
" film & video/drama | \n",
"
\n",
" \n",
" 993 | \n",
" 461 | \n",
" Erickson-Rogers | \n",
" De-engineered even-keeled definition | \n",
" 9800 | \n",
" 7608 | \n",
" canceled | \n",
" 75 | \n",
" IT | \n",
" EUR | \n",
" 1625288400 | \n",
" 1625720400 | \n",
" False | \n",
" True | \n",
" photography/photography books | \n",
"
\n",
" \n",
" 994 | \n",
" 1419 | \n",
" Leach, Rich and Price | \n",
" Implemented bi-directional flexibility | \n",
" 141100 | \n",
" 74073 | \n",
" failed | \n",
" 842 | \n",
" US | \n",
" USD | \n",
" 1636956000 | \n",
" 1638856800 | \n",
" False | \n",
" True | \n",
" publishing/translations | \n",
"
\n",
" \n",
" 995 | \n",
" 2986 | \n",
" Manning-Hamilton | \n",
" Vision-oriented scalable definition | \n",
" 97300 | \n",
" 153216 | \n",
" successful | \n",
" 2043 | \n",
" US | \n",
" USD | \n",
" 1609221600 | \n",
" 1622350800 | \n",
" False | \n",
" True | \n",
" food/food trucks | \n",
"
\n",
" \n",
" 996 | \n",
" 2031 | \n",
" Butler LLC | \n",
" Future-proofed upward-trending migration | \n",
" 6600 | \n",
" 4814 | \n",
" failed | \n",
" 112 | \n",
" US | \n",
" USD | \n",
" 1634274000 | \n",
" 1638252000 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 997 | \n",
" 1627 | \n",
" Ball LLC | \n",
" Right-sized full-range throughput | \n",
" 7600 | \n",
" 4603 | \n",
" canceled | \n",
" 139 | \n",
" IT | \n",
" EUR | \n",
" 1636174800 | \n",
" 1639116000 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 998 | \n",
" 2175 | \n",
" Taylor, Santiago and Flores | \n",
" Polarized composite customer loyalty | \n",
" 66600 | \n",
" 37823 | \n",
" failed | \n",
" 374 | \n",
" US | \n",
" USD | \n",
" 1602133200 | \n",
" 1618117200 | \n",
" False | \n",
" True | \n",
" music/indie rock | \n",
"
\n",
" \n",
" 999 | \n",
" 1788 | \n",
" Hernandez, Norton and Kelley | \n",
" Expanded eco-centric policy | \n",
" 111100 | \n",
" 62819 | \n",
" canceled | \n",
" 1122 | \n",
" US | \n",
" USD | \n",
" 1609308000 | \n",
" 1629262800 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"990 1561 Ortiz-Roberts \n",
"991 2632 Ramirez LLC \n",
"992 439 Morrow Inc \n",
"993 461 Erickson-Rogers \n",
"994 1419 Leach, Rich and Price \n",
"995 2986 Manning-Hamilton \n",
"996 2031 Butler LLC \n",
"997 1627 Ball LLC \n",
"998 2175 Taylor, Santiago and Flores \n",
"999 1788 Hernandez, Norton and Kelley \n",
"\n",
" blurb goal pledged outcome \\\n",
"990 Devolved foreground customer loyalty 7800 6839 failed \n",
"991 Reduced reciprocal focus group 9800 11091 successful \n",
"992 Networked global migration 3100 13223 successful \n",
"993 De-engineered even-keeled definition 9800 7608 canceled \n",
"994 Implemented bi-directional flexibility 141100 74073 failed \n",
"995 Vision-oriented scalable definition 97300 153216 successful \n",
"996 Future-proofed upward-trending migration 6600 4814 failed \n",
"997 Right-sized full-range throughput 7600 4603 canceled \n",
"998 Polarized composite customer loyalty 66600 37823 failed \n",
"999 Expanded eco-centric policy 111100 62819 canceled \n",
"\n",
" backers_count country currency launched_at deadline staff_pick \\\n",
"990 64 US USD 1623214800 1623992400 False \n",
"991 241 US USD 1607493600 1622005200 False \n",
"992 132 US USD 1592110800 1612850400 False \n",
"993 75 IT EUR 1625288400 1625720400 False \n",
"994 842 US USD 1636956000 1638856800 False \n",
"995 2043 US USD 1609221600 1622350800 False \n",
"996 112 US USD 1634274000 1638252000 False \n",
"997 139 IT EUR 1636174800 1639116000 False \n",
"998 374 US USD 1602133200 1618117200 False \n",
"999 1122 US USD 1609308000 1629262800 False \n",
"\n",
" spotlight category & sub-category \n",
"990 True film & video/drama \n",
"991 True music/rock \n",
"992 True film & video/drama \n",
"993 True photography/photography books \n",
"994 True publishing/translations \n",
"995 True food/food trucks \n",
"996 False theater/plays \n",
"997 False theater/plays \n",
"998 True music/indie rock \n",
"999 False food/food trucks "
]
},
"execution_count": 7,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"crowdfunding_info_df.tail(10)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b498ea0c",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" blurb | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_at | \n",
" deadline | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
"
\n",
" \n",
" \n",
" \n",
" 352 | \n",
" 2514 | \n",
" Adams, Willis and Sanchez | \n",
" Expanded hybrid hardware | \n",
" 2800 | \n",
" 977 | \n",
" failed | \n",
" 33 | \n",
" CA | \n",
" CAD | \n",
" 1619326800 | \n",
" 1625461200 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 273 | \n",
" 2440 | \n",
" Thomas and Sons | \n",
" Re-engineered heuristic forecast | \n",
" 7800 | \n",
" 10704 | \n",
" successful | \n",
" 282 | \n",
" CA | \n",
" CAD | \n",
" 1604469600 | \n",
" 1626325200 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 593 | \n",
" 1813 | \n",
" Hale-Hayes | \n",
" Ameliorated client-driven open system | \n",
" 121600 | \n",
" 188288 | \n",
" successful | \n",
" 4006 | \n",
" US | \n",
" USD | \n",
" 1609826400 | \n",
" 1637992800 | \n",
" False | \n",
" False | \n",
" film & video/animation | \n",
"
\n",
" \n",
" 1 | \n",
" 1621 | \n",
" Odom Inc | \n",
" Managed bottom-line architecture | \n",
" 1400 | \n",
" 14560 | \n",
" successful | \n",
" 158 | \n",
" US | \n",
" USD | \n",
" 1611554400 | \n",
" 1621918800 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
"
\n",
" \n",
" 968 | \n",
" 1755 | \n",
" Gonzalez-White | \n",
" Open-architected disintermediate budgetary management | \n",
" 2400 | \n",
" 8117 | \n",
" successful | \n",
" 114 | \n",
" US | \n",
" USD | \n",
" 1608530400 | \n",
" 1635310800 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
"
\n",
" \n",
" 752 | \n",
" 1496 | \n",
" Lowery Group | \n",
" Sharable motivating emulation | \n",
" 5800 | \n",
" 5362 | \n",
" canceled | \n",
" 114 | \n",
" US | \n",
" USD | \n",
" 1608789600 | \n",
" 1630213200 | \n",
" False | \n",
" True | \n",
" theater/plays | \n",
"
\n",
" \n",
" 193 | \n",
" 3195 | \n",
" Calhoun, Rogers and Long | \n",
" Progressive discrete hub | \n",
" 6600 | \n",
" 3012 | \n",
" failed | \n",
" 65 | \n",
" US | \n",
" USD | \n",
" 1587790800 | \n",
" 1616389200 | \n",
" True | \n",
" False | \n",
" music/indie rock | \n",
"
\n",
" \n",
" 629 | \n",
" 2940 | \n",
" Jackson, Martinez and Ray | \n",
" Multi-tiered executive toolset | \n",
" 85900 | \n",
" 55476 | \n",
" failed | \n",
" 750 | \n",
" US | \n",
" USD | \n",
" 1617598800 | \n",
" 1642572000 | \n",
" False | \n",
" True | \n",
" theater/plays | \n",
"
\n",
" \n",
" 322 | \n",
" 1373 | \n",
" Hebert Group | \n",
" Visionary asymmetric Graphical User Interface | \n",
" 117900 | \n",
" 196377 | \n",
" successful | \n",
" 5168 | \n",
" US | \n",
" USD | \n",
" 1616043600 | \n",
" 1616389200 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
"
\n",
" \n",
" 470 | \n",
" 470 | \n",
" Grimes, Holland and Sloan | \n",
" Extended dedicated archive | \n",
" 3600 | \n",
" 10289 | \n",
" successful | \n",
" 381 | \n",
" US | \n",
" USD | \n",
" 1618549200 | \n",
" 1634101200 | \n",
" False | \n",
" False | \n",
" technology/wearables | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"352 2514 Adams, Willis and Sanchez \n",
"273 2440 Thomas and Sons \n",
"593 1813 Hale-Hayes \n",
"1 1621 Odom Inc \n",
"968 1755 Gonzalez-White \n",
"752 1496 Lowery Group \n",
"193 3195 Calhoun, Rogers and Long \n",
"629 2940 Jackson, Martinez and Ray \n",
"322 1373 Hebert Group \n",
"470 470 Grimes, Holland and Sloan \n",
"\n",
" blurb goal pledged \\\n",
"352 Expanded hybrid hardware 2800 977 \n",
"273 Re-engineered heuristic forecast 7800 10704 \n",
"593 Ameliorated client-driven open system 121600 188288 \n",
"1 Managed bottom-line architecture 1400 14560 \n",
"968 Open-architected disintermediate budgetary management 2400 8117 \n",
"752 Sharable motivating emulation 5800 5362 \n",
"193 Progressive discrete hub 6600 3012 \n",
"629 Multi-tiered executive toolset 85900 55476 \n",
"322 Visionary asymmetric Graphical User Interface 117900 196377 \n",
"470 Extended dedicated archive 3600 10289 \n",
"\n",
" outcome backers_count country currency launched_at deadline \\\n",
"352 failed 33 CA CAD 1619326800 1625461200 \n",
"273 successful 282 CA CAD 1604469600 1626325200 \n",
"593 successful 4006 US USD 1609826400 1637992800 \n",
"1 successful 158 US USD 1611554400 1621918800 \n",
"968 successful 114 US USD 1608530400 1635310800 \n",
"752 canceled 114 US USD 1608789600 1630213200 \n",
"193 failed 65 US USD 1587790800 1616389200 \n",
"629 failed 750 US USD 1617598800 1642572000 \n",
"322 successful 5168 US USD 1616043600 1616389200 \n",
"470 successful 381 US USD 1618549200 1634101200 \n",
"\n",
" staff_pick spotlight category & sub-category \n",
"352 False False theater/plays \n",
"273 False False theater/plays \n",
"593 False False film & video/animation \n",
"1 False True music/rock \n",
"968 False False food/food trucks \n",
"752 False True theater/plays \n",
"193 True False music/indie rock \n",
"629 False True theater/plays \n",
"322 False False theater/plays \n",
"470 False False technology/wearables "
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"crowdfunding_info_df.sample(n=10)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "cbfa1ca7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 1000 entries, 0 to 999\n",
"Data columns (total 14 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 cf_id 1000 non-null int64 \n",
" 1 company_name 1000 non-null object\n",
" 2 blurb 1000 non-null object\n",
" 3 goal 1000 non-null int64 \n",
" 4 pledged 1000 non-null int64 \n",
" 5 outcome 1000 non-null object\n",
" 6 backers_count 1000 non-null int64 \n",
" 7 country 1000 non-null object\n",
" 8 currency 1000 non-null object\n",
" 9 launched_at 1000 non-null int64 \n",
" 10 deadline 1000 non-null int64 \n",
" 11 staff_pick 1000 non-null bool \n",
" 12 spotlight 1000 non-null bool \n",
" 13 category & sub-category 1000 non-null object\n",
"dtypes: bool(2), int64(6), object(6)\n",
"memory usage: 95.8+ KB\n"
]
}
],
"source": [
"# Get a dataframe summary\n",
"crowdfunding_info_df.info()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f0657349",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 1001 entries, 0 to 1000\n",
"Data columns (total 1 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 1001 non-null object\n",
"dtypes: object(1)\n",
"memory usage: 7.9+ KB\n"
]
}
],
"source": [
"contact_info_df.info()"
]
},
{
"cell_type": "markdown",
"id": "9b7b70f0",
"metadata": {},
"source": [
"## 8.3.3"
]
},
{
"cell_type": "markdown",
"id": "9e0402ee",
"metadata": {},
"source": [
"### Inspect"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "05499d8a",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['cf_id', 'company_name', 'blurb', 'goal', 'pledged', 'outcome',\n",
" 'backers_count', 'country', 'currency', 'launched_at', 'deadline',\n",
" 'staff_pick', 'spotlight', 'category & sub-category'],\n",
" dtype='object')"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"crowdfunding_info_df.columns"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "4be2e667",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array(['food/food trucks', 'music/rock', 'technology/web',\n",
" 'theater/plays', 'film & video/documentary',\n",
" 'music/electric music', 'film & video/drama', 'music/indie rock',\n",
" 'technology/wearables', 'publishing/nonfiction',\n",
" 'film & video/animation', 'games/video games',\n",
" 'film & video/shorts', 'publishing/fiction',\n",
" 'photography/photography books', 'publishing/radio & podcasts',\n",
" 'music/metal', 'music/jazz', 'publishing/translations',\n",
" 'film & video/television', 'games/mobile games',\n",
" 'music/world music', 'film & video/science fiction',\n",
" 'journalism/audio'], dtype=object)"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"crowdfunding_info_df['category & sub-category'].unique()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "262a91ae",
"metadata": {},
"outputs": [],
"source": [
"# This column has 24 unique values"
]
},
{
"cell_type": "markdown",
"id": "e736cb55",
"metadata": {},
"source": [
"### Plan"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "821d81bc",
"metadata": {},
"outputs": [],
"source": [
"# Split category and sub-category values using code rather than \"hard coding\" (puttin them in manually)"
]
},
{
"cell_type": "markdown",
"id": "e8b82592",
"metadata": {},
"source": [
"### Execute"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "e94eee70",
"metadata": {},
"outputs": [],
"source": [
"# Use str.split method with \"/\" as delimiter"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "f6a0fa00",
"metadata": {},
"outputs": [],
"source": [
"# Assign the category and subcategory values to category and subcategory columns.\n",
"# Documentation: https://pandas.pydata.org/pandas-docs/stable/reference/api/pandas.Series.str.split.html\n",
"crowdfunding_info_df[[\"category\", \"subcategory\"]] = crowdfunding_info_df[\"category & sub-category\"].str.split('/', n=1, expand=True)"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "59391db9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" blurb | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_at | \n",
" deadline | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
" category | \n",
" subcategory | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 147 | \n",
" Baldwin, Riley and Jackson | \n",
" Pre-emptive tertiary standardization | \n",
" 100 | \n",
" 0 | \n",
" failed | \n",
" 0 | \n",
" CA | \n",
" CAD | \n",
" 1581573600 | \n",
" 1614578400 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food | \n",
" food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" 1621 | \n",
" Odom Inc | \n",
" Managed bottom-line architecture | \n",
" 1400 | \n",
" 14560 | \n",
" successful | \n",
" 158 | \n",
" US | \n",
" USD | \n",
" 1611554400 | \n",
" 1621918800 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 2 | \n",
" 1812 | \n",
" Melton, Robinson and Fritz | \n",
" Function-based leadingedge pricing structure | \n",
" 108400 | \n",
" 142523 | \n",
" successful | \n",
" 1425 | \n",
" AU | \n",
" AUD | \n",
" 1608184800 | \n",
" 1640844000 | \n",
" False | \n",
" False | \n",
" technology/web | \n",
" technology | \n",
" web | \n",
"
\n",
" \n",
" 3 | \n",
" 2156 | \n",
" Mcdonald, Gonzalez and Ross | \n",
" Vision-oriented fresh-thinking conglomeration | \n",
" 4200 | \n",
" 2477 | \n",
" failed | \n",
" 24 | \n",
" US | \n",
" USD | \n",
" 1634792400 | \n",
" 1642399200 | \n",
" False | \n",
" False | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 4 | \n",
" 1365 | \n",
" Larson-Little | \n",
" Proactive foreground core | \n",
" 7600 | \n",
" 5265 | \n",
" failed | \n",
" 53 | \n",
" US | \n",
" USD | \n",
" 1608530400 | \n",
" 1629694800 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 5 | \n",
" 2057 | \n",
" Harris Group | \n",
" Open-source optimizing database | \n",
" 7600 | \n",
" 13195 | \n",
" successful | \n",
" 174 | \n",
" DK | \n",
" DKK | \n",
" 1607666400 | \n",
" 1630213200 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 6 | \n",
" 1894 | \n",
" Ortiz, Coleman and Mitchell | \n",
" Operative upward-trending algorithm | \n",
" 5200 | \n",
" 1090 | \n",
" failed | \n",
" 18 | \n",
" GB | \n",
" GBP | \n",
" 1596171600 | \n",
" 1620709200 | \n",
" False | \n",
" False | \n",
" film & video/documentary | \n",
" film & video | \n",
" documentary | \n",
"
\n",
" \n",
" 7 | \n",
" 2669 | \n",
" Carter-Guzman | \n",
" Centralized cohesive challenge | \n",
" 4500 | \n",
" 14741 | \n",
" successful | \n",
" 227 | \n",
" DK | \n",
" DKK | \n",
" 1608616800 | \n",
" 1632200400 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 8 | \n",
" 1114 | \n",
" Nunez-Richards | \n",
" Exclusive attitude-oriented intranet | \n",
" 110100 | \n",
" 21946 | \n",
" live | \n",
" 708 | \n",
" DK | \n",
" DKK | \n",
" 1586322000 | \n",
" 1615356000 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 9 | \n",
" 970 | \n",
" Rangel, Holt and Jones | \n",
" Open-source fresh-thinking model | \n",
" 6200 | \n",
" 3208 | \n",
" failed | \n",
" 44 | \n",
" US | \n",
" USD | \n",
" 1628830800 | \n",
" 1630386000 | \n",
" False | \n",
" False | \n",
" music/electric music | \n",
" music | \n",
" electric music | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"0 147 Baldwin, Riley and Jackson \n",
"1 1621 Odom Inc \n",
"2 1812 Melton, Robinson and Fritz \n",
"3 2156 Mcdonald, Gonzalez and Ross \n",
"4 1365 Larson-Little \n",
"5 2057 Harris Group \n",
"6 1894 Ortiz, Coleman and Mitchell \n",
"7 2669 Carter-Guzman \n",
"8 1114 Nunez-Richards \n",
"9 970 Rangel, Holt and Jones \n",
"\n",
" blurb goal pledged outcome \\\n",
"0 Pre-emptive tertiary standardization 100 0 failed \n",
"1 Managed bottom-line architecture 1400 14560 successful \n",
"2 Function-based leadingedge pricing structure 108400 142523 successful \n",
"3 Vision-oriented fresh-thinking conglomeration 4200 2477 failed \n",
"4 Proactive foreground core 7600 5265 failed \n",
"5 Open-source optimizing database 7600 13195 successful \n",
"6 Operative upward-trending algorithm 5200 1090 failed \n",
"7 Centralized cohesive challenge 4500 14741 successful \n",
"8 Exclusive attitude-oriented intranet 110100 21946 live \n",
"9 Open-source fresh-thinking model 6200 3208 failed \n",
"\n",
" backers_count country currency launched_at deadline staff_pick \\\n",
"0 0 CA CAD 1581573600 1614578400 False \n",
"1 158 US USD 1611554400 1621918800 False \n",
"2 1425 AU AUD 1608184800 1640844000 False \n",
"3 24 US USD 1634792400 1642399200 False \n",
"4 53 US USD 1608530400 1629694800 False \n",
"5 174 DK DKK 1607666400 1630213200 False \n",
"6 18 GB GBP 1596171600 1620709200 False \n",
"7 227 DK DKK 1608616800 1632200400 False \n",
"8 708 DK DKK 1586322000 1615356000 False \n",
"9 44 US USD 1628830800 1630386000 False \n",
"\n",
" spotlight category & sub-category category subcategory \n",
"0 False food/food trucks food food trucks \n",
"1 True music/rock music rock \n",
"2 False technology/web technology web \n",
"3 False music/rock music rock \n",
"4 False theater/plays theater plays \n",
"5 False theater/plays theater plays \n",
"6 False film & video/documentary film & video documentary \n",
"7 False theater/plays theater plays \n",
"8 False theater/plays theater plays \n",
"9 False music/electric music music electric music "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"crowdfunding_info_df.head(10)"
]
},
{
"cell_type": "markdown",
"id": "b65827eb",
"metadata": {},
"source": [
"### Assign unique id numbers to all categories and subcategories"
]
},
{
"cell_type": "code",
"execution_count": 18,
"id": "aa9015a3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['food' 'music' 'technology' 'theater' 'film & video' 'publishing' 'games'\n",
" 'photography' 'journalism']\n",
"['food trucks' 'rock' 'web' 'plays' 'documentary' 'electric music' 'drama'\n",
" 'indie rock' 'wearables' 'nonfiction' 'animation' 'video games' 'shorts'\n",
" 'fiction' 'photography books' 'radio & podcasts' 'metal' 'jazz'\n",
" 'translations' 'television' 'mobile games' 'world music'\n",
" 'science fiction' 'audio']\n"
]
}
],
"source": [
"print(crowdfunding_info_df[\"category\"].unique())\n",
"print(crowdfunding_info_df[\"subcategory\"].unique())"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "d3570cda",
"metadata": {},
"outputs": [],
"source": [
"# 9 categories\n",
"# 24 subcategories"
]
},
{
"cell_type": "markdown",
"id": "932720a1",
"metadata": {},
"source": [
"### Create 2 new dataframes for category and subcategory"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "208a4455",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['food' 'music' 'technology' 'theater' 'film & video' 'publishing' 'games'\n",
" 'photography' 'journalism']\n",
"\n",
"['food trucks' 'rock' 'web' 'plays' 'documentary' 'electric music' 'drama'\n",
" 'indie rock' 'wearables' 'nonfiction' 'animation' 'video games' 'shorts'\n",
" 'fiction' 'photography books' 'radio & podcasts' 'metal' 'jazz'\n",
" 'translations' 'television' 'mobile games' 'world music'\n",
" 'science fiction' 'audio']\n"
]
}
],
"source": [
"# Get the unique categories and subcategories into different LISTS\n",
"categories = crowdfunding_info_df[\"category\"].unique()\n",
"subcategories = crowdfunding_info_df[\"subcategory\"].unique()\n",
"print(categories)\n",
"print()\n",
"print(subcategories)"
]
},
{
"cell_type": "markdown",
"id": "3d21300d",
"metadata": {},
"source": [
"### Use numpy arrays to assign numbers to each list item"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "b22c3705",
"metadata": {},
"outputs": [],
"source": [
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "83c39a0e",
"metadata": {},
"outputs": [],
"source": [
"# Assign to variables two arrays of sequential numbers equal in lenght to their corresponding column\n",
"category_ids = np.arange(1, 10)\n",
"subcategory_ids = np.arange(1, 25)"
]
},
{
"cell_type": "markdown",
"id": "d730cf36",
"metadata": {},
"source": [
"### Note:\n",
"In order to ensure that these category and subcategory id's are unique, including unique to each other, we will convert them to strings and add \"cat0\" to the beginning of each category id and \"scat0\" to the beginning of each subcategory id. This is neccessary because the first 9 numerals of category and subcategory id's are identical (1-9)."
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "83944cb3",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"['cat01', 'cat02', 'cat03', 'cat04', 'cat05', 'cat06', 'cat07', 'cat08', 'cat09']\n",
"\n",
"['scat01', 'scat02', 'scat03', 'scat04', 'scat05', 'scat06', 'scat07', 'scat08', 'scat09', 'scat010', 'scat011', 'scat012', 'scat013', 'scat014', 'scat015', 'scat016', 'scat017', 'scat018', 'scat019', 'scat020', 'scat021', 'scat022', 'scat023', 'scat024']\n"
]
}
],
"source": [
"cat_ids = [\"cat0\" + str(cat_id) for cat_id in category_ids]\n",
"\n",
"scat_ids = [\"scat0\" + str(scat_id) for scat_id in subcategory_ids]\n",
"\n",
"print(cat_ids)\n",
"print()\n",
"print(scat_ids)"
]
},
{
"cell_type": "markdown",
"id": "bb84dee1",
"metadata": {},
"source": [
"### Create the DataFrames"
]
},
{
"cell_type": "code",
"execution_count": 28,
"id": "5ea5ed9d",
"metadata": {},
"outputs": [],
"source": [
"category_df = pd.DataFrame({\n",
" \"category_id\": cat_ids,\n",
" \"category\": categories\n",
"})\n",
"\n",
"subcategory_df = pd.DataFrame({\n",
" \"subcategory_id\": scat_ids,\n",
" \"subcategory\": subcategories\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 29,
"id": "db166cbe",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" category_id | \n",
" category | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" cat01 | \n",
" food | \n",
"
\n",
" \n",
" 1 | \n",
" cat02 | \n",
" music | \n",
"
\n",
" \n",
" 2 | \n",
" cat03 | \n",
" technology | \n",
"
\n",
" \n",
" 3 | \n",
" cat04 | \n",
" theater | \n",
"
\n",
" \n",
" 4 | \n",
" cat05 | \n",
" film & video | \n",
"
\n",
" \n",
" 5 | \n",
" cat06 | \n",
" publishing | \n",
"
\n",
" \n",
" 6 | \n",
" cat07 | \n",
" games | \n",
"
\n",
" \n",
" 7 | \n",
" cat08 | \n",
" photography | \n",
"
\n",
" \n",
" 8 | \n",
" cat09 | \n",
" journalism | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" category_id category\n",
"0 cat01 food\n",
"1 cat02 music\n",
"2 cat03 technology\n",
"3 cat04 theater\n",
"4 cat05 film & video\n",
"5 cat06 publishing\n",
"6 cat07 games\n",
"7 cat08 photography\n",
"8 cat09 journalism"
]
},
"execution_count": 29,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"category_df"
]
},
{
"cell_type": "code",
"execution_count": 30,
"id": "3c51e4b6",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" subcategory_id | \n",
" subcategory | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" scat01 | \n",
" food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" scat02 | \n",
" rock | \n",
"
\n",
" \n",
" 2 | \n",
" scat03 | \n",
" web | \n",
"
\n",
" \n",
" 3 | \n",
" scat04 | \n",
" plays | \n",
"
\n",
" \n",
" 4 | \n",
" scat05 | \n",
" documentary | \n",
"
\n",
" \n",
" 5 | \n",
" scat06 | \n",
" electric music | \n",
"
\n",
" \n",
" 6 | \n",
" scat07 | \n",
" drama | \n",
"
\n",
" \n",
" 7 | \n",
" scat08 | \n",
" indie rock | \n",
"
\n",
" \n",
" 8 | \n",
" scat09 | \n",
" wearables | \n",
"
\n",
" \n",
" 9 | \n",
" scat010 | \n",
" nonfiction | \n",
"
\n",
" \n",
" 10 | \n",
" scat011 | \n",
" animation | \n",
"
\n",
" \n",
" 11 | \n",
" scat012 | \n",
" video games | \n",
"
\n",
" \n",
" 12 | \n",
" scat013 | \n",
" shorts | \n",
"
\n",
" \n",
" 13 | \n",
" scat014 | \n",
" fiction | \n",
"
\n",
" \n",
" 14 | \n",
" scat015 | \n",
" photography books | \n",
"
\n",
" \n",
" 15 | \n",
" scat016 | \n",
" radio & podcasts | \n",
"
\n",
" \n",
" 16 | \n",
" scat017 | \n",
" metal | \n",
"
\n",
" \n",
" 17 | \n",
" scat018 | \n",
" jazz | \n",
"
\n",
" \n",
" 18 | \n",
" scat019 | \n",
" translations | \n",
"
\n",
" \n",
" 19 | \n",
" scat020 | \n",
" television | \n",
"
\n",
" \n",
" 20 | \n",
" scat021 | \n",
" mobile games | \n",
"
\n",
" \n",
" 21 | \n",
" scat022 | \n",
" world music | \n",
"
\n",
" \n",
" 22 | \n",
" scat023 | \n",
" science fiction | \n",
"
\n",
" \n",
" 23 | \n",
" scat024 | \n",
" audio | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" subcategory_id subcategory\n",
"0 scat01 food trucks\n",
"1 scat02 rock\n",
"2 scat03 web\n",
"3 scat04 plays\n",
"4 scat05 documentary\n",
"5 scat06 electric music\n",
"6 scat07 drama\n",
"7 scat08 indie rock\n",
"8 scat09 wearables\n",
"9 scat010 nonfiction\n",
"10 scat011 animation\n",
"11 scat012 video games\n",
"12 scat013 shorts\n",
"13 scat014 fiction\n",
"14 scat015 photography books\n",
"15 scat016 radio & podcasts\n",
"16 scat017 metal\n",
"17 scat018 jazz\n",
"18 scat019 translations\n",
"19 scat020 television\n",
"20 scat021 mobile games\n",
"21 scat022 world music\n",
"22 scat023 science fiction\n",
"23 scat024 audio"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"subcategory_df"
]
},
{
"cell_type": "code",
"execution_count": 31,
"id": "ad8c8643",
"metadata": {},
"outputs": [],
"source": [
"# Send dataframes to CSV files\n",
"category_df.to_csv(\"Data/categories.csv\", index=False)\n",
"subcategory_df.to_csv(\"Data/subcategories.csv\", index=False)"
]
},
{
"cell_type": "markdown",
"id": "b604fc65",
"metadata": {},
"source": [
"## 8.3.4\n",
"Building campaing_df"
]
},
{
"cell_type": "code",
"execution_count": 47,
"id": "e05b4a33",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" blurb | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_at | \n",
" deadline | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
" category | \n",
" subcategory | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 147 | \n",
" Baldwin, Riley and Jackson | \n",
" Pre-emptive tertiary standardization | \n",
" 100 | \n",
" 0 | \n",
" failed | \n",
" 0 | \n",
" CA | \n",
" CAD | \n",
" 1581573600 | \n",
" 1614578400 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food | \n",
" food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" 1621 | \n",
" Odom Inc | \n",
" Managed bottom-line architecture | \n",
" 1400 | \n",
" 14560 | \n",
" successful | \n",
" 158 | \n",
" US | \n",
" USD | \n",
" 1611554400 | \n",
" 1621918800 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 2 | \n",
" 1812 | \n",
" Melton, Robinson and Fritz | \n",
" Function-based leadingedge pricing structure | \n",
" 108400 | \n",
" 142523 | \n",
" successful | \n",
" 1425 | \n",
" AU | \n",
" AUD | \n",
" 1608184800 | \n",
" 1640844000 | \n",
" False | \n",
" False | \n",
" technology/web | \n",
" technology | \n",
" web | \n",
"
\n",
" \n",
" 3 | \n",
" 2156 | \n",
" Mcdonald, Gonzalez and Ross | \n",
" Vision-oriented fresh-thinking conglomeration | \n",
" 4200 | \n",
" 2477 | \n",
" failed | \n",
" 24 | \n",
" US | \n",
" USD | \n",
" 1634792400 | \n",
" 1642399200 | \n",
" False | \n",
" False | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 4 | \n",
" 1365 | \n",
" Larson-Little | \n",
" Proactive foreground core | \n",
" 7600 | \n",
" 5265 | \n",
" failed | \n",
" 53 | \n",
" US | \n",
" USD | \n",
" 1608530400 | \n",
" 1629694800 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"0 147 Baldwin, Riley and Jackson \n",
"1 1621 Odom Inc \n",
"2 1812 Melton, Robinson and Fritz \n",
"3 2156 Mcdonald, Gonzalez and Ross \n",
"4 1365 Larson-Little \n",
"\n",
" blurb goal pledged outcome \\\n",
"0 Pre-emptive tertiary standardization 100 0 failed \n",
"1 Managed bottom-line architecture 1400 14560 successful \n",
"2 Function-based leadingedge pricing structure 108400 142523 successful \n",
"3 Vision-oriented fresh-thinking conglomeration 4200 2477 failed \n",
"4 Proactive foreground core 7600 5265 failed \n",
"\n",
" backers_count country currency launched_at deadline staff_pick \\\n",
"0 0 CA CAD 1581573600 1614578400 False \n",
"1 158 US USD 1611554400 1621918800 False \n",
"2 1425 AU AUD 1608184800 1640844000 False \n",
"3 24 US USD 1634792400 1642399200 False \n",
"4 53 US USD 1608530400 1629694800 False \n",
"\n",
" spotlight category & sub-category category subcategory \n",
"0 False food/food trucks food food trucks \n",
"1 True music/rock music rock \n",
"2 False technology/web technology web \n",
"3 False music/rock music rock \n",
"4 False theater/plays theater plays "
]
},
"execution_count": 47,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# create a copy with a new name to preserve original dataframe\n",
"campaign_df = crowdfunding_info_df.copy()\n",
"campaign_df.head()"
]
},
{
"cell_type": "code",
"execution_count": 48,
"id": "9b53fadd",
"metadata": {},
"outputs": [],
"source": [
"# Rename columns according to instructions\n",
"campaign_df = campaign_df.rename(columns={\"blurb\":\"description\", \"launched_at\":\"launched_date\", \"deadline\":\"end_date\"})"
]
},
{
"cell_type": "code",
"execution_count": 49,
"id": "236ceb62",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" description | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_date | \n",
" end_date | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
" category | \n",
" subcategory | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 147 | \n",
" Baldwin, Riley and Jackson | \n",
" Pre-emptive tertiary standardization | \n",
" 100.0 | \n",
" 0.0 | \n",
" failed | \n",
" 0 | \n",
" CA | \n",
" CAD | \n",
" 1581573600 | \n",
" 1614578400 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food | \n",
" food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" 1621 | \n",
" Odom Inc | \n",
" Managed bottom-line architecture | \n",
" 1400.0 | \n",
" 14560.0 | \n",
" successful | \n",
" 158 | \n",
" US | \n",
" USD | \n",
" 1611554400 | \n",
" 1621918800 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 2 | \n",
" 1812 | \n",
" Melton, Robinson and Fritz | \n",
" Function-based leadingedge pricing structure | \n",
" 108400.0 | \n",
" 142523.0 | \n",
" successful | \n",
" 1425 | \n",
" AU | \n",
" AUD | \n",
" 1608184800 | \n",
" 1640844000 | \n",
" False | \n",
" False | \n",
" technology/web | \n",
" technology | \n",
" web | \n",
"
\n",
" \n",
" 3 | \n",
" 2156 | \n",
" Mcdonald, Gonzalez and Ross | \n",
" Vision-oriented fresh-thinking conglomeration | \n",
" 4200.0 | \n",
" 2477.0 | \n",
" failed | \n",
" 24 | \n",
" US | \n",
" USD | \n",
" 1634792400 | \n",
" 1642399200 | \n",
" False | \n",
" False | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 4 | \n",
" 1365 | \n",
" Larson-Little | \n",
" Proactive foreground core | \n",
" 7600.0 | \n",
" 5265.0 | \n",
" failed | \n",
" 53 | \n",
" US | \n",
" USD | \n",
" 1608530400 | \n",
" 1629694800 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 5 | \n",
" 2057 | \n",
" Harris Group | \n",
" Open-source optimizing database | \n",
" 7600.0 | \n",
" 13195.0 | \n",
" successful | \n",
" 174 | \n",
" DK | \n",
" DKK | \n",
" 1607666400 | \n",
" 1630213200 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 6 | \n",
" 1894 | \n",
" Ortiz, Coleman and Mitchell | \n",
" Operative upward-trending algorithm | \n",
" 5200.0 | \n",
" 1090.0 | \n",
" failed | \n",
" 18 | \n",
" GB | \n",
" GBP | \n",
" 1596171600 | \n",
" 1620709200 | \n",
" False | \n",
" False | \n",
" film & video/documentary | \n",
" film & video | \n",
" documentary | \n",
"
\n",
" \n",
" 7 | \n",
" 2669 | \n",
" Carter-Guzman | \n",
" Centralized cohesive challenge | \n",
" 4500.0 | \n",
" 14741.0 | \n",
" successful | \n",
" 227 | \n",
" DK | \n",
" DKK | \n",
" 1608616800 | \n",
" 1632200400 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 8 | \n",
" 1114 | \n",
" Nunez-Richards | \n",
" Exclusive attitude-oriented intranet | \n",
" 110100.0 | \n",
" 21946.0 | \n",
" live | \n",
" 708 | \n",
" DK | \n",
" DKK | \n",
" 1586322000 | \n",
" 1615356000 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
" 9 | \n",
" 970 | \n",
" Rangel, Holt and Jones | \n",
" Open-source fresh-thinking model | \n",
" 6200.0 | \n",
" 3208.0 | \n",
" failed | \n",
" 44 | \n",
" US | \n",
" USD | \n",
" 1628830800 | \n",
" 1630386000 | \n",
" False | \n",
" False | \n",
" music/electric music | \n",
" music | \n",
" electric music | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"0 147 Baldwin, Riley and Jackson \n",
"1 1621 Odom Inc \n",
"2 1812 Melton, Robinson and Fritz \n",
"3 2156 Mcdonald, Gonzalez and Ross \n",
"4 1365 Larson-Little \n",
"5 2057 Harris Group \n",
"6 1894 Ortiz, Coleman and Mitchell \n",
"7 2669 Carter-Guzman \n",
"8 1114 Nunez-Richards \n",
"9 970 Rangel, Holt and Jones \n",
"\n",
" description goal pledged \\\n",
"0 Pre-emptive tertiary standardization 100.0 0.0 \n",
"1 Managed bottom-line architecture 1400.0 14560.0 \n",
"2 Function-based leadingedge pricing structure 108400.0 142523.0 \n",
"3 Vision-oriented fresh-thinking conglomeration 4200.0 2477.0 \n",
"4 Proactive foreground core 7600.0 5265.0 \n",
"5 Open-source optimizing database 7600.0 13195.0 \n",
"6 Operative upward-trending algorithm 5200.0 1090.0 \n",
"7 Centralized cohesive challenge 4500.0 14741.0 \n",
"8 Exclusive attitude-oriented intranet 110100.0 21946.0 \n",
"9 Open-source fresh-thinking model 6200.0 3208.0 \n",
"\n",
" outcome backers_count country currency launched_date end_date \\\n",
"0 failed 0 CA CAD 1581573600 1614578400 \n",
"1 successful 158 US USD 1611554400 1621918800 \n",
"2 successful 1425 AU AUD 1608184800 1640844000 \n",
"3 failed 24 US USD 1634792400 1642399200 \n",
"4 failed 53 US USD 1608530400 1629694800 \n",
"5 successful 174 DK DKK 1607666400 1630213200 \n",
"6 failed 18 GB GBP 1596171600 1620709200 \n",
"7 successful 227 DK DKK 1608616800 1632200400 \n",
"8 live 708 DK DKK 1586322000 1615356000 \n",
"9 failed 44 US USD 1628830800 1630386000 \n",
"\n",
" staff_pick spotlight category & sub-category category \\\n",
"0 False False food/food trucks food \n",
"1 False True music/rock music \n",
"2 False False technology/web technology \n",
"3 False False music/rock music \n",
"4 False False theater/plays theater \n",
"5 False False theater/plays theater \n",
"6 False False film & video/documentary film & video \n",
"7 False False theater/plays theater \n",
"8 False False theater/plays theater \n",
"9 False False music/electric music music \n",
"\n",
" subcategory \n",
"0 food trucks \n",
"1 rock \n",
"2 web \n",
"3 rock \n",
"4 plays \n",
"5 plays \n",
"6 documentary \n",
"7 plays \n",
"8 plays \n",
"9 electric music "
]
},
"execution_count": 49,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Convert int64 to float for specified columns\n",
"campaign_df[[\"goal\", \"pledged\"]] = campaign_df[[\"goal\", \"pledged\"]].astype(float)\n",
"campaign_df.head(10)"
]
},
{
"cell_type": "markdown",
"id": "cfaae929",
"metadata": {},
"source": [
"### Convert epoch time to ISO using datetime module"
]
},
{
"cell_type": "code",
"execution_count": 50,
"id": "17a59941",
"metadata": {},
"outputs": [],
"source": [
"from datetime import datetime as dt"
]
},
{
"cell_type": "code",
"execution_count": 52,
"id": "2d198bf9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" cf_id | \n",
" company_name | \n",
" description | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_date | \n",
" end_date | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
" category | \n",
" subcategory | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 147 | \n",
" Baldwin, Riley and Jackson | \n",
" Pre-emptive tertiary standardization | \n",
" 100.0 | \n",
" 0.0 | \n",
" failed | \n",
" 0 | \n",
" CA | \n",
" CAD | \n",
" 2020-02-13 | \n",
" 2021-03-01 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food | \n",
" food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" 1621 | \n",
" Odom Inc | \n",
" Managed bottom-line architecture | \n",
" 1400.0 | \n",
" 14560.0 | \n",
" successful | \n",
" 158 | \n",
" US | \n",
" USD | \n",
" 2021-01-25 | \n",
" 2021-05-25 | \n",
" False | \n",
" True | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 2 | \n",
" 1812 | \n",
" Melton, Robinson and Fritz | \n",
" Function-based leadingedge pricing structure | \n",
" 108400.0 | \n",
" 142523.0 | \n",
" successful | \n",
" 1425 | \n",
" AU | \n",
" AUD | \n",
" 2020-12-17 | \n",
" 2021-12-30 | \n",
" False | \n",
" False | \n",
" technology/web | \n",
" technology | \n",
" web | \n",
"
\n",
" \n",
" 3 | \n",
" 2156 | \n",
" Mcdonald, Gonzalez and Ross | \n",
" Vision-oriented fresh-thinking conglomeration | \n",
" 4200.0 | \n",
" 2477.0 | \n",
" failed | \n",
" 24 | \n",
" US | \n",
" USD | \n",
" 2021-10-21 | \n",
" 2022-01-17 | \n",
" False | \n",
" False | \n",
" music/rock | \n",
" music | \n",
" rock | \n",
"
\n",
" \n",
" 4 | \n",
" 1365 | \n",
" Larson-Little | \n",
" Proactive foreground core | \n",
" 7600.0 | \n",
" 5265.0 | \n",
" failed | \n",
" 53 | \n",
" US | \n",
" USD | \n",
" 2020-12-21 | \n",
" 2021-08-23 | \n",
" False | \n",
" False | \n",
" theater/plays | \n",
" theater | \n",
" plays | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" cf_id company_name \\\n",
"0 147 Baldwin, Riley and Jackson \n",
"1 1621 Odom Inc \n",
"2 1812 Melton, Robinson and Fritz \n",
"3 2156 Mcdonald, Gonzalez and Ross \n",
"4 1365 Larson-Little \n",
"\n",
" description goal pledged \\\n",
"0 Pre-emptive tertiary standardization 100.0 0.0 \n",
"1 Managed bottom-line architecture 1400.0 14560.0 \n",
"2 Function-based leadingedge pricing structure 108400.0 142523.0 \n",
"3 Vision-oriented fresh-thinking conglomeration 4200.0 2477.0 \n",
"4 Proactive foreground core 7600.0 5265.0 \n",
"\n",
" outcome backers_count country currency launched_date end_date \\\n",
"0 failed 0 CA CAD 2020-02-13 2021-03-01 \n",
"1 successful 158 US USD 2021-01-25 2021-05-25 \n",
"2 successful 1425 AU AUD 2020-12-17 2021-12-30 \n",
"3 failed 24 US USD 2021-10-21 2022-01-17 \n",
"4 failed 53 US USD 2020-12-21 2021-08-23 \n",
"\n",
" staff_pick spotlight category & sub-category category subcategory \n",
"0 False False food/food trucks food food trucks \n",
"1 False True music/rock music rock \n",
"2 False False technology/web technology web \n",
"3 False False music/rock music rock \n",
"4 False False theater/plays theater plays "
]
},
"execution_count": 52,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"campaign_df[\"launched_date\"] = pd.to_datetime(campaign_df[\"launched_date\"], unit='s').dt.strftime('%Y-%m-%d')\n",
"campaign_df[\"end_date\"] = pd.to_datetime(campaign_df[\"end_date\"], unit='s').dt.strftime('%Y-%m-%d')\n",
"campaign_df.head()"
]
},
{
"cell_type": "markdown",
"id": "4000c7db",
"metadata": {},
"source": [
"### Get cat and scat id #'s by merging those dataframes on common columns"
]
},
{
"cell_type": "code",
"execution_count": 54,
"id": "b17961ba",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" category_id | \n",
" category | \n",
" cf_id | \n",
" company_name | \n",
" description | \n",
" goal | \n",
" pledged | \n",
" outcome | \n",
" backers_count | \n",
" country | \n",
" currency | \n",
" launched_date | \n",
" end_date | \n",
" staff_pick | \n",
" spotlight | \n",
" category & sub-category | \n",
" subcategory | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" cat01 | \n",
" food | \n",
" 147 | \n",
" Baldwin, Riley and Jackson | \n",
" Pre-emptive tertiary standardization | \n",
" 100.0 | \n",
" 0.0 | \n",
" failed | \n",
" 0 | \n",
" CA | \n",
" CAD | \n",
" 2020-02-13 | \n",
" 2021-03-01 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food trucks | \n",
"
\n",
" \n",
" 1 | \n",
" cat01 | \n",
" food | \n",
" 1175 | \n",
" Werner-Bryant | \n",
" Virtual uniform frame | \n",
" 1800.0 | \n",
" 7991.0 | \n",
" successful | \n",
" 222 | \n",
" US | \n",
" USD | \n",
" 2020-06-20 | \n",
" 2021-01-30 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food trucks | \n",
"
\n",
" \n",
" 2 | \n",
" cat01 | \n",
" food | \n",
" 873 | \n",
" Stewart LLC | \n",
" Cloned bi-directional architecture | \n",
" 1300.0 | \n",
" 12047.0 | \n",
" successful | \n",
" 113 | \n",
" US | \n",
" USD | \n",
" 2020-11-29 | \n",
" 2021-06-11 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food trucks | \n",
"
\n",
" \n",
" 3 | \n",
" cat01 | \n",
" food | \n",
" 2568 | \n",
" Castillo-Carey | \n",
" Cross-platform solution-oriented process improvement | \n",
" 142400.0 | \n",
" 21307.0 | \n",
" failed | \n",
" 296 | \n",
" US | \n",
" USD | \n",
" 2020-05-05 | \n",
" 2021-05-30 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food trucks | \n",
"
\n",
" \n",
" 4 | \n",
" cat01 | \n",
" food | \n",
" 1211 | \n",
" Wright, Hartman and Yu | \n",
" User-friendly tertiary array | \n",
" 3300.0 | \n",
" 12437.0 | \n",
" successful | \n",
" 131 | \n",
" US | \n",
" USD | \n",
" 2021-01-29 | \n",
" 2021-02-13 | \n",
" False | \n",
" False | \n",
" food/food trucks | \n",
" food trucks | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" category_id category cf_id company_name \\\n",
"0 cat01 food 147 Baldwin, Riley and Jackson \n",
"1 cat01 food 1175 Werner-Bryant \n",
"2 cat01 food 873 Stewart LLC \n",
"3 cat01 food 2568 Castillo-Carey \n",
"4 cat01 food 1211 Wright, Hartman and Yu \n",
"\n",
" description goal pledged \\\n",
"0 Pre-emptive tertiary standardization 100.0 0.0 \n",
"1 Virtual uniform frame 1800.0 7991.0 \n",
"2 Cloned bi-directional architecture 1300.0 12047.0 \n",
"3 Cross-platform solution-oriented process improvement 142400.0 21307.0 \n",
"4 User-friendly tertiary array 3300.0 12437.0 \n",
"\n",
" outcome backers_count country currency launched_date end_date \\\n",
"0 failed 0 CA CAD 2020-02-13 2021-03-01 \n",
"1 successful 222 US USD 2020-06-20 2021-01-30 \n",
"2 successful 113 US USD 2020-11-29 2021-06-11 \n",
"3 failed 296 US USD 2020-05-05 2021-05-30 \n",
"4 successful 131 US USD 2021-01-29 2021-02-13 \n",
"\n",
" staff_pick spotlight category & sub-category subcategory \n",
"0 False False food/food trucks food trucks \n",
"1 False False food/food trucks food trucks \n",
"2 False False food/food trucks food trucks \n",
"3 False False food/food trucks food trucks \n",
"4 False False food/food trucks food trucks "
]
},
"execution_count": 54,
"metadata": {},
"output_type": "execute_result"
}
],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "11f38a5b",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "PythonData",
"language": "python",
"name": "pythondata"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.9.12"
}
},
"nbformat": 4,
"nbformat_minor": 5
}