{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Frequencies of gender, age, country, and participation in Majors\n",
"\n",
"> [https://github.com/BMClab/covid19](https://github.com/BMClab/covid19) \n",
"> [Laboratory of Biomechanics and Motor Control](http://pesquisa.ufabc.edu.br/bmclab/) \n",
"> Federal University of ABC, Brazil\n",
"\n",
"**The data used in this Jupyter notebook are available on the Figshare repository https://doi.org/10.6084/m9.figshare.16620238.**"
]
},
{
"cell_type": "markdown",
"metadata": {
"toc": true
},
"source": [
"
Contents \n",
""
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Setup"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:16.509882Z",
"start_time": "2021-09-14T03:07:15.387256Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Last updated: 2021-09-14T00:07:16.488680-03:00\n",
"\n",
"Python implementation: CPython\n",
"Python version : 3.8.10\n",
"IPython version : 7.27.0\n",
"\n",
"Compiler : GCC 9.3.0\n",
"OS : Linux\n",
"Release : 5.11.0-34-generic\n",
"Machine : x86_64\n",
"Processor : x86_64\n",
"CPU cores : 12\n",
"Architecture: 64bit\n",
"\n",
"numpy : 1.20.3\n",
"plotly : 5.3.1\n",
"autopep8 : 1.5.6\n",
"matplotlib : 3.4.3\n",
"pycountry_convert: 0.7.2\n",
"seaborn : 0.11.2\n",
"json : 2.0.9\n",
"pandas : 1.3.3\n",
"sys : 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) \n",
"[GCC 9.3.0]\n",
"\n"
]
}
],
"source": [
"import sys, os\n",
"import numpy as np\n",
"import pandas as pd\n",
"import seaborn as sns\n",
"import matplotlib.pyplot as plt\n",
"%matplotlib inline\n",
"import plotly.express as px\n",
"pd.options.plotting.backend = \"plotly\"\n",
"from tqdm.notebook import tqdm\n",
"import pycountry_convert as pc\n",
"%load_ext watermark \n",
"\n",
"%watermark\n",
"%watermark --iversions"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Environment"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:16.517363Z",
"start_time": "2021-09-14T03:07:16.511348Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" "
],
"text/plain": [
"[(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),\n",
" (1.0, 0.4980392156862745, 0.054901960784313725),\n",
" (0.17254901960784313, 0.6274509803921569, 0.17254901960784313),\n",
" (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),\n",
" (0.5803921568627451, 0.403921568627451, 0.7411764705882353),\n",
" (0.5490196078431373, 0.33725490196078434, 0.29411764705882354),\n",
" (0.8901960784313725, 0.4666666666666667, 0.7607843137254902),\n",
" (0.4980392156862745, 0.4980392156862745, 0.4980392156862745),\n",
" (0.7372549019607844, 0.7411764705882353, 0.13333333333333333),\n",
" (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"path2 = r'./../data/'\n",
"\n",
"pd.set_option('display.float_format', lambda x: '%.4g' % x)\n",
"plt.rcParams.update({'font.size': 14, 'xtick.labelsize': 12, 'ytick.labelsize': 12})\n",
"sns.set_style('whitegrid', rc={'xtick.bottom': True, 'xtick.top': True, 'ytick.left': True,\n",
" 'ytick.right': True, 'xtick.direction': 'in', 'ytick.direction': 'in'})\n",
"colors = sns.color_palette()\n",
"colors"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load dataset"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:17.554075Z",
"start_time": "2021-09-14T03:07:16.518354Z"
}
},
"outputs": [],
"source": [
"df = pd.read_parquet(os.path.join(path2, 'run_ww_2019_d.parquet'))\n",
"df['athlete'] = df['athlete'].astype('category') # bug in parquet"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Subject's information"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.024808Z",
"start_time": "2021-09-14T03:07:17.555385Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" athlete \n",
" gender \n",
" age_group \n",
" country \n",
" major \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" F \n",
" 18 - 34 \n",
" United States \n",
" CHICAGO 2019 \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" M \n",
" 35 - 54 \n",
" Germany \n",
" BERLIN 2016 \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" M \n",
" 35 - 54 \n",
" United Kingdom \n",
" LONDON 2018,LONDON 2019 \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" M \n",
" 18 - 34 \n",
" United Kingdom \n",
" LONDON 2017 \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" M \n",
" 35 - 54 \n",
" United States \n",
" BOSTON 2017 \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 36407 \n",
" 37594 \n",
" M \n",
" 18 - 34 \n",
" United Kingdom \n",
" BERLIN 2017 \n",
" \n",
" \n",
" 36408 \n",
" 37595 \n",
" M \n",
" 18 - 34 \n",
" United States \n",
" BERLIN 2019,NEW YORK 2015 \n",
" \n",
" \n",
" 36409 \n",
" 37596 \n",
" M \n",
" 18 - 34 \n",
" United States \n",
" BOSTON 2017 \n",
" \n",
" \n",
" 36410 \n",
" 37597 \n",
" F \n",
" 18 - 34 \n",
" United States \n",
" BOSTON 2015 \n",
" \n",
" \n",
" 36411 \n",
" 37598 \n",
" M \n",
" 35 - 54 \n",
" China \n",
" TOKYO 2012 \n",
" \n",
" \n",
"
\n",
"
36412 rows × 5 columns
\n",
"
"
],
"text/plain": [
" athlete gender age_group country major\n",
"0 0 F 18 - 34 United States CHICAGO 2019\n",
"1 1 M 35 - 54 Germany BERLIN 2016\n",
"2 2 M 35 - 54 United Kingdom LONDON 2018,LONDON 2019\n",
"3 3 M 18 - 34 United Kingdom LONDON 2017\n",
"4 4 M 35 - 54 United States BOSTON 2017\n",
"... ... ... ... ... ...\n",
"36407 37594 M 18 - 34 United Kingdom BERLIN 2017\n",
"36408 37595 M 18 - 34 United States BERLIN 2019,NEW YORK 2015\n",
"36409 37596 M 18 - 34 United States BOSTON 2017\n",
"36410 37597 F 18 - 34 United States BOSTON 2015\n",
"36411 37598 M 35 - 54 China TOKYO 2012\n",
"\n",
"[36412 rows x 5 columns]"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"df = df[['athlete', 'gender', 'age_group', 'country', 'major']\n",
" ].drop_duplicates(subset='athlete').sort_values('athlete').reset_index(drop=True)\n",
"display(df)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.032102Z",
"start_time": "2021-09-14T03:07:18.025629Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"RangeIndex: 36412 entries, 0 to 36411\n",
"Data columns (total 5 columns):\n",
" # Column Non-Null Count Dtype \n",
"--- ------ -------------- ----- \n",
" 0 athlete 36412 non-null category\n",
" 1 gender 36412 non-null category\n",
" 2 age_group 36412 non-null category\n",
" 3 country 36083 non-null category\n",
" 4 major 36412 non-null category\n",
"dtypes: category(5)\n",
"memory usage: 1.8 MB\n"
]
}
],
"source": [
"df.info(memory_usage='deep')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Gender and age"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.062353Z",
"start_time": "2021-09-14T03:07:18.032960Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" age_group \n",
" 18 - 34 \n",
" 35 - 54 \n",
" 55 + \n",
" All \n",
" \n",
" \n",
" gender \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" F \n",
" 0.1061 \n",
" 0.1271 \n",
" 0.01088 \n",
" 0.2441 \n",
" \n",
" \n",
" M \n",
" 0.2301 \n",
" 0.4661 \n",
" 0.05968 \n",
" 0.7559 \n",
" \n",
" \n",
" All \n",
" 0.3362 \n",
" 0.5932 \n",
" 0.07055 \n",
" 1 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"age_group 18 - 34 35 - 54 55 + All\n",
"gender \n",
"F 0.1061 0.1271 0.01088 0.2441\n",
"M 0.2301 0.4661 0.05968 0.7559\n",
"All 0.3362 0.5932 0.07055 1"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(df['gender'], df['age_group'], margins=True, normalize='all')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"76% of athletes are male, 34% are between 18 and 34 years old, 59% are between 35 and 54 and the remaining 7% are 55 or older."
]
},
{
"cell_type": "markdown",
"metadata": {
"ExecuteTime": {
"end_time": "2021-02-09T09:10:19.706839Z",
"start_time": "2021-02-09T09:10:19.702270Z"
}
},
"source": [
"## Location"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### By country\n",
"\n",
"Fix some names in country:"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.070588Z",
"start_time": "2021-09-14T03:07:18.063172Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Number of countries: 130\n",
"Number of athletes with unknown country: 329 (0.9 %)\n"
]
}
],
"source": [
"print('Number of countries: {}'.format(df['country'].unique().size))\n",
"print('Number of athletes with unknown country: {} ({:.1f} %)'.format(df['country'].isnull().sum(),\n",
" df['country'].isnull().sum()/df['country'].size*100))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.079604Z",
"start_time": "2021-09-14T03:07:18.071737Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" country \n",
" percentage \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" United States \n",
" 38.1 \n",
" \n",
" \n",
" 1 \n",
" United Kingdom \n",
" 20.97 \n",
" \n",
" \n",
" 2 \n",
" Germany \n",
" 5.925 \n",
" \n",
" \n",
" 3 \n",
" Canada \n",
" 3.17 \n",
" \n",
" \n",
" 4 \n",
" France \n",
" 2.503 \n",
" \n",
" \n",
" 5 \n",
" Japan \n",
" 2.375 \n",
" \n",
" \n",
" 6 \n",
" Netherlands \n",
" 2.3 \n",
" \n",
" \n",
" 7 \n",
" Brazil \n",
" 1.807 \n",
" \n",
" \n",
" 8 \n",
" Australia \n",
" 1.602 \n",
" \n",
" \n",
" 9 \n",
" Italy \n",
" 1.508 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" country percentage\n",
"0 United States 38.1\n",
"1 United Kingdom 20.97\n",
"2 Germany 5.925\n",
"3 Canada 3.17\n",
"4 France 2.503\n",
"5 Japan 2.375\n",
"6 Netherlands 2.3\n",
"7 Brazil 1.807\n",
"8 Australia 1.602\n",
"9 Italy 1.508"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"cn_pct = 100*df['country'].value_counts(normalize=True)\n",
"cn_pct = cn_pct.to_frame().reset_index()\n",
"cn_pct = cn_pct.rename(columns={'index':'country', 'country':'percentage'})\n",
"cn_pct['country'] = cn_pct['country'].astype('object')\n",
"cn_pct.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.345831Z",
"start_time": "2021-09-14T03:07:18.080441Z"
}
},
"outputs": [
{
"data": {
"text/html": [
" \n",
" "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"domain": {
"x": [
0,
1
],
"y": [
0,
1
]
},
"hovertemplate": "country=%{label} percentage=%{value} ",
"labels": [
"United States",
"United Kingdom",
"Germany",
"Canada",
"France",
"Japan",
"Netherlands",
"Brazil",
"Australia",
"Italy",
"Spain",
"Norway",
"Mexico",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries",
"Other countries"
],
"legendgroup": "",
"name": "",
"showlegend": true,
"textinfo": "percent+label",
"textposition": "outside",
"type": "pie",
"values": [
38.10104481334701,
20.965551644818888,
5.925227946678492,
3.1704680874650117,
2.502563534074218,
2.375079677410415,
2.300252196325139,
1.8069450987999887,
1.601862372862567,
1.5076351744588865,
1.4965496217055123,
1.4217221406202367,
1.0254136296871104,
0.9699858659202395,
0.9228722667183992,
0.8868442202699331,
0.870215891139872,
0.8591303383864978,
0.8369592328797495,
0.8258736801263753,
0.6789901061441677,
0.6263337305656402,
0.598619848682205,
0.4295651691932489,
0.4295651691932489,
0.42402239281656184,
0.4073940636865006,
0.3990798991214699,
0.3103954770944766,
0.2993099243411025,
0.2826815952110412,
0.2743674306460106,
0.24388216057423165,
0.235567996009201,
0.22448244325582686,
0.19953994956073495,
0.19953994956073495,
0.19676856137239143,
0.16905467948895603,
0.1551977385472383,
0.14411218579386414,
0.14411218579386414,
0.14411218579386414,
0.1413407976055206,
0.13856940941717708,
0.13856940941717708,
0.13579802122883353,
0.13025524485214643,
0.10254136296871104,
0.08868442202699332,
0.08591303383864977,
0.08591303383864977,
0.08037025746196269,
0.07205609289693207,
0.07205609289693207,
0.06928470470858854,
0.06097054014355791,
0.06097054014355791,
0.058199151955214364,
0.05265637557852729,
0.04988498739018374,
0.0471135992018402,
0.04434221101349666,
0.04157082282515312,
0.036028046448466036,
0.036028046448466036,
0.036028046448466036,
0.036028046448466036,
0.033256658260122494,
0.030485270071778956,
0.030485270071778956,
0.02217110550674833,
0.02217110550674833,
0.01939971731840479,
0.01939971731840479,
0.01939971731840479,
0.016628329130061247,
0.016628329130061247,
0.016628329130061247,
0.013856940941717705,
0.013856940941717705,
0.013856940941717705,
0.011085552753374165,
0.011085552753374165,
0.011085552753374165,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.008314164565030624,
0.005542776376687083,
0.005542776376687083,
0.005542776376687083,
0.005542776376687083,
0.005542776376687083,
0.005542776376687083,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413,
0.0027713881883435413
]
}
],
"layout": {
"legend": {
"tracegroupgap": 0
},
"showlegend": false,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Countries of the athletes"
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"y = cn_pct.copy(deep=True)\n",
"y.loc[y['percentage'] < 1, 'country'] = 'Other countries'\n",
"fig = px.pie(y, values='percentage', names='country', labels='percentage',\n",
" title='Countries of the athletes')\n",
"fig.update_traces(textposition='outside', textinfo='percent+label')\n",
"fig.update_layout(showlegend=False)\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### By continent"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.437521Z",
"start_time": "2021-09-14T03:07:18.348553Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" athlete \n",
" gender \n",
" age_group \n",
" country \n",
" major \n",
" continent \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" F \n",
" 18 - 34 \n",
" United States \n",
" CHICAGO 2019 \n",
" NA \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" M \n",
" 35 - 54 \n",
" Germany \n",
" BERLIN 2016 \n",
" EU \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" M \n",
" 35 - 54 \n",
" United Kingdom \n",
" LONDON 2018,LONDON 2019 \n",
" EU \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" M \n",
" 18 - 34 \n",
" United Kingdom \n",
" LONDON 2017 \n",
" EU \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" M \n",
" 35 - 54 \n",
" United States \n",
" BOSTON 2017 \n",
" NA \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 36407 \n",
" 37594 \n",
" M \n",
" 18 - 34 \n",
" United Kingdom \n",
" BERLIN 2017 \n",
" EU \n",
" \n",
" \n",
" 36408 \n",
" 37595 \n",
" M \n",
" 18 - 34 \n",
" United States \n",
" BERLIN 2019,NEW YORK 2015 \n",
" NA \n",
" \n",
" \n",
" 36409 \n",
" 37596 \n",
" M \n",
" 18 - 34 \n",
" United States \n",
" BOSTON 2017 \n",
" NA \n",
" \n",
" \n",
" 36410 \n",
" 37597 \n",
" F \n",
" 18 - 34 \n",
" United States \n",
" BOSTON 2015 \n",
" NA \n",
" \n",
" \n",
" 36411 \n",
" 37598 \n",
" M \n",
" 35 - 54 \n",
" China \n",
" TOKYO 2012 \n",
" AS \n",
" \n",
" \n",
"
\n",
"
36412 rows × 6 columns
\n",
"
"
],
"text/plain": [
" athlete gender age_group country major \\\n",
"0 0 F 18 - 34 United States CHICAGO 2019 \n",
"1 1 M 35 - 54 Germany BERLIN 2016 \n",
"2 2 M 35 - 54 United Kingdom LONDON 2018,LONDON 2019 \n",
"3 3 M 18 - 34 United Kingdom LONDON 2017 \n",
"4 4 M 35 - 54 United States BOSTON 2017 \n",
"... ... ... ... ... ... \n",
"36407 37594 M 18 - 34 United Kingdom BERLIN 2017 \n",
"36408 37595 M 18 - 34 United States BERLIN 2019,NEW YORK 2015 \n",
"36409 37596 M 18 - 34 United States BOSTON 2017 \n",
"36410 37597 F 18 - 34 United States BOSTON 2015 \n",
"36411 37598 M 35 - 54 China TOKYO 2012 \n",
"\n",
" continent \n",
"0 NA \n",
"1 EU \n",
"2 EU \n",
"3 EU \n",
"4 NA \n",
"... ... \n",
"36407 EU \n",
"36408 NA \n",
"36409 NA \n",
"36410 NA \n",
"36411 AS \n",
"\n",
"[36412 rows x 6 columns]"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df['continent'] = np.nan\n",
"for country in df['country'].unique():\n",
" if pd.isnull(country):\n",
" country_code, continent_name = np.nan, np.nan\n",
" elif country == 'Kosovo':\n",
" country_code = 'XK'\n",
" continent_name = 'EU'\n",
" else: \n",
" country_code = pc.country_name_to_country_alpha2(country)\n",
" if country_code == 'TL':\n",
" country_code = 'TP'\n",
" continent_name = pc.country_alpha2_to_continent_code(country_code)\n",
" if not pd.isnull(country):\n",
" df.loc[df['country'] == country, 'continent'] = continent_name\n",
"df['continent'] = df['continent'].astype('category')\n",
"df"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.445360Z",
"start_time": "2021-09-14T03:07:18.438438Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" continent \n",
" percentage \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" EU \n",
" 43.6 \n",
" \n",
" \n",
" 1 \n",
" NA \n",
" 43.24 \n",
" \n",
" \n",
" 2 \n",
" AS \n",
" 7.765 \n",
" \n",
" \n",
" 3 \n",
" SA \n",
" 3.079 \n",
" \n",
" \n",
" 4 \n",
" OC \n",
" 1.774 \n",
" \n",
" \n",
" 5 \n",
" AF \n",
" 0.5487 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" continent percentage\n",
"0 EU 43.6\n",
"1 NA 43.24\n",
"2 AS 7.765\n",
"3 SA 3.079\n",
"4 OC 1.774\n",
"5 AF 0.5487"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"ct_pct = 100*df['continent'].value_counts(normalize=True)\n",
"ct_pct = ct_pct.to_frame().reset_index()\n",
"ct_pct = ct_pct.rename(columns={'index':'continent', 'continent':'percentage'})\n",
"ct_pct"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.475013Z",
"start_time": "2021-09-14T03:07:18.446142Z"
}
},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"domain": {
"x": [
0,
1
],
"y": [
0,
1
]
},
"hovertemplate": "continent=%{label} percentage=%{value} ",
"labels": [
"EU",
"NA",
"AS",
"SA",
"OC",
"AF"
],
"legendgroup": "",
"name": "",
"showlegend": true,
"textinfo": "percent+label",
"textposition": "outside",
"type": "pie",
"values": [
43.596707590832246,
43.236427126347586,
7.765429703738603,
3.0790122772496744,
1.7736884405398663,
0.5487348612920212
]
}
],
"layout": {
"height": 400,
"legend": {
"tracegroupgap": 0
},
"showlegend": false,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Continents of the athletes"
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.pie(ct_pct, values='percentage', names='continent', labels='percentage',\n",
" title='Continents of the athletes', height=400)\n",
"fig.update_traces(textposition='outside', textinfo='percent+label')\n",
"fig.update_layout(showlegend=False)\n",
"fig.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Majors"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.638492Z",
"start_time": "2021-09-14T03:07:18.475846Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" athlete \n",
" gender \n",
" age_group \n",
" country \n",
" major \n",
" continent \n",
" major_y \n",
" major_n \n",
" major_s \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" 0 \n",
" F \n",
" 18 - 34 \n",
" United States \n",
" CHICAGO 2019 \n",
" NA \n",
" 2019 \n",
" 1 \n",
" [CHICAGO] \n",
" \n",
" \n",
" 1 \n",
" 1 \n",
" M \n",
" 35 - 54 \n",
" Germany \n",
" BERLIN 2016 \n",
" EU \n",
" 2016 \n",
" 1 \n",
" [BERLIN] \n",
" \n",
" \n",
" 2 \n",
" 2 \n",
" M \n",
" 35 - 54 \n",
" United Kingdom \n",
" LONDON 2018,LONDON 2019 \n",
" EU \n",
" 2019 \n",
" 2 \n",
" [LONDON, LONDON] \n",
" \n",
" \n",
" 3 \n",
" 3 \n",
" M \n",
" 18 - 34 \n",
" United Kingdom \n",
" LONDON 2017 \n",
" EU \n",
" 2017 \n",
" 1 \n",
" [LONDON] \n",
" \n",
" \n",
" 4 \n",
" 4 \n",
" M \n",
" 35 - 54 \n",
" United States \n",
" BOSTON 2017 \n",
" NA \n",
" 2017 \n",
" 1 \n",
" [BOSTON] \n",
" \n",
" \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" ... \n",
" \n",
" \n",
" 36407 \n",
" 37594 \n",
" M \n",
" 18 - 34 \n",
" United Kingdom \n",
" BERLIN 2017 \n",
" EU \n",
" 2017 \n",
" 1 \n",
" [BERLIN] \n",
" \n",
" \n",
" 36408 \n",
" 37595 \n",
" M \n",
" 18 - 34 \n",
" United States \n",
" BERLIN 2019,NEW YORK 2015 \n",
" NA \n",
" 2019 \n",
" 2 \n",
" [BERLIN, NEW YORK] \n",
" \n",
" \n",
" 36409 \n",
" 37596 \n",
" M \n",
" 18 - 34 \n",
" United States \n",
" BOSTON 2017 \n",
" NA \n",
" 2017 \n",
" 1 \n",
" [BOSTON] \n",
" \n",
" \n",
" 36410 \n",
" 37597 \n",
" F \n",
" 18 - 34 \n",
" United States \n",
" BOSTON 2015 \n",
" NA \n",
" 2015 \n",
" 1 \n",
" [BOSTON] \n",
" \n",
" \n",
" 36411 \n",
" 37598 \n",
" M \n",
" 35 - 54 \n",
" China \n",
" TOKYO 2012 \n",
" AS \n",
" 2012 \n",
" 1 \n",
" [TOKYO] \n",
" \n",
" \n",
"
\n",
"
36412 rows × 9 columns
\n",
"
"
],
"text/plain": [
" athlete gender age_group country major \\\n",
"0 0 F 18 - 34 United States CHICAGO 2019 \n",
"1 1 M 35 - 54 Germany BERLIN 2016 \n",
"2 2 M 35 - 54 United Kingdom LONDON 2018,LONDON 2019 \n",
"3 3 M 18 - 34 United Kingdom LONDON 2017 \n",
"4 4 M 35 - 54 United States BOSTON 2017 \n",
"... ... ... ... ... ... \n",
"36407 37594 M 18 - 34 United Kingdom BERLIN 2017 \n",
"36408 37595 M 18 - 34 United States BERLIN 2019,NEW YORK 2015 \n",
"36409 37596 M 18 - 34 United States BOSTON 2017 \n",
"36410 37597 F 18 - 34 United States BOSTON 2015 \n",
"36411 37598 M 35 - 54 China TOKYO 2012 \n",
"\n",
" continent major_y major_n major_s \n",
"0 NA 2019 1 [CHICAGO] \n",
"1 EU 2016 1 [BERLIN] \n",
"2 EU 2019 2 [LONDON, LONDON] \n",
"3 EU 2017 1 [LONDON] \n",
"4 NA 2017 1 [BOSTON] \n",
"... ... ... ... ... \n",
"36407 EU 2017 1 [BERLIN] \n",
"36408 NA 2019 2 [BERLIN, NEW YORK] \n",
"36409 NA 2017 1 [BOSTON] \n",
"36410 NA 2015 1 [BOSTON] \n",
"36411 AS 2012 1 [TOKYO] \n",
"\n",
"[36412 rows x 9 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"f = lambda x: np.array(x, dtype=int).max()\n",
"df['major_y'] = df['major'].str.findall(r'[0-9]+').apply(f)\n",
"df['major_n'] = df['major'].str.split(',').apply(lambda x: len(x))\n",
"df['major_s'] = df['major'].str.replace(' \\d+', '', regex=True).str.split(',')\n",
"df"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Frequency"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.654304Z",
"start_time": "2021-09-14T03:07:18.639412Z"
},
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" major \n",
" percentage \n",
" \n",
" \n",
" \n",
" \n",
" 0 \n",
" BOSTON \n",
" 27.26 \n",
" \n",
" \n",
" 1 \n",
" BERLIN \n",
" 26.5 \n",
" \n",
" \n",
" 2 \n",
" NEW YORK \n",
" 16.27 \n",
" \n",
" \n",
" 3 \n",
" LONDON \n",
" 16.09 \n",
" \n",
" \n",
" 4 \n",
" CHICAGO \n",
" 8.199 \n",
" \n",
" \n",
" 5 \n",
" TOKYO \n",
" 5.686 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" major percentage\n",
"0 BOSTON 27.26\n",
"1 BERLIN 26.5\n",
"2 NEW YORK 16.27\n",
"3 LONDON 16.09\n",
"4 CHICAGO 8.199\n",
"5 TOKYO 5.686"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mj_pct = 100*df['major_s'].explode().astype('category').value_counts(normalize=True)\n",
"mj_pct = mj_pct.to_frame().reset_index()\n",
"mj_pct = mj_pct.rename(columns={'index':'major', 'major_s':'percentage'})\n",
"mj_pct"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.683906Z",
"start_time": "2021-09-14T03:07:18.655225Z"
}
},
"outputs": [
{
"data": {
"application/vnd.plotly.v1+json": {
"config": {
"plotlyServerURL": "https://plot.ly"
},
"data": [
{
"domain": {
"x": [
0,
1
],
"y": [
0,
1
]
},
"hovertemplate": "major=%{label} percentage=%{value} ",
"labels": [
"BOSTON",
"BERLIN",
"NEW YORK",
"LONDON",
"CHICAGO",
"TOKYO"
],
"legendgroup": "",
"name": "",
"showlegend": true,
"textinfo": "percent+label",
"textposition": "outside",
"type": "pie",
"values": [
27.263863787691427,
26.49814515361933,
16.26557595358128,
16.08722534005517,
8.199372205840389,
5.685817559212404
]
}
],
"layout": {
"height": 400,
"legend": {
"tracegroupgap": 0
},
"showlegend": false,
"template": {
"data": {
"bar": [
{
"error_x": {
"color": "#2a3f5f"
},
"error_y": {
"color": "#2a3f5f"
},
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "bar"
}
],
"barpolar": [
{
"marker": {
"line": {
"color": "#E5ECF6",
"width": 0.5
},
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "barpolar"
}
],
"carpet": [
{
"aaxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"baxis": {
"endlinecolor": "#2a3f5f",
"gridcolor": "white",
"linecolor": "white",
"minorgridcolor": "white",
"startlinecolor": "#2a3f5f"
},
"type": "carpet"
}
],
"choropleth": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "choropleth"
}
],
"contour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "contour"
}
],
"contourcarpet": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "contourcarpet"
}
],
"heatmap": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmap"
}
],
"heatmapgl": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "heatmapgl"
}
],
"histogram": [
{
"marker": {
"pattern": {
"fillmode": "overlay",
"size": 10,
"solidity": 0.2
}
},
"type": "histogram"
}
],
"histogram2d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2d"
}
],
"histogram2dcontour": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "histogram2dcontour"
}
],
"mesh3d": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"type": "mesh3d"
}
],
"parcoords": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "parcoords"
}
],
"pie": [
{
"automargin": true,
"type": "pie"
}
],
"scatter": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter"
}
],
"scatter3d": [
{
"line": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatter3d"
}
],
"scattercarpet": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattercarpet"
}
],
"scattergeo": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergeo"
}
],
"scattergl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattergl"
}
],
"scattermapbox": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scattermapbox"
}
],
"scatterpolar": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolar"
}
],
"scatterpolargl": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterpolargl"
}
],
"scatterternary": [
{
"marker": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"type": "scatterternary"
}
],
"surface": [
{
"colorbar": {
"outlinewidth": 0,
"ticks": ""
},
"colorscale": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"type": "surface"
}
],
"table": [
{
"cells": {
"fill": {
"color": "#EBF0F8"
},
"line": {
"color": "white"
}
},
"header": {
"fill": {
"color": "#C8D4E3"
},
"line": {
"color": "white"
}
},
"type": "table"
}
]
},
"layout": {
"annotationdefaults": {
"arrowcolor": "#2a3f5f",
"arrowhead": 0,
"arrowwidth": 1
},
"autotypenumbers": "strict",
"coloraxis": {
"colorbar": {
"outlinewidth": 0,
"ticks": ""
}
},
"colorscale": {
"diverging": [
[
0,
"#8e0152"
],
[
0.1,
"#c51b7d"
],
[
0.2,
"#de77ae"
],
[
0.3,
"#f1b6da"
],
[
0.4,
"#fde0ef"
],
[
0.5,
"#f7f7f7"
],
[
0.6,
"#e6f5d0"
],
[
0.7,
"#b8e186"
],
[
0.8,
"#7fbc41"
],
[
0.9,
"#4d9221"
],
[
1,
"#276419"
]
],
"sequential": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
],
"sequentialminus": [
[
0,
"#0d0887"
],
[
0.1111111111111111,
"#46039f"
],
[
0.2222222222222222,
"#7201a8"
],
[
0.3333333333333333,
"#9c179e"
],
[
0.4444444444444444,
"#bd3786"
],
[
0.5555555555555556,
"#d8576b"
],
[
0.6666666666666666,
"#ed7953"
],
[
0.7777777777777778,
"#fb9f3a"
],
[
0.8888888888888888,
"#fdca26"
],
[
1,
"#f0f921"
]
]
},
"colorway": [
"#636efa",
"#EF553B",
"#00cc96",
"#ab63fa",
"#FFA15A",
"#19d3f3",
"#FF6692",
"#B6E880",
"#FF97FF",
"#FECB52"
],
"font": {
"color": "#2a3f5f"
},
"geo": {
"bgcolor": "white",
"lakecolor": "white",
"landcolor": "#E5ECF6",
"showlakes": true,
"showland": true,
"subunitcolor": "white"
},
"hoverlabel": {
"align": "left"
},
"hovermode": "closest",
"mapbox": {
"style": "light"
},
"paper_bgcolor": "white",
"plot_bgcolor": "#E5ECF6",
"polar": {
"angularaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"radialaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"scene": {
"xaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"yaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
},
"zaxis": {
"backgroundcolor": "#E5ECF6",
"gridcolor": "white",
"gridwidth": 2,
"linecolor": "white",
"showbackground": true,
"ticks": "",
"zerolinecolor": "white"
}
},
"shapedefaults": {
"line": {
"color": "#2a3f5f"
}
},
"ternary": {
"aaxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"baxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
},
"bgcolor": "#E5ECF6",
"caxis": {
"gridcolor": "white",
"linecolor": "white",
"ticks": ""
}
},
"title": {
"x": 0.05
},
"xaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
},
"yaxis": {
"automargin": true,
"gridcolor": "white",
"linecolor": "white",
"ticks": "",
"title": {
"standoff": 15
},
"zerolinecolor": "white",
"zerolinewidth": 2
}
}
},
"title": {
"text": "Frequency of Majors"
}
}
},
"text/html": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"fig = px.pie(mj_pct, values='percentage', names='major', labels='percentage',\n",
" title='Frequency of Majors', height=400)\n",
"fig.update_traces(textposition='outside', textinfo='percent+label')\n",
"fig.update_layout(showlegend=False)\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.690096Z",
"start_time": "2021-09-14T03:07:18.684955Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Majors in America: 51.7\n",
"Majors in Europe: 42.6\n",
"Majors in Asia: 5.7\n"
]
}
],
"source": [
"fun = lambda x, y: x[x['major'].isin(y)]['percentage'].values.sum()\n",
"print('Majors in America: {:.1f}'.format(fun(mj_pct, ['BOSTON', 'NEW YORK', 'CHICAGO'])))\n",
"print('Majors in Europe: {:.1f}'.format(fun(mj_pct, ['BERLIN', 'LONDON'])))\n",
"print('Majors in Asia: {:.1f}'.format(fun(mj_pct, ['TOKYO'])))"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Number and year of Majors"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.725355Z",
"start_time": "2021-09-14T03:07:18.691138Z"
}
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" major_n \n",
" 1 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" All \n",
" \n",
" \n",
" major_y \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" \n",
" 2005 \n",
" 0.002746 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.002746 \n",
" \n",
" \n",
" 2008 \n",
" 0.01099 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.01099 \n",
" \n",
" \n",
" 2009 \n",
" 0.04669 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.04669 \n",
" \n",
" \n",
" 2010 \n",
" 0.1181 \n",
" 0.002746 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.1208 \n",
" \n",
" \n",
" 2011 \n",
" 0.2142 \n",
" 0.002746 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.217 \n",
" \n",
" \n",
" 2012 \n",
" 0.6152 \n",
" 0.01099 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.6262 \n",
" \n",
" \n",
" 2013 \n",
" 1.123 \n",
" 0.09063 \n",
" 0.01373 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 1.228 \n",
" \n",
" \n",
" 2014 \n",
" 1.999 \n",
" 0.1785 \n",
" 0.01922 \n",
" 0.005493 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 2.203 \n",
" \n",
" \n",
" 2015 \n",
" 5.251 \n",
" 0.4147 \n",
" 0.04943 \n",
" 0.002746 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 5.718 \n",
" \n",
" \n",
" 2016 \n",
" 7.418 \n",
" 0.5795 \n",
" 0.08514 \n",
" 0.005493 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 8.088 \n",
" \n",
" \n",
" 2017 \n",
" 12.25 \n",
" 1.112 \n",
" 0.173 \n",
" 0.06317 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 13.6 \n",
" \n",
" \n",
" 2018 \n",
" 21.96 \n",
" 2.153 \n",
" 0.2691 \n",
" 0.07415 \n",
" 0.01648 \n",
" 0.005493 \n",
" 0.002746 \n",
" 0.002746 \n",
" 24.49 \n",
" \n",
" \n",
" 2019 \n",
" 36.54 \n",
" 5.46 \n",
" 1.206 \n",
" 0.2801 \n",
" 0.06866 \n",
" 0.02197 \n",
" 0.002746 \n",
" 0.002746 \n",
" 43.58 \n",
" \n",
" \n",
" 2020 \n",
" 0.05767 \n",
" 0.01099 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0 \n",
" 0.06866 \n",
" \n",
" \n",
" All \n",
" 87.61 \n",
" 10.02 \n",
" 1.815 \n",
" 0.4312 \n",
" 0.08514 \n",
" 0.02746 \n",
" 0.005493 \n",
" 0.005493 \n",
" 100 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
"major_n 1 2 3 4 5 6 7 8 \\\n",
"major_y \n",
"2005 0.002746 0 0 0 0 0 0 0 \n",
"2008 0.01099 0 0 0 0 0 0 0 \n",
"2009 0.04669 0 0 0 0 0 0 0 \n",
"2010 0.1181 0.002746 0 0 0 0 0 0 \n",
"2011 0.2142 0.002746 0 0 0 0 0 0 \n",
"2012 0.6152 0.01099 0 0 0 0 0 0 \n",
"2013 1.123 0.09063 0.01373 0 0 0 0 0 \n",
"2014 1.999 0.1785 0.01922 0.005493 0 0 0 0 \n",
"2015 5.251 0.4147 0.04943 0.002746 0 0 0 0 \n",
"2016 7.418 0.5795 0.08514 0.005493 0 0 0 0 \n",
"2017 12.25 1.112 0.173 0.06317 0 0 0 0 \n",
"2018 21.96 2.153 0.2691 0.07415 0.01648 0.005493 0.002746 0.002746 \n",
"2019 36.54 5.46 1.206 0.2801 0.06866 0.02197 0.002746 0.002746 \n",
"2020 0.05767 0.01099 0 0 0 0 0 0 \n",
"All 87.61 10.02 1.815 0.4312 0.08514 0.02746 0.005493 0.005493 \n",
"\n",
"major_n All \n",
"major_y \n",
"2005 0.002746 \n",
"2008 0.01099 \n",
"2009 0.04669 \n",
"2010 0.1208 \n",
"2011 0.217 \n",
"2012 0.6262 \n",
"2013 1.228 \n",
"2014 2.203 \n",
"2015 5.718 \n",
"2016 8.088 \n",
"2017 13.6 \n",
"2018 24.49 \n",
"2019 43.58 \n",
"2020 0.06866 \n",
"All 100 "
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.crosstab(df['major_y'], df['major_n'], margins=True, normalize='all')*100"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"ExecuteTime": {
"end_time": "2021-09-14T03:07:18.742856Z",
"start_time": "2021-09-14T03:07:18.726243Z"
}
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cumulative frequency of year of Majors:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 2019 \n",
" 2018 \n",
" 2017 \n",
" 2016 \n",
" 2015 \n",
" 2014 \n",
" 2013 \n",
" 2012 \n",
" 2011 \n",
" 2010 \n",
" 2020 \n",
" 2009 \n",
" 2008 \n",
" 2005 \n",
" \n",
" \n",
" \n",
" \n",
" major_y \n",
" 1.587e+06 \n",
" 8.916e+05 \n",
" 4.952e+05 \n",
" 2.945e+05 \n",
" 2.082e+05 \n",
" 8.02e+04 \n",
" 4.47e+04 \n",
" 2.28e+04 \n",
" 7900 \n",
" 4400 \n",
" 2500 \n",
" 1700 \n",
" 400 \n",
" 100 \n",
" \n",
" \n",
" major_y \n",
" 43.58 \n",
" 68.07 \n",
" 81.67 \n",
" 89.76 \n",
" 95.48 \n",
" 97.68 \n",
" 98.91 \n",
" 99.53 \n",
" 99.75 \n",
" 99.87 \n",
" 99.94 \n",
" 99.99 \n",
" 100 \n",
" 100 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 2019 2018 2017 2016 2015 2014 2013 \\\n",
"major_y 1.587e+06 8.916e+05 4.952e+05 2.945e+05 2.082e+05 8.02e+04 4.47e+04 \n",
"major_y 43.58 68.07 81.67 89.76 95.48 97.68 98.91 \n",
"\n",
" 2012 2011 2010 2020 2009 2008 2005 \n",
"major_y 2.28e+04 7900 4400 2500 1700 400 100 \n",
"major_y 99.53 99.75 99.87 99.94 99.99 100 100 "
]
},
"metadata": {},
"output_type": "display_data"
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Cumulative frequency of number of Majors:\n"
]
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" \n",
" 1 \n",
" 2 \n",
" 3 \n",
" 4 \n",
" 5 \n",
" 6 \n",
" 7 \n",
" 8 \n",
" \n",
" \n",
" \n",
" \n",
" major_n \n",
" 3.19e+06 \n",
" 3.647e+05 \n",
" 6.61e+04 \n",
" 1.57e+04 \n",
" 3100 \n",
" 1000 \n",
" 200 \n",
" 200 \n",
" \n",
" \n",
" major_n \n",
" 87.61 \n",
" 97.63 \n",
" 99.45 \n",
" 99.88 \n",
" 99.96 \n",
" 99.99 \n",
" 99.99 \n",
" 100 \n",
" \n",
" \n",
"
\n",
"
"
],
"text/plain": [
" 1 2 3 4 5 6 7 8\n",
"major_n 3.19e+06 3.647e+05 6.61e+04 1.57e+04 3100 1000 200 200\n",
"major_n 87.61 97.63 99.45 99.88 99.96 99.99 99.99 100"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"print('Cumulative frequency of year of Majors:')\n",
"display(pd.concat((df['major_y'].value_counts(),\n",
" df['major_y'].value_counts(normalize=True).cumsum()), axis=1).T*100)\n",
"print('Cumulative frequency of number of Majors:')\n",
"display(pd.concat((df['major_n'].value_counts(),\n",
" df['major_n'].value_counts(normalize=True).cumsum()), axis=1).T*100)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"98% of the athletes ran in up to two Majors and 98% of the athletes ran a Major between the years 2014 and 2019."
]
}
],
"metadata": {
"hide_input": false,
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.12"
},
"nbTranslate": {
"displayLangs": [
"*"
],
"hotkey": "alt-t",
"langInMainMenu": true,
"sourceLang": "en",
"targetLang": "fr",
"useGoogleTranslate": true
},
"toc": {
"base_numbering": 1,
"nav_menu": {},
"number_sections": true,
"sideBar": true,
"skip_h1_title": true,
"title_cell": "Contents",
"title_sidebar": "Contents",
"toc_cell": true,
"toc_position": {},
"toc_section_display": true,
"toc_window_display": false
},
"varInspector": {
"cols": {
"lenName": 16,
"lenType": 16,
"lenVar": 40
},
"kernels_config": {
"python": {
"delete_cmd_postfix": "",
"delete_cmd_prefix": "del ",
"library": "var_list.py",
"varRefreshCmd": "print(var_dic_list())"
},
"r": {
"delete_cmd_postfix": ") ",
"delete_cmd_prefix": "rm(",
"library": "var_list.r",
"varRefreshCmd": "cat(var_dic_list()) "
}
},
"position": {
"height": "280px",
"left": "793px",
"right": "20px",
"top": "121px",
"width": "470px"
},
"types_to_exclude": [
"module",
"function",
"builtin_function_or_method",
"instance",
"_Feature"
],
"window_display": false
}
},
"nbformat": 4,
"nbformat_minor": 4
}