{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Frequencies of gender, age, country, and participation in Majors\n", "\n", "> [https://github.com/BMClab/covid19](https://github.com/BMClab/covid19) \n", "> [Laboratory of Biomechanics and Motor Control](http://pesquisa.ufabc.edu.br/bmclab/) \n", "> Federal University of ABC, Brazil\n", "\n", "**The data used in this Jupyter notebook are available on the Figshare repository https://doi.org/10.6084/m9.figshare.16620238.**" ] }, { "cell_type": "markdown", "metadata": { "toc": true }, "source": [ "

Contents

\n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Setup" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:16.509882Z", "start_time": "2021-09-14T03:07:15.387256Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Last updated: 2021-09-14T00:07:16.488680-03:00\n", "\n", "Python implementation: CPython\n", "Python version : 3.8.10\n", "IPython version : 7.27.0\n", "\n", "Compiler : GCC 9.3.0\n", "OS : Linux\n", "Release : 5.11.0-34-generic\n", "Machine : x86_64\n", "Processor : x86_64\n", "CPU cores : 12\n", "Architecture: 64bit\n", "\n", "numpy : 1.20.3\n", "plotly : 5.3.1\n", "autopep8 : 1.5.6\n", "matplotlib : 3.4.3\n", "pycountry_convert: 0.7.2\n", "seaborn : 0.11.2\n", "json : 2.0.9\n", "pandas : 1.3.3\n", "sys : 3.8.10 | packaged by conda-forge | (default, May 11 2021, 07:01:05) \n", "[GCC 9.3.0]\n", "\n" ] } ], "source": [ "import sys, os\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "import plotly.express as px\n", "pd.options.plotting.backend = \"plotly\"\n", "from tqdm.notebook import tqdm\n", "import pycountry_convert as pc\n", "%load_ext watermark \n", "\n", "%watermark\n", "%watermark --iversions" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Environment" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:16.517363Z", "start_time": "2021-09-14T03:07:16.511348Z" } }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "[(0.12156862745098039, 0.4666666666666667, 0.7058823529411765),\n", " (1.0, 0.4980392156862745, 0.054901960784313725),\n", " (0.17254901960784313, 0.6274509803921569, 0.17254901960784313),\n", " (0.8392156862745098, 0.15294117647058825, 0.1568627450980392),\n", " (0.5803921568627451, 0.403921568627451, 0.7411764705882353),\n", " (0.5490196078431373, 0.33725490196078434, 0.29411764705882354),\n", " (0.8901960784313725, 0.4666666666666667, 0.7607843137254902),\n", " (0.4980392156862745, 0.4980392156862745, 0.4980392156862745),\n", " (0.7372549019607844, 0.7411764705882353, 0.13333333333333333),\n", " (0.09019607843137255, 0.7450980392156863, 0.8117647058823529)]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "path2 = r'./../data/'\n", "\n", "pd.set_option('display.float_format', lambda x: '%.4g' % x)\n", "plt.rcParams.update({'font.size': 14, 'xtick.labelsize': 12, 'ytick.labelsize': 12})\n", "sns.set_style('whitegrid', rc={'xtick.bottom': True, 'xtick.top': True, 'ytick.left': True,\n", " 'ytick.right': True, 'xtick.direction': 'in', 'ytick.direction': 'in'})\n", "colors = sns.color_palette()\n", "colors" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Load dataset" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:17.554075Z", "start_time": "2021-09-14T03:07:16.518354Z" } }, "outputs": [], "source": [ "df = pd.read_parquet(os.path.join(path2, 'run_ww_2019_d.parquet'))\n", "df['athlete'] = df['athlete'].astype('category') # bug in parquet" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Subject's information" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.024808Z", "start_time": "2021-09-14T03:07:17.555385Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
athletegenderage_groupcountrymajor
00F18 - 34United StatesCHICAGO 2019
11M35 - 54GermanyBERLIN 2016
22M35 - 54United KingdomLONDON 2018,LONDON 2019
33M18 - 34United KingdomLONDON 2017
44M35 - 54United StatesBOSTON 2017
..................
3640737594M18 - 34United KingdomBERLIN 2017
3640837595M18 - 34United StatesBERLIN 2019,NEW YORK 2015
3640937596M18 - 34United StatesBOSTON 2017
3641037597F18 - 34United StatesBOSTON 2015
3641137598M35 - 54ChinaTOKYO 2012
\n", "

36412 rows × 5 columns

\n", "
" ], "text/plain": [ " athlete gender age_group country major\n", "0 0 F 18 - 34 United States CHICAGO 2019\n", "1 1 M 35 - 54 Germany BERLIN 2016\n", "2 2 M 35 - 54 United Kingdom LONDON 2018,LONDON 2019\n", "3 3 M 18 - 34 United Kingdom LONDON 2017\n", "4 4 M 35 - 54 United States BOSTON 2017\n", "... ... ... ... ... ...\n", "36407 37594 M 18 - 34 United Kingdom BERLIN 2017\n", "36408 37595 M 18 - 34 United States BERLIN 2019,NEW YORK 2015\n", "36409 37596 M 18 - 34 United States BOSTON 2017\n", "36410 37597 F 18 - 34 United States BOSTON 2015\n", "36411 37598 M 35 - 54 China TOKYO 2012\n", "\n", "[36412 rows x 5 columns]" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "df = df[['athlete', 'gender', 'age_group', 'country', 'major']\n", " ].drop_duplicates(subset='athlete').sort_values('athlete').reset_index(drop=True)\n", "display(df)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.032102Z", "start_time": "2021-09-14T03:07:18.025629Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "RangeIndex: 36412 entries, 0 to 36411\n", "Data columns (total 5 columns):\n", " # Column Non-Null Count Dtype \n", "--- ------ -------------- ----- \n", " 0 athlete 36412 non-null category\n", " 1 gender 36412 non-null category\n", " 2 age_group 36412 non-null category\n", " 3 country 36083 non-null category\n", " 4 major 36412 non-null category\n", "dtypes: category(5)\n", "memory usage: 1.8 MB\n" ] } ], "source": [ "df.info(memory_usage='deep')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Gender and age" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.062353Z", "start_time": "2021-09-14T03:07:18.032960Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
age_group18 - 3435 - 5455 +All
gender
F0.10610.12710.010880.2441
M0.23010.46610.059680.7559
All0.33620.59320.070551
\n", "
" ], "text/plain": [ "age_group 18 - 34 35 - 54 55 + All\n", "gender \n", "F 0.1061 0.1271 0.01088 0.2441\n", "M 0.2301 0.4661 0.05968 0.7559\n", "All 0.3362 0.5932 0.07055 1" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(df['gender'], df['age_group'], margins=True, normalize='all')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "76% of athletes are male, 34% are between 18 and 34 years old, 59% are between 35 and 54 and the remaining 7% are 55 or older." ] }, { "cell_type": "markdown", "metadata": { "ExecuteTime": { "end_time": "2021-02-09T09:10:19.706839Z", "start_time": "2021-02-09T09:10:19.702270Z" } }, "source": [ "## Location" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### By country\n", "\n", "Fix some names in country:" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.070588Z", "start_time": "2021-09-14T03:07:18.063172Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Number of countries: 130\n", "Number of athletes with unknown country: 329 (0.9 %)\n" ] } ], "source": [ "print('Number of countries: {}'.format(df['country'].unique().size))\n", "print('Number of athletes with unknown country: {} ({:.1f} %)'.format(df['country'].isnull().sum(),\n", " df['country'].isnull().sum()/df['country'].size*100))" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.079604Z", "start_time": "2021-09-14T03:07:18.071737Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
countrypercentage
0United States38.1
1United Kingdom20.97
2Germany5.925
3Canada3.17
4France2.503
5Japan2.375
6Netherlands2.3
7Brazil1.807
8Australia1.602
9Italy1.508
\n", "
" ], "text/plain": [ " country percentage\n", "0 United States 38.1\n", "1 United Kingdom 20.97\n", "2 Germany 5.925\n", "3 Canada 3.17\n", "4 France 2.503\n", "5 Japan 2.375\n", "6 Netherlands 2.3\n", "7 Brazil 1.807\n", "8 Australia 1.602\n", "9 Italy 1.508" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cn_pct = 100*df['country'].value_counts(normalize=True)\n", "cn_pct = cn_pct.to_frame().reset_index()\n", "cn_pct = cn_pct.rename(columns={'index':'country', 'country':'percentage'})\n", "cn_pct['country'] = cn_pct['country'].astype('object')\n", "cn_pct.head(10)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.345831Z", "start_time": "2021-09-14T03:07:18.080441Z" } }, "outputs": [ { "data": { "text/html": [ " \n", " " ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "domain": { "x": [ 0, 1 ], "y": [ 0, 1 ] }, "hovertemplate": "country=%{label}
percentage=%{value}", "labels": [ "United States", "United Kingdom", "Germany", "Canada", "France", "Japan", "Netherlands", "Brazil", "Australia", "Italy", "Spain", "Norway", "Mexico", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries", "Other countries" ], "legendgroup": "", "name": "", "showlegend": true, "textinfo": "percent+label", "textposition": "outside", "type": "pie", "values": [ 38.10104481334701, 20.965551644818888, 5.925227946678492, 3.1704680874650117, 2.502563534074218, 2.375079677410415, 2.300252196325139, 1.8069450987999887, 1.601862372862567, 1.5076351744588865, 1.4965496217055123, 1.4217221406202367, 1.0254136296871104, 0.9699858659202395, 0.9228722667183992, 0.8868442202699331, 0.870215891139872, 0.8591303383864978, 0.8369592328797495, 0.8258736801263753, 0.6789901061441677, 0.6263337305656402, 0.598619848682205, 0.4295651691932489, 0.4295651691932489, 0.42402239281656184, 0.4073940636865006, 0.3990798991214699, 0.3103954770944766, 0.2993099243411025, 0.2826815952110412, 0.2743674306460106, 0.24388216057423165, 0.235567996009201, 0.22448244325582686, 0.19953994956073495, 0.19953994956073495, 0.19676856137239143, 0.16905467948895603, 0.1551977385472383, 0.14411218579386414, 0.14411218579386414, 0.14411218579386414, 0.1413407976055206, 0.13856940941717708, 0.13856940941717708, 0.13579802122883353, 0.13025524485214643, 0.10254136296871104, 0.08868442202699332, 0.08591303383864977, 0.08591303383864977, 0.08037025746196269, 0.07205609289693207, 0.07205609289693207, 0.06928470470858854, 0.06097054014355791, 0.06097054014355791, 0.058199151955214364, 0.05265637557852729, 0.04988498739018374, 0.0471135992018402, 0.04434221101349666, 0.04157082282515312, 0.036028046448466036, 0.036028046448466036, 0.036028046448466036, 0.036028046448466036, 0.033256658260122494, 0.030485270071778956, 0.030485270071778956, 0.02217110550674833, 0.02217110550674833, 0.01939971731840479, 0.01939971731840479, 0.01939971731840479, 0.016628329130061247, 0.016628329130061247, 0.016628329130061247, 0.013856940941717705, 0.013856940941717705, 0.013856940941717705, 0.011085552753374165, 0.011085552753374165, 0.011085552753374165, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.008314164565030624, 0.005542776376687083, 0.005542776376687083, 0.005542776376687083, 0.005542776376687083, 0.005542776376687083, 0.005542776376687083, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413, 0.0027713881883435413 ] } ], "layout": { "legend": { "tracegroupgap": 0 }, "showlegend": false, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Countries of the athletes" } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "y = cn_pct.copy(deep=True)\n", "y.loc[y['percentage'] < 1, 'country'] = 'Other countries'\n", "fig = px.pie(y, values='percentage', names='country', labels='percentage',\n", " title='Countries of the athletes')\n", "fig.update_traces(textposition='outside', textinfo='percent+label')\n", "fig.update_layout(showlegend=False)\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### By continent" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.437521Z", "start_time": "2021-09-14T03:07:18.348553Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
athletegenderage_groupcountrymajorcontinent
00F18 - 34United StatesCHICAGO 2019NA
11M35 - 54GermanyBERLIN 2016EU
22M35 - 54United KingdomLONDON 2018,LONDON 2019EU
33M18 - 34United KingdomLONDON 2017EU
44M35 - 54United StatesBOSTON 2017NA
.....................
3640737594M18 - 34United KingdomBERLIN 2017EU
3640837595M18 - 34United StatesBERLIN 2019,NEW YORK 2015NA
3640937596M18 - 34United StatesBOSTON 2017NA
3641037597F18 - 34United StatesBOSTON 2015NA
3641137598M35 - 54ChinaTOKYO 2012AS
\n", "

36412 rows × 6 columns

\n", "
" ], "text/plain": [ " athlete gender age_group country major \\\n", "0 0 F 18 - 34 United States CHICAGO 2019 \n", "1 1 M 35 - 54 Germany BERLIN 2016 \n", "2 2 M 35 - 54 United Kingdom LONDON 2018,LONDON 2019 \n", "3 3 M 18 - 34 United Kingdom LONDON 2017 \n", "4 4 M 35 - 54 United States BOSTON 2017 \n", "... ... ... ... ... ... \n", "36407 37594 M 18 - 34 United Kingdom BERLIN 2017 \n", "36408 37595 M 18 - 34 United States BERLIN 2019,NEW YORK 2015 \n", "36409 37596 M 18 - 34 United States BOSTON 2017 \n", "36410 37597 F 18 - 34 United States BOSTON 2015 \n", "36411 37598 M 35 - 54 China TOKYO 2012 \n", "\n", " continent \n", "0 NA \n", "1 EU \n", "2 EU \n", "3 EU \n", "4 NA \n", "... ... \n", "36407 EU \n", "36408 NA \n", "36409 NA \n", "36410 NA \n", "36411 AS \n", "\n", "[36412 rows x 6 columns]" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df['continent'] = np.nan\n", "for country in df['country'].unique():\n", " if pd.isnull(country):\n", " country_code, continent_name = np.nan, np.nan\n", " elif country == 'Kosovo':\n", " country_code = 'XK'\n", " continent_name = 'EU'\n", " else: \n", " country_code = pc.country_name_to_country_alpha2(country)\n", " if country_code == 'TL':\n", " country_code = 'TP'\n", " continent_name = pc.country_alpha2_to_continent_code(country_code)\n", " if not pd.isnull(country):\n", " df.loc[df['country'] == country, 'continent'] = continent_name\n", "df['continent'] = df['continent'].astype('category')\n", "df" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.445360Z", "start_time": "2021-09-14T03:07:18.438438Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
continentpercentage
0EU43.6
1NA43.24
2AS7.765
3SA3.079
4OC1.774
5AF0.5487
\n", "
" ], "text/plain": [ " continent percentage\n", "0 EU 43.6\n", "1 NA 43.24\n", "2 AS 7.765\n", "3 SA 3.079\n", "4 OC 1.774\n", "5 AF 0.5487" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ct_pct = 100*df['continent'].value_counts(normalize=True)\n", "ct_pct = ct_pct.to_frame().reset_index()\n", "ct_pct = ct_pct.rename(columns={'index':'continent', 'continent':'percentage'})\n", "ct_pct" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.475013Z", "start_time": "2021-09-14T03:07:18.446142Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "domain": { "x": [ 0, 1 ], "y": [ 0, 1 ] }, "hovertemplate": "continent=%{label}
percentage=%{value}", "labels": [ "EU", "NA", "AS", "SA", "OC", "AF" ], "legendgroup": "", "name": "", "showlegend": true, "textinfo": "percent+label", "textposition": "outside", "type": "pie", "values": [ 43.596707590832246, 43.236427126347586, 7.765429703738603, 3.0790122772496744, 1.7736884405398663, 0.5487348612920212 ] } ], "layout": { "height": 400, "legend": { "tracegroupgap": 0 }, "showlegend": false, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Continents of the athletes" } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.pie(ct_pct, values='percentage', names='continent', labels='percentage',\n", " title='Continents of the athletes', height=400)\n", "fig.update_traces(textposition='outside', textinfo='percent+label')\n", "fig.update_layout(showlegend=False)\n", "fig.show()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Majors" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.638492Z", "start_time": "2021-09-14T03:07:18.475846Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
athletegenderage_groupcountrymajorcontinentmajor_ymajor_nmajor_s
00F18 - 34United StatesCHICAGO 2019NA20191[CHICAGO]
11M35 - 54GermanyBERLIN 2016EU20161[BERLIN]
22M35 - 54United KingdomLONDON 2018,LONDON 2019EU20192[LONDON, LONDON]
33M18 - 34United KingdomLONDON 2017EU20171[LONDON]
44M35 - 54United StatesBOSTON 2017NA20171[BOSTON]
..............................
3640737594M18 - 34United KingdomBERLIN 2017EU20171[BERLIN]
3640837595M18 - 34United StatesBERLIN 2019,NEW YORK 2015NA20192[BERLIN, NEW YORK]
3640937596M18 - 34United StatesBOSTON 2017NA20171[BOSTON]
3641037597F18 - 34United StatesBOSTON 2015NA20151[BOSTON]
3641137598M35 - 54ChinaTOKYO 2012AS20121[TOKYO]
\n", "

36412 rows × 9 columns

\n", "
" ], "text/plain": [ " athlete gender age_group country major \\\n", "0 0 F 18 - 34 United States CHICAGO 2019 \n", "1 1 M 35 - 54 Germany BERLIN 2016 \n", "2 2 M 35 - 54 United Kingdom LONDON 2018,LONDON 2019 \n", "3 3 M 18 - 34 United Kingdom LONDON 2017 \n", "4 4 M 35 - 54 United States BOSTON 2017 \n", "... ... ... ... ... ... \n", "36407 37594 M 18 - 34 United Kingdom BERLIN 2017 \n", "36408 37595 M 18 - 34 United States BERLIN 2019,NEW YORK 2015 \n", "36409 37596 M 18 - 34 United States BOSTON 2017 \n", "36410 37597 F 18 - 34 United States BOSTON 2015 \n", "36411 37598 M 35 - 54 China TOKYO 2012 \n", "\n", " continent major_y major_n major_s \n", "0 NA 2019 1 [CHICAGO] \n", "1 EU 2016 1 [BERLIN] \n", "2 EU 2019 2 [LONDON, LONDON] \n", "3 EU 2017 1 [LONDON] \n", "4 NA 2017 1 [BOSTON] \n", "... ... ... ... ... \n", "36407 EU 2017 1 [BERLIN] \n", "36408 NA 2019 2 [BERLIN, NEW YORK] \n", "36409 NA 2017 1 [BOSTON] \n", "36410 NA 2015 1 [BOSTON] \n", "36411 AS 2012 1 [TOKYO] \n", "\n", "[36412 rows x 9 columns]" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "f = lambda x: np.array(x, dtype=int).max()\n", "df['major_y'] = df['major'].str.findall(r'[0-9]+').apply(f)\n", "df['major_n'] = df['major'].str.split(',').apply(lambda x: len(x))\n", "df['major_s'] = df['major'].str.replace(' \\d+', '', regex=True).str.split(',')\n", "df" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### Frequency" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.654304Z", "start_time": "2021-09-14T03:07:18.639412Z" }, "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
majorpercentage
0BOSTON27.26
1BERLIN26.5
2NEW YORK16.27
3LONDON16.09
4CHICAGO8.199
5TOKYO5.686
\n", "
" ], "text/plain": [ " major percentage\n", "0 BOSTON 27.26\n", "1 BERLIN 26.5\n", "2 NEW YORK 16.27\n", "3 LONDON 16.09\n", "4 CHICAGO 8.199\n", "5 TOKYO 5.686" ] }, "execution_count": 14, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mj_pct = 100*df['major_s'].explode().astype('category').value_counts(normalize=True)\n", "mj_pct = mj_pct.to_frame().reset_index()\n", "mj_pct = mj_pct.rename(columns={'index':'major', 'major_s':'percentage'})\n", "mj_pct" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.683906Z", "start_time": "2021-09-14T03:07:18.655225Z" } }, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "config": { "plotlyServerURL": "https://plot.ly" }, "data": [ { "domain": { "x": [ 0, 1 ], "y": [ 0, 1 ] }, "hovertemplate": "major=%{label}
percentage=%{value}", "labels": [ "BOSTON", "BERLIN", "NEW YORK", "LONDON", "CHICAGO", "TOKYO" ], "legendgroup": "", "name": "", "showlegend": true, "textinfo": "percent+label", "textposition": "outside", "type": "pie", "values": [ 27.263863787691427, 26.49814515361933, 16.26557595358128, 16.08722534005517, 8.199372205840389, 5.685817559212404 ] } ], "layout": { "height": 400, "legend": { "tracegroupgap": 0 }, "showlegend": false, "template": { "data": { "bar": [ { "error_x": { "color": "#2a3f5f" }, "error_y": { "color": "#2a3f5f" }, "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "bar" } ], "barpolar": [ { "marker": { "line": { "color": "#E5ECF6", "width": 0.5 }, "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "barpolar" } ], "carpet": [ { "aaxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "baxis": { "endlinecolor": "#2a3f5f", "gridcolor": "white", "linecolor": "white", "minorgridcolor": "white", "startlinecolor": "#2a3f5f" }, "type": "carpet" } ], "choropleth": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "choropleth" } ], "contour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "contour" } ], "contourcarpet": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "contourcarpet" } ], "heatmap": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmap" } ], "heatmapgl": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "heatmapgl" } ], "histogram": [ { "marker": { "pattern": { "fillmode": "overlay", "size": 10, "solidity": 0.2 } }, "type": "histogram" } ], "histogram2d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2d" } ], "histogram2dcontour": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "histogram2dcontour" } ], "mesh3d": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "type": "mesh3d" } ], "parcoords": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "parcoords" } ], "pie": [ { "automargin": true, "type": "pie" } ], "scatter": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter" } ], "scatter3d": [ { "line": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatter3d" } ], "scattercarpet": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattercarpet" } ], "scattergeo": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergeo" } ], "scattergl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattergl" } ], "scattermapbox": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scattermapbox" } ], "scatterpolar": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolar" } ], "scatterpolargl": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterpolargl" } ], "scatterternary": [ { "marker": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "type": "scatterternary" } ], "surface": [ { "colorbar": { "outlinewidth": 0, "ticks": "" }, "colorscale": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "type": "surface" } ], "table": [ { "cells": { "fill": { "color": "#EBF0F8" }, "line": { "color": "white" } }, "header": { "fill": { "color": "#C8D4E3" }, "line": { "color": "white" } }, "type": "table" } ] }, "layout": { "annotationdefaults": { "arrowcolor": "#2a3f5f", "arrowhead": 0, "arrowwidth": 1 }, "autotypenumbers": "strict", "coloraxis": { "colorbar": { "outlinewidth": 0, "ticks": "" } }, "colorscale": { "diverging": [ [ 0, "#8e0152" ], [ 0.1, "#c51b7d" ], [ 0.2, "#de77ae" ], [ 0.3, "#f1b6da" ], [ 0.4, "#fde0ef" ], [ 0.5, "#f7f7f7" ], [ 0.6, "#e6f5d0" ], [ 0.7, "#b8e186" ], [ 0.8, "#7fbc41" ], [ 0.9, "#4d9221" ], [ 1, "#276419" ] ], "sequential": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ], "sequentialminus": [ [ 0, "#0d0887" ], [ 0.1111111111111111, "#46039f" ], [ 0.2222222222222222, "#7201a8" ], [ 0.3333333333333333, "#9c179e" ], [ 0.4444444444444444, "#bd3786" ], [ 0.5555555555555556, "#d8576b" ], [ 0.6666666666666666, "#ed7953" ], [ 0.7777777777777778, "#fb9f3a" ], [ 0.8888888888888888, "#fdca26" ], [ 1, "#f0f921" ] ] }, "colorway": [ "#636efa", "#EF553B", "#00cc96", "#ab63fa", "#FFA15A", "#19d3f3", "#FF6692", "#B6E880", "#FF97FF", "#FECB52" ], "font": { "color": "#2a3f5f" }, "geo": { "bgcolor": "white", "lakecolor": "white", "landcolor": "#E5ECF6", "showlakes": true, "showland": true, "subunitcolor": "white" }, "hoverlabel": { "align": "left" }, "hovermode": "closest", "mapbox": { "style": "light" }, "paper_bgcolor": "white", "plot_bgcolor": "#E5ECF6", "polar": { "angularaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "radialaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "scene": { "xaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "yaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" }, "zaxis": { "backgroundcolor": "#E5ECF6", "gridcolor": "white", "gridwidth": 2, "linecolor": "white", "showbackground": true, "ticks": "", "zerolinecolor": "white" } }, "shapedefaults": { "line": { "color": "#2a3f5f" } }, "ternary": { "aaxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "baxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" }, "bgcolor": "#E5ECF6", "caxis": { "gridcolor": "white", "linecolor": "white", "ticks": "" } }, "title": { "x": 0.05 }, "xaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 }, "yaxis": { "automargin": true, "gridcolor": "white", "linecolor": "white", "ticks": "", "title": { "standoff": 15 }, "zerolinecolor": "white", "zerolinewidth": 2 } } }, "title": { "text": "Frequency of Majors" } } }, "text/html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig = px.pie(mj_pct, values='percentage', names='major', labels='percentage',\n", " title='Frequency of Majors', height=400)\n", "fig.update_traces(textposition='outside', textinfo='percent+label')\n", "fig.update_layout(showlegend=False)\n", "fig.show()" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.690096Z", "start_time": "2021-09-14T03:07:18.684955Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Majors in America: 51.7\n", "Majors in Europe: 42.6\n", "Majors in Asia: 5.7\n" ] } ], "source": [ "fun = lambda x, y: x[x['major'].isin(y)]['percentage'].values.sum()\n", "print('Majors in America: {:.1f}'.format(fun(mj_pct, ['BOSTON', 'NEW YORK', 'CHICAGO'])))\n", "print('Majors in Europe: {:.1f}'.format(fun(mj_pct, ['BERLIN', 'LONDON'])))\n", "print('Majors in Asia: {:.1f}'.format(fun(mj_pct, ['TOKYO'])))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Number and year of Majors" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.725355Z", "start_time": "2021-09-14T03:07:18.691138Z" } }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
major_n12345678All
major_y
20050.00274600000000.002746
20080.0109900000000.01099
20090.0466900000000.04669
20100.11810.0027460000000.1208
20110.21420.0027460000000.217
20120.61520.010990000000.6262
20131.1230.090630.01373000001.228
20141.9990.17850.019220.00549300002.203
20155.2510.41470.049430.00274600005.718
20167.4180.57950.085140.00549300008.088
201712.251.1120.1730.06317000013.6
201821.962.1530.26910.074150.016480.0054930.0027460.00274624.49
201936.545.461.2060.28010.068660.021970.0027460.00274643.58
20200.057670.010990000000.06866
All87.6110.021.8150.43120.085140.027460.0054930.005493100
\n", "
" ], "text/plain": [ "major_n 1 2 3 4 5 6 7 8 \\\n", "major_y \n", "2005 0.002746 0 0 0 0 0 0 0 \n", "2008 0.01099 0 0 0 0 0 0 0 \n", "2009 0.04669 0 0 0 0 0 0 0 \n", "2010 0.1181 0.002746 0 0 0 0 0 0 \n", "2011 0.2142 0.002746 0 0 0 0 0 0 \n", "2012 0.6152 0.01099 0 0 0 0 0 0 \n", "2013 1.123 0.09063 0.01373 0 0 0 0 0 \n", "2014 1.999 0.1785 0.01922 0.005493 0 0 0 0 \n", "2015 5.251 0.4147 0.04943 0.002746 0 0 0 0 \n", "2016 7.418 0.5795 0.08514 0.005493 0 0 0 0 \n", "2017 12.25 1.112 0.173 0.06317 0 0 0 0 \n", "2018 21.96 2.153 0.2691 0.07415 0.01648 0.005493 0.002746 0.002746 \n", "2019 36.54 5.46 1.206 0.2801 0.06866 0.02197 0.002746 0.002746 \n", "2020 0.05767 0.01099 0 0 0 0 0 0 \n", "All 87.61 10.02 1.815 0.4312 0.08514 0.02746 0.005493 0.005493 \n", "\n", "major_n All \n", "major_y \n", "2005 0.002746 \n", "2008 0.01099 \n", "2009 0.04669 \n", "2010 0.1208 \n", "2011 0.217 \n", "2012 0.6262 \n", "2013 1.228 \n", "2014 2.203 \n", "2015 5.718 \n", "2016 8.088 \n", "2017 13.6 \n", "2018 24.49 \n", "2019 43.58 \n", "2020 0.06866 \n", "All 100 " ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.crosstab(df['major_y'], df['major_n'], margins=True, normalize='all')*100" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "ExecuteTime": { "end_time": "2021-09-14T03:07:18.742856Z", "start_time": "2021-09-14T03:07:18.726243Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cumulative frequency of year of Majors:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
20192018201720162015201420132012201120102020200920082005
major_y1.587e+068.916e+054.952e+052.945e+052.082e+058.02e+044.47e+042.28e+047900440025001700400100
major_y43.5868.0781.6789.7695.4897.6898.9199.5399.7599.8799.9499.99100100
\n", "
" ], "text/plain": [ " 2019 2018 2017 2016 2015 2014 2013 \\\n", "major_y 1.587e+06 8.916e+05 4.952e+05 2.945e+05 2.082e+05 8.02e+04 4.47e+04 \n", "major_y 43.58 68.07 81.67 89.76 95.48 97.68 98.91 \n", "\n", " 2012 2011 2010 2020 2009 2008 2005 \n", "major_y 2.28e+04 7900 4400 2500 1700 400 100 \n", "major_y 99.53 99.75 99.87 99.94 99.99 100 100 " ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Cumulative frequency of number of Majors:\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
12345678
major_n3.19e+063.647e+056.61e+041.57e+0431001000200200
major_n87.6197.6399.4599.8899.9699.9999.99100
\n", "
" ], "text/plain": [ " 1 2 3 4 5 6 7 8\n", "major_n 3.19e+06 3.647e+05 6.61e+04 1.57e+04 3100 1000 200 200\n", "major_n 87.61 97.63 99.45 99.88 99.96 99.99 99.99 100" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "print('Cumulative frequency of year of Majors:')\n", "display(pd.concat((df['major_y'].value_counts(),\n", " df['major_y'].value_counts(normalize=True).cumsum()), axis=1).T*100)\n", "print('Cumulative frequency of number of Majors:')\n", "display(pd.concat((df['major_n'].value_counts(),\n", " df['major_n'].value_counts(normalize=True).cumsum()), axis=1).T*100)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "98% of the athletes ran in up to two Majors and 98% of the athletes ran a Major between the years 2014 and 2019." ] } ], "metadata": { "hide_input": false, "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.8.12" }, "nbTranslate": { "displayLangs": [ "*" ], "hotkey": "alt-t", "langInMainMenu": true, "sourceLang": "en", "targetLang": "fr", "useGoogleTranslate": true }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": true, "title_cell": "Contents", "title_sidebar": "Contents", "toc_cell": true, "toc_position": {}, "toc_section_display": true, "toc_window_display": false }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "position": { "height": "280px", "left": "793px", "right": "20px", "top": "121px", "width": "470px" }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }