{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "##Medals for participating countries at the International Olympiad in Informatics, 1989-2015##" ] }, { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Read data from IOI [homepage](http://stats.ioinformatics.org/countries/):" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
0NaNCountry▲IOI HostMedalsNaNNaNNaN
1GSBTotalNaNNaNNaN
2NaNAlbaniaNaN0000
3?Argentina1993282232
4?ArmeniaNaN142025
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "0 NaN Country▲ IOI Host Medals NaN NaN NaN\n", "1 G S B Total NaN NaN NaN\n", "2 NaN Albania NaN 0 0 0 0\n", "3 ? Argentina 1993 2 8 22 32\n", "4 ? Armenia NaN 1 4 20 25" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url=r'http://stats.ioinformatics.org/countries/'\n", "dfs = pd.read_html(url)#returns a list of tables\n", "df=dfs[0]\n", "df.head()#inspect header" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Re-read skipping rows 0,1:" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456
0NaNAlbaniaNaN0000
1?Argentina1993282232
2?ArmeniaNaN142025
3NaNAustralia20136143050
4NaNAustriaNaN352230
\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6\n", "0 NaN Albania NaN 0 0 0 0\n", "1 ? Argentina 1993 2 8 22 32\n", "2 ? Armenia NaN 1 4 20 25\n", "3 NaN Australia 2013 6 14 30 50\n", "4 NaN Austria NaN 3 5 22 30" ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "url=r'http://stats.ioinformatics.org/countries/'\n", "dfs = pd.read_html(url, skiprows=[0,1])\n", "df=dfs[0]\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Int64Index([0, 1, 2, 3, 4, 5, 6], dtype='int64')\n" ] } ], "source": [ "print df.columns" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Delete columns 0, 2:" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "df.drop([0,2], axis=1, inplace=True)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ " 1 3 4 5 6\n", "0 Albania 0 0 0 0\n", "1 Argentina 2 8 22 32\n", "2 Armenia 1 4 20 25\n", "3 Australia 6 14 30 50\n", "4 Austria 3 5 22 30\n" ] } ], "source": [ "print df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryGoldSilverBronzeTotal
0Albania0000
1Argentina282232
2Armenia142025
3Australia6143050
4Austria352230
\n", "
" ], "text/plain": [ " Country Gold Silver Bronze Total\n", "0 Albania 0 0 0 0\n", "1 Argentina 2 8 22 32\n", "2 Armenia 1 4 20 25\n", "3 Australia 6 14 30 50\n", "4 Austria 3 5 22 30" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfn=df.rename(columns = {1: 'Country', 3:'Gold', 4: 'Silver', 5: 'Bronze', 6:'Total'})\n", "dfn.head()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryGoldSilverBronzeTotal
0Argentina282232
1Armenia142025
2Australia6143050
3Austria352230
4Azerbaijan0145
\n", "
" ], "text/plain": [ " Country Gold Silver Bronze Total\n", "0 Argentina 2 8 22 32\n", "1 Armenia 1 4 20 25\n", "2 Australia 6 14 30 50\n", "3 Austria 3 5 22 30\n", "4 Azerbaijan 0 1 4 5" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfn = dfn[(dfn[['Total']] != 0).all(axis=1)]#delete countries with no medal\n", "L= len(dfn)\n", "dfn= dfn.set_index(np.arange(L))\n", "dfn.head()\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "List the complete table:" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryGoldSilverBronzeTotal
0Argentina282232
1Armenia142025
2Australia6143050
3Austria352230
4Azerbaijan0145
5Bangladesh0134
6Belarus14323379
7Belgium0268
8Bosnia and Herzegovina0156
9Brazil292738
10Bulgaria23393193
11Canada10223365
12Chile0011
13China722312107
14Colombia02911
15Croatia11333276
16Cuba062632
17Cyprus0022
18Czech Republic13223671
19Czechoslovakia3429
20Denmark361928
21Egypt041216
22Estonia5173052
23Finland5213359
24France272736
25Georgia273140
26German Democratic Republic0303
27Germany14263676
28Greece042125
29Hong Kong4213863
30Hungary11274179
31India1112739
32Indonesia2182949
33Iran19472288
34Ireland141217
35Israel5222552
36Italy2142440
37Japan1617841
38Kazakhstan2142440
39Kyrgyzstan0033
40Latvia6233968
41Lithuania2253966
42Luxembourg011415
43Macau041014
44Macedonia011314
45Malaysia0235
46Malta0022
47Mexico131317
48Moldova022224
49Mongolia0022
50Netherlands3153654
51New Zealand011415
52Norway0257
53Philippines0011
54Poland34362797
55Portugal0055
56Republic of Korea33332692
57Romania28422595
58Russia52321296
59Serbia0111425
60Serbia and Montenegro2151734
61Singapore8273469
62Slovakia24372384
63Slovenia042226
64South Africa4133451
65Soviet Union3418
66Spain041216
67Sri Lanka371626
68Sweden13222762
69Switzerland072532
70Syria0235
71Taiwan17422281
72Tajikistan0044
73Thailand15324188
74Trinidad and Tobago0033
75Tunisia0011
76Turkey2203759
77Turkmenistan1045
78Ukraine7224473
79United Kingdom5162950
80United States of America42331590
81Venezuela0011
82Vietnam10364591
83Yugoslavia1315
" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.display import HTML\n", "h = HTML(dfn.to_html());h\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Shorten a few country names:" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [], "source": [ "dfn.loc[8, 'Country']='BosniaHer'\n", "dfn.loc[18,'Country']='CzechRep'\n", "dfn.loc[19, 'Country']='Czechoslov'\n", "dfn.loc[26, 'Country']='GermanDemR'\n", "dfn.loc[29,'Country']='HongKong'\n", "dfn.loc[51,'Country']='NewZealand'\n", "dfn.loc[56, 'Country']='RepKorea'\n", "dfn.loc[60, 'Country']='SerbiaMoN'\n", "dfn.loc[64, 'Country']='SouthAfrica'\n", "dfn.loc[65, 'Country']='SovUnion'\n", "dfn.loc[67, 'Country']='SriLanka'\n", "dfn.loc[74, 'Country']='TrinidadTo'\n", "dfn.loc[79, 'Country']='UnKingdom'\n", "dfn.loc[80, 'Country']='USA'\n" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": false }, "outputs": [], "source": [ "#Check data again\n", "#h = HTML(dfn.to_html());h" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Convert data from the last 4 columns to int:" ] }, { "cell_type": "code", "execution_count": 12, "metadata": { "collapsed": false }, "outputs": [], "source": [ "for c in ['Gold', 'Silver', 'Bronze', 'Total']:\n", " dfn[c]=dfn[c].astype(int)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Sort data increasingly with respect to the total number of medals:" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CountryGoldSilverBronzeTotal
0Philippines0011
1Venezuela0011
2Tunisia0011
3Chile0011
4Mongolia0022
\n", "
" ], "text/plain": [ " Country Gold Silver Bronze Total\n", "0 Philippines 0 0 1 1\n", "1 Venezuela 0 0 1 1\n", "2 Tunisia 0 0 1 1\n", "3 Chile 0 0 1 1\n", "4 Mongolia 0 0 2 2" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dfs=dfn.sort(columns='Total')\n", "L=len(dfs)\n", "dfs= dfs.set_index(np.arange(L))\n", "dfs.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Set colors for Gold, Silver and Bronze medal:" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": false }, "outputs": [], "source": [ "colors=['#D9D919', '#C0C0C0', '#A67D3D'] #bright gold, silver, bronze" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import plotly.plotly as py\n", "from plotly.graph_objs import *\n", "\n", "py.sign_in(\"empet\", \"shzbvu3hbh\")" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": false }, "outputs": [], "source": [ "Co=dfs['Country'].values.tolist()\n", "Go=dfs['Gold'].values.tolist()\n", "Si=dfs['Silver'].values.tolist()\n", "Bro=dfs['Bronze'].values.tolist()\n", "To=dfs['Total'].values.tolist()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "107\n" ] } ], "source": [ "print max(To)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "" ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "anno_text=\"Source: [1]\"\n", "\n", "trace1= Bar(\n", " x=Go,\n", " y=Co,\n", " orientation='h', \n", " marker=Marker(color=colors[0]),\n", " name='gold'\n", " )\n", "trace2= Bar(\n", " x=Si,\n", " y=Co,\n", " orientation='h', \n", " marker=Marker(color=colors[1]),\n", " name='silver'\n", " )\n", "trace3= Bar(\n", " x=Bro,\n", " y=Co,\n", " orientation='h', \n", " marker=Marker(color=colors[2]),\n", " name='bronze'\n", " )\n", "data = Data([trace1, trace2, trace3])\n", "m_title='Unofficial country rankings according to the total number of medals at IOI
1989-2015'\n", "layout = Layout(\n", " title=m_title,\n", " font=Font(family='Raleway, sans-serif'\n", " \n", " ),\n", " showlegend=False,\n", " autosize=False,\n", " width=800,\n", " height=1000,\n", " xaxis=XAxis(\n", " range=[0, 108],\n", " gridwidth=2,\n", " showgrid=True,\n", " zeroline=True,\n", " showline=True,\n", " mirror=True,\n", " side='top'\n", " ),\n", " \n", " \n", " yaxis=YAxis(\n", " #title='Country',\n", " showgrid=False,\n", " zeroline=True,\n", " showline=True,\n", " autotick=True,\n", " nticks=0,\n", " ticks='outside',\n", " showticklabels=True,\n", " tick0=0,\n", " dtick=1,\n", " ticklen=10,\n", " mirror=True\n", " ),\n", " \n", " \n", " margin=Margin(\n", " l=80,\n", " r=80,\n", " b=80,\n", " t=100,\n", " pad=2\n", " ),\n", " annotations=Annotations([\n", " Annotation(\n", " showarrow=False, \n", " text=anno_text, \n", " xref='paper', \n", " yref='paper', \n", " x=0, \n", " y=-0.075, \n", " xanchor='left', \n", " yanchor='bottom', \n", " font=Font(\n", " size=14 \n", " ) \n", " )\n", " ]), \n", " \n", " bargap=0.18,\n", " bargroupgap=0,\n", " hovermode='y',\n", " dragmode='zoom',\n", " barmode='stack',\n", " \n", ")\n", "fig = Figure(data=data, layout=layout)\n", "py.iplot(fig, filename='IOI-Rankings', height=1005)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "\n", "\n" ], "text/plain": [ "" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "from IPython.core.display import HTML\n", "def css_styling():\n", " styles = open(\"./custom.css\", \"r\").read()\n", " return HTML(styles)\n", "css_styling()" ] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }