{ "cells": [ { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import pandas as pd, numpy as np" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#load list of all circuits and years with races from wikipedia\n", "df=pd.read_html('https://en.wikipedia.org/wiki/List_of_World_Rally_Championship_rallies', header=0)" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df=df[1][['Rally','Headquarters','WRC years','Location']]" ] }, { "cell_type": "code", "execution_count": 47, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "{'races': u'1980\\u20131981, 1983\\u20131994, 1996\\u20132009, 2011\\u20132017', 'place': u'Villa Carlos Paz, Argentina', 'name': u'Rally Argentina', 'coord': (-31.4207828, -64.4992141)}\n", "{'races': u'1989\\u20131993, 1995\\u20132006, 2009, 2011, 2013\\u20132017', 'place': u'Coffs Harbour, Australia', 'name': u'Rally Australia', 'coord': (-30.298612, 153.1093922)}\n", "{'races': u'1973', 'place': u'Baden, Austria', 'name': u'\\xd6sterreichische Alpenfahrt', 'coord': (48.00214, 16.23091)}\n", "{'races': u'1981\\u20131982', 'place': u'S\\xe3o Paulo, Brazil', 'name': u'Rallye do Brasil', 'coord': (-23.5505199, -46.63330939999999)}\n", "{'races': u'2010', 'place': u'Borovets, Bulgaria', 'name': u'Rally Bulgaria', 'coord': (42.27066749999999, 23.6056165)}\n", "{'races': u'1974', 'place': u'Smiths Falls, Canada', 'name': u'Rally of the Rideau Lakes', 'coord': (44.903748, -76.02161889999999)}\n", "{'races': u'1977\\u20131979', 'place': u'Montreal, Canada', 'name': u'Crit\\xe9rium du Qu\\xe9bec', 'coord': (45.5016889, -73.567256)}\n", "{'races': u'1999', 'place': u'Beijing, China', 'name': u'Rally China', 'coord': (39.90419989999999, 116.4073963)}\n", "{'races': u'1978\\u20131992', 'place': u\"Abidjan, C\\xf4te d'Ivoire\", 'name': u\"Rallye C\\xf4te d'Ivoire\", 'coord': (5.3599517, -4.0082563)}\n", "{'races': u'2000\\u20132006, 2009', 'place': u'Limassol, Cyprus', 'name': u'Cyprus Rally', 'coord': (34.7071301, 33.0226174)}\n", "{'races': u'1973\\u20131994, 1996\\u20132017', 'place': u'Jyv\\xe4skyl\\xe4, Finland', 'name': u'Rally Finland (formerly 1000 Lakes Rally)', 'coord': (62.2426034, 25.7472567)}\n", "{'races': u'1973\\u20131995, 1997\\u20132008, 2015\\u20132017', 'place': u'Bastia, France', 'name': u'Tour de Corse \\u2013 Rallye de France', 'coord': (42.697283, 9.450880999999999)}\n", "{'races': u'2010\\u20132014', 'place': u'Strasbourg, France', 'name': u'Rallye de France Alsace', 'coord': (48.5734053, 7.752111299999999)}\n", "{'races': u'2002\\u20132008, 2010\\u20132017', 'place': u'Trier, Germany', 'name': u'Rallye Deutschland', 'coord': (49.749992, 6.6371433)}\n", "{'races': u'1973, 1975\\u20131994, 1996\\u20132009, 2011\\u20132013', 'place': u'Loutraki, Greece', 'name': u'Acropolis Rally', 'coord': (37.9759033, 22.9774589)}\n", "{'races': u'1996\\u20131997', 'place': u'Makassar, Indonesia', 'name': u'Rally Indonesia', 'coord': (-5.147665099999999, 119.4327314)}\n", "{'races': u'2007, 2009', 'place': u'Sligo, Ireland/ Northern Ireland', 'name': u'Rally Ireland', 'coord': (54.591714, -5.910673999999999)}\n", "{'races': u'1973\\u20131994, 1996\\u20132003', 'place': u'Sanremo, Italy', 'name': u'Rallye Sanremo', 'coord': (43.81596709999999, 7.7760567)}\n", "{'races': u'2004\\u20132009, 2011\\u20132017', 'place': u'Alghero, Italy', 'name': u\"Rally d'Italia Sardegna\", 'coord': (40.5579517, 8.319294900000001)}\n", "{'races': u'2004\\u20132008, 2010', 'place': u'Sapporo, Japan', 'name': u'Rally Japan', 'coord': (43.0620958, 141.3543763)}\n", "{'races': u'2008, 2010\\u20132011', 'place': u'Amman, Jordan', 'name': u'Jordan Rally', 'coord': (31.9453666, 35.9283716)}\n", "{'races': u'1973\\u20131994, 1996\\u20132002', 'place': u'Nairobi, Kenya', 'name': u'Safari Rally', 'coord': (-1.2920659, 36.8219462)}\n", "{'races': u'2004\\u20132008, 2010\\u20132017', 'place': u'Leon, Mexico', 'name': u'Rally Mexico', 'coord': (21.1250077, -101.6859605)}\n", "{'races': u'1973, 1975\\u20131995, 1997\\u20132008, 2012\\u20132017', 'place': 'Gap, France', 'name': u'Monte Carlo Rally', 'coord': (44.559638, 6.079758)}\n", "{'races': u'1973, 1975\\u20131976', 'place': u'Casablanca, Morocco', 'name': u'Rallye du Maroc', 'coord': (33.5731104, -7.589843399999999)}\n", "{'races': u'1977, 1979\\u20131980, 1982\\u20131995, 1997\\u20132008, 2010, 2012', 'place': u'Auckland, New Zealand', 'name': u'Rally New Zealand', 'coord': (-36.8484597, 174.7633315)}\n", "{'races': u'2007, 2009', 'place': u'Hamar, Norway', 'name': u'Rally Norway', 'coord': (60.7945331, 11.0679977)}\n", "{'races': u'1973, 2009, 2014\\u20132017', 'place': u'Miko\\u0142ajki, Poland', 'name': u'Rally Poland', 'coord': (53.8027021, 21.5706044)}\n", "{'races': u'1973\\u20131995, 1997\\u20132001, 2007, 2009\\u20132017', 'place': u'Matosinhos, Portugal', 'name': u'Rally de Portugal ', 'coord': (41.1844362, -8.696277499999999)}\n", "{'races': u'1991\\u20131993, 1995\\u20132017 ', 'place': u'Salou, Spain', 'name': u'Rally de Espa\\xf1a', 'coord': (41.0777465, 1.1315926)}\n", "{'races': u'1973, 1975\\u20131989, 1991\\u20131993, 1995\\u20132008, 2010\\u20132017', 'place': u'Karlstad, Sweden', 'name': u'Rally Sweden', 'coord': (59.4021806, 13.5114978)}\n", "{'races': u'2003\\u20132006, 2008, 2010, 2018', 'place': u'Istanbul, Turkey', 'name': u'Rally of Turkey', 'coord': (41.0082376, 28.9783589)}\n", "{'races': u'1973\\u20131995, 1997\\u20132017', 'place': u'Deeside, United Kingdom', 'name': u'Wales Rally GB', 'coord': (53.1997351, -3.0329723)}\n", "{'races': u'1973\\u20131974', 'place': u'Detroit, United States', 'name': u'Press-on-Regardless Rally', 'coord': (42.331427, -83.0457538)}\n", "{'races': u'1986\\u20131988', 'place': u'Tacoma, United States', 'name': u'Olympus Rally', 'coord': (47.2528768, -122.4442906)}\n" ] } ], "source": [ "#geocode circuit names and create list with circuits\n", "from pygeocoder import Geocoder\n", "circs=[]\n", "apikey='AIzaSyCJJD4hDxsENJOVohntPCqgvsuvQ-yRgLY'\n", "for i in df.T.iteritems():\n", " circ={}\n", " g=i[1][0]\n", " if '[' in g:\n", " g=g[:g.find('[')]\n", " circ['name']=g\n", " g=i[1][2]\n", " if '[' in g:\n", " g=g[:g.find('[')]\n", " circ['races']=g\n", " g=i[1][1]\n", " if '[' in g:\n", " g=g[:g.find('[')]\n", " circ['place']=g+', '+i[1][3]\n", " if g=='Gap':\n", " circ['place']='Gap, France'\n", " circ['coord']=Geocoder(apikey).geocode(circ['place']).coordinates\n", " circs.append(circ)\n", " print circs[-1]" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": true }, "outputs": [], "source": [ "calendar={i:[] for i in range(1973,2019)}\n", "for i in range(len(circs)):\n", " g=circs[i]['races']\n", " if '[' in g:\n", " g=g[:g.find('[')]\n", " for k in g.replace(u'\\u2013', '-').strip().replace(\" \", \",\").replace(\",,\", \",\").replace(\",,\", \",\").split(\",\"):\n", " r=k.find('-')\n", " if r==-1:\n", " calendar[np.int(k)].append(i)\n", " else:\n", " for j in range(np.int(k[:r]),np.int(k[r+1:])+1):\n", " calendar[j].append(i)" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true }, "outputs": [], "source": [ "#save data\n", "import json\n", "file('calendar_wrc2018.json','w').write(json.dumps(calendar))\n", "file('circs_wrc2018.json','w').write(json.dumps(circs))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.14" } }, "nbformat": 4, "nbformat_minor": 1 }