{ "cells": [ { "cell_type": "code", "execution_count": 72, "metadata": {}, "outputs": [], "source": [ "import requests, pandas as pd, numpy as np, json\n", "from requests import session\n", "from bs4 import BeautifulSoup" ] }, { "cell_type": "code", "execution_count": 89, "metadata": {}, "outputs": [], "source": [ "url='http://osm-subway.maps.me/'" ] }, { "cell_type": "code", "execution_count": 90, "metadata": {}, "outputs": [], "source": [ "response = requests.get(url)\n", "soup = BeautifulSoup(response.content)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": {}, "outputs": [], "source": [ "links=[i['href'] for i in soup.findAll('a')[1:-3]]" ] }, { "cell_type": "code", "execution_count": 181, "metadata": {}, "outputs": [], "source": [ "systems={}" ] }, { "cell_type": "code", "execution_count": 182, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "algeria.html\n", "egypt.html\n", "ethiopia.html\n", "mauritius.html\n", "nigeria.html\n", "south-africa.html\n", "tunisia.html\n", "argentina.html\n", "brazil.html\n", "canada.html\n", "chile.html\n", "colombia.html\n", "dominican-republic.html\n", "mexico.html\n", "panama.html\n", "peru.html\n", "usa.html\n", "venezuela.html\n", "armenia.html\n", "azerbaijan.html\n", "china.html\n", "india.html\n", "indonesia.html\n", "iran.html\n", "japan.html\n", "kazakhstan.html\n", "malaysia.html\n", "north-korea.html\n", "philippines.html\n", "qatar.html\n", "saudi-arabia.html\n", "singapore.html\n", "south-korea.html\n", "taiwan.html\n", "thailand.html\n", "united-arab-emirates.html\n", "uzbekistan.html\n", "australia.html\n", "austria.html\n", "belarus.html\n", "belgium.html\n", "bulgaria.html\n", "czech-republic.html\n", "denmark.html\n", "estonia.html\n", "finland.html\n", "france.html\n", "georgia.html\n", "germany.html\n", "greece.html\n", "hungary.html\n", "ireland.html\n", "italy.html\n", "netherlands.html\n", "norway.html\n", "poland.html\n", "portugal.html\n", "romania.html\n", "russia.html\n", "spain.html\n", "sweden.html\n", "switzerland.html\n", "turkey.html\n", "uk.html\n", "ukraine.html\n" ] } ], "source": [ "for link in links:\n", " response = requests.get(url+link)\n", " soup = BeautifulSoup(response.content)\n", " print(link)\n", " names=[' '.join(i.text.split(' ')[:-2]) for i in soup.find_all('td',{'class':'bold'})]\n", " ids=[i['href'].split('.yaml')[0] for i in soup.findAll('a',{'class':'hlink'})[::2]]\n", " for i in range(len(names)):\n", " systems[ids[i]]=names[i]" ] }, { "cell_type": "code", "execution_count": 183, "metadata": {}, "outputs": [], "source": [ "metros=json.loads(open('metrosy.json','r').read())" ] }, { "cell_type": "code", "execution_count": 184, "metadata": {}, "outputs": [], "source": [ "metrokeys={i.split('/')[-2]:i for i in metros.keys()}" ] }, { "cell_type": "code", "execution_count": 185, "metadata": {}, "outputs": [], "source": [ "for metro in metros:\n", " metros[metro]['lr']=[]" ] }, { "cell_type": "code", "execution_count": 186, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "algiers appended to https://www.metrolinemap.com/metro/algiers/\n", "cairo appended to https://www.metrolinemap.com/metro/cairo/\n", "ERROR addis_ababa\n", "ERROR mauritius\n", "ERROR abuja\n", "ERROR cape_town\n", "ERROR durban\n", "ERROR eastern_cape\n", "ERROR johannesburg_-_gautrain\n", "ERROR johannesburg_-_metrorail\n", "ERROR tunis\n", "buenos_aires appended to https://www.metrolinemap.com/metro/buenos-aires/\n", "ERROR mendoza\n", "ERROR baixada_santista\n", "belo_horizonte appended to https://www.metrolinemap.com/metro/belo-horizonte/\n", "ERROR braslia\n", "ERROR fortaleza\n", "ERROR macei\n", "ERROR natal\n", "porto_alegre appended to https://www.metrolinemap.com/metro/porto-alegre/\n", "recife appended to https://www.metrolinemap.com/metro/recife/\n", "rio_de_janeiro appended to https://www.metrolinemap.com/metro/rio-de-janeiro/\n", "rio_de_janeiro_-_supervia appended to https://www.metrolinemap.com/metro/rio-de-janeiro/\n", "salvador appended to https://www.metrolinemap.com/metro/salvador/\n", "ERROR so_paulo\n", "ERROR so_paulo_-_cptm\n", "ERROR calgary\n", "ERROR edmonton\n", "montreal appended to https://www.metrolinemap.com/metro/montreal/\n", "ERROR ottawa\n", "toronto appended to https://www.metrolinemap.com/metro/toronto/\n", "vancouver appended to https://www.metrolinemap.com/metro/vancouver/\n", "santiago appended to https://www.metrolinemap.com/metro/santiago/\n", "ERROR medelln\n", "santo_domingo appended to https://www.metrolinemap.com/metro/santo-domingo/\n", "ERROR guadalajara\n", "mexico_city appended to https://www.metrolinemap.com/metro/mexico-city/\n", "monterrey appended to https://www.metrolinemap.com/metro/monterrey/\n", "panama_city appended to https://www.metrolinemap.com/metro/panama-city/\n", "lima appended to https://www.metrolinemap.com/metro/lima/\n", "atlanta appended to https://www.metrolinemap.com/metro/atlanta/\n", "baltimore appended to https://www.metrolinemap.com/metro/baltimore/\n", "boston appended to https://www.metrolinemap.com/metro/boston/\n", "ERROR charlotte\n", "chicago appended to https://www.metrolinemap.com/metro/chicago/\n", "cleveland appended to https://www.metrolinemap.com/metro/cleveland/\n", "ERROR denver\n", "ERROR las_vegas\n", "los_angeles appended to https://www.metrolinemap.com/metro/los-angeles/\n", "miami appended to https://www.metrolinemap.com/metro/miami/\n", "ERROR minneapolis-st_paul\n", "ERROR new_york_-_jfk_airport\n", "new_york_city appended to https://www.metrolinemap.com/metro/new-york-city/\n", "new_york_city_-_new_jersey appended to https://www.metrolinemap.com/metro/new-york-city/\n", "new_york_city_-_path appended to https://www.metrolinemap.com/metro/new-york-city/\n", "new_york_city_-_staten_island appended to https://www.metrolinemap.com/metro/new-york-city/\n", "ERROR newark\n", "ERROR norfolk\n", "philadelphia appended to https://www.metrolinemap.com/metro/philadelphia/\n", "philadelphia_-_patco appended to https://www.metrolinemap.com/metro/philadelphia/\n", "philadelphia_-_snjrg appended to https://www.metrolinemap.com/metro/philadelphia/\n", "ERROR sacramento\n", "ERROR san_diego\n", "san_francisco appended to https://www.metrolinemap.com/metro/san-francisco/\n", "ERROR san_jos-santa_clara\n", "ERROR san_juan_puerto_rico\n", "washington_dc appended to https://www.metrolinemap.com/metro/washington/\n", "caracas appended to https://www.metrolinemap.com/metro/caracas/\n", "ERROR maracaibo\n", "ERROR valencia_carabobo\n", "yerevan appended to https://www.metrolinemap.com/metro/yerevan/\n", "baku appended to https://www.metrolinemap.com/metro/baku/\n", "beijing appended to https://www.metrolinemap.com/metro/beijing/\n", "changchun appended to https://www.metrolinemap.com/metro/changchun/\n", "changsha appended to https://www.metrolinemap.com/metro/changsha/\n", "ERROR changzhou\n", "chengdu appended to https://www.metrolinemap.com/metro/chengdu/\n", "chongqing appended to https://www.metrolinemap.com/metro/chongqing/\n", "dalian appended to https://www.metrolinemap.com/metro/dalian/\n", "dongguan appended to https://www.metrolinemap.com/metro/dongguan/\n", "fuzhou appended to https://www.metrolinemap.com/metro/fuzhou/\n", "guangzhou appended to https://www.metrolinemap.com/metro/guangzhou/\n", "guiyang appended to https://www.metrolinemap.com/metro/guiyang/\n", "hangzhou appended to https://www.metrolinemap.com/metro/hangzhou/\n", "harbin appended to https://www.metrolinemap.com/metro/harbin/\n", "hefei appended to https://www.metrolinemap.com/metro/hefei/\n", "ERROR hohhot\n", "hong_kong appended to https://www.metrolinemap.com/metro/hong-kong/\n", "jinan appended to https://www.metrolinemap.com/metro/jinan/\n", "kunming appended to https://www.metrolinemap.com/metro/kunming/\n", "ERROR lanzhou\n", "ERROR macau\n", "nanchang appended to https://www.metrolinemap.com/metro/nanchang/\n", "nanjing appended to https://www.metrolinemap.com/metro/nanjing/\n", "nanning appended to https://www.metrolinemap.com/metro/nanning/\n", "ningbo appended to https://www.metrolinemap.com/metro/ningbo/\n", "qingdao appended to https://www.metrolinemap.com/metro/qingdao/\n", "shanghai appended to https://www.metrolinemap.com/metro/shanghai/\n", "shanghai_-_s-train appended to https://www.metrolinemap.com/metro/shanghai/\n", "shenyang appended to https://www.metrolinemap.com/metro/shenyang/\n", "shenzhen appended to https://www.metrolinemap.com/metro/shenzhen/\n", "shijiazhuang appended to https://www.metrolinemap.com/metro/shijiazhuang/\n", "suzhou appended to https://www.metrolinemap.com/metro/suzhou/\n", "tianjin appended to https://www.metrolinemap.com/metro/tianjin/\n", "wenzhou appended to https://www.metrolinemap.com/metro/wenzhou/\n", "wuhan appended to https://www.metrolinemap.com/metro/wuhan/\n", "wuxi appended to https://www.metrolinemap.com/metro/wuxi/\n", "xian appended to https://www.metrolinemap.com/metro/xian/\n", "xiamen appended to https://www.metrolinemap.com/metro/xiamen/\n", "ERROR xuzhou\n", "zhengzhou appended to https://www.metrolinemap.com/metro/zhengzhou/\n", "ERROR rmqi\n", "ahmedabad appended to https://www.metrolinemap.com/metro/ahmedabad/\n", "bangalore appended to https://www.metrolinemap.com/metro/bangalore/\n", "chennai appended to https://www.metrolinemap.com/metro/chennai/\n", "delhi appended to https://www.metrolinemap.com/metro/delhi/\n", "hyderabad appended to https://www.metrolinemap.com/metro/hyderabad/\n", "jaipur appended to https://www.metrolinemap.com/metro/jaipur/\n", "kochi appended to https://www.metrolinemap.com/metro/kochi/\n", "kolkata appended to https://www.metrolinemap.com/metro/kolkata/\n", "lucknow appended to https://www.metrolinemap.com/metro/lucknow/\n", "mumbai appended to https://www.metrolinemap.com/metro/mumbai/\n", "nagpur appended to https://www.metrolinemap.com/metro/nagpur/\n", "noida appended to https://www.metrolinemap.com/metro/noida/\n", "jakarta appended to https://www.metrolinemap.com/metro/jakarta/\n", "jakarta_lrt appended to https://www.metrolinemap.com/metro/jakarta/\n", "ERROR palembang\n", "isfahan appended to https://www.metrolinemap.com/metro/isfahan/\n", "mashhad appended to https://www.metrolinemap.com/metro/mashhad/\n", "shiraz appended to https://www.metrolinemap.com/metro/shiraz/\n", "tabriz appended to https://www.metrolinemap.com/metro/tabriz/\n", "tehran appended to https://www.metrolinemap.com/metro/tehran/\n", "ERROR chiba\n", "ERROR chiba_-_toyo\n", "ERROR chiba_-_yukarigaoka\n", "fukuoka appended to https://www.metrolinemap.com/metro/fukuoka/\n", "hiroshima appended to https://www.metrolinemap.com/metro/hiroshima/\n", "ERROR kamakura-enoshima\n", "ERROR kanazawa\n", "ERROR kitakyushu\n", "kobe appended to https://www.metrolinemap.com/metro/kobe/\n", "kobe_new_transit appended to https://www.metrolinemap.com/metro/kobe/\n", "kyoto appended to https://www.metrolinemap.com/metro/kyoto/\n", "ERROR matsuyama\n", "nagoya appended to https://www.metrolinemap.com/metro/nagoya/\n", "ERROR okinawa\n", "osaka appended to https://www.metrolinemap.com/metro/osaka/\n", "osaka_-_rapit appended to https://www.metrolinemap.com/metro/osaka/\n", "ERROR saitama\n", "ERROR saitama_-_ina\n", "sapporo appended to https://www.metrolinemap.com/metro/sapporo/\n", "sendai appended to https://www.metrolinemap.com/metro/sendai/\n", "tokyo appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_haneda_monorail appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_nippori-toneri_liner appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_rinkai appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_skyliner appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_tama_monorail appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_toei appended to https://www.metrolinemap.com/metro/tokyo/\n", "tokyo_-_yurikamome appended to https://www.metrolinemap.com/metro/tokyo/\n", "yokohama appended to https://www.metrolinemap.com/metro/yokohama/\n", "yokohama_-_denentoshi appended to https://www.metrolinemap.com/metro/yokohama/\n", "yokohama_-_minatomirai appended to https://www.metrolinemap.com/metro/yokohama/\n", "almaty appended to https://www.metrolinemap.com/metro/almaty/\n", "kuala_lumpur appended to https://www.metrolinemap.com/metro/kuala-lumpur/\n", "pyongyang appended to https://www.metrolinemap.com/metro/pyongyang/\n", "manila appended to https://www.metrolinemap.com/metro/manila/\n", "doha appended to https://www.metrolinemap.com/metro/doha/\n", "mecca appended to https://www.metrolinemap.com/metro/mecca/\n", "singapore appended to https://www.metrolinemap.com/metro/singapore/\n", "busan appended to https://www.metrolinemap.com/metro/busan/\n", "daegu appended to https://www.metrolinemap.com/metro/daegu/\n", "daejeon appended to https://www.metrolinemap.com/metro/daejeon/\n", "gwangju appended to https://www.metrolinemap.com/metro/gwangju/\n", "ERROR incheon\n", "ERROR incheon_-_airport\n", "seoul appended to https://www.metrolinemap.com/metro/seoul/\n", "seoul_-_neotrans appended to https://www.metrolinemap.com/metro/seoul/\n", "kaohsiung appended to https://www.metrolinemap.com/metro/kaohsiung/\n", "taipei appended to https://www.metrolinemap.com/metro/taipei/\n", "taoyuan appended to https://www.metrolinemap.com/metro/taoyuan/\n", "bangkok appended to https://www.metrolinemap.com/metro/bangkok/\n", "bangkok_-_airport appended to https://www.metrolinemap.com/metro/bangkok/\n", "bangkok_-_skytrain appended to https://www.metrolinemap.com/metro/bangkok/\n", "dubai appended to https://www.metrolinemap.com/metro/dubai/\n", "tashkent appended to https://www.metrolinemap.com/metro/tashkent/\n", "ERROR canberra\n", "sydney appended to https://www.metrolinemap.com/metro/sydney/\n", "vienna appended to https://www.metrolinemap.com/metro/vienna/\n", "vienna_s-bahn appended to https://www.metrolinemap.com/metro/vienna/\n", "minsk appended to https://www.metrolinemap.com/metro/minsk/\n", "brussels appended to https://www.metrolinemap.com/metro/brussels/\n", "sofia appended to https://www.metrolinemap.com/metro/sofia/\n", "prague appended to https://www.metrolinemap.com/metro/prague/\n", "copenhagen appended to https://www.metrolinemap.com/metro/copenhagen/\n", "ERROR tallinn\n", "helsinki appended to https://www.metrolinemap.com/metro/helsinki/\n", "lille appended to https://www.metrolinemap.com/metro/lille/\n", "lyon appended to https://www.metrolinemap.com/metro/lyon/\n", "marseille appended to https://www.metrolinemap.com/metro/marseille/\n", "paris appended to https://www.metrolinemap.com/metro/paris/\n", "paris_rer appended to https://www.metrolinemap.com/metro/paris/\n", "rennes appended to https://www.metrolinemap.com/metro/rennes/\n", "toulouse appended to https://www.metrolinemap.com/metro/toulouse/\n", "tbilisi appended to https://www.metrolinemap.com/metro/tbilisi/\n", "berlin appended to https://www.metrolinemap.com/metro/berlin/\n", "berlin_s-bahn appended to https://www.metrolinemap.com/metro/berlin/\n", "ERROR bremen_s-bahn\n", "ERROR dresden_s-bahn\n", "hamburg appended to https://www.metrolinemap.com/metro/hamburg/\n", "hamburg_s-bahn appended to https://www.metrolinemap.com/metro/hamburg/\n", "ERROR hannover_s-bahn\n", "ERROR leipzig_s-bahn\n", "ERROR magdeburg_s-bahn\n", "munich appended to https://www.metrolinemap.com/metro/munich/\n", "munich_s-bahn appended to https://www.metrolinemap.com/metro/munich/\n", "nuremberg appended to https://www.metrolinemap.com/metro/nuremberg/\n", "nuremberg_s-bahn appended to https://www.metrolinemap.com/metro/nuremberg/\n", "ERROR rhein-ruhr\n", "ERROR stuttgart_s-bahn\n", "athens appended to https://www.metrolinemap.com/metro/athens/\n", "budapest appended to https://www.metrolinemap.com/metro/budapest/\n", "budapest_hev appended to https://www.metrolinemap.com/metro/budapest/\n", "ERROR dublin\n", "brescia appended to https://www.metrolinemap.com/metro/brescia/\n", "catania appended to https://www.metrolinemap.com/metro/catania/\n", "genoa appended to https://www.metrolinemap.com/metro/genoa/\n", "milan appended to https://www.metrolinemap.com/metro/milan/\n", "naples appended to https://www.metrolinemap.com/metro/naples/\n", "naples_-_aversa appended to https://www.metrolinemap.com/metro/naples/\n", "rome appended to https://www.metrolinemap.com/metro/rome/\n", "rome_-_leonardo_express appended to https://www.metrolinemap.com/metro/rome/\n", "turin appended to https://www.metrolinemap.com/metro/turin/\n", "amsterdam appended to https://www.metrolinemap.com/metro/amsterdam/\n", "rotterdam appended to https://www.metrolinemap.com/metro/rotterdam/\n", "oslo appended to https://www.metrolinemap.com/metro/oslo/\n", "warsaw appended to https://www.metrolinemap.com/metro/warsaw/\n", "lisbon appended to https://www.metrolinemap.com/metro/lisbon/\n", "ERROR porto\n", "bucharest appended to https://www.metrolinemap.com/metro/bucharest/\n", "kazan appended to https://www.metrolinemap.com/metro/kazan/\n", "moscow appended to https://www.metrolinemap.com/metro/moscow/\n", "moscow_-_aeroexpress appended to https://www.metrolinemap.com/metro/moscow/\n", "nizhny_novgorod appended to https://www.metrolinemap.com/metro/nizhny-novgorod/\n", "novosibirsk appended to https://www.metrolinemap.com/metro/novosibirsk/\n", "saint_petersburg appended to https://www.metrolinemap.com/metro/saint-petersburg/\n", "samara appended to https://www.metrolinemap.com/metro/samara/\n", "ERROR volgograd\n", "yekaterinburg appended to https://www.metrolinemap.com/metro/yekaterinburg/\n", "barcelona appended to https://www.metrolinemap.com/metro/barcelona/\n", "bilbao appended to https://www.metrolinemap.com/metro/bilbao/\n", "madrid appended to https://www.metrolinemap.com/metro/madrid/\n", "ERROR mlaga\n", "ERROR palma\n", "ERROR seville\n", "ERROR valencia\n", "stockholm appended to https://www.metrolinemap.com/metro/stockholm/\n", "ERROR geneva\n", "lausanne appended to https://www.metrolinemap.com/metro/lausanne/\n", "adana appended to https://www.metrolinemap.com/metro/adana/\n", "ankara appended to https://www.metrolinemap.com/metro/ankara/\n", "bursa appended to https://www.metrolinemap.com/metro/bursa/\n", "istanbul appended to https://www.metrolinemap.com/metro/istanbul/\n", "izmir appended to https://www.metrolinemap.com/metro/izmir/\n", "glasgow appended to https://www.metrolinemap.com/metro/glasgow/\n", "london appended to https://www.metrolinemap.com/metro/london/\n", "newcastle appended to https://www.metrolinemap.com/metro/newcastle/\n", "dnipro appended to https://www.metrolinemap.com/metro/dnipro/\n", "kharkiv appended to https://www.metrolinemap.com/metro/kharkiv/\n", "kiev appended to https://www.metrolinemap.com/metro/kiev/\n" ] } ], "source": [ "for s in systems:\n", " #s=s.replace('_','-')\n", " if s in metrokeys:\n", " metros[metrokeys[s]]['lr'].append({'name':systems[s],'id':s})\n", " print(s,'appended to',metrokeys[s])\n", " elif s.split('_-_')[0] in metrokeys:\n", " metros[metrokeys[s.split('_-_')[0]]]['lr'].append({'name':systems[s],'id':s})\n", " print(s,'appended to',metrokeys[s.split('_-_')[0]])\n", " elif s.split('_')[0] in metrokeys:\n", " metros[metrokeys[s.split('_')[0]]]['lr'].append({'name':systems[s],'id':s})\n", " print(s,'appended to',metrokeys[s.split('_')[0]])\n", " elif s.replace('_','-') in metrokeys:\n", " metros[metrokeys[s.replace('_','-')]]['lr'].append({'name':systems[s],'id':s})\n", " print(s,'appended to',metrokeys[s.replace('_','-')])\n", " elif s.split('_-_')[0].replace('_','-') in metrokeys:\n", " metros[metrokeys[s.split('_-_')[0].replace('_','-')]]['lr'].append({'name':systems[s],'id':s})\n", " print(s,'appended to',metrokeys[s.split('_-_')[0].replace('_','-')])\n", " else:\n", " print('ERROR',s) " ] }, { "cell_type": "code", "execution_count": 187, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "6443467" ] }, "execution_count": 187, "metadata": {}, "output_type": "execute_result" } ], "source": [ "open('metrosx.json','w').write(json.dumps(metros))" ] }, { "cell_type": "code", "execution_count": 188, "metadata": {}, "outputs": [], "source": [ "import zipfile" ] }, { "cell_type": "code", "execution_count": 189, "metadata": {}, "outputs": [], "source": [ "zipfile.ZipFile('metrosx.zip', \"w\", zipfile.ZIP_DEFLATED).write('metrosx.json')" ] }, { "cell_type": "code", "execution_count": 190, "metadata": {}, "outputs": [], "source": [ "metro=metros['https://www.metrolinemap.com/metro/budapest/']" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.4" } }, "nbformat": 4, "nbformat_minor": 4 }