{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Kitas in Hannover #\n",
"\n",
"Welche Kitas gibt es in Hannover überhaupt und wo befinden sie sich?\n",
"\n",
"http://www.hannover.de/Leben-in-der-Region-Hannover/Soziales/Kinder-Jugendliche/Kinderbetreuung/Kindertagesst%C3%A4tten-in-der-Stadt-Hannover/Kitas-in-Hannover-nach-Tr%C3%A4gern/Kitas-der-Landeshauptstadt-Hannover\n",
"\n",
"### Webcrawling in Python ###"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"ExecuteTime": {
"end_time": "2016-03-11T22:04:08.813288",
"start_time": "2016-03-11T22:04:08.715023"
},
"collapsed": false
},
"outputs": [],
"source": [
"import urllib.request\n",
"import urllib.parse\n",
"from bs4 import BeautifulSoup\n",
"from geopy.geocoders import Nominatim\n",
"\n",
"geolocator = Nominatim()\n",
"\n",
"def parse_page(thePage):\n",
" line_divs = thePage.find_all(\"div\", class_=\"line_view\")\n",
" results = []\n",
" for div in line_divs:\n",
" name = div.a['title']\n",
" split = name.split(\" - \")\n",
" address = div.a.div.p.contents[0].strip()\n",
"\n",
" location = geolocator.geocode(address)\n",
" if(location is not None):\n",
" #print(\"'\" + name + \"', \" + str(location.latitude) + \", \" + str(location.longitude))\n",
" results.append((split[0], split[1], location))\n",
" else:\n",
" print(\"*** NO RESULT FOR: '\" + address + \"' ***\")\n",
" \n",
" return(results)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"ExecuteTime": {
"end_time": "2016-03-11T22:06:03.485257",
"start_time": "2016-03-11T22:05:44.766616"
},
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"*** NO RESULT FOR: 'Rötngenstraße 10, 30163 Hannover' ***\n",
"*** NO RESULT FOR: 'Gronostraße 9 C-E, 30459 Hannover' ***\n",
"*** NO RESULT FOR: 'Isernhagener Straße 82, 30165 Hannover' ***\n",
"Found 43 Kitas.\n"
]
}
],
"source": [
"baseurl = \"http://www.hannover.de\"\n",
"url = baseurl + \"/Leben-in-der-Region-Hannover/Soziales/Kinder-Jugendliche/Kinderbetreuung/Kindertagesst%C3%A4tten-in-der-Stadt-Hannover/Kitas-in-Hannover-nach-Tr%C3%A4gern/Kitas-der-Landeshauptstadt-Hannover\"\n",
"\n",
"kita_infos = []\n",
"nextPage = True\n",
"while(nextPage):\n",
" dom = urllib.request.urlopen(url).read()\n",
" soup = BeautifulSoup(dom, 'html.parser')\n",
" results = parse_page(soup)\n",
" kita_infos.extend(results)\n",
" \n",
" next = soup.find_all(\"span\", class_=\"next\")\n",
" link = next[0].a\n",
" \n",
" if link is not None:\n",
" text = link.get(\"href\", None)\n",
" url = baseurl + urllib.parse.quote(text)\n",
" else:\n",
" nextPage = False\n",
"\n",
"print('Found ' + str(len(kita_infos)) + ' Kitas.')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Kartenvisualisierung in Python ###"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"ExecuteTime": {
"end_time": "2016-03-11T22:06:13.869510",
"start_time": "2016-03-11T22:06:11.134060"
},
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
""
],
"text/plain": [
""
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"from IPython.display import HTML\n",
"import folium\n",
"\n",
"def embed_map(map, path=\"map.html\"):\n",
" map.save(outfile=path)\n",
" return HTML(''.format(path=path))\n",
"\n",
"ha_map = folium.Map(location=[52.3770, 9.7404], zoom_start=12)\n",
"for kita in kita_infos:\n",
" folium.Marker([kita[2].latitude, kita[2].longitude], icon=folium.Icon(icon='info-sign'), popup=kita[1]).add_to(ha_map)\n",
"embed_map(ha_map, \"ha_map.html\")"
]
}
],
"metadata": {
"gist": {
"data": {
"description": "KitasInHannover.ipynb",
"public": false
},
"id": ""
},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.1"
},
"latex_envs": {
"bibliofile": "biblio.bib",
"cite_by": "apalike",
"current_citInitial": 1,
"eqLabelWithNumbers": true,
"eqNumInitial": 0
}
},
"nbformat": 4,
"nbformat_minor": 0
}