{ "cells": [ { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "3\n", "Av. Dn. Ana Costa\n", "Av. Francisco Nóbrega Barbosa\n", "Av. Francisco Nóbrega Barbosa => Avenida Francisco Nóbrega Barbosa\n", "Complexo Viário Engenheiro Job Shuji Nogami\n", "R. Bergamo\n", "R. Bergamo => Rua Bergamo\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "C:\\Anaconda\\lib\\site-packages\\IPython\\kernel\\__main__.py:72: UnicodeWarning: Unicode equal comparison failed to convert both arguments to Unicode - interpreting them as being unequal\n" ] } ], "source": [ "#!/usr/bin/env python\n", "#-*- coding: utf-8 -*-\n", "#audit.py\n", "import xml.etree.cElementTree as ET\n", "from collections import defaultdict\n", "import re\n", "import pprint\n", "\n", "#arquivo = \"/media/vagner/Seagate Expansion Drive/500Gb/DiskExternoVagner/Cursos/CientistaDados/Modulo_4/DW-OSM-RMSP/sample.osm\"\n", "arquivo = r'F:\\500Gb\\DiskExternoVagner\\Cursos\\CientistaDados\\Modulo_4\\DW-OSM-RMSP\\sample.osm'\n", "\n", "osm_file = open(arquivo, \"r\")\n", "\n", "street_type_re = re.compile(r'^\\S+\\.?',re.IGNORECASE)\n", "street_types = defaultdict(set)\n", "\n", "\n", "\n", "expected = [\"Alameda\", \"Avenida\", \"Estrada\", \"Largo\", u\"Praça\", \"Rodovia\", \"Rua\", \"Travessa\", \"Via\",\n", " u\"Calçadão\", \"Rodoanel\", \"Passeio\", u\"Complexo Viário\", \"Marginal\"]\n", "\n", "mapping = { \"Av\": \"Avenida\",\n", " \"Av.\": \"Avenida\",\n", " \"R\": \"Rua\",\n", " \"R.\": \"Rua\", \n", " \"rua\": \"Rua\",\n", " \"r.\": \"Rua\",\n", " \"r\": \"Rua\",\n", " \"Pr.\": u\"Praça\",\n", " \"PR.\": u\"Praça\",\n", " \"Complexo viário\": u\"Complexo Viário\",\n", " \"Complexo viario\": u\"Complexo Viário\"\n", " }\n", "\n", "def audit_street_type(street_types, street_name):\n", " \"\"\" Utiliza expressão regular para verificar o ínicio do campo \"addr:street\" identificando assim o Tipo de \n", " Via (Rua, Alamenda, etc)\n", " \"\"\"\n", " #print street_name\n", " m = street_type_re.search(street_name)\n", " if m:\n", " street_type = m.group()\n", " #print street_type \n", " if street_type not in expected:\n", " street_types[street_type].add(street_name)\n", " \n", "def is_street_name(elem):\n", " \"\"\" Verifica se o tag é um atributo de endereço de via (addr:street). Retorna um Boolean (True se atributo\n", " de endereço de via) \n", " \"\"\"\n", " return (elem.attrib['k'] == \"addr:street\")\n", "\n", "def audit():\n", " \"\"\" Realiza análise baseada no evento \"tag de ínicio\"; quando este ocorre o elemento em questão é \n", " identificado e se for do tipo \"way\", o método \"iter\" é chamado; este método realiza uma iteração de todas as \n", " subtags (tags aninhadas) do tipo \"tag\".\n", " \"\"\"\n", " for event, elem in ET.iterparse(osm_file, events=(\"start\",)):\n", " if elem.tag == \"way\":\n", " for tag in elem.iter(\"tag\"):\n", " if is_street_name(tag):\n", " audit_street_type(street_types, tag.attrib['v'])\n", " #pprint.pprint(dict(street_types))\n", " return street_types\n", "\n", "def update_name(name, mapping):\n", " #print name\n", " m = street_type_re.search(name)\n", " other_street_types=[]\n", " if m:\n", " street_type = m.group()\n", " if street_type in mapping.keys():\n", " name = re.sub(street_type_re, mapping[street_type], name)\n", " else:\n", " other_street_types.append(street_type)\n", " return name\n", "\n", "def test():\n", " st_types = audit()\n", " print (len(st_types.keys()))\n", " #pprint.pprint(dict(st_types))\n", " \n", " for st_type, ways in st_types.iteritems():\n", " for name in ways:\n", " better_name = update_name(name, mapping)\n", " if name != better_name:\n", " print name, \"=>\", better_name\n", " name= better_name\n", " \n", "if __name__ == '__main__':\n", " test()\n", " osm_file.close()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.12" } }, "nbformat": 4, "nbformat_minor": 1 }