{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "https://geocoding.geo.census.gov/geocoder/Geocoding_Services_API.pdf" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import geopandas as gpd\n", "from shapely.geometry import Point\n", "import requests\n", "import re, csv" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# geocode each spreadsheet \n", "def censusGeocode(file, output):\n", " url = 'https://geocoding.geo.census.gov/geocoder/geographies/addressbatch?form'\n", " payload = {'benchmark':'Public_AR_Current',\n", " 'vintage':'Current_Current',}\n", " files = {'addressFile': open(file)}\n", " r = requests.post(url, files=files, data=payload)\n", " results = str(r.text)\n", " results = re.sub('\"','',results)\n", " results = results.split('\\n')\n", " with open(output, 'w', newline = '') as geocodeOutput:\n", " w = csv.writer(geocodeOutput, delimiter=',')\n", " w.writerows([c.strip() for c in r.split(',')] for r in results)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "dfAll = pd.read_csv('./data/NCSBE/ncvoter_Wake.zip')\n", "numRecs = dfAll.shape[0]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "fromID = 0\n", "toID = fromID+1000\n", "while fromID < numRecs:\n", " #Status\n", " print(toID,end=\";\")\n", " #Get the chunk of values\n", " address_set = dfAll.iloc[fromID:toID,1:-2].dropna(how='any')\n", " #Write out the address file\n", " address_set.to_csv('Addresses.csv'.format(fromID),index=True,header=False)\n", " #Update the output filename\n", " outFile = 'GeoCode_{}.csv'.format(toID)\n", " #Geocode\n", " censusGeocode('Addresses.csv',outFile)\n", " #Update the counters\n", " fromID = toID\n", " toID = fromID + 10000" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "#Convert output to dataframe\n", "names = ['address','county','state','zip','match','match_type','address_2','county_2','state_2',\n", " 'zip_2','longitude','latitude','side','block10','st_fips','co_fips','census_2','census_3']\n", "dtypes = {'address':'str','county':'str','state':'str','zip':'str','match':'str','match_type':'str',\n", " 'address_2':'str','county_2':'str','state_2':'str','zip_2':'str',\n", " 'longitude':'float','latitude':'float','side':'str','block10':'str','st_fips':'str',\n", " 'co_fips':'str','census_2':'str','census_3':'str'}\n", "df = pd.read_csv('GeoCode_1000.csv',header=None,names=names,dtype=dtypes)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "gdf = gpd.GeoDataFrame(df, geometry=geom)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "geom = [Point(x,y) for x, y in zip(df.longitude,df.latitude)]" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }