{ "cells": [ { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import quandl to retrieve data\n", "# Pandas and matplotlib standard imports for data analysis\n", "import quandl\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "# Use Plotly for better graphs\n", "import plotly.offline as plotly\n", "import plotly.graph_objs as go\n", "\n", "# Offline Plotting\n", "plotly.init_notebook_mode(connected=True)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "# Constants\n", "\n", "# Populate your own quandl API Token\n", "# Populate your own mapbox API Token\n", "quandl.ApiConfig.api_key = {YOUR API KEY}\n", "mapbox_access_token = {YOUR API KEY}\n", "\n", "# Start date and end date should be the same\n", "start_date = '2018-06-30'\n", "end_date = '2018-06-30'\n", "\n", "# Specify Zillow Indicator we want to graph\n", "indicator = 'MRPFAH'\n", "\n", "# Specify the county and state we want to generate the map for\n", "county = 'Alameda'\n", "state = 'CA'" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "# We are going to fetch the data for each zip code in the county and add that to a single dataframe\n", "def get_city_data(cities, indicator):\n", " df_array = []\n", " for city, code in cities.items():\n", " quandl_db_code = f'ZILLOW/Z{code}_{indicator}'\n", " try:\n", " data = quandl.get(quandl_db_code, start_date=start_date, end_date=end_date)\n", " data.rename(index=str, columns={'Value': city}, inplace=True)\n", " data.index = pd.to_datetime(data.index, format='%Y/%m/%d')\n", " df_array.append(data)\n", " except:\n", " print(f'Error fetching for: {quandl_db_code}')\n", " ret = pd.concat(df_array, axis=1)\n", " return ret" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "# Here we are going to figure out what zip codes are in the specified county\n", "# We will also use the logitude / latitude data for plotting\n", "\n", "ZIP_CODE_DTYPE = {'zip_code': str}\n", "ZIP_CODES = pd.read_csv('zip_codes_states.csv', dtype=ZIP_CODE_DTYPE)\n", "county_df = ZIP_CODES['county'] == county\n", "state_df = ZIP_CODES['state'] == state\n", "data = ZIP_CODES[county_df & state_df]" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "data.index = data['zip_code']" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error fetching for: ZILLOW/Z94502_MRPFAH\n", "Error fetching for: ZILLOW/Z94537_MRPFAH\n", "Error fetching for: ZILLOW/Z94540_MRPFAH\n", "Error fetching for: ZILLOW/Z94542_MRPFAH\n", "Error fetching for: ZILLOW/Z94543_MRPFAH\n", "Error fetching for: ZILLOW/Z94544_MRPFAH\n", "Error fetching for: ZILLOW/Z94545_MRPFAH\n", "Error fetching for: ZILLOW/Z94552_MRPFAH\n", "Error fetching for: ZILLOW/Z94557_MRPFAH\n", "Error fetching for: ZILLOW/Z94577_MRPFAH\n", "Error fetching for: ZILLOW/Z94578_MRPFAH\n", "Error fetching for: ZILLOW/Z94579_MRPFAH\n", "Error fetching for: ZILLOW/Z94580_MRPFAH\n", "Error fetching for: ZILLOW/Z94586_MRPFAH\n", "Error fetching for: ZILLOW/Z94601_MRPFAH\n", "Error fetching for: ZILLOW/Z94602_MRPFAH\n", "Error fetching for: ZILLOW/Z94603_MRPFAH\n", "Error fetching for: ZILLOW/Z94604_MRPFAH\n", "Error fetching for: ZILLOW/Z94606_MRPFAH\n", "Error fetching for: ZILLOW/Z94609_MRPFAH\n", "Error fetching for: ZILLOW/Z94610_MRPFAH\n", "Error fetching for: ZILLOW/Z94612_MRPFAH\n", "Error fetching for: ZILLOW/Z94613_MRPFAH\n", "Error fetching for: ZILLOW/Z94614_MRPFAH\n", "Error fetching for: ZILLOW/Z94615_MRPFAH\n", "Error fetching for: ZILLOW/Z94617_MRPFAH\n", "Error fetching for: ZILLOW/Z94618_MRPFAH\n", "Error fetching for: ZILLOW/Z94619_MRPFAH\n", "Error fetching for: ZILLOW/Z94620_MRPFAH\n", "Error fetching for: ZILLOW/Z94621_MRPFAH\n", "Error fetching for: ZILLOW/Z94622_MRPFAH\n", "Error fetching for: ZILLOW/Z94623_MRPFAH\n", "Error fetching for: ZILLOW/Z94624_MRPFAH\n", "Error fetching for: ZILLOW/Z94625_MRPFAH\n", "Error fetching for: ZILLOW/Z94626_MRPFAH\n", "Error fetching for: ZILLOW/Z94627_MRPFAH\n", "Error fetching for: ZILLOW/Z94643_MRPFAH\n", "Error fetching for: ZILLOW/Z94649_MRPFAH\n", "Error fetching for: ZILLOW/Z94659_MRPFAH\n", "Error fetching for: ZILLOW/Z94660_MRPFAH\n", "Error fetching for: ZILLOW/Z94661_MRPFAH\n", "Error fetching for: ZILLOW/Z94662_MRPFAH\n", "Error fetching for: ZILLOW/Z94666_MRPFAH\n", "Error fetching for: ZILLOW/Z94701_MRPFAH\n", "Error fetching for: ZILLOW/Z94702_MRPFAH\n", "Error fetching for: ZILLOW/Z94703_MRPFAH\n", "Error fetching for: ZILLOW/Z94704_MRPFAH\n", "Error fetching for: ZILLOW/Z94705_MRPFAH\n", "Error fetching for: ZILLOW/Z94706_MRPFAH\n", "Error fetching for: ZILLOW/Z94707_MRPFAH\n", "Error fetching for: ZILLOW/Z94708_MRPFAH\n", "Error fetching for: ZILLOW/Z94709_MRPFAH\n", "Error fetching for: ZILLOW/Z94710_MRPFAH\n", "Error fetching for: ZILLOW/Z94712_MRPFAH\n", "Error fetching for: ZILLOW/Z94720_MRPFAH\n" ] } ], "source": [ "# We will call our function to fetch the data from quandl\n", "# There will be errors for data we cannot retrieve\n", "\n", "real_estate_data = get_city_data(data['zip_code'], indicator)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "# Here we are just doing some transformation and massaging of the data we get back\n", "# If you want more information please run the notebook on your own\n", "\n", "transformed_real_estate_data = real_estate_data.transpose()\n", "column_name = f'{indicator} {end_date}'" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "transformed_real_estate_data[column_name] = transformed_real_estate_data[end_date].astype('float')" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "concat_data = pd.concat([data, transformed_real_estate_data[column_name]], axis=1, sort=True)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "concat_data.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "concat_data['text'] = concat_data['zip_code'] + ' ' + concat_data[column_name].astype(str)" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "df = concat_data" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
zip_codelatitudelongitudecitystatecountyMRPFAH 2018-06-30text
945019450137.770563-122.264779AlamedaCAAlameda2.69811594501 2.6981150835926
945369453637.565285-121.982721FremontCAAlameda2.39726094536 2.3972602739726
945389453837.509453-121.958320FremontCAAlameda2.46090894538 2.4609079743008
945399453937.520339-121.912568FremontCAAlameda2.29729794539 2.2972972972973
945419454137.675130-121.974120HaywardCAAlameda1.96692594541 1.9669247009149
\n", "
" ], "text/plain": [ " zip_code latitude longitude city state county \\\n", "94501 94501 37.770563 -122.264779 Alameda CA Alameda \n", "94536 94536 37.565285 -121.982721 Fremont CA Alameda \n", "94538 94538 37.509453 -121.958320 Fremont CA Alameda \n", "94539 94539 37.520339 -121.912568 Fremont CA Alameda \n", "94541 94541 37.675130 -121.974120 Hayward CA Alameda \n", "\n", " MRPFAH 2018-06-30 text \n", "94501 2.698115 94501 2.6981150835926 \n", "94536 2.397260 94536 2.3972602739726 \n", "94538 2.460908 94538 2.4609079743008 \n", "94539 2.297297 94539 2.2972972972973 \n", "94541 1.966925 94541 1.9669247009149 " ] }, "execution_count": 108, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Here is our final result dataframe\n", "# We can see for each zip code we have the longitude and latitude data\n", "# As well as the Zillow Indicator and text label \n", "\n", "df.head()" ] }, { "cell_type": "code", "execution_count": 109, "metadata": {}, "outputs": [], "source": [ "# Now it's just a matter of plotting using the Plotly Mapbox graph\n", "\n", "plot_data = [\n", " go.Scattermapbox(\n", " lon = df['longitude'],\n", " lat = df['latitude'],\n", " mode='markers',\n", " marker=dict(\n", " reversescale = True,\n", " autocolorscale = False,\n", " size=17,\n", " opacity=0.85,\n", " colorscale = 'YlOrRd',\n", " cmin = df[column_name].min(),\n", " color = df[column_name],\n", " cmax = df[column_name].max(),\n", " colorbar=dict(\n", " title=\"Price\"\n", " )\n", " ),\n", " text = df['text'],\n", " hoverinfo= 'text'\n", " )]\n", "\n", "layout = go.Layout(\n", " title= f'{indicator} in {county}, {state} {end_date}',\n", " autosize=True,\n", " hovermode='closest',\n", " showlegend=False,\n", " mapbox=dict(\n", " accesstoken=mapbox_access_token,\n", " bearing=0,\n", " center=dict(\n", " lat=df.iloc[0]['latitude'],\n", " lon=df.iloc[0]['longitude']\n", " ),\n", " pitch=0,\n", " zoom=8,\n", " style='light'\n", " ),\n", ")" ] }, { "cell_type": "code", "execution_count": 110, "metadata": {}, "outputs": [ { "data": { "application/vnd.plotly.v1+json": { "data": [ { "hoverinfo": "text", "lat": [ 37.770563, 37.565284999999996, 37.509453, 37.520339, 37.675129999999996, 37.731229, 37.676781, 37.680181, 37.570681, 37.534102000000004, 37.646081, 37.714402, 37.589084, 37.688244, 37.759322, 37.807967, 37.833876000000004, 37.847076 ], "lon": [ -122.264779, -121.982721, -121.95832, -121.912568, -121.97412, -122.066373, -121.91605, -121.921498, -122.063323, -122.025352, -121.862128, -121.906329, -121.97362, -121.89874099999999, -122.156626, -122.29878000000001, -122.29548, -122.222278 ], "marker": { "autocolorscale": false, "cmax": 3.3942558746736, "cmin": 1.9166311170699, "color": [ 2.6981150835926, 2.3972602739726, 2.4609079743008, 2.2972972972973, 1.9669247009149, 2.3003801251922, 1.9166311170699, 2.0896548693876, 2.1818181818182, 2.1113074204947, 2.0644823830725, 2.0768048316115, 2.2691761363636, 2.2038129165554, 2.0760510155881, 3.1673551377641, 3.3942558746736, 2.6666666666667 ], "colorbar": { "title": "Price" }, "colorscale": "YlOrRd", "opacity": 0.85, "reversescale": true, "size": 17 }, "mode": "markers", "text": [ "94501 2.6981150835926", "94536 2.3972602739726", "94538 2.4609079743008", "94539 2.2972972972973", "94541 1.9669247009149", "94546 2.3003801251922", "94550 1.9166311170699", "94551 2.0896548693876", "94555 2.1818181818182", "94560 2.1113074204947", "94566 2.0644823830725", "94568 2.0768048316115", "94587 2.2691761363636", "94588 2.2038129165554", "94605 2.0760510155881", "94607 3.1673551377641", "94608 3.3942558746736", "94611 2.6666666666667" ], "type": "scattermapbox", "uid": "025ae432-99e5-11e8-9d35-0242ac130002" } ], "layout": { "autosize": true, "hovermode": "closest", "mapbox": { "accesstoken": "pk.eyJ1IjoiYWNoaGFicmEyIiwiYSI6ImNqanhnNjRtaTFkbW8zcnJyeGptOTdoa2QifQ.IN8rFO4lk5Tq290mvT0DyA", "bearing": 0, "center": { "lat": 37.770563, "lon": -122.264779 }, "pitch": 0, "style": "light", "zoom": 8 }, "showlegend": false, "title": "MRPFAH in Alameda, CA 2018-06-30" } }, "text/html": [ "
" ], "text/vnd.plotly.v1+html": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Finally calling our plot function after declaring the layout\n", "\n", "fig = dict( data=plot_data, layout=layout )\n", "plotly.iplot( fig, filename=f'{indicator} {county} {state} {end_date}')" ] }, { "cell_type": "code", "execution_count": 111, "metadata": {}, "outputs": [], "source": [ "# Output to div for blog post\n", "# plotly.plot(fig, filename=f'{indicator} {county} {state} {end_date}', include_plotlyjs=False, output_type='div')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }