{ "cells": [ { "cell_type": "code", "execution_count": 96, "metadata": {}, "outputs": [ { "data": { "text/html": [ "" ], "text/vnd.plotly.v1+html": [ "" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "# Import quandl to retrieve data\n", "# Pandas and matplotlib standard imports for data analysis\n", "import quandl\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "%matplotlib inline\n", "\n", "# Use Plotly for better graphs\n", "import plotly.offline as plotly\n", "import plotly.graph_objs as go\n", "\n", "# Offline Plotting\n", "plotly.init_notebook_mode(connected=True)" ] }, { "cell_type": "code", "execution_count": 97, "metadata": {}, "outputs": [], "source": [ "# Constants\n", "\n", "# Populate your own quandl API Token\n", "# Populate your own mapbox API Token\n", "quandl.ApiConfig.api_key = {YOUR API KEY}\n", "mapbox_access_token = {YOUR API KEY}\n", "\n", "# Start date and end date should be the same\n", "start_date = '2018-06-30'\n", "end_date = '2018-06-30'\n", "\n", "# Specify Zillow Indicator we want to graph\n", "indicator = 'MRPFAH'\n", "\n", "# Specify the county and state we want to generate the map for\n", "county = 'Alameda'\n", "state = 'CA'" ] }, { "cell_type": "code", "execution_count": 98, "metadata": {}, "outputs": [], "source": [ "# We are going to fetch the data for each zip code in the county and add that to a single dataframe\n", "def get_city_data(cities, indicator):\n", " df_array = []\n", " for city, code in cities.items():\n", " quandl_db_code = f'ZILLOW/Z{code}_{indicator}'\n", " try:\n", " data = quandl.get(quandl_db_code, start_date=start_date, end_date=end_date)\n", " data.rename(index=str, columns={'Value': city}, inplace=True)\n", " data.index = pd.to_datetime(data.index, format='%Y/%m/%d')\n", " df_array.append(data)\n", " except:\n", " print(f'Error fetching for: {quandl_db_code}')\n", " ret = pd.concat(df_array, axis=1)\n", " return ret" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [], "source": [ "# Here we are going to figure out what zip codes are in the specified county\n", "# We will also use the logitude / latitude data for plotting\n", "\n", "ZIP_CODE_DTYPE = {'zip_code': str}\n", "ZIP_CODES = pd.read_csv('zip_codes_states.csv', dtype=ZIP_CODE_DTYPE)\n", "county_df = ZIP_CODES['county'] == county\n", "state_df = ZIP_CODES['state'] == state\n", "data = ZIP_CODES[county_df & state_df]" ] }, { "cell_type": "code", "execution_count": 100, "metadata": {}, "outputs": [], "source": [ "data.index = data['zip_code']" ] }, { "cell_type": "code", "execution_count": 101, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Error fetching for: ZILLOW/Z94502_MRPFAH\n", "Error fetching for: ZILLOW/Z94537_MRPFAH\n", "Error fetching for: ZILLOW/Z94540_MRPFAH\n", "Error fetching for: ZILLOW/Z94542_MRPFAH\n", "Error fetching for: ZILLOW/Z94543_MRPFAH\n", "Error fetching for: ZILLOW/Z94544_MRPFAH\n", "Error fetching for: ZILLOW/Z94545_MRPFAH\n", "Error fetching for: ZILLOW/Z94552_MRPFAH\n", "Error fetching for: ZILLOW/Z94557_MRPFAH\n", "Error fetching for: ZILLOW/Z94577_MRPFAH\n", "Error fetching for: ZILLOW/Z94578_MRPFAH\n", "Error fetching for: ZILLOW/Z94579_MRPFAH\n", "Error fetching for: ZILLOW/Z94580_MRPFAH\n", "Error fetching for: ZILLOW/Z94586_MRPFAH\n", "Error fetching for: ZILLOW/Z94601_MRPFAH\n", "Error fetching for: ZILLOW/Z94602_MRPFAH\n", "Error fetching for: ZILLOW/Z94603_MRPFAH\n", "Error fetching for: ZILLOW/Z94604_MRPFAH\n", "Error fetching for: ZILLOW/Z94606_MRPFAH\n", "Error fetching for: ZILLOW/Z94609_MRPFAH\n", "Error fetching for: ZILLOW/Z94610_MRPFAH\n", "Error fetching for: ZILLOW/Z94612_MRPFAH\n", "Error fetching for: ZILLOW/Z94613_MRPFAH\n", "Error fetching for: ZILLOW/Z94614_MRPFAH\n", "Error fetching for: ZILLOW/Z94615_MRPFAH\n", "Error fetching for: ZILLOW/Z94617_MRPFAH\n", "Error fetching for: ZILLOW/Z94618_MRPFAH\n", "Error fetching for: ZILLOW/Z94619_MRPFAH\n", "Error fetching for: ZILLOW/Z94620_MRPFAH\n", "Error fetching for: ZILLOW/Z94621_MRPFAH\n", "Error fetching for: ZILLOW/Z94622_MRPFAH\n", "Error fetching for: ZILLOW/Z94623_MRPFAH\n", "Error fetching for: ZILLOW/Z94624_MRPFAH\n", "Error fetching for: ZILLOW/Z94625_MRPFAH\n", "Error fetching for: ZILLOW/Z94626_MRPFAH\n", "Error fetching for: ZILLOW/Z94627_MRPFAH\n", "Error fetching for: ZILLOW/Z94643_MRPFAH\n", "Error fetching for: ZILLOW/Z94649_MRPFAH\n", "Error fetching for: ZILLOW/Z94659_MRPFAH\n", "Error fetching for: ZILLOW/Z94660_MRPFAH\n", "Error fetching for: ZILLOW/Z94661_MRPFAH\n", "Error fetching for: ZILLOW/Z94662_MRPFAH\n", "Error fetching for: ZILLOW/Z94666_MRPFAH\n", "Error fetching for: ZILLOW/Z94701_MRPFAH\n", "Error fetching for: ZILLOW/Z94702_MRPFAH\n", "Error fetching for: ZILLOW/Z94703_MRPFAH\n", "Error fetching for: ZILLOW/Z94704_MRPFAH\n", "Error fetching for: ZILLOW/Z94705_MRPFAH\n", "Error fetching for: ZILLOW/Z94706_MRPFAH\n", "Error fetching for: ZILLOW/Z94707_MRPFAH\n", "Error fetching for: ZILLOW/Z94708_MRPFAH\n", "Error fetching for: ZILLOW/Z94709_MRPFAH\n", "Error fetching for: ZILLOW/Z94710_MRPFAH\n", "Error fetching for: ZILLOW/Z94712_MRPFAH\n", "Error fetching for: ZILLOW/Z94720_MRPFAH\n" ] } ], "source": [ "# We will call our function to fetch the data from quandl\n", "# There will be errors for data we cannot retrieve\n", "\n", "real_estate_data = get_city_data(data['zip_code'], indicator)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": {}, "outputs": [], "source": [ "# Here we are just doing some transformation and massaging of the data we get back\n", "# If you want more information please run the notebook on your own\n", "\n", "transformed_real_estate_data = real_estate_data.transpose()\n", "column_name = f'{indicator} {end_date}'" ] }, { "cell_type": "code", "execution_count": 103, "metadata": {}, "outputs": [], "source": [ "transformed_real_estate_data[column_name] = transformed_real_estate_data[end_date].astype('float')" ] }, { "cell_type": "code", "execution_count": 104, "metadata": {}, "outputs": [], "source": [ "concat_data = pd.concat([data, transformed_real_estate_data[column_name]], axis=1, sort=True)" ] }, { "cell_type": "code", "execution_count": 105, "metadata": {}, "outputs": [], "source": [ "concat_data.dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [], "source": [ "concat_data['text'] = concat_data['zip_code'] + ' ' + concat_data[column_name].astype(str)" ] }, { "cell_type": "code", "execution_count": 107, "metadata": {}, "outputs": [], "source": [ "df = concat_data" ] }, { "cell_type": "code", "execution_count": 108, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", " | zip_code | \n", "latitude | \n", "longitude | \n", "city | \n", "state | \n", "county | \n", "MRPFAH 2018-06-30 | \n", "text | \n", "
---|---|---|---|---|---|---|---|---|
94501 | \n", "94501 | \n", "37.770563 | \n", "-122.264779 | \n", "Alameda | \n", "CA | \n", "Alameda | \n", "2.698115 | \n", "94501 2.6981150835926 | \n", "
94536 | \n", "94536 | \n", "37.565285 | \n", "-121.982721 | \n", "Fremont | \n", "CA | \n", "Alameda | \n", "2.397260 | \n", "94536 2.3972602739726 | \n", "
94538 | \n", "94538 | \n", "37.509453 | \n", "-121.958320 | \n", "Fremont | \n", "CA | \n", "Alameda | \n", "2.460908 | \n", "94538 2.4609079743008 | \n", "
94539 | \n", "94539 | \n", "37.520339 | \n", "-121.912568 | \n", "Fremont | \n", "CA | \n", "Alameda | \n", "2.297297 | \n", "94539 2.2972972972973 | \n", "
94541 | \n", "94541 | \n", "37.675130 | \n", "-121.974120 | \n", "Hayward | \n", "CA | \n", "Alameda | \n", "1.966925 | \n", "94541 1.9669247009149 | \n", "