{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualizing the 2016 General Election Polls" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from __future__ import print_function\n", "import pandas as pd\n", "import numpy as np\n", "from ipywidgets import VBox, HBox\n", "import os\n", "\n", "codes = pd.read_csv(os.path.abspath('../data_files/state_codes.csv'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "try:\n", " from pollster import Pollster\n", "except ImportError:\n", " print('Pollster not found. Installing Pollster..')\n", " try:\n", " import subprocess\n", " subprocess.check_call(['pip', 'install', 'pollster==0.1.6'])\n", " except:\n", " print(\"The pip installation failed. Please manually install Pollster and re-run this notebook.\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_candidate_data(question):\n", " clinton, trump, undecided, other = 0., 0., 0., 0.\n", " for candidate in question['subpopulations'][0]['responses']:\n", " if candidate['last_name'] == 'Clinton':\n", " clinton = candidate['value']\n", " elif candidate['last_name'] == 'Trump':\n", " trump = candidate['value']\n", " elif candidate['choice'] == 'Undecided':\n", " undecided = candidate['value']\n", " else:\n", " other = candidate['value']\n", " return clinton, trump, other, undecided\n", "\n", "def get_row(question, partisan='Nonpartisan', end_date='2016-06-21'):\n", " # if question['topic'] != '2016-president':\n", " if ('2016' in question['topic']) and ('Presidential' in question['topic']):\n", " hillary, donald, other, undecided = get_candidate_data(question)\n", " return [{'Name': question['name'], 'Partisan': partisan, 'State': question['state'],\n", " 'Date': np.datetime64(end_date), 'Trump': donald, 'Clinton': hillary, 'Other': other,\n", " 'Undecided': undecided}]\n", " else:\n", " return\n", "\n", "def analyze_polls(polls):\n", " global data\n", " for poll in polls:\n", " for question in poll.questions:\n", " resp = get_row(question, partisan=poll.partisan, end_date=poll.end_date)\n", " if resp is not None:\n", " data = data.append(resp)\n", " return" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "try:\n", " from pollster import Pollster\n", " pollster = Pollster()\n", " \n", " # Getting data from Pollster. This might take a second.\n", " raw_data = pollster.charts(topic='2016-president')\n", " \n", " data = pd.DataFrame(columns=['Name', 'Partisan', 'State', 'Date', 'Trump', 'Clinton', 'Other',\n", " 'Undecided'])\n", " \n", " for i in raw_data:\n", " analyze_polls(i.polls())\n", "except:\n", " raise ValueError('Please install Pollster and run the functions above')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_state_party(code):\n", " state = codes[codes['FIPS']==code]['USPS'].values[0]\n", " if data[data['State']==state].shape[0] == 0:\n", " return None\n", " polls = data[(data['State']==state) & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')\n", " if polls.shape[0] == 0:\n", " return None\n", " if (polls.tail(1)['Trump'] > polls.tail(1)['Clinton']).values[0]:\n", " return 'Republican'\n", " else:\n", " return 'Democrat'\n", "\n", "def get_color_data():\n", " color_data = {}\n", " for i in codes['FIPS']:\n", " color_data[i] = get_state_party(i)\n", " return color_data\n", "\n", "def get_state_data(code):\n", " state = codes[codes['FIPS']==code]['USPS'].values[0]\n", " if data[data['State']==state].shape[0] == 0:\n", " return None\n", " polls = data[(data['State']==state) & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')\n", " return polls" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "from bqplot import *\n", "from ipywidgets import Layout" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "dt_x = DateScale()\n", "sc_y = LinearScale()\n", "\n", "time_series = Lines(scales={'x': dt_x, 'y': sc_y}, colors=['#E91D0E', '#2aa1ec'], marker='circle')\n", "\n", "ax_x = Axis(scale=dt_x, label='Date')\n", "ax_y = Axis(scale=sc_y, orientation='vertical', label='Percentage')\n", "\n", "ts_fig = Figure(marks=[time_series], axes=[ax_x, ax_y], title='General Election - State Polls', \n", " layout=Layout(min_width='650px', min_height='400px'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "sc_geo = AlbersUSA()\n", "sc_c1 = OrdinalColorScale(domain=['Democrat', 'Republican'], colors=['#2aa1ec', '#E91D0E'])\n", "\n", "color_data = get_color_data()\n", "\n", "map_styles = {'color': color_data,\n", " 'scales': {'projection': sc_geo, 'color': sc_c1}, 'colors': {'default_color': 'Grey'}}\n", "\n", "axis = ColorAxis(scale=sc_c1)\n", "\n", "states_map = Map(map_data=topo_load('map_data/USStatesMap.json'), tooltip=ts_fig, **map_styles)\n", "map_fig = Figure(marks=[states_map], axes=[axis],title='General Election Polls - State Wise')" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def hover_callback(name, value):\n", " polls = get_state_data(value['data']['id'])\n", " if polls is None or polls.shape[0] == 0:\n", " time_series.y = [0.]\n", " return\n", " time_series.x, time_series.y = polls['Date'].values.astype(np.datetime64), [polls['Trump'].values, polls['Clinton'].values]\n", " ts_fig.title = str(codes[codes['FIPS']==value['data']['id']]['Name'].values[0]) + ' Polls - Presidential Election'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "states_map.on_hover(hover_callback)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": false }, "outputs": [], "source": [ "national = data[(data['State']=='US') & (data['Trump'] > 0.) & (data['Clinton'] > 0.)].sort_values(by='Date')\n", "\n", "dt_x = DateScale()\n", "sc_y = LinearScale()\n", "\n", "clinton_scatter = Scatter(x=national['Date'].values.astype(np.datetime64), y=national['Clinton'],\n", " scales={'x': dt_x, 'y': sc_y}, \n", " colors=['#2aa1ec'])\n", "\n", "trump_scatter = Scatter(x=national['Date'].values.astype(np.datetime64), y=national['Trump'],\n", " scales={'x': dt_x, 'y': sc_y},\n", " colors=['#E91D0E'])\n", "\n", "ax_x = Axis(scale=dt_x, label='Date', tick_format='%b-%Y', num_ticks=8)\n", "ax_y = Axis(scale=sc_y, orientation='vertical', label='Percentage')\n", "\n", "scat_fig = Figure(marks=[clinton_scatter, trump_scatter], axes=[ax_x, ax_y], title='General Election - National Polls')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Hover on the map to visualize the poll data for that state." ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "VBox([map_fig, scat_fig])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Visualizing the County Results of the 2008 Elections" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "county_data = pd.read_csv(os.path.abspath('../data_files/2008-election-results.csv'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "winner = np.array(['McCain'] * county_data.shape[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "winner[(county_data['Obama'] > county_data['McCain']).values] = 'Obama'" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "scrolled": true }, "outputs": [], "source": [ "sc_geo_county = AlbersUSA()\n", "sc_c1_county = OrdinalColorScale(domain=['McCain', 'Obama'], colors=['Red', 'DeepSkyBlue'])\n", "\n", "color_data_county = dict(zip(county_data['FIPS'].values.astype(int), list(winner)))\n", "\n", "map_styles_county = {'color': color_data_county,\n", " 'scales': {'projection': sc_geo_county, 'color': sc_c1_county}, 'colors': {'default_color': 'Grey'}}\n", "\n", "axis_county = ColorAxis(scale=sc_c1_county)\n", "\n", "county_map = Map(map_data=topo_load('map_data/USCountiesMap.json'), **map_styles_county)\n", "county_fig = Figure(marks=[county_map], axes=[axis_county],title='US Elections 2008 - Example',\n", " layout=Layout(min_width='800px', min_height='550px'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "names_sc = OrdinalScale(domain=['Obama', 'McCain'])\n", "vote_sc_y = LinearScale(min=0, max=100.)\n", "\n", "names_ax = Axis(scale=names_sc, label='Candidate')\n", "vote_ax = Axis(scale=vote_sc_y, orientation='vertical', label='Percentage')\n", "\n", "vote_bars = Bars(scales={'x': names_sc, 'y': vote_sc_y}, colors=['#2aa1ec', '#E91D0E'])\n", "\n", "bar_fig = Figure(marks=[vote_bars], axes=[names_ax, vote_ax], title='Vote Margin',\n", " layout=Layout(min_width='600px', min_height='400px'))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def county_hover(name, value):\n", " if (county_data['FIPS'] == value['data']['id']).sum() == 0:\n", " bar_fig.title = ''\n", " vote_bars.y = [0., 0.]\n", " return\n", " votes = county_data[county_data['FIPS'] == value['data']['id']]\n", " dem_vote = float(votes['Obama %'].values[0])\n", " rep_vote = float(votes['McCain %'].values[0])\n", " vote_bars.x, vote_bars.y = ['Obama', 'McCain'], [dem_vote, rep_vote]\n", " bar_fig.title = 'Vote % - ' + value['data']['name']\n", " \n", "county_map.on_hover(county_hover)\n", "county_map.tooltip = bar_fig" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "#### Hover on the map to visualize the voting percentage for each candidate in that county" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "scrolled": true }, "outputs": [], "source": [ "county_fig" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.7.0" } }, "nbformat": 4, "nbformat_minor": 1 }