{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [], "source": [ "import collections\n", "import datetime\n", "\n", "import numpy as np\n", "import pandas as pd\n", "import requests\n", "\n", "API_ENDPOINT = \"http://elections.huffingtonpost.com/pollster/api/polls\"\n", "\n", "np.random.seed(2016)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_all_results(state='US', party='gop', start_date='2015-6-1'):\n", " topic = '2016-president-{}-primary'.format(party)\n", " params = {'state': state,\n", " 'after': start_date,\n", " 'topic': topic\n", " }\n", " page = 1\n", " while True:\n", " params['page'] = page\n", " page_results = requests.get(API_ENDPOINT,\n", " params=params).json()\n", " for poll in page_results:\n", " subpop = next(i['subpopulations'][0]\n", " for i in poll['questions']\n", " if i['topic'] == topic)\n", " for response in subpop['responses']:\n", " if response['first_name']:\n", " yield {'poll': poll['id'],\n", " 'date': poll['end_date'],\n", " 'filter': subpop['name'].lower(),\n", " 'obs': subpop['observations'],\n", " 'candidate': '{} {}'.format(response['first_name'], response['last_name']),\n", " 'mean': response['value']}\n", "\n", " if len(page_results) < 10:\n", " break\n", " page += 1" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_polls(state='US', party='gop', start_date='2015-6-1'):\n", " polls = pd.DataFrame(get_all_results(state=state, party=party, start_date=start_date))\n", " polls['date'] = pd.to_datetime(polls['date'])\n", " return polls\n" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def get_distribution_for_date(polls, target_date=None, window=30):\n", " if target_date is None:\n", " target_date = datetime.datetime.today()\n", " polls = polls[\n", " (polls['date'] <= target_date)\n", " & (polls['date'] > target_date - datetime.timedelta(window))\n", " ]\n", " weights = 1 / np.square((target_date - polls['date']) / np.timedelta64(1, 'D') + 1)\n", " weighted = polls[['candidate']].copy()\n", " weighted['n'] = weights * polls['obs']\n", " weighted['votes'] = polls['mean'] / 100 * polls['obs'] * weights\n", " weighted = weighted.groupby('candidate').sum()\n", " weighted['mean'] = weighted['votes'] / weighted['n']\n", " weighted['std'] = np.sqrt((weighted['mean'] * (1 - weighted['mean'])) / weighted['n'])\n", " return weighted[['mean', 'std']].query('mean > 0').copy()" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def run_simulation(dists, trials=10000):\n", " runs = pd.DataFrame(\n", " [np.random.normal(dists['mean'], dists['std'])\n", " for i in range(trials)],\n", " columns=dists.index)\n", " results = pd.Series(collections.Counter(runs.T.idxmax()))\n", " return results / results.sum()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def predict(state='us', party='gop', window=30, trials=10000, target_date=None):\n", " polls = get_polls(state=state, party=party)\n", " dists = get_distribution_for_date(polls, window=window, target_date=target_date)\n", " print('Superpoll Results:')\n", " print(dists.sort_values('mean', ascending=False).applymap(lambda x: '{:.1%}'.format(x)))\n", " print()\n", " print('Simulation Results:')\n", " print(run_simulation(dists, trials=trials).sort_values(ascending=False).map(lambda x: '{:.1%}'.format(x)))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Superpoll Results:\n", " mean std\n", "candidate \n", "Donald Trump 28.2% 2.0%\n", "Ted Cruz 23.6% 1.8%\n", "Marco Rubio 17.4% 1.6%\n", "Ben Carson 7.6% 1.2%\n", "Rand Paul 4.7% 0.9%\n", "Jeb Bush 4.1% 0.9%\n", "Mike Huckabee 3.3% 0.8%\n", "John Kasich 2.8% 0.7%\n", "Carly Fiorina 2.4% 0.7%\n", "Chris Christie 2.1% 0.6%\n", "Rick Santorum 1.3% 0.5%\n", "Jim Gilmore 0.1% 0.2%\n", "\n", "Simulation Results:\n", "Donald Trump 96.0%\n", "Ted Cruz 4.0%\n", "dtype: object\n" ] } ], "source": [ "target_date = datetime.datetime(2016, 2, 1)\n", "predict(state='ia', party='gop', target_date=target_date)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Superpoll Results:\n", " mean std\n", "candidate \n", "Hillary Clinton 47.4% 2.4%\n", "Bernie Sanders 46.0% 2.4%\n", "Martin O'Malley 3.6% 0.9%\n", "\n", "Simulation Results:\n", "Hillary Clinton 66.0%\n", "Bernie Sanders 34.0%\n", "dtype: object\n" ] } ], "source": [ "predict(state='ia', party='dem', target_date=target_date)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Superpoll Results:\n", " mean std\n", "candidate \n", "Donald Trump 27.5% 2.1%\n", "Ted Cruz 23.1% 2.0%\n", "Marco Rubio 18.1% 1.9%\n", "Ben Carson 7.5% 1.3%\n", "Rand Paul 5.1% 1.1%\n", "Jeb Bush 4.1% 0.9%\n", "Mike Huckabee 3.5% 0.9%\n", "John Kasich 2.8% 0.8%\n", "Carly Fiorina 2.5% 0.7%\n", "Chris Christie 2.0% 0.7%\n", "Rick Santorum 1.3% 0.5%\n", "\n", "Simulation Results:\n", "Donald Trump 93.6%\n", "Ted Cruz 6.4%\n", "Marco Rubio 0.0%\n", "dtype: object\n" ] } ], "source": [ "predict(state='ia', party='gop', target_date=target_date, window=4)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Superpoll Results:\n", " mean std\n", "candidate \n", "Hillary Clinton 47.0% 2.7%\n", "Bernie Sanders 46.9% 2.7%\n", "Martin O'Malley 3.2% 1.0%\n", "\n", "Simulation Results:\n", "Hillary Clinton 51.0%\n", "Bernie Sanders 49.0%\n", "dtype: object\n" ] } ], "source": [ "predict(state='ia', party='dem', target_date=target_date, window=4)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.5.1" } }, "nbformat": 4, "nbformat_minor": 0 }