{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "Created by [SmirkyGraphs](http://smirkygraphs.github.io/). Code: [GitHub](https://github.com/SmirkyGraphs/Python-Notebooks). Source: [RI BOE](http://www.elections.ri.gov/elections/preresults/).\n", "
" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Cleaning Voter Turnout " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "using pandas to get voter turnout for each race and by precinct" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "# NOTE \n", "# Replaced BALLOT TOTAL with page 1 ballot totals\n", "# before 2016 in these precincts with multiple pages \n", "# prior to 2016 each page was counted as 1 ballot\n", "# leading to well over 100% turnout which I don't\n", "# think was true in reality.\n", "\n", "import pandas as pd\n", "import glob as glob" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "files = glob.glob('./data/raw/*.xls')\n", "\n", "rename = {'City/Town':'Precinct Name', 'Precinct': 'Precinct Name', 'Election Day': 'E Day', \n", " 'Mail Ballots': 'Mail', 'Contest': 'Office Title', 'Contest Title': 'Office Title', \n", " 'Candidate Name': 'Candidate', 'Precinct No.': 'Precinct #'}\n", "\n", "cols = ['Precinct Name', 'Precinct #', 'Total', 'E Day', 'Mail', 'Office Title']" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "contest_frame = []\n", "turnout_frame = []\n", "\n", "for f in files:\n", " df = pd.read_excel(f, converters={'Precinct #': '{:0>4}'.format})\n", " df = df.rename(columns=rename)\n", " df = df[cols]\n", " \n", " # cleaning data\n", "\n", " df['Precinct Name'] = df['Precinct Name'].str.strip()\n", " \n", " df = df[~df['Precinct Name'].str.contains('Limited')]\n", " df = df[~df['Precinct Name'].str.contains('Federal')]\n", " df = df[~df['Precinct Name'].str.contains('President')]\n", " \n", " df = df[~df['Office Title'].str.contains('Page')]\n", "\n", " # registered voters by precinct\n", "\n", " registered_voters = df[df['Office Title'] == 'REGISTERED VOTERS - TOTAL']\n", "\n", " columns = ['E Day', 'Mail', 'Office Title']\n", "\n", " registered_voters = registered_voters.drop(columns=columns)\n", " registered_voters = registered_voters.rename(columns = {'Total': 'registered_voters'})\n", " registered_voters = registered_voters.reset_index(drop=True)\n", "\n", " # ballots cast by precinct\n", "\n", " ballots_cast = df[df['Office Title'] == 'BALLOTS CAST - TOTAL']\n", "\n", " columns = ['E Day', 'Mail', 'Office Title']\n", "\n", " ballots_cast = ballots_cast.drop(columns=columns)\n", " ballots_cast = ballots_cast.rename(columns = {'Total': 'ballots_cast'})\n", " ballots_cast = ballots_cast.reset_index(drop=True)\n", " \n", " # mail in voters by precinct\n", "\n", " mail_in = df[df['Office Title'] == 'BALLOTS CAST - TOTAL']\n", "\n", " columns = ['E Day', 'Total', 'Office Title']\n", "\n", " mail_in = mail_in.drop(columns=columns)\n", " mail_in = mail_in.rename(columns = {'Mail': 'mail_in_ballots'})\n", " mail_in = mail_in.reset_index(drop=True)\n", "\n", " # merging the 2 datasets\n", "\n", " voter_table = registered_voters.merge(ballots_cast)\n", " voter_table = voter_table.merge(mail_in)\n", "\n", " # getting total votes by precinct\n", "\n", " contest_votes = df.groupby(['Office Title', 'Precinct Name'])['Total'].sum()\n", " contest_votes = pd.DataFrame(contest_votes)\n", " contest_votes = contest_votes.reset_index()\n", "\n", " # combining all datasets and getting turnout\n", "\n", " contest_votes = contest_votes.merge(voter_table, how='left', on='Precinct Name')\n", " contest_votes = contest_votes.groupby(['Office Title'])['registered_voters', 'ballots_cast', 'mail_in_ballots'].sum()\n", " contest_votes['turnout'] = contest_votes['ballots_cast']/contest_votes['registered_voters']\n", " \n", " voter_table['turnout'] = voter_table['ballots_cast']/voter_table['registered_voters']\n", " voter_table['percent_mail'] = voter_table['mail_in_ballots']/voter_table['ballots_cast']\n", " \n", " # adding tag of year/cycle\n", " \n", " contest_votes['election_type_year'] = f[7:-4]\n", " voter_table['election_type_year'] = f[7:-4]\n", " \n", " # adding looped cleaned data to frame lists\n", " \n", " contest_frame.append(contest_votes)\n", " turnout_frame.append(voter_table)\n", " \n", " \n", "\n", "contest_frame = pd.concat(contest_frame, sort=False)\n", "turnout_frame = pd.concat(turnout_frame, sort=False)\n", "\n", "turnout_frame.to_csv('./data/clean/precinct_turnout.csv', index=False)\n", "contest_frame.to_csv('./data/clean/contest_turnout.csv', index=False)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.5" } }, "nbformat": 4, "nbformat_minor": 2 }