{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Extract Voting Data for County\n", "Read the [NC SBE](https://www.ncsbe.gov/data-stats/other-election-related-data ) voter [registration and voter history datasets and extracts relevant fields for Wake Co. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['county_id', 'county_desc', 'voter_reg_num', 'status_cd',\n", " 'voter_status_desc', 'reason_cd', 'voter_status_reason_desc',\n", " 'absent_ind', 'name_prefx_cd', 'last_name', 'first_name', 'middle_name',\n", " 'name_suffix_lbl', 'res_street_address', 'res_city_desc', 'state_cd',\n", " 'zip_code', 'mail_addr1', 'mail_addr2', 'mail_addr3', 'mail_addr4',\n", " 'mail_city', 'mail_state', 'mail_zipcode', 'full_phone_number',\n", " 'race_code', 'ethnic_code', 'party_cd', 'gender_code', 'birth_age',\n", " 'birth_state', 'drivers_lic', 'registr_dt', 'precinct_abbrv',\n", " 'precinct_desc', 'municipality_abbrv', 'municipality_desc',\n", " 'ward_abbrv', 'ward_desc', 'cong_dist_abbrv', 'super_court_abbrv',\n", " 'judic_dist_abbrv', 'nc_senate_abbrv', 'nc_house_abbrv',\n", " 'county_commiss_abbrv', 'county_commiss_desc', 'township_abbrv',\n", " 'township_desc', 'school_dist_abbrv', 'school_dist_desc',\n", " 'fire_dist_abbrv', 'fire_dist_desc', 'water_dist_abbrv',\n", " 'water_dist_desc', 'sewer_dist_abbrv', 'sewer_dist_desc',\n", " 'sanit_dist_abbrv', 'sanit_dist_desc', 'rescue_dist_abbrv',\n", " 'rescue_dist_desc', 'munic_dist_abbrv', 'munic_dist_desc',\n", " 'dist_1_abbrv', 'dist_1_desc', 'dist_2_abbrv', 'dist_2_desc',\n", " 'confidential_ind', 'birth_year', 'ncid', 'vtd_abbrv', 'vtd_desc'],\n", " dtype='object')" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Explore data: reveal columns\n", "dfSample = pd.read_csv('./data/NCSBE/ncvoter_Statewide.txt',sep='\\t',nrows=10,\n", " dtype='str',\n", " encoding = \"ISO-8859-1\")\n", "dfSample.columns" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "birth_state DC\n", "drivers_lic Y\n", "registr_dt 02/23/2018\n", "precinct_abbrv 10N\n", "precinct_desc NORTH MELVILLE\n", "municipality_abbrv MEB\n", "municipality_desc MEBANE\n", "ward_abbrv NaN\n", "ward_desc NaN\n", "cong_dist_abbrv 06\n", "super_court_abbrv 15A\n", "judic_dist_abbrv 15A\n", "nc_senate_abbrv 24\n", "nc_house_abbrv 063\n", "county_commiss_abbrv NaN\n", "county_commiss_desc NaN\n", "township_abbrv NaN\n", "township_desc NaN\n", "school_dist_abbrv NaN\n", "school_dist_desc NaN\n", "fire_dist_abbrv NaN\n", "fire_dist_desc NaN\n", "water_dist_abbrv NaN\n", "water_dist_desc NaN\n", "sewer_dist_abbrv NaN\n", "sewer_dist_desc NaN\n", "sanit_dist_abbrv NaN\n", "sanit_dist_desc NaN\n", "rescue_dist_abbrv NaN\n", "rescue_dist_desc NaN\n", "munic_dist_abbrv MEB\n", "munic_dist_desc MEBANE\n", "dist_1_abbrv 17\n", "dist_1_desc 17TH PROSECUTORIAL\n", "dist_2_abbrv \n", "dist_2_desc \n", "confidential_ind N\n", "birth_year 1978\n", "ncid AA201627\n", "vtd_abbrv 10N\n", "vtd_desc 10N\n", "Name: 1, dtype: object" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Explore data: reveal values\n", "dfSample.iloc[1,30:]" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "#Get all the data\n", "dfAll = pd.read_csv('./data/NCSBE/ncvoter_Statewide.txt',\n", " usecols=['county_desc','voter_reg_num','res_street_address','res_city_desc',\n", " 'state_cd','zip_code','race_code','ethnic_code','gender_code','party_cd','ncid'],\n", " sep='\\t',\n", " dtype='str',\n", " encoding = \"ISO-8859-1\")" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [], "source": [ "#Select Wake records\n", "dfWake = dfAll[dfAll['county_desc'] == \"WAKE\"].reindex()\n", "dfWake.drop('county_desc',axis=1,inplace=True)\n", "dfWake.set_index('voter_reg_num',inplace=True)\n", "dfWake.dropna(how='any',inplace=True,\n", " subset=['res_street_address','res_city_desc','state_cd','zip_code'])" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "#Add an address \n", "dfWake['address'] = (dfWake['res_street_address'] + \" \" \n", " + dfWake['res_city_desc'] + \" \" \n", " + dfWake['state_cd'] + \" \"\n", " + dfWake['zip_code'])" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "#Write to a file\n", "dfWake.to_csv('./data/NCSBE/ncvoter_Wake.csv',index=True,index_label='voter_reg_num')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.8" } }, "nbformat": 4, "nbformat_minor": 2 }