{ "cells": [ { "cell_type": "markdown", "id": "af9d0bd0", "metadata": {}, "source": [ "# Compiling, homogenizing, and quality-controlling King County bike-related citation records\n", "\n", "##### Ethan C. Campbell, for Central Seattle Greenways / Helmet Law Working Group\n", "\n", "For questions, contact me at ethanchenbell@gmail.com.\n", "\n", "For analysis of the data compiled below, refer to the companion notebook on my GitHub: [***csg_analyze_king_county_bike_citations.ipynb***](https://github.com/ethan-campbell/Miscellaneous/blob/master/csg_analyze_king_county_bike_citations.ipynb)." ] }, { "cell_type": "markdown", "id": "c3fdf788", "metadata": {}, "source": [ "#### Import packages and set file system" ] }, { "cell_type": "code", "execution_count": 333, "id": "76d408bd", "metadata": {}, "outputs": [], "source": [ "%matplotlib inline\n", "from numpy import *\n", "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import matplotlib.dates as mdates\n", "import matplotlib as mpl\n", "# mpl.rcParams['figure.dpi'] = 300 # turn on for higher-quality figure export\n", "from datetime import datetime, timedelta\n", "import platform\n", "import warnings\n", "import sys\n", "\n", "from IPython.core.display import display, HTML\n", "# display(HTML(\"\"))\n", "\n", "# choose root directory for data files\n", "if platform.system() == 'Darwin':\n", " data_dir = '/Users/Ethan/Documents/Finances and records/2020-06-30 - Helmet Law Working Group/Data/'\n", "elif platform.system() == 'Linux':\n", " data_dir = '/dat1/ethancc/CSG/'\n", " \n", "# filepaths\n", "aoc_dir = data_dir + '2021-06-22 - WA State Administrative Office of the Courts data/'" ] }, { "cell_type": "markdown", "id": "b338682a", "metadata": {}, "source": [ "#### Load and merge bike citation records from WA State Administrative Office of the Courts (AOC)" ] }, { "cell_type": "code", "execution_count": 334, "id": "55a9ae68", "metadata": {}, "outputs": [], "source": [ "# load bike infraction data from WA State Administrative Office of the Courts\n", "# note: \"KCDC\" = King County District Court\n", "# \"KCMC\" / \"MCs\" = municipal courts in King County\n", "kcdc_citations = pd.read_excel(aoc_dir + '210419-000051_KCDC_Bicycle_Viols - updated.xlsx',header=2,skipfooter=1)\n", "kcmc_citations = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=3,sheet_name='Bicycle Charges')\n", "kcmc_lea_data = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=2,sheet_name='LEA')\n", "kcmc_lfo_data = pd.read_excel(aoc_dir + '210419-000051_KCMC_Bicycle_Viols - updated.xlsx',header=2,sheet_name='LFOs')\n", "\n", "# merge in data on officers, badge numbers, and accounts receivables (AR) charges (all provided for municipal courts only)\n", "kcmc_lea_data = kcmc_lea_data.drop(columns=['Court Name','Case Token','Case LEA Name'])\n", "kcmc_citations = kcmc_citations.merge(kcmc_lea_data,how='left',on='Case Number')\n", "kcmc_lfo_data = kcmc_lfo_data.drop(columns=['Court Name','Case Token'])\n", "kcmc_citations = kcmc_citations.merge(kcmc_lfo_data,how='left',on='Case Number')\n", "\n", "# standardize column names\n", "kcdc_citations = kcdc_citations.rename(columns={'Case Key':'Case Key (KCDC) or Token (MCs)',\n", " 'Case Type Code & Desc':'Case Type',\n", " 'Case LEA':'Law Enforcement Agency',\n", " 'Charge Violation Date':'Violation Date',\n", " 'Charge Law Number':'Law Code',\n", " 'Charge Law':'Law Description',\n", " 'Charge Result':'Disposition',\n", " 'Charge Result Date':'Disposition Date',\n", " 'Case Participant Actor Name':'Defendant Name',\n", " 'Actor Person Gender':'Defendant Gender',\n", " 'Actor Person Race':'Defendant Race',\n", " 'Actor Person Ethnicity':'Defendant Ethnicity'})\n", "kcmc_citations = kcmc_citations.rename(columns={'Case Token':'Case Key (KCDC) or Token (MCs)',\n", " 'Case LEA Name':'Law Enforcement Agency',\n", " 'Case Citation Charge Date':'Violation Date',\n", " 'Charge Law Number':'Law Code',\n", " 'Charge':'Law Description',\n", " 'Charge Disposition':'Disposition',\n", " 'Charge Disposition Date':'Disposition Date',\n", " 'Person Name':'Defendant Name',\n", " 'Person Gender':'Defendant Gender',\n", " 'Person Race':'Defendant Race',\n", " 'Person Ethnicity':'Defendant Ethnicity',\n", " 'Official Name':'Officer Name',\n", " 'Official Officer Badge Number':'Officer Badge Number'})\n", "\n", "# redact identifying defendant data and unneeded columns\n", "# NOTE: keep only Federal Way Municipal Court defendant names to allow checking for duplicate citations with separate FWMC data\n", "kcdc_citations = kcdc_citations.drop(columns=['#Age at Viol Dt','Case Participant Type Code'])\n", "kcmc_citations = kcmc_citations.drop(columns=['#Age at Viol Dt','Specific Participant Type Code'])\n", "kcdc_citations['Defendant Name'] = NaN\n", "kcmc_citations.loc[kcmc_citations['Court Name'] != 'FEDERAL WAY MUNICIPAL COURT','Defendant Name'] = NaN\n", "\n", "# merge KCDC and MC data\n", "kc_citations = pd.concat([kcmc_citations,kcdc_citations],ignore_index=True)\n", "\n", "# flag all as originating from query for helmet and non-helmet bike violations\n", "kc_citations['Originating Query'] = 'All bike violations (requested KCHC 9.10.010 and 9.15.010, RCW sections, WAC sections, and local code chapters)'" ] }, { "cell_type": "markdown", "id": "2ad4a51f", "metadata": {}, "source": [ "#### Remove records that are not bike-related" ] }, { "cell_type": "code", "execution_count": 335, "id": "ce3a197c", "metadata": {}, "outputs": [], "source": [ "# remove one citation under King County Code 14A.40.010d (requiring motorized foot scooter users to wear a helmet)\n", "kc_citations = kc_citations[kc_citations['Law Code'] != 'kcc14a.40.010.d']\n", "\n", "# remove three concurrent citations for single person by North Bend Police Department, two of which are \"bicycle\" citations\n", "# but the third of which is under NBMC 10.20.030 (requiring motorized foot scooter users to wear a helmet)\n", "kc_citations = kc_citations[kc_citations['Case Number'] != '4Z0454185']\n", "\n", "# remove one citation under Kent Municipal Code 9.44.050 (requiring motorized foot scooter users to wear a helmet)\n", "kc_citations = kc_citations[kc_citations['Law Code'] != '9.44.050']" ] }, { "cell_type": "markdown", "id": "acfa25dc", "metadata": {}, "source": [ "#### Standardize and homogenize data" ] }, { "cell_type": "code", "execution_count": 336, "id": "c6b54470", "metadata": {}, "outputs": [], "source": [ "kc_citations['Court Name'] = kc_citations['Court Name'].str.title()\n", "kc_citations.replace({'Court Name':{'King District Court - Kcdc':'King County District Court',\n", " 'Kcdc Auburn Courthouse':'King County District Court',\n", " 'Seatac Municipal Court':'SeaTac Municipal Court'}},inplace=True)\n", "kc_citations.replace({'Case Type':{'IN - Infraction':'Infraction',\n", " 'CR - Criminal':'Criminal',\n", " \"Defendant's race\":'Infraction',\n", " 'Limited to the defendant':'Infraction',\n", " 'Calculated by days between defendant birth date and charge violation date':'Infraction',\n", " 'defendant gender':'Infraction'}},inplace=True)\n", "kc_citations['Law Enforcement Agency'] = kc_citations['Law Enforcement Agency'].str.title()\n", "kc_citations.replace({'Law Enforcement Agency':{'Maple Valley Police Department':\"King County Sheriff's Office / Maple Valley Police Department\",\n", " 'Burien Police Department':\"King County Sheriff's Office / Burien Police Department\",\n", " 'Covington Police Department':\"King County Sheriff's Office / Covington Police Department\",\n", " 'Kenmore City Of':\"King County Sheriff's Office / Kenmore Police Department\",\n", " 'Woodinville Police Department':\"King County Sheriff's Office / Woodinville Police Department\",\n", " 'Sammamish Police Department':\"King County Sheriff's Office / Sammamish Police Department\",\n", " 'King Co Police Department':\"King County Sheriff's Office\",\n", " 'Lake Forest Park Police Dept':'Lake Forest Park Police Department',\n", " 'Port Of Seattle Police Department':'Port of Seattle Police Department',\n", " 'Seatac Police Department':\"King County Sheriff's Office / SeaTac Police Department\",\n", " 'Shoreline City Of':\"King County Sheriff's Office / Shoreline Police Department\",\n", " 'University Of Washington Police':'University Of Washington Police Department',\n", " 'Wa St Patrol':'Washington State Patrol'}},inplace=True)\n", "kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'North Bend Police Department',\n", " kc_citations['Violation Date'] <= datetime(2014,3,8)),\n", " 'Law Enforcement Agency'] = \"King County Sheriff's Office / North Bend Police Department\"\n", "kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'North Bend Police Department',\n", " kc_citations['Violation Date'] > datetime(2014,3,8)),\n", " 'Law Enforcement Agency'] = \"Snoqualmie/North Bend Police Department\"\n", "kc_citations.loc[logical_and(kc_citations['Law Enforcement Agency'] == 'Snoqualmie Police Department',\n", " kc_citations['Violation Date'] > datetime(2014,3,8)),\n", " 'Law Enforcement Agency'] = \"Snoqualmie/North Bend Police Department\"\n", "\n", "kc_citations['Law Description'] = kc_citations['Law Description'].str.title()\n", "kc_citations.replace({'Law Description':{'Bicycle Helmet Regulations':'Bicycle Helmet Required',\n", " 'Bicycle Helmet Violation':'Bicycle Helmet Required',\n", " 'Bicycle Helmets':'Bicycle Helmet Required',\n", " 'Fail To Wear Bicycle Helmet':'Bicycle Helmet Required',\n", " 'Helmet Required':'Bicycle Helmet Required',\n", " 'Helmet Required Riding Bicycle':'Bicycle Helmet Required',\n", " 'Helmet Required When Riding Bicyle':'Bicycle Helmet Required',\n", " 'No Bicycle Helmet':'Bicycle Helmet Required',\n", " 'No Bicyle Helmet':'Bicycle Helmet Required',\n", " 'Require Bicycle Helmet Guardian Of':'Bicycle Helmet Required (Guardian of Minor)',\n", " 'Requirement Re Bicycle Helmet':'Bicycle Helmet Required',\n", " 'Riding Bicycle W/O Helmet':'Bicycle Helmet Required',\n", " 'Riding Bicycle W/Out Helmet':'Bicycle Helmet Required',\n", " 'Riding Bicycle Without Helmet':'Bicycle Helmet Required',\n", " 'Bicycle Fail To Yield':'Bicycle Failure to Yield',\n", " 'Bicycle Leaving Curb X-Walk Spd Zn':'Bicycle Leaving Curb or Crosswalk in Speed Zone',\n", " 'No Bicycle Lights As Required':'Bicycle Lights Required',\n", " 'Bicycle Riding On Sidewalk':'Bicycle Riding on Sidewalk',\n", " 'Bicycle Riding On Sidewalks':'Bicycle Riding on Sidewalk',\n", " 'Defective Equipment-Bicycle':'Bicycle Defective Equipment',\n", " 'Bicycle-Fail To Stop At Stop Sign':'Bicycle Failure to Stop at Stop Sign',\n", " 'Bicycle-Negligence':'Bicycle Negligence',\n", " 'Bicycles Obedience To Traffic Dev':'Bicycle Obedience to Traffic Control Devices',\n", " 'Bicycles Ride On Right Side':'Riding Bicycle on Right Side',\n", " 'Bike On Sidewalk In Business Dist':'Bicycle on Sidewalk in Business District',\n", " 'Carrying Articles On Bicycle':'Carrying Articles on Bicycle',\n", " 'Fld To Give Hand Signals/Bike':'Bicycle Failure to Give Hand Signals',\n", " 'Improper Operation Of Bicycle':'Improper Operation of Bicycle',\n", " 'No Bicycles On Skate Park':'No Bicycles on Skate Park',\n", " 'Ride Bicycle On Transit Property':'Riding Bicycle on Transit Property',\n", " 'Riding Bicycle On Sidewalk/Path':'Riding Bicycle on Sidewalk or Path',\n", " 'Riding Bicycles On Right Side':'Riding Bicycle on Right Side',\n", " 'Speeding On Bicycle Trail':'Speeding on Bicycle Trail',\n", " 'Unlawful Riding On Bicycle':'Unlawful Riding on Bicycle',\n", " 'Violating Laws While Riding Bike':'Violating Laws While Riding Bicycle'}},inplace=True)\n", "kc_citations.loc[logical_and(kc_citations['Law Code'] == '46.61.780',\n", " kc_citations['Law Description'] == 'Bicycle Violation'),\n", " 'Law Description'] = 'Bicycle Defective Equipment'\n", "kc_citations.replace({'Disposition':{'Dismissed W/Prejudice':'Dismissed with Prejudice',\n", " 'Dismissed W/O Prejudice':'Dismissed without Prejudice',\n", " 'Dismissed After Deferral':'Dismissed after Deferral',\n", " 'Committed After Deferral':'Committed after Deferral'}},inplace=True)\n", "kc_citations.replace({'Defendant Race':{'White (Caucasian)':'White',\n", " 'Black (African American)':'Black',\n", " 'Asian':'Asian or Pacific Islander',\n", " 'Unknown':'Unknown/Other',\n", " 'Multiracial':'Unknown/Other'}},inplace=True)\n", "kc_citations.replace({'Defendant Ethnicity':{'Hispanic':'Hispanic/Latino',\n", " 'Hispanic or Latino':'Hispanic/Latino',\n", " 'Non-Hispanic':'Not Hispanic/Latino',\n", " 'Not Hispanic or Latino':'Not Hispanic/Latino'}},inplace=True)\n", "kc_citations['Officer Name'] = kc_citations['Officer Name'].str.title()\n", "combined_officer_strings = [name for name in kc_citations['Officer Name'].value_counts().index if ' ' in name]\n", "fixed_officer_strings = [name.split(' ')[0] for name in kc_citations['Officer Name'].value_counts().index if ' ' in name]\n", "for ofc_idx, ofc_name in enumerate(combined_officer_strings):\n", " kc_citations.replace({'Officer Name':{ofc_name:fixed_officer_strings[ofc_idx]}},inplace=True)\n", "kc_citations.replace({'Law Code':{'D.10.12.010(1)':'Des Moines Municipal Code 10.12.010 or 10.12.010(1)',\n", " 'ECM1804':'[Enumclaw - no match found] \"ECM 1804\"',\n", " 'ECM1807':'[Enumclaw - no match found] \"ECM 1807\"',\n", " 'MVO.8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',\n", " 'M8.15.030':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',\n", " 'FWRC8.25.060':'Federal Way Revised Code 8.25.060',\n", " 'FWC15.196':'Federal Way Revised Code 8.25.060',\n", " 'IQM10.44.025':'Issaquah Municipal Code 10.44.025',\n", " 'SNL10.30.050':'Snoqualmie Municipal Code 10.30.050',\n", " 'K9.41.030':'Kent City Code 9.41.030',\n", " '9.41.030':'Kent City Code 9.41.030',\n", " 'LFM9.18.020':'Lake Forest Park Municipal Code 9.18.020',\n", " '10.54.080':'Pacific Municipal Code 10.54.080',\n", " 'P.10.54.080':'Pacific Municipal Code 10.54.080',\n", " 'R6-26(3)(A)':'Renton Municipal Code 6-26-3(A) or 6-26-7',\n", " 'R6-26-7':'Renton Municipal Code 6-26-3(A) or 6-26-7',\n", " '10.56.185':'Auburn City Code 10.56.185',\n", " 'amc 10.56.185':'Auburn City Code 10.56.185',\n", " 'amc10.56.185':'Auburn City Code 10.56.185',\n", " 'ke9.05.450':'Kenmore Municipal Code 9.05.450 (partially adopts KCHC Title 9 by reference)',\n", " 'be11.60.090':'Bellevue City Code 11.60.090',\n", " 'bu8.37.050':'Burien Municipal Code 8.37.050 or 8.37.050.1',\n", " 'bu8.37.050.1':'Burien Municipal Code 8.37.050 or 8.37.050.1',\n", " 'KCHC9.10.010A':'King County Health Code 9.10.010A or 9.15.010',\n", " '9.10.010.a':'King County Health Code 9.10.010A or 9.15.010',\n", " '9.15.010':'King County Health Code 9.10.010A or 9.15.010',\n", " '9.10.010.b':'King County Health Code 9.10.010B'}},inplace=True)\n", "kc_citations.replace({'Law Code':{'46.61.780':'RCW 46.61.780',\n", " '46.61.755':'RCW 46.61.755',\n", " '46.61.770':'RCW 46.61.770',\n", " '46.61.758':'RCW 46.61.758',\n", " '46.61.775':'RCW 46.61.775',\n", " '46.61.760':'RCW 46.61.760',\n", " '46.61.235.2B':'RCW 46.61.235.2B',\n", " '46.61.235.2b':'RCW 46.61.235.2B',\n", " '46.61.235.5-2b':'RCW 46.61.235.2B/5',\n", " '308.330.545':'WAC 308-330-545',\n", " '308.330.555':'WAC 308-330-555',\n", " '308-330-555':'WAC 308-330-555',\n", " 'KCC7.12.295.A':'King County Code 7.12.295 or 7.12.295.A',\n", " '28.96.010.a4':'King County Code 28.96.010.A4',\n", " 'amc 10.56.110':'Auburn City Code 10.56.110',\n", " 'amc10.56.110':'Auburn City Code 10.56.110',\n", " '10.56.110':'Auburn City Code 10.56.110',\n", " '10.56.120':'Auburn City Code 10.56.120',\n", " '10.56.140':'Auburn City Code 10.56.140',\n", " 'amc 10.56.140':'Auburn City Code 10.56.140',\n", " 'LFMORD798.2D':'Lake Forest Park Ordinance 798.2D',\n", " 'LFM798.2B':'Lake Forest Park Ordinance 798.2B',\n", " 'FWRC8.25.020.1':'Federal Way Revised Code 8.52.010.1',\n", " 'smc11.44.120':'Seattle Municipal Code 11.44.120',\n", " 'BDM8.28.040':'Black Diamond Municipal Code 8.28.040'}},inplace=True)" ] }, { "cell_type": "markdown", "id": "cd6160cd", "metadata": {}, "source": [ "#### Load and merge Des Moines Municipal Court records" ] }, { "cell_type": "code", "execution_count": 337, "id": "fd4f1450", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Court NameCase NumberViolation DateLaw CodeLaw DescriptionOfficer NameOfficer Badge NumberDefendant RaceDefendant Gender
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Court Name, Case Number, Violation Date, Law Code, Law Description, Officer Name, Officer Badge Number, Defendant Race, Defendant Gender]\n", "Index: []" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "dmmc_citations = pd.read_excel(data_dir + '2020-07-21 - Des Moines Municipal Court helmet infraction data - transcribed.xlsx',\n", " header=0)\n", "\n", "# identify records not already present in existing data\n", "new_indices = [idx for idx, case_num in enumerate(dmmc_citations['Case Number']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "dmmc_citations = dmmc_citations.loc[new_indices]\n", "display(dmmc_citations)\n", "\n", "# note: no new records to merge" ] }, { "cell_type": "markdown", "id": "9543ce4d", "metadata": {}, "source": [ "#### Load and merge Renton Municipal Court records" ] }, { "cell_type": "code", "execution_count": 338, "id": "f102d21e", "metadata": {}, "outputs": [], "source": [ "rmc_citations = pd.read_excel(data_dir + '2020-07-22 - Renton Municipal Court helmet infraction data (2007-08-20 to 2020-06-30).xlsx',\n", " header=4)\n", "\n", "# redact identifying defendant data and unneeded columns\n", "rmc_citations = rmc_citations.drop(columns=['Case Def Name'])\n", "\n", "# identify records not already present in existing data\n", "new_indices = [idx for idx, case_num in enumerate(rmc_citations['Case #']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "rmc_citations = rmc_citations.loc[new_indices]\n", "\n", "# display(rmc_citations)" ] }, { "cell_type": "code", "execution_count": 339, "id": "f1772b4b", "metadata": {}, "outputs": [], "source": [ "# drop columns with missing/poor/unneeded data\n", "rmc_citations = rmc_citations.drop(columns=['Unnamed: 2','Type'])\n", "\n", "# standardize column names\n", "rmc_citations = rmc_citations.rename(columns={'Filed Date':'Case File Date',\n", " 'Case #':'Case Number',\n", " 'LEA':'Law Enforcement Agency',\n", " 'Official Officer Badge Number':'Officer Badge Number',\n", " 'Viol Date':'Violation Date',\n", " 'Person Gender Code':'Defendant Gender',\n", " 'Person Race Code':'Defendant Race',\n", " 'Person Ethnicity Code':'Defendant Ethnicity',\n", " 'Dispo Date':'Disposition Date',\n", " 'Dispo':'Disposition'})\n", "\n", "# standardize data\n", "rmc_citations.replace({'Law Enforcement Agency':{'RPD':'Renton Police Department'},\n", " 'Defendant Gender':{'M':'Male'},\n", " 'Defendant Race':{'W':'White'},\n", " 'Defendant Ethnicity':{'U':'Unknown'},\n", " 'Disposition':{'DO':'Unknown (\"DO\")',\n", " 'C':'Committed'}},inplace=True)\n", "\n", "# add missing columns\n", "rmc_citations['Court Name'] = 'Renton Municipal Court (via PDR)'\n", "rmc_citations['Case Type'] = 'Infraction'\n", "rmc_citations['Law Code'] = 'Renton Municipal Code 6-26-3(A) or 6-26-7'\n", "rmc_citations['Law Description'] = 'Bicycle Helmet Required'\n", "rmc_citations['Originating Query'] = 'Helmet violations only (requested RMC 6-26)'\n", "\n", "# add to rest of data\n", "kc_citations = pd.merge(kc_citations,rmc_citations,how='outer')" ] }, { "cell_type": "markdown", "id": "d6f77c2b", "metadata": {}, "source": [ "#### Load and merge King County Sheriff's Office records\n", "\n", "Note: missing cities, towns, and jurisdictions (i.e., unincorporated King County) were filled in by looking up given addresses and cross-streets on Google Maps and the [Unincorporated King County Community Service Areas](https://kingcounty.gov/~/media/depts/local-services/community-service-areas/map-all-areas-detail.ashx?la=en) map prior to loading the spreadsheet below." ] }, { "cell_type": "code", "execution_count": 340, "id": "3994045f", "metadata": {}, "outputs": [], "source": [ "kcso_citations = pd.read_excel(data_dir + \"2020-07-27 - King County Sheriff's Office helmet infraction data (2010-03-16 to 2019-08-22) - missing jurisdictions filled in.xlsx\",\n", " header=0)\n", "\n", "# redact identifying defendant data and unneeded columns\n", "kcso_citations = kcso_citations.drop(columns=['Age','Height','Weight','Eye Color','Hair Color',\n", " 'Officer Notes','Case Number',\n", " 'Primary Trafficway','Block Number','Reference Trafficway'])" ] }, { "cell_type": "code", "execution_count": 341, "id": "140393cb", "metadata": {}, "outputs": [], "source": [ "# standardize column names\n", "kcso_citations['Violation Date'] = kcso_citations['Date / Time of Incident'].dt.date\n", "kcso_citations = kcso_citations.rename(columns={'Date / Time of Incident':'Violation Datetime',\n", " 'Ticket Number':'Case Number',\n", " 'Citation Type':'Case Type',\n", " 'Violation Code':'Law Code',\n", " 'Violation Title':'Law Description',\n", " 'Agency':'Law Enforcement Agency',\n", " 'Badge':'Officer Badge Number',\n", " 'Officer':'Officer Name',\n", " 'Sex':'Defendant Gender',\n", " 'Race':'Defendant Race'})\n", "\n", "# split records representing multiple citations into two separate lines\n", "kcso_citations = (kcso_citations.set_index([col_name for col_name in kcso_citations.keys() \\\n", " if col_name not in ['Law Code','Law Description']]) \n", " .apply(lambda col: col.str.split(';').explode())\n", " .reset_index()\n", " .reindex(kcso_citations.columns, axis=1))\n", "\n", "# standardize data\n", "kcso_citations.replace({'Law Enforcement Agency':{'Kenmore PD':\"King County Sheriff's Office / Kenmore Police Department\",\n", " 'Covington PD':\"King County Sheriff's Office / Covington Police Department\",\n", " 'Maple Valley PD':\"King County Sheriff's Office / Maple Valley Police Department\",\n", " 'Newcastle PD':\"King County Sheriff's Office / Newcastle Police Department\",\n", " 'Metro Transit Police':\"King County Sheriff's Office / King County Metro Transit Police\",\n", " 'Shoreline PD':\"King County Sheriff's Office / Shoreline Police Department\",\n", " 'Woodinville PD':\"King County Sheriff's Office / Woodinville Police Department\",\n", " 'Sammamish PD':\"King County Sheriff's Office / Woodinville Police Department\"},\n", " 'City':{'Vashon Island':'Unincorporated King County',\n", " 'Vashon':'Unincorporated King County',\n", " 'White Center':'Unincorporated King County',\n", " 'Queen Anne':'Maple Valley'},\n", " 'Defendant Gender':{'M':'Male','F':'Female'},\n", " 'Defendant Race':{'Unknown':'Unknown/Other',\n", " 'American Indian':'American Indian or Alaskan Native'},\n", " 'Case Type':{'Infraction Non-traffic':'Infraction Non-Traffic'}},inplace=True)\n", "kcso_citations.loc[kcso_citations['Defendant Race'] == 'Hispanic','Defendant Ethnicity'] = 'Hispanic/Latino'\n", "kcso_citations.loc[kcso_citations['Defendant Race'] != 'Hispanic','Defendant Ethnicity'] = 'Unknown'\n", "kcso_citations['Law Description'] = kcso_citations['Law Description'].str.title()\n", "kcso_citations.replace({'Law Description':{'Requirement Re Bicycle Helmet':'Bicycle Helmet Required',\n", " 'Bicycle Helmet Violation':'Bicycle Helmet Required',\n", " 'Bicycle Helmets':'Bicycle Helmet Required',\n", " 'Helmet Required':'Bicycle Helmet Required',\n", " 'Fail To Wear Bicycle Helmet':'Bicycle Helmet Required',\n", " ' Fail To Wear Bicycle Helmet':'Bicycle Helmet Required',\n", " 'Ride Bike W/O Helmet':'Bicycle Helmet Required',\n", " 'Require Bicycle Helmet Guardian Of':'Bicycle Helmet Required (Guardian of Minor)',\n", " ' Trail Use Violation':'Trail Use Violation',\n", " 'Pedestrian On Roadway Unlawfully':'Pedestrian on Roadway Unlawfully'}},inplace=True)\n", "kcso_citations.replace({'Law Code':{'9.10.010.A':'King County Health Code 9.10.010A or 9.15.010',\n", " '9.15.010':'King County Health Code 9.10.010A or 9.15.010',\n", " '9.10.010.B':'King County Health Code 9.10.010B',\n", " ' KCC.7.12.295':'King County Code 7.12.295 or 7.12.295.A',\n", " 'KE9.05.450':'Kenmore Municipal Code 9.05.450 (partially adopts KCHC Title 9 by reference)',\n", " 'BE11.60.090':'Bellevue City Code 11.60.090',\n", " ' BE11.60.090':'Bellevue City Code 11.60.090',\n", " 'M8.15.030':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',\n", " 'MVO.8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',\n", " 'M8.15.030A':'Maple Valley Municipal Code 8.15.030 or 8.15.030A',\n", " '46.61.250.1':'RCW 46.61.250.1'}},inplace=True)" ] }, { "cell_type": "code", "execution_count": 342, "id": "e6c5a039", "metadata": {}, "outputs": [], "source": [ "# identify records already present in existing data\n", "# extract columns to add to (or check with) existing data\n", "existing_indices = [idx for idx, case_num in enumerate(kcso_citations['Case Number']) \\\n", " if case_num in kc_citations['Case Number'].values]\n", "existing_kcso_citations = kcso_citations.loc[existing_indices][['Case Number','Law Enforcement Agency','City',\n", " 'Officer Badge Number','Officer Name']]\n", "\n", "# merge into rest of data (overwrite existing columns)\n", "for idx in existing_kcso_citations.index:\n", " for col_name in existing_kcso_citations.keys()[1:]:\n", " kc_citations.loc[kc_citations['Case Number'] == existing_kcso_citations.loc[idx,'Case Number'],col_name] = \\\n", " existing_kcso_citations.loc[idx,col_name]" ] }, { "cell_type": "code", "execution_count": 343, "id": "05f3d628", "metadata": {}, "outputs": [], "source": [ "# identify records not already present in existing data\n", "new_indices = [idx for idx, case_num in enumerate(kcso_citations['Case Number']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "new_kcso_citations = kcso_citations.loc[new_indices]\n", "\n", "# add missing columns\n", "new_kcso_citations['Court Name'] = \"Unknown (King County Sheriff's Office records)\"\n", "new_kcso_citations['Disposition'] = 'Unknown'\n", "new_kcso_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010)'\n", "\n", "# add to rest of data\n", "kc_citations = pd.merge(kc_citations,new_kcso_citations,how='outer')" ] }, { "cell_type": "markdown", "id": "3b216538", "metadata": {}, "source": [ "#### Load and merge SeaTac Municipal Court records" ] }, { "cell_type": "code", "execution_count": 344, "id": "a46b480d", "metadata": {}, "outputs": [], "source": [ "stmc_citations = pd.read_excel(data_dir + '2020-08-06 - SeaTac Municipal Court helmet infraction data - transcribed copy.xlsx',\n", " header=0)\n", "\n", "# redact identifying defendant data and unneeded columns\n", "stmc_citations = stmc_citations.drop(columns=['Defendant Name'])\n", "\n", "# identify records not already present in existing data\n", "new_indices = [idx for idx, case_num in enumerate(stmc_citations['Case Number']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "stmc_citations = stmc_citations.loc[new_indices]\n", "\n", "# display(stmc_citations)" ] }, { "cell_type": "code", "execution_count": 345, "id": "f50f0533", "metadata": {}, "outputs": [], "source": [ "# approximate violation date as case file date\n", "stmc_citations['Violation Date'] = stmc_citations['Case File Date'].copy()\n", "\n", "# add missing columns\n", "stmc_citations['Court Name'] = 'SeaTac Municipal Court (via PDR)'\n", "stmc_citations['Law Enforcement Agency'] = \"King County Sheriff's Office / SeaTac Police Department\"\n", "stmc_citations['Case Type'] = 'Infraction'\n", "stmc_citations['Disposition'] = 'Unknown'\n", "stmc_citations['Defendant Gender'] = 'Unknown'\n", "stmc_citations['Defendant Race'] = 'Unknown/Other'\n", "stmc_citations['Defendant Ethnicity'] = 'Unknown'\n", "stmc_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010, STMC 9.30.020 and 9.30.060)'\n", "\n", "# add to rest of data\n", "kc_citations = pd.merge(kc_citations,stmc_citations,how='outer')" ] }, { "cell_type": "markdown", "id": "1113fe4c", "metadata": {}, "source": [ "#### Load and merge Federal Way Municipal Court records" ] }, { "cell_type": "code", "execution_count": 346, "id": "7237ee07", "metadata": {}, "outputs": [], "source": [ "fwmc_citations = pd.read_excel(data_dir + '2020-08-05 - Federal Way Municipal Court helmet infraction data (2010-06-14 to 2014-06-08).xlsx',\n", " header=0,parse_dates=[0])\n", "\n", "# match missing citation numbers where possible; otherwise, assign unique citation numbers with \"Unknown_FWMC\" in their names\n", "unknown_counter = 1\n", "for idx in fwmc_citations.index:\n", " if fwmc_citations.loc[idx,'Citation Number'] == 'Unknown':\n", " citation_match = logical_and.reduce((kc_citations['Court Name'] == 'Federal Way Municipal Court',\n", " kc_citations['Violation Date'] == fwmc_citations.loc[idx,'Date Issued'],\n", " kc_citations['Defendant Name'].str.split(',').str[0] \\\n", " == fwmc_citations.loc[idx,'Defendant'].split(',')[0]))\n", " if sum(citation_match) > 0:\n", " fwmc_citations.loc[idx,'Citation Number'] = kc_citations.loc[where(citation_match),'Case Number'].values\n", " else:\n", " fwmc_citations.loc[idx,'Citation Number'] = f'Unknown_FWMC_Assigned_{unknown_counter:03}'\n", " unknown_counter += 1\n", "\n", "# redact identifying defendant data and unneeded columns\n", "fwmc_citations = fwmc_citations.drop(columns=['Defendant'])\n", "\n", "# also redact identifying defendant data from AOC records\n", "kc_citations = kc_citations.drop(columns=['Defendant Name'])\n", "\n", "# identify records not already present in existing data\n", "new_indices = [idx for idx, case_num in enumerate(fwmc_citations['Citation Number']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "fwmc_citations = fwmc_citations.loc[new_indices]\n", "\n", "# display(fwmc_citations)" ] }, { "cell_type": "code", "execution_count": 347, "id": "fa9a7c75", "metadata": {}, "outputs": [], "source": [ "# drop columns with missing/poor/unneeded data\n", "fwmc_citations = fwmc_citations.drop(columns=['Race/Gender','Citation Location','Officer','Unnamed: 8'])\n", "\n", "# standardize column names\n", "fwmc_citations = fwmc_citations.rename(columns={'Date Issued':'Violation Date',\n", " 'Offense Description':'Law Description',\n", " 'Citation Number':'Case Number',\n", " 'Badge #':'Officer Badge Number'})\n", "\n", "# standardize data\n", "fwmc_citations.replace({'Law Description':{'RIDING BICYCLE W/O HELMET':'Bicycle Helmet Required'}},inplace=True)\n", "\n", "# add missing columns\n", "fwmc_citations['Court Name'] = 'Federal Way Municipal Court (via PDR)'\n", "fwmc_citations['Law Enforcement Agency'] = 'Federal Way Police Department'\n", "fwmc_citations['Case Type'] = 'Infraction'\n", "fwmc_citations['Law Code'] = 'Federal Way Revised Code 8.25.060'\n", "fwmc_citations['Disposition'] = 'Unknown'\n", "fwmc_citations['Defendant Gender'] = 'Unknown'\n", "fwmc_citations['Defendant Race'] = 'Unknown/Other'\n", "fwmc_citations['Defendant Ethnicity'] = 'Unknown'\n", "fwmc_citations['Originating Query'] = 'Helmet violations only (requested FWRC 8.25.060)'\n", "\n", "# add to rest of data\n", "kc_citations = pd.merge(kc_citations,fwmc_citations,how='outer')" ] }, { "cell_type": "markdown", "id": "6ab60ac6", "metadata": {}, "source": [ "#### Load and merge Kent Municipal Court records" ] }, { "cell_type": "code", "execution_count": 348, "id": "eb3c6054", "metadata": {}, "outputs": [], "source": [ "kmc_citations = pd.read_excel(data_dir + '2020-07-28 - Kent Municipal Court helmet infraction data (2007-10-30 to 2020-06-15).xlsx',\n", " header=4,parse_dates=[0,6,12])\n", "\n", "# nullify invalid date entries\n", "kmc_citations.loc[kmc_citations['Dispo Date'] < datetime(1990,1,1),'Dispo Date'] = NaN\n", "\n", "# fill duplicate rows with valid data from above or below, then remove duplicates\n", "kmc_citations['Official Officer Badge Number'].fillna(method='ffill',axis=0,limit=1,inplace=True)\n", "kmc_citations['Person Race'].fillna(method='bfill',axis=0,limit=1,inplace=True)\n", "kmc_citations['Person Gender'].fillna(method='bfill',axis=0,limit=1,inplace=True)\n", "kmc_citations = kmc_citations.drop_duplicates().reset_index(drop=True)\n", "\n", "# redact identifying defendant data and unneeded columns\n", "kmc_citations.drop(columns=['Case Def Name','Unnamed: 2','Unnamed: 9','Unnamed: 11'],inplace=True)\n", "\n", "# identify records not already present in existing data\n", "new_indices = [idx for idx, case_num in enumerate(kmc_citations['Case #']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "kmc_citations = kmc_citations.loc[new_indices]" ] }, { "cell_type": "code", "execution_count": 349, "id": "eca78179", "metadata": {}, "outputs": [], "source": [ "# standardize column names\n", "kmc_citations = kmc_citations.rename(columns={'Filed Date':'Case File Date',\n", " 'Case #':'Case Number',\n", " 'Type':'Case Type',\n", " 'LEA':'Law Enforcement Agency',\n", " 'Official Officer Badge Number':'Officer Badge Number',\n", " 'Viol Date':'Violation Date',\n", " 'Person Race':'Defendant Race',\n", " 'Person Gender':'Defendant Gender',\n", " 'Dispo Date':'Disposition Date',\n", " 'Dispo':'Disposition'})\n", "\n", "# standardize data\n", "kmc_citations.replace({'Case Type':{'IN':'Infraction',\n", " 'IT':'Infraction Traffic'},\n", " 'Law Enforcement Agency':{'KNP':'Kent Police Department'},\n", " 'Defendant Race':{'Asian':'Asian or Pacific Islander'},\n", " 'Disposition':{'C':'Committed',\n", " 'P':'Paid',\n", " 'D':'Dismissed'}},inplace=True)\n", "\n", "# add missing columns\n", "kmc_citations['Court Name'] = 'Kent Municipal Court (via PDR)'\n", "kmc_citations['Law Code'] = 'Kent City Code 9.41.030'\n", "kmc_citations['Law Description'] = 'Bicycle Helmet Required'\n", "kmc_citations['Defendant Ethnicity'] = 'Unknown'\n", "kmc_citations['Originating Query'] = 'Helmet violations only (requested KCHC 9.10.010 and 9.15.010, KCC 9.41.030 and 9.41.070)'\n", "\n", "# add to rest of data\n", "kc_citations = pd.merge(kc_citations,kmc_citations,how='outer')" ] }, { "cell_type": "markdown", "id": "727b0c48", "metadata": {}, "source": [ "#### In all previously loaded records, split officer name into first/middle/last" ] }, { "cell_type": "code", "execution_count": 350, "id": "46c4ce8a", "metadata": {}, "outputs": [], "source": [ "for row_idx in kc_citations.index:\n", " ofc_name = kc_citations.loc[row_idx,'Officer Name']\n", " if type(ofc_name) == float: # NaN value\n", " pass\n", " else:\n", " ofc_name = ofc_name.replace('.',' ')\n", " if ',' in ofc_name: # assume single comma and order: last, first (middle)\n", " last, first_middle = ofc_name.split(',')\n", " first_middle = first_middle.split()\n", " if len(first_middle) == 1:\n", " first = first_middle[0]\n", " middle = NaN\n", " elif len(first_middle) == 2:\n", " if len(first_middle[1]) == 1:\n", " first, middle = first_middle\n", " else:\n", " first = first_middle[0] + ' ' + first_middle[1]\n", " middle = NaN\n", " else:\n", " print('ERROR: found first + middle name with over 2 components')\n", " else: # assume order: first (middle) last\n", " components = ofc_name.split()\n", " if len(components) == 2:\n", " first, last = components\n", " middle = NaN\n", " elif len(components) == 3:\n", " if len(components[1]) == 1:\n", " first, middle, last = components\n", " else: # ambiguous if 2nd component is part of first or last name; assume two-part first name\n", " first = components[0] + ' ' + components[1]\n", " last = components[2]\n", " middle = NaN\n", " elif len(components) == 4:\n", " if len(components[1]) == 1:\n", " first, middle, last1, last2 = components\n", " last = last1 + ' ' + last2\n", " elif len(components[2]) == 1:\n", " first1, first2, middle, last = components\n", " first = first1 + ' ' + first2\n", " else: # ambiguous\n", " first1, first2, last1, last2 = components\n", " first = first1 + ' ' + first2 \n", " last = last1 + ' ' + last2\n", " middle = NaN\n", " else:\n", " print('ERROR: found name with 1 component or over 4 components')\n", "\n", " kc_citations.loc[row_idx,'Officer First Name'] = first\n", " kc_citations.loc[row_idx,'Officer Middle Name'] = middle\n", " kc_citations.loc[row_idx,'Officer Last Name'] = last\n", " \n", "# remove original, inconsistently-formatted name column\n", "kc_citations.drop(columns=['Officer Name'],inplace=True)" ] }, { "cell_type": "markdown", "id": "e7451b1d", "metadata": {}, "source": [ "#### Load and merge Seattle Municipal Court records (batches #1 and #2)" ] }, { "cell_type": "code", "execution_count": 351, "id": "854fb0b6", "metadata": {}, "outputs": [], "source": [ "# load Seattle Municipal Court infraction records from 2003-2016 (incomplete due to purge schedule)\n", "smc_citations_2003_2016 = pd.read_excel(data_dir + '2020-07-20 - Seattle Municipal Court bicycle violations (2003-01-01 to 2016-12-31).xlsx',\n", " index_col=None,parse_dates=[0,1])\n", "\n", "# load Seattle Municipal Court infraction records from 2017-2020\n", "smc_citations_2017_2020 = pd.read_excel(data_dir + '2020-07-16 - Seattle Municipal Court bicycle violations (2017-01-01 to 2020-06-30).xlsx',\n", " index_col=None,parse_dates=[0,1])\n", "\n", "# fix misnamed columns in records from 2003-2016\n", "temp_officer_number = smc_citations_2003_2016['Officer First Name'].copy()\n", "temp_first_name = smc_citations_2003_2016['Officer Middle Initial'].copy()\n", "temp_middle_initial = smc_citations_2003_2016['Officer Last Name'].copy()\n", "temp_last_name = smc_citations_2003_2016['Officer Number'].copy()\n", "smc_citations_2003_2016['Officer First Name'] = temp_first_name\n", "smc_citations_2003_2016['Officer Middle Initial'] = temp_middle_initial\n", "smc_citations_2003_2016['Officer Last Name'] = temp_last_name\n", "smc_citations_2003_2016['Officer Number'] = temp_officer_number\n", "\n", "# merge old and new data\n", "smc_citations = pd.concat([smc_citations_2003_2016,smc_citations_2017_2020]).copy().reset_index(drop=True)\n", "\n", "# remove duplicate entries (same defendant, same violation)\n", "# but retain different violations (e.g. helmet and light violations) issued to same defendant under same citation number\n", "# NOTE: this will impact analyses of officers, since it seems most (if not all) duplicates list two different officers\n", "smc_citations = smc_citations.drop_duplicates(subset=['Citation Number','Violation Code'],keep='first').reset_index(drop=True)\n", "\n", "# drop unneeded columns\n", "smc_citations.drop(columns=['Violation Location'],inplace=True)\n", "\n", "# load Seattle Municipal Court infraction records from 2003-2021\n", "# (including charge amounts and status, plus more defendant data)\n", "smc_batch2_pt1 = pd.read_excel(data_dir + '2021-01-28 - Seattle Municipal Court bicycle violations (2003-01-01 to 2016-12-31).xlsx',\n", " index_col=None,parse_dates=[12,13])\n", "smc_batch2_pt2 = pd.read_excel(data_dir + '2021-02-16 - Seattle Municipal Court bicycle violations (2017-01-01 to 2021-02-16).xlsx',\n", " index_col=None,parse_dates=[12,13])\n", "smc_citations_batch2 = pd.concat([smc_batch2_pt1,smc_batch2_pt2]).copy().reset_index().drop(columns='index')\n", "smc_citations_batch2.rename(columns={'Race Code':'Defendant Race','Gender Code':'Defendant Gender'},inplace=True)\n", "\n", "# drop unneeded columns and standardize column names\n", "smc_citations_batch2.drop(columns=['Case Number','City','State','Zip Code',\n", " 'Defendant First Name','Defendant Last Name',\n", " 'Violation Location','In Collection','Obligation Number'],inplace=True)\n", "\n", "# remove duplicate entries (same defendant, same violation) – SEE DETAILS ABOVE IN SIMILAR LINE\n", "smc_citations_batch2 = smc_citations_batch2.drop_duplicates(subset=['Citation Number','Violation Code'],keep='first').reset_index(drop=True)\n", "\n", "# fix financial charge information, based on information from Gary Ireland, SMC PIO:\n", "# \"Instances where the Obligation Amount Outstanding is either 0 or less than the difference\n", "# between the Assessed and Paid amounts, is caused by waiver or write-off by a judge for\n", "# one reason or another.\"\n", "# this appears to be accurate for all citations issued (\"Violation Date\") in 2011 onwards;\n", "# however, inspection of the data shows nearly all records prior to 2011 have a zero value for\n", "# \"Obligation Amount Outstanding,\" which is highly suspicious. thus, we infer:\n", "# - prior to 2011: AR Ordered Amount = \"Amount Assessed\"\n", "# AR Adjusted Amount = NaN (no data available)\n", "# - from 2011 onwards: AR Ordered Amount = \"Amount Assessed\" (NOTE: this doesn't seem to include late/default penalties)\n", "# AR Adjusted Amount = MIN(\"Amount Assessed\", \"Amount Paid\"+\"Amount Outstanding\")\n", "smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year < 2011,'AR Adjusted Amount'] = NaN\n", "smc_citations_batch2['TEMP - paid plus outstanding'] \\\n", " = smc_citations_batch2['Obligation Amount Paid'] + \\\n", " smc_citations_batch2['Obligation Amount Outstanding']\n", "smc_citations_batch2['TEMP - inferred adjusted amount'] \\\n", " = smc_citations_batch2[['Obligation Amount Assessed','TEMP - paid plus outstanding']].min(axis=1)\n", "smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year >= 2011,'AR Adjusted Amount'] \\\n", " = smc_citations_batch2.loc[smc_citations_batch2['Violation Date'].dt.year >= 2011,'TEMP - inferred adjusted amount']\n", "smc_citations_batch2.drop(columns=['TEMP - paid plus outstanding',\n", " 'TEMP - inferred adjusted amount'],inplace=True)\n", "smc_citations_batch2 \\\n", " = smc_citations_batch2.rename(columns={'Obligation Amount Assessed':'AR Ordered Amount',\n", " 'Obligation Amount Paid':'AR Paid Amount',\n", " 'Obligation Amount Outstanding':'AR Due Amount'})\n", "\n", "# merge two batches of SMC records\n", "# NOTE: see below for \"fix\" to merge duplicate rows with slightly different missing (NaN) columns\n", "smc_citations = pd.merge(smc_citations,smc_citations_batch2,how='outer')\n", "\n", "# merge date and time columns\n", "smc_citations['Violation Datetime'] = smc_citations['Violation Date'] \\\n", " + pd.to_timedelta(floor(smc_citations['Violation Time']/100),'h') \\\n", " + pd.to_timedelta(smc_citations['Violation Time'] - 100*floor(smc_citations['Violation Time']/100),'m')\n", "\n", "# drop unneeded columns\n", "smc_citations.drop(columns=['Violation Time'],inplace=True)\n", "\n", "# handle entries missing Defendant Race information\n", "smc_citations['Defendant Race'] = smc_citations['Defendant Race'].fillna('U')\n", "\n", "# clean up officer badge numbers\n", "smc_citations['Officer Number'] = smc_citations['Officer Number'].replace(9999,NaN)\n", "\n", "# fix previous merge by merging duplicate rows with slightly different missing (NaN) columns\n", "smc_citations = smc_citations.groupby(['Citation Number','Violation Code'])\\\n", " .apply(lambda x: x.fillna(method='ffill').\\\n", " fillna(method='bfill').\\\n", " drop_duplicates()).\\\n", " reset_index(drop=True)\n", "\n", "# display(smc_citations)" ] }, { "cell_type": "code", "execution_count": 352, "id": "5dd07734", "metadata": {}, "outputs": [], "source": [ "# standardize column names\n", "smc_citations = smc_citations.rename(columns={'Filing Date':'Case File Date',\n", " 'Citation Number':'Case Number',\n", " 'Violation Code':'Law Code',\n", " 'Violation Description':'Law Description',\n", " 'Officer Middle Initial':'Officer Middle Name',\n", " 'Officer Number':'Officer Badge Number'})\n", "\n", "# standardize data\n", "smc_citations['Law Description'] = smc_citations['Law Description'].str.title()\n", "smc_citations['Officer First Name'] = smc_citations['Officer First Name'].str.title()\n", "smc_citations['Officer Last Name'] = smc_citations['Officer Last Name'].str.title()\n", "smc_citations['Law Code'] = [old_str.replace('SMC','Seattle Municipal Code ') if old_str[:3] == 'SMC' else old_str \\\n", " for old_str in smc_citations['Law Code']]\n", "smc_citations['Law Code'] = ['Seattle Municipal Code ' + old_str if old_str[:5] == '11.44' else old_str \\\n", " for old_str in smc_citations['Law Code']]\n", "smc_citations.replace({'Law Code':{'R9.15.010':'King County Health Code 9.10.010A or 9.15.010',\n", " 'KCHC9.10.010':'King County Health Code 9.10.010A or 9.15.010', # assuming section A\n", " 'H9.10.010':'King County Health Code 9.10.010A or 9.15.010'}, # assuming section A\n", " 'Law Description':{'Rights And Duties Of Riders':'Bicycle Rider Rights/Duties Violation',\n", " 'Bike Rider Rights/Duties Violation':'Bicycle Rider Rights/Duties Violation',\n", " 'Bike, Lamps/Reflectors':'Bicycle No/Improper Lamps/Reflectors',\n", " 'Bike, R/W Xwalk':'Bicycle Yield Right of Way in Crosswalk',\n", " 'Bike R/W In Crosswalk':'Bicycle Yield Right of Way in Crosswalk',\n", " 'Bike, Ride On Road':'Bicycle Riding on Roadways Rules Violation',\n", " 'Bike Riding On Road Rules Viol':'Bicycle Riding on Roadways Rules Violation',\n", " 'Bike No/Improper Lamps/Reflectors':'Bicycle No/Improper Lamps/Reflectors',\n", " 'Bike Unsafe Pass On Right':'Bicycle Unsafe Pass on Right',\n", " 'Bike Yield R/W On Public Path':'Bicycle Riding on Sidewalk/Public Path Rules Violation',\n", " 'Bike, Yield R/W':'Bicycle Riding on Sidewalk/Public Path Rules Violation',\n", " 'Bike, Pass, Right':'Bicycle Unsafe Pass on Right',\n", " 'Bike No/Improper Hand Signals':'Bicycle No/Improper Hand Signals',\n", " 'Bike, Hand Signals':'Bicycle No/Improper Hand Signals',\n", " 'Bike, No Brakes':'Bicycle No/Improper Brakes',\n", " 'Bike No/Improper Brakes':'Bicycle No/Improper Brakes',\n", " 'Bike, Control':'Bicycle Control One Hand Minimum Required',\n", " \"Bike Control One Hand Min Req'D\":'Bicycle Control One Hand Minimum Required',\n", " 'Bike, Clinging To Veh':'Bicycle Clinging to Vehicle',\n", " 'Bike Clinging/Attaching To Vehicle':'Bicycle Clinging to Vehicle',\n", " 'Bike, Excess Passengers':'Bicycle Excess Passengers',\n", " 'Bike, Fail To Ride On Seat':'Bicycle Failure to Ride on Seat',\n", " 'Bike, Responsibility - Child/Parent':'Bicycle Parent of Child Responsibility',\n", " 'Bike Ride >2 Abreast Road/Sidewalk':'Bicycle Riding More Than 2 Abreast'},\n", " 'Defendant Race':{'W':'White','B':'Black','A':'Asian or Pacific Islander',\n", " 'I':'American Indian or Alaskan Native','U':'Unknown/Other'},\n", " 'Defendant Gender':{'M':'Male','F':'Female','U':'Unknown'}},inplace=True)\n", "\n", "# add missing columns\n", "smc_citations['Court Name'] = 'Seattle Municipal Court (via PDR)'\n", "smc_citations['Case Type'] = 'Infraction'\n", "smc_citations['Law Enforcement Agency'] = 'Seattle Police Department'\n", "smc_citations['Disposition'] = 'Unknown'\n", "smc_citations['Defendant Ethnicity'] = 'Unknown'\n", "smc_citations['Originating Query'] = 'All bike violations (requested KCHC 9.10.010 and 9.15.010, SMC 11.44)'\n", "\n", "# for third time, remove duplicate entries (same defendant, same violation) – SEE DETAILS ABOVE IN SIMILAR LINE\n", "# this is necessary because \"Violation Code\" column was not yet homogenized above\n", "smc_citations = smc_citations.drop_duplicates(subset=['Case Number','Law Code'],keep='first').reset_index(drop=True)\n", "\n", "# identify records not already present in existing data\n", "# (NOTE: for SMC data, all records are not already present, so this is a formality)\n", "new_indices = [idx for idx, case_num in enumerate(smc_citations['Case Number']) \\\n", " if case_num not in kc_citations['Case Number'].values]\n", "smc_citations = smc_citations.loc[new_indices]\n", "\n", "# add to rest of data\n", "kc_citations = pd.merge(kc_citations,smc_citations,how='outer')" ] }, { "cell_type": "markdown", "id": "5e260127", "metadata": {}, "source": [ "#### Assign towns/cities/jurisdictions for each record" ] }, { "cell_type": "code", "execution_count": 353, "id": "dfb4864a", "metadata": {}, "outputs": [], "source": [ "def assign_city(city_name,lea_is=None,lea_contains=None,law_contains=None,court_contains=None):\n", " # boolean mask for rows without a town/city/jurisdiction assignment\n", " city_unassigned_mask = kc_citations['City'].apply(lambda x: not isinstance(x, str))\n", " \n", " # assign town/city/jurisdiction based on LEA and other criteria\n", " if lea_is is not None:\n", " lea_mask = kc_citations['Law Enforcement Agency'] == lea_is\n", " elif lea_contains is not None:\n", " lea_mask = kc_citations['Law Enforcement Agency'].str.contains(lea_contains)\n", " \n", " if law_contains is not None:\n", " assign_mask = logical_and.reduce((city_unassigned_mask,lea_mask,\n", " kc_citations['Law Code'].str.contains(law_contains)))\n", " else:\n", " assign_mask = logical_and(city_unassigned_mask,lea_mask)\n", " \n", " if court_contains is not None:\n", " assign_mask = logical_and(assign_mask,kc_citations['Court Name'].str.contains(court_contains))\n", " \n", " kc_citations.loc[assign_mask,'City'] = city_name" ] }, { "cell_type": "code", "execution_count": 354, "id": "5a112d6b", "metadata": {}, "outputs": [], "source": [ "# near-certain assignments made based on law enforcement agency, court, and local code referenced\n", "assign_city('Pacific',lea_is='Algona Police Department',law_contains='Pacific Municipal Code')\n", "assign_city('Algona',lea_is='Algona Police Department')\n", "assign_city('Auburn',lea_is='Auburn Police Department')\n", "assign_city('Bellevue',lea_is='Bellevue Police Department')\n", "assign_city('Black Diamond',lea_is='Black Diamond Police Department')\n", "assign_city('Bothell',lea_is='Bothell Police Department')\n", "assign_city('Burien',lea_contains='Burien Police Department')\n", "assign_city('Covington',lea_contains='Covington Police Department')\n", "assign_city('Des Moines',lea_is='Des Moines Police Department')\n", "assign_city('Enumclaw',lea_is='Enumclaw Police Department')\n", "assign_city('Federal Way',lea_is='Federal Way Police Department')\n", "assign_city('Issaquah',lea_is='Issaquah Police Department')\n", "assign_city('Kent',lea_is='Kent Police Department')\n", "assign_city('Kirkland',lea_is='Kirkland Police Department')\n", "assign_city('Lake Forest Park',lea_is='Lake Forest Park Police Department')\n", "assign_city('Mercer Island',lea_is='Mercer Island Police Department')\n", "assign_city('Pacific',lea_is='Pacific Police Department')\n", "assign_city('Redmond',lea_is='Redmond Police Department')\n", "assign_city('Renton',lea_is='Renton Police Department')\n", "assign_city('Sammamish',lea_contains='Sammamish Police Department')\n", "assign_city('SeaTac',lea_contains='SeaTac Police Department')\n", "assign_city('Seattle',lea_is='Seattle Police Department')\n", "assign_city('Seattle',lea_is='University Of Washington Police Department')\n", "assign_city('Seattle',lea_is='Port of Seattle Police Department')\n", "assign_city('Tukwila',lea_is='Tukwila Police Department')\n", "assign_city('Kirkland',lea_contains='Woodinville Police Department',court_contains='Kirkland Municipal Court')\n", "assign_city('Woodinville',lea_contains='Woodinville Police Department')\n", "assign_city('North Bend',lea_is=\"King County Sheriff's Office / North Bend Police Department\")\n", "assign_city('Issaquah',lea_is='Snoqualmie/North Bend Police Department',law_contains='Issaquah Municipal Code')\n", "assign_city('Snoqualmie',lea_is='Snoqualmie/North Bend Police Department',law_contains='Snoqualmie Municipal Code')\n", "assign_city('Kenmore',lea_contains='Kenmore Police Department')\n", "assign_city('Shoreline',lea_contains='Shoreline Police Department')\n", "assign_city('Maple Valley',lea_contains='Maple Valley Police Department')\n", "assign_city('Maple Valley',lea_is=\"King County Sheriff's Office\",law_contains='Maple Valley Municipal Code')\n", "\n", "# ambiguous assignments\n", "assign_city('Snoqualmie',lea_is='Snoqualmie/North Bend Police Department') # 6 citations (could be Snoqualmie or North Bend)\n", "assign_city('Unincorporated King County',lea_is='Washington State Patrol') # 10 citations without any city ties (all within KCDC jurisdiction)\n", "assign_city('Unincorporated King County',lea_is=\"King County Sheriff's Office\") # 81 citations without any city ties (all within KCDC jurisdiction)\n", "\n", "# [to save] lookup code (similar to count_values but does not exclude NaN values)\n", "# kc_citations.loc[kc_citations['Law Enforcement Agency'].str.contains('Maple Valley')]\\\n", "# .groupby(['Court Name','Law Enforcement Agency','Law Code','City'],dropna=False,as_index=False).size()" ] }, { "cell_type": "markdown", "id": "60202c2b", "metadata": {}, "source": [ "#### Export compiled, homogenized, quality-controlled citation records" ] }, { "cell_type": "code", "execution_count": 355, "id": "e89a2a57", "metadata": {}, "outputs": [], "source": [ "kc_citations.to_excel(data_dir + '2021-10-18 - compiled King County bike citation records.xlsx')" ] } ], "metadata": { "kernelspec": { "display_name": "PyCharm (Miscellaneous)", "language": "python", "name": "pycharm-bf1ba4a2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.13" } }, "nbformat": 4, "nbformat_minor": 5 }