{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [] }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" } }, "cells": [ { "cell_type": "code", "execution_count": null, "metadata": { "id": "b0frxjOjX3vv" }, "outputs": [], "source": [ "# Authorize access to Google Drive\n", "from google.colab import drive, auth\n", "drive.mount('/content/drive',force_remount=True)\n", "\n", "import numpy as np\n", "import pandas as pd\n", "from datetime import datetime, timedelta\n", "import os\n", "import re\n", "from string import capwords" ] }, { "cell_type": "code", "source": [ "# Filepaths\n", "root = '/content/drive/MyDrive/TCC (Transportation Choices Coalition)/'\n", "data_dir = root + 'Data/'\n", "cad_dir = data_dir + '2022-11-28 - Snohomish County 911 (SNO911) public records - CAD logs/'" ], "metadata": { "id": "ngd3abmOYc6h" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Load files\n", "filenames = os.listdir(cad_dir)\n", "filenames.sort()\n", "data = []\n", "for fn in filenames:\n", " with open(cad_dir + fn, 'r') as file:\n", " data_this = file.read()\n", " data.append(data_this)" ], "metadata": { "id": "LAZYUdn5Yns5" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "# Extract data fields\n", "output = pd.DataFrame()\n", "for fn_idx, fn in enumerate(filenames):\n", " cfs = re.compile(r\"Call For Service Detail Report - CFS ([0-9]+)\\n\")\n", " cfs_matches = np.array(cfs.findall(data[fn_idx]))\n", " this_output = pd.DataFrame(index=np.arange(len(cfs_matches)),data={'CFS':cfs_matches})\n", "\n", " this_output['Filename'] = fn\n", "\n", " address = re.compile(r\"Address\\n\\n(.+)\\n\\n\")\n", " address_matches = address.findall(data[fn_idx])\n", " this_output['Original address'] = [capwords(s) for s in address_matches]\n", " this_output[['Original address','Town/city']] = \\\n", " this_output['Original address'].str.split(', ',expand=True)\n", " this_output[['Primary street','Cross street']] = \\\n", " this_output['Original address'].str.split('@ ',expand=True)\n", " this_output.loc[~this_output['Original address'].str.contains('@'),'Address'] = \\\n", " this_output.loc[~this_output['Original address'].str.contains('@'),'Primary street']\n", " this_output.loc[~this_output['Original address'].str.contains('@'),'Primary street'] = ''\n", " this_output.loc[this_output['Address'].isnull(),'Address'] = ''\n", "\n", " date = re.compile(r\"Create Date\\n\\n(.+) (.+ .+)\\n\\n\")\n", " date_matches = date.findall(data[fn_idx])\n", " date_matches_dist = []\n", " date_match_idx = 0\n", " for sub_cad_log in data[fn_idx].split('Call For Service Detail Report - CFS')[1:]:\n", " if date_match_idx >= len(date_matches):\n", " date_matches_dist.append(('',''))\n", " elif (date_matches[date_match_idx][0] in sub_cad_log) and \\\n", " (date_matches[date_match_idx][1] in sub_cad_log):\n", " date_matches_dist.append(date_matches[date_match_idx])\n", " date_match_idx += 1\n", " else:\n", " date_matches_dist.append(('',''))\n", " this_output['Date'] = pd.to_datetime(np.array(date_matches_dist)[:,0])\n", " this_output['Time'] = pd.to_datetime(np.array(date_matches_dist)[:,1])\n", " this_output['Time'] = this_output['Time'].dt.floor('Min').dt.time\n", "\n", " incnum = re.compile(r\"Incident Number\\n(.+(?:\\n.+)*)\\n\\nJurisdiction\")\n", " incnum_matches = [s.rstrip(\"\\nUnits\") for s in incnum.findall(data[fn_idx])]\n", " incnum_matches_dist = []\n", " incnum_match_idx = 0\n", " for sub_cad_log in data[fn_idx].split('Call For Service Detail Report - CFS')[1:]:\n", " if incnum_match_idx >= len(incnum_matches):\n", " incnum_matches_dist.append('')\n", " elif incnum_matches[incnum_match_idx] in sub_cad_log:\n", " incnum_matches_dist.append(incnum_matches[incnum_match_idx])\n", " incnum_match_idx += 1\n", " else:\n", " incnum_matches_dist.append('')\n", " incnum_matches_dist = ['CHECK_INC_NUM' if s == '' else s.replace('\\n',', ') \\\n", " for s in incnum_matches_dist]\n", " this_output['Incident'] = incnum_matches_dist\n", "\n", " this_output['IDs'] = this_output['CFS'].str.cat(this_output['Incident'],sep=', ')\n", "\n", " this_output = this_output[['Filename','IDs','Town/city','Address',\n", " 'Primary street','Cross street','Date','Time']]\n", " output = output.append(this_output)\n", "\n", "output = output.reset_index().drop(columns=['index'])\n", "\n", "output.to_excel(cad_dir + 'SNO911_parsed.xlsx')\n", "display(output)" ], "metadata": { "colab": { "base_uri": "https://localhost:8080/", "height": 658 }, "id": "P1NJrB6_Y1PJ", "outputId": "69971eeb-9bca-4f66-e5b3-3dac54e6a430" }, "execution_count": null, "outputs": [ { "output_type": "display_data", "data": { "text/plain": [ " Filename IDs Town/city \\\n", "0 2017_CAD_Logs_R.txt 436, CHECK_INC_NUM Everett \n", "1 2017_CAD_Logs_R.txt 452, 2017-00119029 None \n", "2 2017_CAD_Logs_R.txt 884, 2017-00130003 Everett \n", "3 2017_CAD_Logs_R.txt 627, CHECK_INC_NUM Everett \n", "4 2017_CAD_Logs_R.txt 621, 2017-00146009 Everett \n", ".. ... ... ... \n", "429 2022_CAD_LOGS_R.txt 572, 2022-00148970 Everett \n", "430 2022_CAD_LOGS_R.txt 499, 2022-00040803 Everett \n", "431 2022_CAD_LOGS_R.txt 881, 2022-00041899 Everett \n", "432 2022_CAD_LOGS_R.txt 660, 2022-00067798 Lynnwood \n", "433 2022_CAD_LOGS_R.txt 505, 2022-00017574 Lake Stevens \n", "\n", " Address Primary street Cross street Date \\\n", "0 19th Ave Se 100th St Se 2017-09-17 \n", "1 6600 Blk Evergreen None 2017-09-30 \n", "2 221 Se Everett Mall Way 103 None 2017-10-25 \n", "3 112th St Sw Evergreen Way 2017-11-01 \n", "4 Broadway 13th St 2017-12-02 \n", ".. ... ... ... ... \n", "429 12000 Hwy 99 None 2022-10-23 \n", "430 Airport Rd Hwy 99 2022-03-24 \n", "431 214 128th St Sw None 2022-03-26 \n", "432 14000 Blk Hwy 99 None 2022-05-17 \n", "433 Sr 9 Ne Market Pl 2022-09-28 \n", "\n", " Time \n", "0 20:56:00 \n", "1 19:07:00 \n", "2 11:57:00 \n", "3 01:33:00 \n", "4 10:57:00 \n", ".. ... \n", "429 12:37:00 \n", "430 06:23:00 \n", "431 07:49:00 \n", "432 20:57:00 \n", "433 20:34:00 \n", "\n", "[434 rows x 8 columns]" ], "text/html": [ "\n", "
\n", " | Filename | \n", "IDs | \n", "Town/city | \n", "Address | \n", "Primary street | \n", "Cross street | \n", "Date | \n", "Time | \n", "
---|---|---|---|---|---|---|---|---|
0 | \n", "2017_CAD_Logs_R.txt | \n", "436, CHECK_INC_NUM | \n", "Everett | \n", "\n", " | 19th Ave Se | \n", "100th St Se | \n", "2017-09-17 | \n", "20:56:00 | \n", "
1 | \n", "2017_CAD_Logs_R.txt | \n", "452, 2017-00119029 | \n", "None | \n", "6600 Blk Evergreen | \n", "\n", " | None | \n", "2017-09-30 | \n", "19:07:00 | \n", "
2 | \n", "2017_CAD_Logs_R.txt | \n", "884, 2017-00130003 | \n", "Everett | \n", "221 Se Everett Mall Way 103 | \n", "\n", " | None | \n", "2017-10-25 | \n", "11:57:00 | \n", "
3 | \n", "2017_CAD_Logs_R.txt | \n", "627, CHECK_INC_NUM | \n", "Everett | \n", "\n", " | 112th St Sw | \n", "Evergreen Way | \n", "2017-11-01 | \n", "01:33:00 | \n", "
4 | \n", "2017_CAD_Logs_R.txt | \n", "621, 2017-00146009 | \n", "Everett | \n", "\n", " | Broadway | \n", "13th St | \n", "2017-12-02 | \n", "10:57:00 | \n", "
... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
429 | \n", "2022_CAD_LOGS_R.txt | \n", "572, 2022-00148970 | \n", "Everett | \n", "12000 Hwy 99 | \n", "\n", " | None | \n", "2022-10-23 | \n", "12:37:00 | \n", "
430 | \n", "2022_CAD_LOGS_R.txt | \n", "499, 2022-00040803 | \n", "Everett | \n", "\n", " | Airport Rd | \n", "Hwy 99 | \n", "2022-03-24 | \n", "06:23:00 | \n", "
431 | \n", "2022_CAD_LOGS_R.txt | \n", "881, 2022-00041899 | \n", "Everett | \n", "214 128th St Sw | \n", "\n", " | None | \n", "2022-03-26 | \n", "07:49:00 | \n", "
432 | \n", "2022_CAD_LOGS_R.txt | \n", "660, 2022-00067798 | \n", "Lynnwood | \n", "14000 Blk Hwy 99 | \n", "\n", " | None | \n", "2022-05-17 | \n", "20:57:00 | \n", "
433 | \n", "2022_CAD_LOGS_R.txt | \n", "505, 2022-00017574 | \n", "Lake Stevens | \n", "\n", " | Sr 9 Ne | \n", "Market Pl | \n", "2022-09-28 | \n", "20:34:00 | \n", "
434 rows × 8 columns
\n", "