{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import pandas as pd\n", "import numpy as np\n", "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "from IPython.core.display import display, HTML\n", "%matplotlib inline" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import folium\n", "from folium.plugins import FastMarkerCluster" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "pd.options.display.max_columns = None\n", "# display(HTML(\"\"))" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "CSV_PATH = os.path.join('data', 'hacknight_ticket_sample_data_2015.csv')\n", "df = pd.read_csv(CSV_PATH,low_memory=False, parse_dates=['issue_date', 'ticket_queue_date'])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "CSV_PATH = os.path.join('data', 'hacknight_sample_data_geocode.csv')\n", "addrs_df = pd.read_csv(CSV_PATH)\n", "addrs_df['address'] = addrs_df['address'] + ', chicago, il'" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ticket_numberissue_dateviolation_locationlicense_plate_numberlicense_plate_statelicense_plate_typezipcodeviolation_codeviolation_descriptionunitunit_descriptionvehicle_makefine_level1_amountfine_level2_amountcurrent_amount_duetotal_paymentsticket_queueticket_queue_datenotice_levelhearing_dispositionnotice_numberofficeraddresslatlng
091888146212015-05-07 13:52:002134 S ARCHER AV7c189a16ef79db9413c1f46b7e5d1712e5c0c1575be352...MIPAS481030964190AEXP. METER NON-CENTRAL BUSINESS DISTRICT498DOFBUIC501000.050.0Paid2015-05-13NaNNaN07982100 s archer av, chicago, il41.854262-87.631986
191884170372015-01-01 21:29:002167 S ARCHER AVac7f6f4be49dfa223571838e586653aa5cdcb6bdfc6385...ILPASNaN0964190AEXP. METER NON-CENTRAL BUSINESS DISTRICT502DOFMITS501000.050.0Paid2015-01-05NaNNaN015292100 s archer av, chicago, il41.854262-87.631986
291885987342015-03-26 13:14:002140 S ARCHER AVa493850f62c8ca5a25787271859f0409c289fb84130f33...OHPAS454090976160AREAR AND FRONT PLATE REQUIRED498DOFPORS60120146.40.0Notice2015-04-08SEIZNaN51915665307802100 s archer av, chicago, il41.854262-87.631986
391891546392015-07-24 14:18:002172 S ARCHER AV38a1ef0c0a325c79e9dba69f6e96652fcbf1e9dcb5db3f...ILPAS6061615140964190AEXP. METER NON-CENTRAL BUSINESS DISTRICT498DOFMERZ501000.00.0Dismissed2015-09-14VIOLNot Liable51764353607962100 s archer av, chicago, il41.854262-87.631986
491880416232015-01-17 14:45:002107 S ARCHER AVe3c39cce6c30735cc977656a463233ea6f516328828e8d...LAPAS700720964190AEXP. METER NON-CENTRAL BUSINESS DISTRICT502DOFHOND50100122.00.0Notice2015-02-04SEIZNaN518184584014612100 s archer av, chicago, il41.854262-87.631986
\n", "
" ], "text/plain": [ " ticket_number issue_date violation_location \\\n", "0 9188814621 2015-05-07 13:52:00 2134 S ARCHER AV \n", "1 9188417037 2015-01-01 21:29:00 2167 S ARCHER AV \n", "2 9188598734 2015-03-26 13:14:00 2140 S ARCHER AV \n", "3 9189154639 2015-07-24 14:18:00 2172 S ARCHER AV \n", "4 9188041623 2015-01-17 14:45:00 2107 S ARCHER AV \n", "\n", " license_plate_number license_plate_state \\\n", "0 7c189a16ef79db9413c1f46b7e5d1712e5c0c1575be352... MI \n", "1 ac7f6f4be49dfa223571838e586653aa5cdcb6bdfc6385... IL \n", "2 a493850f62c8ca5a25787271859f0409c289fb84130f33... OH \n", "3 38a1ef0c0a325c79e9dba69f6e96652fcbf1e9dcb5db3f... IL \n", "4 e3c39cce6c30735cc977656a463233ea6f516328828e8d... LA \n", "\n", " license_plate_type zipcode violation_code \\\n", "0 PAS 48103 0964190A \n", "1 PAS NaN 0964190A \n", "2 PAS 45409 0976160A \n", "3 PAS 606161514 0964190A \n", "4 PAS 70072 0964190A \n", "\n", " violation_description unit unit_description \\\n", "0 EXP. METER NON-CENTRAL BUSINESS DISTRICT 498 DOF \n", "1 EXP. METER NON-CENTRAL BUSINESS DISTRICT 502 DOF \n", "2 REAR AND FRONT PLATE REQUIRED 498 DOF \n", "3 EXP. METER NON-CENTRAL BUSINESS DISTRICT 498 DOF \n", "4 EXP. METER NON-CENTRAL BUSINESS DISTRICT 502 DOF \n", "\n", " vehicle_make fine_level1_amount fine_level2_amount current_amount_due \\\n", "0 BUIC 50 100 0.0 \n", "1 MITS 50 100 0.0 \n", "2 PORS 60 120 146.4 \n", "3 MERZ 50 100 0.0 \n", "4 HOND 50 100 122.0 \n", "\n", " total_payments ticket_queue ticket_queue_date notice_level \\\n", "0 50.0 Paid 2015-05-13 NaN \n", "1 50.0 Paid 2015-01-05 NaN \n", "2 0.0 Notice 2015-04-08 SEIZ \n", "3 0.0 Dismissed 2015-09-14 VIOL \n", "4 0.0 Notice 2015-02-04 SEIZ \n", "\n", " hearing_disposition notice_number officer address \\\n", "0 NaN 0 798 2100 s archer av, chicago, il \n", "1 NaN 0 1529 2100 s archer av, chicago, il \n", "2 NaN 5191566530 780 2100 s archer av, chicago, il \n", "3 Not Liable 5176435360 796 2100 s archer av, chicago, il \n", "4 NaN 5181845840 1461 2100 s archer av, chicago, il \n", "\n", " lat lng \n", "0 41.854262 -87.631986 \n", "1 41.854262 -87.631986 \n", "2 41.854262 -87.631986 \n", "3 41.854262 -87.631986 \n", "4 41.854262 -87.631986 " ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geocoded_df = pd.merge(left=df, right=addrs_df, how='inner', on='address')\n", "geocoded_df.head()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The map below shows where the most prolific ticketer (Officer #728) wrote their tickets. As you zoom in on the map, the map will automatically show finer detail as to where the tickets were written." ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# Which CPD officers wrote at least 50 tickets in 2015\n", "prolific_CPD = geocoded_df.loc[geocoded_df['unit_description'] == 'CPD', 'officer'].value_counts()\\\n", " [geocoded_df.loc[geocoded_df['unit_description'] == 'CPD','officer'].value_counts() >= 50]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "chi_lat = 41.8\n", "chi_lon = -87.75" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Officer #728 is the officer that wrote the most tickets in 2015\n", "subset_df = geocoded_df[geocoded_df['officer'].isin(prolific_CPD.index.tolist())]\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "EXPIRED PLATES OR TEMPORARY REGISTRATION 8791\n", "STREET CLEANING 6273\n", "EXP. METER NON-CENTRAL BUSINESS DISTRICT 5773\n", "NO CITY STICKER VEHICLE UNDER/EQUAL TO 16,000 LBS. 5183\n", "RESIDENTIAL PERMIT PARKING 4013\n", "PARKING/STANDING PROHIBITED ANYTIME 3188\n", "REAR AND FRONT PLATE REQUIRED 2717\n", "EXPIRED METER CENTRAL BUSINESS DISTRICT 2474\n", "NO STANDING/PARKING TIME RESTRICTED 1793\n", "RUSH HOUR PARKING 1593\n", "WITHIN 15' OF FIRE HYDRANT 1002\n", "PARK OR STAND IN BUS/TAXI/CARRIAGE STAND 876\n", "STOP SIGN OR TRAFFIC SIGNAL 502\n", "DOUBLE PARKING OR STANDING 474\n", "TRUCK,RV,BUS, OR TAXI RESIDENTIAL STREET 425\n", "NONCOMPLIANT PLATE(S) 406\n", "PARK ALLEY 372\n", "PARK OR STAND ON CROSSWALK 327\n", "DISABLED PARKING ZONE 312\n", "BLOCK ACCESS/ALLEY/DRIVEWAY/FIRELANE 281\n", "STAND, PARK, OR OTHER USE OF BUS LANE 278\n", "ABANDONED VEH. FOR 7 DAYS OR INOPERABLE 264\n", "WRONG DIRECTION OR 12'' FROM CURB 212\n", "OBSTRUCT ROADWAY 180\n", "SPECIAL EVENTS RESTRICTION 150\n", "3-7 AM SNOW ROUTE 145\n", "TWO HEAD LAMPS REQUIRED VISIBLE 1000' 145\n", "PARK OR STAND ON SIDEWALK 125\n", "SAFETY BELTS REQUIRED 123\n", "WINDOWS MISSING OR CRACKED BEYOND 6 118\n", "Name: violation_description, dtype: int64" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# What violations occurred at least 100 times?\n", "query = geocoded_df['violation_description'].value_counts()\n", "query[(query > 100)]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Where were people ticketed for not having a parking permit?**" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 11, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Where were people ticketed for not having a parking permit?\n", "subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])]\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Where were people ticketed for not having a parking permit?\n", "subset_df = geocoded_df[geocoded_df['violation_description'].isin(['EXPIRED PLATES OR TEMPORARY REGISTRATION'])]\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "HUMM 45\n", "RROV 40\n", "FIAT 33\n", "TESL 29\n", "GEO 28\n", "UNKN 28\n", "HARL 22\n", "GENU 20\n", "YAMA 20\n", "KENW 18\n", "OTHR 16\n", "KAWA 15\n", "MASE 14\n", "SMRT 14\n", "HINO 12\n", "TRIU 11\n", "WORH 11\n", "FERR 8\n", "STRG 8\n", "KYMC 7\n", "BENT 7\n", "ASTO 7\n", "EGIL 6\n", "SPNT 6\n", "THMP 5\n", "PETR 5\n", "VESP 5\n", "Name: vehicle_make, dtype: int64" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# What are the rare cares that were ticketed? (>=5 to eliminate unknown makers, <=50 to get rare cars)\n", "query = geocoded_df['vehicle_make'].value_counts()\n", "query[(query >= 5) & (query <= 50)]" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "# What are the rare cares that were ticketed? (>=5 to eliminate randos, <=50 to get rare cars)\n", "rare_cars = query[(query >= 5) & (query <= 50)].index.tolist()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Where were rare cars (as defined above) ticketed?**" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Where were rare cars (as defined above) ticketed?\n", "subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(rare_cars)]\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Where were Ferraris and Maseratis ticketed?**" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Where were Ferraris and Maseratis ticketed?\n", "subset_df = geocoded_df[geocoded_df['vehicle_make'].isin(['FERR', 'MASE'])]\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "VIOL 14471\n", "SEIZ 9422\n", "DETR 6245\n", "FINL 4294\n", "DLS 2275\n", "Name: notice_level, dtype: int64" ] }, "execution_count": 17, "metadata": {}, "output_type": "execute_result" } ], "source": [ "geocoded_df['notice_level'].value_counts()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Where were vehicles seized?**" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Where were vehicles seized?\n", "subset_df = geocoded_df[geocoded_df['notice_level'] == 'SEIZ']\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "PAS 42611\n", "TMP 2278\n", "TRK 2201\n", "TXI 560\n", "DLC 213\n", "HCP 202\n", "FFM 182\n", "MCY 158\n", "ENV 97\n", "LIV 94\n", "APP 77\n", "DOM 71\n", "COL 62\n", "PML 46\n", "CBK 44\n", "SOX 38\n", "INT 26\n", "USM 24\n", "BRS 22\n", "PFR 22\n", "Name: license_plate_type, dtype: int64" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "query = geocoded_df['license_plate_type'].value_counts()\n", "query[query >= 20]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "**Where were vehicles with temporary license plates ticketed?**" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Where were vehicles with temporary license plates ticketed?\n", "subset_df = geocoded_df[geocoded_df['license_plate_type'] == 'TMP']\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Officer #728 is the officer that wrote the most tickets in 2015\n", "subset_df = geocoded_df[geocoded_df['officer'] == '653']\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
" ], "text/plain": [ "" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "chi_map = folium.Map(location=[chi_lat, chi_lon], zoom_start=10)\n", "\n", "# Officer #728 is the officer that wrote the most tickets in 2015\n", "subset_df = geocoded_df[geocoded_df['officer'] == '790']\n", "\n", "chi_map.add_child(FastMarkerCluster(subset_df[['lat', 'lng']].values.tolist()))\n", "chi_map" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python [default]", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.6" } }, "nbformat": 4, "nbformat_minor": 2 }