{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"import calendar \n",
"import datetime\n",
"import dask.dataframe as dd\n",
"import numpy as np \n",
"import pandas as pd \n",
"import matplotlib.pyplot as plt\n",
"import matplotlib.dates as mdates\n",
"import seaborn as sns\n",
"from IPython.display import SVG, display"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": true
},
"outputs": [],
"source": [
"%matplotlib inline\n",
"%config InlineBackend.figure_format = 'svg'\n",
"\n",
"# set neat seaborn whitegrid styles for matplotlib charts\n",
"plt.style.use('seaborn')\n",
"sns.set_style('whitegrid')\n",
"#plt.style.available"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Loading crime data from: ../data/crimes-2017.snappy.parq\n",
"Crime data loaded into memory.\n",
"Wall time: 3.19 s\n"
]
}
],
"source": [
"%%time\n",
"# set data file path\n",
"parquet_data_folder = '../data/crimes-2017.snappy.parq'\n",
"print('Loading crime data from: {}'.format(parquet_data_folder))\n",
"\n",
"# load crimes parquet data into dask df\n",
"crimes = dd.read_parquet(parquet_data_folder, index='Date')\n",
"\n",
"# load all data into memory\n",
"crimes = crimes.persist()\n",
"print('Crime data loaded into memory.')"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Crime data stats:\n",
"---------------------------------------\n",
"172,030 total records in 1 partitions\n",
"DataFrame size: 2,408,420\n",
"Wall time: 0 ns\n"
]
}
],
"source": [
"%%time\n",
"# log records count and data partitions\n",
"print('Crime data stats:')\n",
"print('---------------------------------------')\n",
"print('{:,} total records in {} partitions'.format(len(crimes), crimes.npartitions))\n",
"print('DataFrame size: {:,}'.format(crimes.size.compute()))"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
Dask DataFrame Structure:
\n",
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Block | \n",
" PrimaryType | \n",
" FBICode | \n",
" Description | \n",
" LocationDescription | \n",
" CommunityArea | \n",
" Beat | \n",
" District | \n",
" Ward | \n",
" Arrest | \n",
" Domestic | \n",
" Latitude | \n",
" Longitude | \n",
" Year | \n",
"
\n",
" \n",
" | npartitions=1 | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2017-01-01 00:00:00 | \n",
" object | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" category[unknown] | \n",
" bool | \n",
" bool | \n",
" float64 | \n",
" float64 | \n",
" category[unknown] | \n",
"
\n",
" \n",
" | 2017-08-25 23:59:00 | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
" ... | \n",
"
\n",
" \n",
"
\n",
"
\n",
"Dask Name: read-parquet, 1 tasks
"
],
"text/plain": [
"Dask DataFrame Structure:\n",
" Block PrimaryType FBICode Description LocationDescription CommunityArea Beat District Ward Arrest Domestic Latitude Longitude Year\n",
"npartitions=1 \n",
"2017-01-01 00:00:00 object category[unknown] category[unknown] category[unknown] category[unknown] category[unknown] category[unknown] category[unknown] category[unknown] bool bool float64 float64 category[unknown]\n",
"2017-08-25 23:59:00 ... ... ... ... ... ... ... ... ... ... ... ... ... ...\n",
"Dask Name: read-parquet, 1 tasks"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check data frame structure\n",
"crimes"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Block | \n",
" PrimaryType | \n",
" FBICode | \n",
" Description | \n",
" LocationDescription | \n",
" CommunityArea | \n",
" Beat | \n",
" District | \n",
" Ward | \n",
" Arrest | \n",
" Domestic | \n",
" Latitude | \n",
" Longitude | \n",
" Year | \n",
"
\n",
" \n",
" | Date | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2017-01-01 | \n",
" 088XX S PRINCETON AVE | \n",
" OFFENSE INVOLVING CHILDREN | \n",
" 20 | \n",
" CRIM SEX ABUSE BY FAM MEMBER | \n",
" RESIDENCE | \n",
" 49.0 | \n",
" 634 | \n",
" 6 | \n",
" 21.0 | \n",
" False | \n",
" False | \n",
" NaN | \n",
" NaN | \n",
" 2017 | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" 013XX S KOMENSKY AVE | \n",
" OFFENSE INVOLVING CHILDREN | \n",
" 20 | \n",
" CRIM SEX ABUSE BY FAM MEMBER | \n",
" RESIDENCE | \n",
" 29.0 | \n",
" 1011 | \n",
" 10 | \n",
" 24.0 | \n",
" False | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" 2017 | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" 044XX S HERMITAGE AVE | \n",
" OFFENSE INVOLVING CHILDREN | \n",
" 02 | \n",
" SEX ASSLT OF CHILD BY FAM MBR | \n",
" RESIDENCE | \n",
" 61.0 | \n",
" 924 | \n",
" 9 | \n",
" 12.0 | \n",
" False | \n",
" True | \n",
" NaN | \n",
" NaN | \n",
" 2017 | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" 016XX W WABANSIA AVE | \n",
" DECEPTIVE PRACTICE | \n",
" 11 | \n",
" FRAUD OR CONFIDENCE GAME | \n",
" RESIDENCE | \n",
" 24.0 | \n",
" 1433 | \n",
" 14 | \n",
" 1.0 | \n",
" False | \n",
" False | \n",
" NaN | \n",
" NaN | \n",
" 2017 | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" 023XX N LEAVITT ST | \n",
" DECEPTIVE PRACTICE | \n",
" 11 | \n",
" FINANCIAL IDENTITY THEFT OVER $ 300 | \n",
" RESIDENCE | \n",
" 22.0 | \n",
" 1432 | \n",
" 14 | \n",
" 32.0 | \n",
" False | \n",
" False | \n",
" NaN | \n",
" NaN | \n",
" 2017 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Block PrimaryType FBICode \\\n",
"Date \n",
"2017-01-01 088XX S PRINCETON AVE OFFENSE INVOLVING CHILDREN 20 \n",
"2017-01-01 013XX S KOMENSKY AVE OFFENSE INVOLVING CHILDREN 20 \n",
"2017-01-01 044XX S HERMITAGE AVE OFFENSE INVOLVING CHILDREN 02 \n",
"2017-01-01 016XX W WABANSIA AVE DECEPTIVE PRACTICE 11 \n",
"2017-01-01 023XX N LEAVITT ST DECEPTIVE PRACTICE 11 \n",
"\n",
" Description LocationDescription \\\n",
"Date \n",
"2017-01-01 CRIM SEX ABUSE BY FAM MEMBER RESIDENCE \n",
"2017-01-01 CRIM SEX ABUSE BY FAM MEMBER RESIDENCE \n",
"2017-01-01 SEX ASSLT OF CHILD BY FAM MBR RESIDENCE \n",
"2017-01-01 FRAUD OR CONFIDENCE GAME RESIDENCE \n",
"2017-01-01 FINANCIAL IDENTITY THEFT OVER $ 300 RESIDENCE \n",
"\n",
" CommunityArea Beat District Ward Arrest Domestic Latitude \\\n",
"Date \n",
"2017-01-01 49.0 634 6 21.0 False False NaN \n",
"2017-01-01 29.0 1011 10 24.0 False True NaN \n",
"2017-01-01 61.0 924 9 12.0 False True NaN \n",
"2017-01-01 24.0 1433 14 1.0 False False NaN \n",
"2017-01-01 22.0 1432 14 32.0 False False NaN \n",
"\n",
" Longitude Year \n",
"Date \n",
"2017-01-01 NaN 2017 \n",
"2017-01-01 NaN 2017 \n",
"2017-01-01 NaN 2017 \n",
"2017-01-01 NaN 2017 \n",
"2017-01-01 NaN 2017 "
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# preview crimes data with Date index\n",
"crimes.head()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"PrimaryType\n",
"THEFT 40644\n",
"BATTERY 32375\n",
"CRIMINAL DAMAGE 19118\n",
"ASSAULT 12669\n",
"OTHER OFFENSE 11599\n",
"DECEPTIVE PRACTICE 11075\n",
"BURGLARY 8393\n",
"MOTOR VEHICLE THEFT 7331\n",
"ROBBERY 7324\n",
"NARCOTICS 7123\n",
"CRIMINAL TRESPASS 4544\n",
"WEAPONS VIOLATION 3077\n",
"OFFENSE INVOLVING CHILDREN 1390\n",
"PUBLIC PEACE VIOLATION 1009\n",
"CRIM SEXUAL ASSAULT 971\n",
"INTERFERENCE WITH PUBLIC OFFICER 729\n",
"SEX OFFENSE 598\n",
"PROSTITUTION 562\n",
"HOMICIDE 444\n",
"ARSON 301\n",
"LIQUOR LAW VIOLATION 144\n",
"STALKING 137\n",
"GAMBLING 134\n",
"KIDNAPPING 124\n",
"INTIMIDATION 95\n",
"CONCEALED CARRY LICENSE VIOLATION 40\n",
"OBSCENITY 40\n",
"NON-CRIMINAL 22\n",
"HUMAN TRAFFICKING 6\n",
"PUBLIC INDECENCY 6\n",
"OTHER NARCOTIC VIOLATION 4\n",
"NON-CRIMINAL (SUBJECT SPECIFIED) 2\n",
"dtype: int64\n",
"...\n",
"Total Primary Crime Types: 32\n"
]
}
],
"source": [
"# get crime types and counts by primary type\n",
"crime_types = crimes[['PrimaryType']]\n",
"crime_type_total = crime_types.groupby('PrimaryType').size().compute()\n",
"\n",
"# print crime stats\n",
"print(crime_type_total.sort_values(ascending=False))\n",
"print(\"...\\nTotal Primary Crime Types: {:,}\".format(crime_type_total.size))"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" Arrest Domestic Total\n",
"PrimaryType \n",
"THEFT 4048.0 1533.0 40644\n",
"BATTERY 6424.0 15728.0 32375\n",
"CRIMINAL DAMAGE 1143.0 2229.0 19118\n",
"ASSAULT 2251.0 3257.0 12669\n",
"OTHER OFFENSE 2572.0 3612.0 11599\n",
"DECEPTIVE PRACTICE 406.0 101.0 11075\n",
"BURGLARY 311.0 115.0 8393\n",
"MOTOR VEHICLE THEFT 574.0 72.0 7331\n",
"ROBBERY 445.0 132.0 7324\n",
"NARCOTICS 7043.0 5.0 7123\n",
"CRIMINAL TRESPASS 2720.0 223.0 4544\n",
"WEAPONS VIOLATION 2446.0 11.0 3077\n",
"OFFENSE INVOLVING CHILDREN 163.0 686.0 1390\n",
"PUBLIC PEACE VIOLATION 680.0 32.0 1009\n",
"CRIM SEXUAL ASSAULT 38.0 133.0 971\n",
"INTERFERENCE WITH PUBLIC OFFICER 692.0 4.0 729\n",
"SEX OFFENSE 90.0 48.0 598\n",
"PROSTITUTION 562.0 0.0 562\n",
"HOMICIDE 68.0 23.0 444\n",
"ARSON 22.0 9.0 301\n",
"LIQUOR LAW VIOLATION 144.0 1.0 144\n",
"STALKING 11.0 74.0 137\n",
"GAMBLING 134.0 0.0 134\n",
"KIDNAPPING 9.0 43.0 124\n",
"INTIMIDATION 6.0 8.0 95\n",
"CONCEALED CARRY LICENSE VIOLATION 39.0 0.0 40\n",
"OBSCENITY 32.0 9.0 40\n",
"NON-CRIMINAL 1.0 0.0 22\n",
"PUBLIC INDECENCY 6.0 0.0 6\n",
"HUMAN TRAFFICKING 0.0 2.0 6\n",
"OTHER NARCOTIC VIOLATION 3.0 0.0 4\n",
"NON-CRIMINAL (SUBJECT SPECIFIED) 1.0 2.0 2\n"
]
}
],
"source": [
"# get arrests and domestic counts for each primary crime type\n",
"crime_type_counts = crimes[['PrimaryType', 'Arrest', 'Domestic']]\\\n",
".groupby('PrimaryType').sum().compute()\n",
"\n",
"# add crime type totals column\n",
"crime_type_counts['Total'] = crime_type_total\n",
"\n",
"# print all crime stats (arrests, domestic + total)\n",
"print(crime_type_counts.sort_values(by='Total', ascending=False))"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot crimes by primary type counts\n",
"all_crime_types = crime_type_counts.sort_values(by='Total', ascending=True)\\\n",
".drop(['Arrest', 'Domestic'], axis=1) # denotes column\n",
"all_crime_types.plot(kind='barh', figsize=(8,6), color='#cc0000')\n",
"plt.title('2017 Chicago Crimes by Type')\n",
"plt.xlabel('Number of Crime reports')\n",
"plt.ylabel('Crime Type')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot crimes by arrests\n",
"crime_type_counts.sort_values(by='Arrest', ascending=True)\\\n",
".plot(kind='barh', figsize=(8,10))\n",
"plt.title('2017 Chicago Crime reports by Arrests')\n",
"plt.ylabel('Crime Type')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot crimes by domestic incident reports\n",
"crime_type_counts.sort_values(by='Domestic', ascending=True)\\\n",
".plot(kind='barh', figsize=(8,10))\n",
"plt.title('2017 Chicago Crime reports by Domestic incidents')\n",
"plt.ylabel('Crime Type')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot high crimes (>-= 1,000 reports)\n",
"crime_type_counts[crime_type_counts['Total'] >= 1000]\\\n",
".sort_values(by='Total', ascending=True)\\\n",
".plot(kind='barh', figsize=(6,6))\n",
"plt.ylabel('Crime Type')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.title('High 2017 Chicago Crimes (>= 1,000 reports)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot less than 1000 crime reports types\n",
"crime_type_counts[crime_type_counts['Total'] < 1000]\\\n",
".sort_values(by='Total', ascending=True)\\\n",
".plot(kind='barh', figsize=(6,6))\n",
"plt.ylabel('Crime Type')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.title('Low 2017 Chicago Crimes (<1,000 reports)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot less than 100 crime reports types\n",
"crime_type_counts[crime_type_counts['Total'] < 100]\\\n",
".sort_values(by='Total', ascending=True)\\\n",
".plot(kind='barh', figsize=(6,4))\n",
"plt.ylabel('Crime Type')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.title('Least 2017 Chicago Crimes (<100 reports)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"LocationDescription\n",
"STREET 38928\n",
"RESIDENCE 29228\n",
"APARTMENT 21467\n",
"SIDEWALK 13807\n",
"OTHER 7101\n",
"Name: Total, dtype: int64\n",
"...\n",
"Total Locations: 123\n"
]
}
],
"source": [
"# get crime location counts\n",
"crime_locations = crimes.groupby('LocationDescription').size().compute()\n",
"crime_locations = crime_locations.sort_values(ascending=False).rename('Total') #.reset_index()\n",
"\n",
"# print crime location stats\n",
"print(crime_locations.head())\n",
"print(\"...\\nTotal Locations: {:,}\".format(crime_locations.size))"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot top 30 crime locations\n",
"crime_locations[:30].sort_values(ascending=True)\\\n",
".plot(kind='barh', figsize=(6,8))\n",
"plt.ylabel('Location')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.title('2017 Chicago Crime Top 30 Locations')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Arrest | \n",
" Domestic | \n",
" Total | \n",
"
\n",
" \n",
" | LocationDescription | \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | RESIDENCE | \n",
" 3096.0 | \n",
" 9896.0 | \n",
" 29228 | \n",
"
\n",
" \n",
" | APARTMENT | \n",
" 3131.0 | \n",
" 9263.0 | \n",
" 21467 | \n",
"
\n",
" \n",
" | OTHER | \n",
" 676.0 | \n",
" 530.0 | \n",
" 7101 | \n",
"
\n",
" \n",
" | RESTAURANT | \n",
" 579.0 | \n",
" 115.0 | \n",
" 4214 | \n",
"
\n",
" \n",
" | COMMERCIAL / BUSINESS OFFICE | \n",
" 105.0 | \n",
" 26.0 | \n",
" 1017 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Arrest Domestic Total\n",
"LocationDescription \n",
"RESIDENCE 3096.0 9896.0 29228\n",
"APARTMENT 3131.0 9263.0 21467\n",
"OTHER 676.0 530.0 7101\n",
"RESTAURANT 579.0 115.0 4214\n",
"COMMERCIAL / BUSINESS OFFICE 105.0 26.0 1017"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get arrests and domestic counts for each location description\n",
"crime_location_counts = crimes[['LocationDescription', 'Arrest', 'Domestic']]\\\n",
".groupby('LocationDescription').sum().compute()\n",
"\n",
"# add crime location totals column\n",
"crime_location_counts['Total'] = crime_locations\n",
"crime_location_counts.head()"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot top crime locations with arrests and domestic crime counts\n",
"crime_location_counts[crime_location_counts['Total'] >= 2500]\\\n",
".sort_values(by='Total', ascending=True)\\\n",
".plot(kind='barh', figsize=(6,6))\n",
"plt.ylabel('Location')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.title('2017 Chicago Crime Top Locations (>=2,500 Crime Reports)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot next 20 top crime locations\n",
"crime_location_counts = crime_location_counts[crime_location_counts['Total'] <= 3000]\\\n",
".sort_values(by='Total', ascending=False)\n",
"crime_location_counts[:20].sort_values(by='Total', ascending=True)\\\n",
".plot(kind='barh', figsize=(6,6))\n",
"plt.ylabel('Location')\n",
"plt.xlabel('Number of Crimes')\n",
"plt.title('2017 Chicago Crime Next Top 20 Crime Locations (<3,000 Crime Reports)')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"Dask Index Structure:\n",
"npartitions=1\n",
"2017-01-01 00:00:00 datetime64[ns]\n",
"2017-08-25 23:59:00 ...\n",
"Name: Date, dtype: datetime64[ns]\n",
"Dask Name: read-parquet, 2 tasks"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# check for datetime64 index to plot crime over time\n",
"crimes.index"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date\n",
"2017-01-01 00:00:00 True\n",
"2017-01-01 00:00:00 True\n",
"2017-01-01 00:00:00 True\n",
"2017-01-01 00:00:00 True\n",
"2017-01-01 00:01:00 True\n",
"Name: Arrest, dtype: bool\n",
"...\n",
"Total Arrests: 33,084\n"
]
}
],
"source": [
"# get arrests\n",
"arrests = crimes[crimes['Arrest'] == True]['Arrest']\n",
"\n",
"# print arrests stats\n",
"print(arrests.head())\n",
"print(\"...\\nTotal Arrests: {:,}\".format(arrests.size.compute()))"
]
},
{
"cell_type": "code",
"execution_count": 22,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot monthly arrests\n",
"monthly_arrests = arrests.resample('M').sum().compute()\n",
"monthly_arrests.plot(kind='bar', figsize=(6,3))\n",
"plt.xticks([0,1,2,3,4,5,6,7], calendar.month_name[1:13], rotation=0)\n",
"plt.xlabel('2017 Month')\n",
"plt.title('2017 Chicago Crime Monthly Arrests')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot weekly arrests\n",
"weekly_arrests = arrests.resample('W').sum().compute()\n",
"weekly_arrests.plot(kind='bar')\n",
"# TODO: format weekly xticks to only list week start date\n",
"plt.xlabel('Week Of 2017')\n",
"plt.title('2017 Chicago Crime Weekly Arrests')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 24,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot daily arrests\n",
"daily_arrests = arrests.resample('D').sum().compute()\n",
"daily_arrests.plot()\n",
"plt.title('2017 Chicago Crime Daily Arrests')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date\n",
"2017-01-01 True\n",
"2017-01-01 True\n",
"2017-01-01 True\n",
"2017-01-01 True\n",
"2017-01-01 True\n",
"Name: Domestic, dtype: bool\n",
"...\n",
"Total Domestic: 28,092\n"
]
}
],
"source": [
"# get domestic crimes\n",
"domestic = crimes[crimes['Domestic'] == True]['Domestic']\n",
"\n",
"# print domestic crime stats\n",
"print(domestic.head())\n",
"print(\"...\\nTotal Domestic: {:,}\".format(domestic.size.compute()))"
]
},
{
"cell_type": "code",
"execution_count": 26,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot daily domestic crimes\n",
"daily_domestic = domestic.resample('D').sum().compute()\n",
"daily_domestic.plot(color='g')\n",
"plt.title('2017 Chicago Crime Daily Domestic reports')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" PrimaryType\n",
"2017-01-01 940\n",
"2017-01-02 601\n",
"2017-01-03 706\n",
"2017-01-04 610\n",
"2017-01-05 601\n",
"...\n",
"Total Days: 237\n"
]
}
],
"source": [
"# get daily total crime counts\n",
"daily_crime = crime_types.resample('D').count().compute()\n",
"\n",
"#print daily total crime stats\n",
"print(daily_crime.head())\n",
"print(\"...\\nTotal Days: {:,}\".format(daily_crime.size))"
]
},
{
"cell_type": "code",
"execution_count": 28,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot daily crime counts, arrests, and domestic incidents reports\n",
"fig, ax = plt.subplots()\n",
"ax.plot(daily_crime.index, daily_crime, '--', label='Total', color='r', zorder=10)\n",
"ax.plot(daily_arrests.index, daily_arrests, color='#3399ff', zorder=10)\n",
"ax.fill_between(daily_domestic.index, daily_domestic, label='Domestic', color='c')\n",
"ax.set_ylabel('Number of Crimes')\n",
"ax.set_xlabel('Month')\n",
"ax.legend(loc='right')\n",
"plt.title('2017 Daily Chicago Crime reports, Arests, and Domestic incidents')\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
" PrimaryType\n",
"Date \n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"2017-01-01 CRIM SEXUAL ASSAULT\n",
"2017-01-01 OFFENSE INVOLVING CHILDREN\n",
"...............................................\n",
"OFFENSE INVOLVING CHILDREN 1390\n",
"CRIM SEXUAL ASSAULT 971\n",
"HOMICIDE 444\n",
"KIDNAPPING 124\n",
"HUMAN TRAFFICKING 6\n",
"Name: PrimaryType, dtype: int64\n"
]
}
],
"source": [
"# get select human endangerment crimes\n",
"violent_crimes = crime_types[(crime_types['PrimaryType']=='CRIM SEXUAL ASSAULT') | \\\n",
" (crime_types['PrimaryType']=='HOMICIDE') | \\\n",
" (crime_types['PrimaryType']=='HUMAN TRAFFICKING') | \\\n",
" (crime_types['PrimaryType']=='KIDNAPPING') | \\\n",
" (crime_types['PrimaryType']=='OFFENSE INVOLVING CHILDREN')]\n",
"\n",
"# print violent crimes stats\n",
"print(violent_crimes.head(10))\n",
"print('...............................................')\n",
"print(violent_crimes.PrimaryType.value_counts().head(5))"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Dask DataFrame Structure:\n",
" PrimaryType\n",
"npartitions=1 \n",
"2017-01-01 00:00:00 category[known]\n",
"2017-08-25 23:59:00 ...\n",
"Dask Name: _categorize_block, 14 tasks\n",
"\n",
"Selected Primary Type categories:\n",
"----------------------------------------------------------------------\n",
"Index(['OFFENSE INVOLVING CHILDREN', 'DECEPTIVE PRACTICE', 'OTHER OFFENSE',\n",
" 'CRIMINAL TRESPASS', 'CRIMINAL DAMAGE', 'SEX OFFENSE', 'THEFT',\n",
" 'CRIM SEXUAL ASSAULT', 'OBSCENITY', 'ASSAULT', 'BATTERY',\n",
" 'MOTOR VEHICLE THEFT', 'INTIMIDATION', 'ROBBERY', 'WEAPONS VIOLATION',\n",
" 'INTERFERENCE WITH PUBLIC OFFICER', 'PUBLIC PEACE VIOLATION',\n",
" 'BURGLARY', 'NARCOTICS', 'HOMICIDE', 'KIDNAPPING', 'ARSON', 'STALKING',\n",
" 'CONCEALED CARRY LICENSE VIOLATION', 'PROSTITUTION', 'NON-CRIMINAL',\n",
" 'LIQUOR LAW VIOLATION', 'PUBLIC INDECENCY', 'GAMBLING',\n",
" 'NON-CRIMINAL (SUBJECT SPECIFIED)', 'HUMAN TRAFFICKING',\n",
" 'OTHER NARCOTIC VIOLATION'],\n",
" dtype='object')\n"
]
}
],
"source": [
"# categorize by primary type\n",
"violent_crimes = violent_crimes.categorize(columns='PrimaryType')\n",
"print(violent_crimes)\n",
"\n",
"# print selected violent crimes primary type categories\n",
"print('\\nSelected Primary Type categories:')\n",
"print('----------------------------------------------------------------------')\n",
"print(violent_crimes.PrimaryType.cat.categories)"
]
},
{
"cell_type": "code",
"execution_count": 31,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Date PrimaryType \n",
"2017-01-31 OFFENSE INVOLVING CHILDREN 218\n",
" CRIM SEXUAL ASSAULT 135\n",
" HOMICIDE 53\n",
" KIDNAPPING 8\n",
"2017-02-28 OFFENSE INVOLVING CHILDREN 119\n",
" CRIM SEXUAL ASSAULT 118\n",
" HOMICIDE 48\n",
" KIDNAPPING 13\n",
"2017-03-31 OFFENSE INVOLVING CHILDREN 175\n",
" CRIM SEXUAL ASSAULT 96\n",
"Name: Count, dtype: int64\n",
"...\n",
"PrimaryType OFFENSE INVOLVING CHILDREN CRIM SEXUAL ASSAULT HOMICIDE \\\n",
"Date \n",
"2017-01-31 218.0 135.0 53.0 \n",
"2017-02-28 119.0 118.0 48.0 \n",
"2017-03-31 175.0 96.0 37.0 \n",
"2017-04-30 172.0 131.0 46.0 \n",
"2017-05-31 196.0 124.0 57.0 \n",
"2017-06-30 172.0 113.0 85.0 \n",
"2017-07-31 167.0 150.0 75.0 \n",
"2017-08-31 171.0 104.0 43.0 \n",
"\n",
"PrimaryType KIDNAPPING HUMAN TRAFFICKING \n",
"Date \n",
"2017-01-31 8.0 NaN \n",
"2017-02-28 13.0 NaN \n",
"2017-03-31 18.0 4.0 \n",
"2017-04-30 15.0 NaN \n",
"2017-05-31 19.0 1.0 \n",
"2017-06-30 15.0 1.0 \n",
"2017-07-31 23.0 NaN \n",
"2017-08-31 13.0 NaN \n",
"...\n",
"Total Monthly/Type records: 40\n"
]
}
],
"source": [
"# group violent crimes by month and crime type\n",
"violent_crimes_groupby = violent_crimes.groupby([pd.TimeGrouper('M'), 'PrimaryType'])\n",
"violent_crime_data = violent_crimes_groupby['PrimaryType'].count().compute().rename('Count')\n",
"print(violent_crime_data.head(10))\n",
"print('...')\n",
"\n",
"# unstack violent crime type group for series plotting\n",
"violent_crime_data = violent_crime_data.unstack()\n",
"print(violent_crime_data.head(12))\n",
"\n",
"print(\"...\\nTotal Monthly/Type records: {:,}\".format(violent_crime_data.size))"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot violent crime data\n",
"violent_crime_data.plot(figsize=(6,6), kind='bar')\n",
"plt.xticks([0,1,2,3,4,5,6], calendar.month_name[1:13], rotation=0)\n",
"plt.legend(loc='upper right', frameon=True)\n",
"plt.xlabel('2017 Month')\n",
"plt.title('Human Endangerment 2017 Chicago Crimes')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" PrimaryType | \n",
"
\n",
" \n",
" | Date | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" | 2017-01-01 | \n",
" OTHER OFFENSE | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" CRIMINAL DAMAGE | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" CRIMINAL DAMAGE | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" THEFT | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" OTHER OFFENSE | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" OTHER OFFENSE | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" ASSAULT | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" THEFT | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" CRIMINAL DAMAGE | \n",
"
\n",
" \n",
" | 2017-01-01 | \n",
" CRIMINAL DAMAGE | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" PrimaryType\n",
"Date \n",
"2017-01-01 OTHER OFFENSE\n",
"2017-01-01 CRIMINAL DAMAGE\n",
"2017-01-01 CRIMINAL DAMAGE\n",
"2017-01-01 THEFT\n",
"2017-01-01 OTHER OFFENSE\n",
"2017-01-01 OTHER OFFENSE\n",
"2017-01-01 ASSAULT\n",
"2017-01-01 THEFT\n",
"2017-01-01 CRIMINAL DAMAGE\n",
"2017-01-01 CRIMINAL DAMAGE"
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# get top 5 crimes\n",
"top_5_crimes = crime_types[(crime_types['PrimaryType']=='THEFT') | \\\n",
" (crime_types['PrimaryType']=='BATTERY') | \\\n",
" (crime_types['PrimaryType']=='CRIMINAL DAMAGE') | \\\n",
" (crime_types['PrimaryType']=='ASSAULT') | \\\n",
" (crime_types['PrimaryType']=='OTHER OFFENSE')]\n",
"top_5_crimes.head(10)"
]
},
{
"cell_type": "code",
"execution_count": 34,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"THEFT 40644\n",
"BATTERY 32375\n",
"CRIMINAL DAMAGE 19118\n",
"ASSAULT 12669\n",
"OTHER OFFENSE 11599\n",
"Name: PrimaryType, dtype: int64"
]
},
"execution_count": 34,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# print top 5 crime counts\n",
"top_5_crimes.PrimaryType.value_counts().head()"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Monthly Top 5 Crime Data:\n",
"---------------------------------------------------------------------\n",
"Date PrimaryType \n",
"2017-01-31 OTHER OFFENSE 1551\n",
" CRIMINAL DAMAGE 2407\n",
" THEFT 4927\n",
" ASSAULT 1373\n",
" BATTERY 3557\n",
"2017-02-28 OTHER OFFENSE 1373\n",
" CRIMINAL DAMAGE 2044\n",
" THEFT 4376\n",
" ASSAULT 1294\n",
" BATTERY 3407\n",
"Name: Count, dtype: int64\n",
"...\n",
"PrimaryType OTHER OFFENSE CRIMINAL DAMAGE THEFT ASSAULT BATTERY\n",
"Date \n",
"2017-01-31 1551 2407 4927 1373 3557\n",
"2017-02-28 1373 2044 4376 1294 3407\n",
"2017-03-31 1508 2232 4471 1481 3851\n",
"2017-04-30 1551 2440 4761 1635 4097\n",
"2017-05-31 1611 2508 5303 1847 4554\n",
"2017-06-30 1362 2563 5702 1850 4640\n",
"2017-07-31 1492 2728 5978 1810 4685\n",
"2017-08-31 1151 2196 5126 1379 3584\n"
]
}
],
"source": [
"print('Monthly Top 5 Crime Data:')\n",
"print('---------------------------------------------------------------------')\n",
"\n",
"# group top 5 crimes by month and crime type\n",
"crimes_groupby = top_5_crimes.groupby([pd.TimeGrouper('M'), 'PrimaryType'])\n",
"top_5_crime_data = crimes_groupby['PrimaryType'].count().compute().rename('Count')\n",
"print(top_5_crime_data.head(10))\n",
"print('...')\n",
"\n",
"# unstack top 5 crimes type group for series plotting\n",
"top_5_crime_data = top_5_crime_data.unstack()\n",
"print(top_5_crime_data.head(10))"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# plot top 5 crimes data\n",
"top_5_crime_data.plot(figsize=(6,6), kind='bar')\n",
"\n",
"plt.xticks([0,1,2,3,4,5,6,7], calendar.month_name[1:13], rotation=0)\n",
"plt.legend(loc='upper right', frameon=True)\n",
"plt.xlabel('2017 Month')\n",
"plt.title('Top 5 2017 Chicago Crimes by Month')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Weekly Top 5 Crime Data:\n",
"---------------------------------------------------------------------\n",
"Date PrimaryType \n",
"2017-01-01 OTHER OFFENSE 72\n",
" CRIMINAL DAMAGE 113\n",
" THEFT 178\n",
" ASSAULT 45\n",
" BATTERY 203\n",
"2017-01-08 OTHER OFFENSE 336\n",
" CRIMINAL DAMAGE 477\n",
" THEFT 909\n",
" ASSAULT 300\n",
" BATTERY 714\n",
"Name: Count, dtype: int64\n",
"...\n",
"PrimaryType OTHER OFFENSE CRIMINAL DAMAGE THEFT ASSAULT BATTERY\n",
"Date \n",
"2017-01-01 72 113 178 45 203\n",
"2017-01-08 336 477 909 300 714\n",
"2017-01-15 383 524 1076 292 766\n",
"2017-01-22 349 568 1234 338 814\n",
"2017-01-29 318 569 1198 312 824\n",
"2017-02-05 331 491 1144 287 828\n",
"2017-02-12 338 542 1106 333 845\n",
"2017-02-19 366 555 1182 345 888\n",
"2017-02-26 356 502 983 324 852\n",
"2017-03-05 322 442 1012 294 818\n",
"...\n"
]
}
],
"source": [
"print('Weekly Top 5 Crime Data:')\n",
"print('---------------------------------------------------------------------')\n",
"\n",
"# group top 5 crimes by week and crime type\n",
"crimes_groupby = top_5_crimes.groupby([pd.TimeGrouper('W'), 'PrimaryType'])\n",
"top_5_crime_data = crimes_groupby['PrimaryType'].count().compute().rename('Count')\n",
"print(top_5_crime_data.head(10))\n",
"print('...')\n",
"\n",
"# unstack top 5 crimes type group for series plotting\n",
"top_5_crime_data = top_5_crime_data.unstack()\n",
"print(top_5_crime_data.head(10))\n",
"print('...')"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"\r\n"
],
"text/plain": [
""
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"# add weekly arrests data for comparison of crime fighting efforts\n",
"#top_5_crime_data['Arrests'] = weekly_arrests\n",
"\n",
"# plot Weekly top 5 crimes data\n",
"top_5_crime_data.plot(figsize=(6,6))\n",
"plt.legend(loc='upper right', frameon=True)\n",
"plt.title('Weekly Top 5 2017 Chicago Crimes')\n",
"plt.tight_layout()\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Daily Top 5 Crime Data:\n",
"---------------------------------------------------------------------\n",
"Date PrimaryType \n",
"2017-01-01 OTHER OFFENSE 72\n",
" CRIMINAL DAMAGE 113\n",
" THEFT 178\n",
" ASSAULT 45\n",
" BATTERY 203\n",
"2017-01-02 OTHER OFFENSE 44\n",
" CRIMINAL DAMAGE 73\n",
" THEFT 120\n",
" ASSAULT 38\n",
" BATTERY 105\n",
"Name: Count, dtype: int64\n",
"...\n",
"PrimaryType OTHER OFFENSE CRIMINAL DAMAGE THEFT ASSAULT BATTERY\n",
"Date \n",
"2017-01-01 72 113 178 45 203\n",
"2017-01-02 44 73 120 38 105\n",
"2017-01-03 43 79 149 55 114\n",
"2017-01-04 54 57 161 34 95\n",
"2017-01-05 56 68 132 42 84\n",
"2017-01-06 40 68 122 44 81\n",
"2017-01-07 44 62 120 42 120\n",
"2017-01-08 55 70 105 45 115\n",
"2017-01-09 50 90 159 38 89\n",
"2017-01-10 55 93 152 47 105\n",
"...\n"
]
}
],
"source": [
"print('Daily Top 5 Crime Data:')\n",
"print('---------------------------------------------------------------------')\n",
"\n",
"# group top 5 crimes by day and crime type\n",
"crimes_groupby = top_5_crimes.groupby([pd.TimeGrouper('D'), 'PrimaryType'])\n",
"top_5_crime_data = crimes_groupby['PrimaryType'].count().compute().rename('Count')\n",
"print(top_5_crime_data.head(10))\n",
"print('...')\n",
"\n",
"# unstack top 5 crimes type group for series plotting\n",
"top_5_crime_data = top_5_crime_data.unstack()\n",
"print(top_5_crime_data.head(10))\n",
"print('...')"
]
},
{
"cell_type": "code",
"execution_count": 40,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"image/svg+xml": [
"\r\n",
"\r\n",
"\r\n",
"