{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Preparing the data\n",
"\n",
"We want to make a single .csv with all months of 2017; bike theft data only.\n",
"\n",
"PMR (test to see if can edit and commit)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Import libraries (pandas should always be imported as pd)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import glob, os "
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Load data - this solution is not scalable"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
"JanSt = pd.read_csv('../data/Crime_data_2017/2017-01/2017-01-cambridgeshire-street.csv')\n",
"FebSt = pd.read_csv('../data/Crime_data_2017/2017-02/2017-02-cambridgeshire-street.csv')\n",
"MarSt = pd.read_csv('../data/Crime_data_2017/2017-03/2017-03-cambridgeshire-street.csv')\n",
"AprSt = pd.read_csv('../data/Crime_data_2017/2017-04/2017-04-cambridgeshire-street.csv')\n",
"MaySt = pd.read_csv('../data/Crime_data_2017/2017-05/2017-05-cambridgeshire-street.csv')\n",
"JunSt = pd.read_csv('../data/Crime_data_2017/2017-06/2017-06-cambridgeshire-street.csv')\n",
"JulSt = pd.read_csv('../data/Crime_data_2017/2017-07/2017-07-cambridgeshire-street.csv')\n",
"AugSt = pd.read_csv('../data/Crime_data_2017/2017-08/2017-08-cambridgeshire-street.csv')\n",
"SepSt = pd.read_csv('../data/Crime_data_2017/2017-09/2017-09-cambridgeshire-street.csv')\n",
"OctSt = pd.read_csv('../data/Crime_data_2017/2017-10/2017-10-cambridgeshire-street.csv')\n",
"NovSt = pd.read_csv('../data/Crime_data_2017/2017-11/2017-11-cambridgeshire-street.csv')\n",
"DecSt = pd.read_csv('../data/Crime_data_2017/2017-12/2017-12-cambridgeshire-street.csv')"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Produce a list of all the data frames"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [],
"source": [
"frames = [JanSt,FebSt,MarSt,AprSt,MaySt,JunSt,JulSt,AugSt,SepSt,OctSt,NovSt,DecSt]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Concatenate data into single data frame"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [],
"source": [
"St = pd.concat(frames)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extract only data on bicycle thefts"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"Bikethefts = St[St['Crime type'].str.contains(\"Bicycle theft\")]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View the head (first 5 rows) of the bike theft data"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {
"scrolled": true
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Crime ID | \n",
" Month | \n",
" Reported by | \n",
" Falls within | \n",
" Longitude | \n",
" Latitude | \n",
" Location | \n",
" LSOA code | \n",
" LSOA name | \n",
" Crime type | \n",
" Last outcome category | \n",
" Context | \n",
"
\n",
" \n",
" \n",
" \n",
" 9 | \n",
" 7618ab88baac08d51a23432420127e44144ce4748d5112... | \n",
" 2017-01 | \n",
" Cambridgeshire Constabulary | \n",
" Cambridgeshire Constabulary | \n",
" 0.135772 | \n",
" 52.232788 | \n",
" On or near Kirkwood Road | \n",
" E01017975 | \n",
" Cambridge 001A | \n",
" Bicycle theft | \n",
" Investigation complete; no suspect identified | \n",
" NaN | \n",
"
\n",
" \n",
" 10 | \n",
" e498705835a2699a86fc3adf17cd7b00eae728ced382b4... | \n",
" 2017-01 | \n",
" Cambridgeshire Constabulary | \n",
" Cambridgeshire Constabulary | \n",
" 0.131681 | \n",
" 52.229005 | \n",
" On or near Crowland Way | \n",
" E01017975 | \n",
" Cambridge 001A | \n",
" Bicycle theft | \n",
" Investigation complete; no suspect identified | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" 529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e... | \n",
" 2017-01 | \n",
" Cambridgeshire Constabulary | \n",
" Cambridgeshire Constabulary | \n",
" 0.130128 | \n",
" 52.230525 | \n",
" On or near Crathern Way | \n",
" E01017975 | \n",
" Cambridge 001A | \n",
" Bicycle theft | \n",
" Investigation complete; no suspect identified | \n",
" NaN | \n",
"
\n",
" \n",
" 12 | \n",
" 08e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1... | \n",
" 2017-01 | \n",
" Cambridgeshire Constabulary | \n",
" Cambridgeshire Constabulary | \n",
" 0.135772 | \n",
" 52.232788 | \n",
" On or near Kirkwood Road | \n",
" E01017975 | \n",
" Cambridge 001A | \n",
" Bicycle theft | \n",
" Investigation complete; no suspect identified | \n",
" NaN | \n",
"
\n",
" \n",
" 88 | \n",
" 76d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a... | \n",
" 2017-01 | \n",
" Cambridgeshire Constabulary | \n",
" Cambridgeshire Constabulary | \n",
" 0.126413 | \n",
" 52.233362 | \n",
" On or near Minerva Way | \n",
" E01017979 | \n",
" Cambridge 001E | \n",
" Bicycle theft | \n",
" Investigation complete; no suspect identified | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Crime ID Month \\\n",
"9 7618ab88baac08d51a23432420127e44144ce4748d5112... 2017-01 \n",
"10 e498705835a2699a86fc3adf17cd7b00eae728ced382b4... 2017-01 \n",
"11 529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e... 2017-01 \n",
"12 08e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1... 2017-01 \n",
"88 76d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a... 2017-01 \n",
"\n",
" Reported by Falls within Longitude \\\n",
"9 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.135772 \n",
"10 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.131681 \n",
"11 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.130128 \n",
"12 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.135772 \n",
"88 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.126413 \n",
"\n",
" Latitude Location LSOA code LSOA name \\\n",
"9 52.232788 On or near Kirkwood Road E01017975 Cambridge 001A \n",
"10 52.229005 On or near Crowland Way E01017975 Cambridge 001A \n",
"11 52.230525 On or near Crathern Way E01017975 Cambridge 001A \n",
"12 52.232788 On or near Kirkwood Road E01017975 Cambridge 001A \n",
"88 52.233362 On or near Minerva Way E01017979 Cambridge 001E \n",
"\n",
" Crime type Last outcome category Context \n",
"9 Bicycle theft Investigation complete; no suspect identified NaN \n",
"10 Bicycle theft Investigation complete; no suspect identified NaN \n",
"11 Bicycle theft Investigation complete; no suspect identified NaN \n",
"12 Bicycle theft Investigation complete; no suspect identified NaN \n",
"88 Bicycle theft Investigation complete; no suspect identified NaN "
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame.head(Bikethefts)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Extract only the columns of interest: Crime ID, Month, Longitude, Latitude, Location and Last outcome category."
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [],
"source": [
"BikeSub = Bikethefts[['Crime ID', 'Month', 'Longitude', 'Latitude','Location','Last outcome category']]"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"View the head of the subsetted bike theft data"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Crime ID | \n",
" Month | \n",
" Longitude | \n",
" Latitude | \n",
" Location | \n",
" Last outcome category | \n",
"
\n",
" \n",
" \n",
" \n",
" 9 | \n",
" 7618ab88baac08d51a23432420127e44144ce4748d5112... | \n",
" 2017-01 | \n",
" 0.135772 | \n",
" 52.232788 | \n",
" On or near Kirkwood Road | \n",
" Investigation complete; no suspect identified | \n",
"
\n",
" \n",
" 10 | \n",
" e498705835a2699a86fc3adf17cd7b00eae728ced382b4... | \n",
" 2017-01 | \n",
" 0.131681 | \n",
" 52.229005 | \n",
" On or near Crowland Way | \n",
" Investigation complete; no suspect identified | \n",
"
\n",
" \n",
" 11 | \n",
" 529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e... | \n",
" 2017-01 | \n",
" 0.130128 | \n",
" 52.230525 | \n",
" On or near Crathern Way | \n",
" Investigation complete; no suspect identified | \n",
"
\n",
" \n",
" 12 | \n",
" 08e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1... | \n",
" 2017-01 | \n",
" 0.135772 | \n",
" 52.232788 | \n",
" On or near Kirkwood Road | \n",
" Investigation complete; no suspect identified | \n",
"
\n",
" \n",
" 88 | \n",
" 76d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a... | \n",
" 2017-01 | \n",
" 0.126413 | \n",
" 52.233362 | \n",
" On or near Minerva Way | \n",
" Investigation complete; no suspect identified | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Crime ID Month Longitude \\\n",
"9 7618ab88baac08d51a23432420127e44144ce4748d5112... 2017-01 0.135772 \n",
"10 e498705835a2699a86fc3adf17cd7b00eae728ced382b4... 2017-01 0.131681 \n",
"11 529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e... 2017-01 0.130128 \n",
"12 08e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1... 2017-01 0.135772 \n",
"88 76d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a... 2017-01 0.126413 \n",
"\n",
" Latitude Location \\\n",
"9 52.232788 On or near Kirkwood Road \n",
"10 52.229005 On or near Crowland Way \n",
"11 52.230525 On or near Crathern Way \n",
"12 52.232788 On or near Kirkwood Road \n",
"88 52.233362 On or near Minerva Way \n",
"\n",
" Last outcome category \n",
"9 Investigation complete; no suspect identified \n",
"10 Investigation complete; no suspect identified \n",
"11 Investigation complete; no suspect identified \n",
"12 Investigation complete; no suspect identified \n",
"88 Investigation complete; no suspect identified "
]
},
"execution_count": 38,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pd.DataFrame.head(BikeSub)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Write the processed data as a csv file."
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [],
"source": [
"BikeSub.to_csv('../data/processed/bikethefts.csv', index = False)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"anaconda-cloud": {},
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}