{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Preparing the data\n", "\n", "We want to make a single .csv with all months of 2017; bike theft data only.\n", "\n", "PMR (test to see if can edit and commit)\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Import libraries (pandas should always be imported as pd)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import glob, os " ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load data - this solution is not scalable" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "JanSt = pd.read_csv('../data/Crime_data_2017/2017-01/2017-01-cambridgeshire-street.csv')\n", "FebSt = pd.read_csv('../data/Crime_data_2017/2017-02/2017-02-cambridgeshire-street.csv')\n", "MarSt = pd.read_csv('../data/Crime_data_2017/2017-03/2017-03-cambridgeshire-street.csv')\n", "AprSt = pd.read_csv('../data/Crime_data_2017/2017-04/2017-04-cambridgeshire-street.csv')\n", "MaySt = pd.read_csv('../data/Crime_data_2017/2017-05/2017-05-cambridgeshire-street.csv')\n", "JunSt = pd.read_csv('../data/Crime_data_2017/2017-06/2017-06-cambridgeshire-street.csv')\n", "JulSt = pd.read_csv('../data/Crime_data_2017/2017-07/2017-07-cambridgeshire-street.csv')\n", "AugSt = pd.read_csv('../data/Crime_data_2017/2017-08/2017-08-cambridgeshire-street.csv')\n", "SepSt = pd.read_csv('../data/Crime_data_2017/2017-09/2017-09-cambridgeshire-street.csv')\n", "OctSt = pd.read_csv('../data/Crime_data_2017/2017-10/2017-10-cambridgeshire-street.csv')\n", "NovSt = pd.read_csv('../data/Crime_data_2017/2017-11/2017-11-cambridgeshire-street.csv')\n", "DecSt = pd.read_csv('../data/Crime_data_2017/2017-12/2017-12-cambridgeshire-street.csv')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Produce a list of all the data frames" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "frames = [JanSt,FebSt,MarSt,AprSt,MaySt,JunSt,JulSt,AugSt,SepSt,OctSt,NovSt,DecSt]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Concatenate data into single data frame" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [], "source": [ "St = pd.concat(frames)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extract only data on bicycle thefts" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "Bikethefts = St[St['Crime type'].str.contains(\"Bicycle theft\")]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "View the head (first 5 rows) of the bike theft data" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Crime IDMonthReported byFalls withinLongitudeLatitudeLocationLSOA codeLSOA nameCrime typeLast outcome categoryContext
97618ab88baac08d51a23432420127e44144ce4748d5112...2017-01Cambridgeshire ConstabularyCambridgeshire Constabulary0.13577252.232788On or near Kirkwood RoadE01017975Cambridge 001ABicycle theftInvestigation complete; no suspect identifiedNaN
10e498705835a2699a86fc3adf17cd7b00eae728ced382b4...2017-01Cambridgeshire ConstabularyCambridgeshire Constabulary0.13168152.229005On or near Crowland WayE01017975Cambridge 001ABicycle theftInvestigation complete; no suspect identifiedNaN
11529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e...2017-01Cambridgeshire ConstabularyCambridgeshire Constabulary0.13012852.230525On or near Crathern WayE01017975Cambridge 001ABicycle theftInvestigation complete; no suspect identifiedNaN
1208e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1...2017-01Cambridgeshire ConstabularyCambridgeshire Constabulary0.13577252.232788On or near Kirkwood RoadE01017975Cambridge 001ABicycle theftInvestigation complete; no suspect identifiedNaN
8876d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a...2017-01Cambridgeshire ConstabularyCambridgeshire Constabulary0.12641352.233362On or near Minerva WayE01017979Cambridge 001EBicycle theftInvestigation complete; no suspect identifiedNaN
\n", "
" ], "text/plain": [ " Crime ID Month \\\n", "9 7618ab88baac08d51a23432420127e44144ce4748d5112... 2017-01 \n", "10 e498705835a2699a86fc3adf17cd7b00eae728ced382b4... 2017-01 \n", "11 529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e... 2017-01 \n", "12 08e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1... 2017-01 \n", "88 76d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a... 2017-01 \n", "\n", " Reported by Falls within Longitude \\\n", "9 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.135772 \n", "10 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.131681 \n", "11 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.130128 \n", "12 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.135772 \n", "88 Cambridgeshire Constabulary Cambridgeshire Constabulary 0.126413 \n", "\n", " Latitude Location LSOA code LSOA name \\\n", "9 52.232788 On or near Kirkwood Road E01017975 Cambridge 001A \n", "10 52.229005 On or near Crowland Way E01017975 Cambridge 001A \n", "11 52.230525 On or near Crathern Way E01017975 Cambridge 001A \n", "12 52.232788 On or near Kirkwood Road E01017975 Cambridge 001A \n", "88 52.233362 On or near Minerva Way E01017979 Cambridge 001E \n", "\n", " Crime type Last outcome category Context \n", "9 Bicycle theft Investigation complete; no suspect identified NaN \n", "10 Bicycle theft Investigation complete; no suspect identified NaN \n", "11 Bicycle theft Investigation complete; no suspect identified NaN \n", "12 Bicycle theft Investigation complete; no suspect identified NaN \n", "88 Bicycle theft Investigation complete; no suspect identified NaN " ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame.head(Bikethefts)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Extract only the columns of interest: Crime ID, Month, Longitude, Latitude, Location and Last outcome category." ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "BikeSub = Bikethefts[['Crime ID', 'Month', 'Longitude', 'Latitude','Location','Last outcome category']]" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "View the head of the subsetted bike theft data" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Crime IDMonthLongitudeLatitudeLocationLast outcome category
97618ab88baac08d51a23432420127e44144ce4748d5112...2017-010.13577252.232788On or near Kirkwood RoadInvestigation complete; no suspect identified
10e498705835a2699a86fc3adf17cd7b00eae728ced382b4...2017-010.13168152.229005On or near Crowland WayInvestigation complete; no suspect identified
11529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e...2017-010.13012852.230525On or near Crathern WayInvestigation complete; no suspect identified
1208e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1...2017-010.13577252.232788On or near Kirkwood RoadInvestigation complete; no suspect identified
8876d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a...2017-010.12641352.233362On or near Minerva WayInvestigation complete; no suspect identified
\n", "
" ], "text/plain": [ " Crime ID Month Longitude \\\n", "9 7618ab88baac08d51a23432420127e44144ce4748d5112... 2017-01 0.135772 \n", "10 e498705835a2699a86fc3adf17cd7b00eae728ced382b4... 2017-01 0.131681 \n", "11 529d80f26b78125e9f9386a0f0cf82582fe1059a991d0e... 2017-01 0.130128 \n", "12 08e44ae155f1be07b25214f94996eb75e6e7c93a0ca3c1... 2017-01 0.135772 \n", "88 76d55f7b71de6f014123fd9f69a9fde85c93ca3b4e228a... 2017-01 0.126413 \n", "\n", " Latitude Location \\\n", "9 52.232788 On or near Kirkwood Road \n", "10 52.229005 On or near Crowland Way \n", "11 52.230525 On or near Crathern Way \n", "12 52.232788 On or near Kirkwood Road \n", "88 52.233362 On or near Minerva Way \n", "\n", " Last outcome category \n", "9 Investigation complete; no suspect identified \n", "10 Investigation complete; no suspect identified \n", "11 Investigation complete; no suspect identified \n", "12 Investigation complete; no suspect identified \n", "88 Investigation complete; no suspect identified " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.DataFrame.head(BikeSub)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Write the processed data as a csv file." ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "BikeSub.to_csv('../data/processed/bikethefts.csv', index = False)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.4" } }, "nbformat": 4, "nbformat_minor": 2 }