{ "metadata": { "name": "", "signature": "sha256:29897a9f8041bb301bd1fb41356d238d8ccf7902ba369eb1d9acc34f5f990953" }, "nbformat": 3, "nbformat_minor": 0, "worksheets": [ { "cells": [ { "cell_type": "code", "collapsed": false, "input": [ "import pandas as pd\n", "\n", "#csv_path = 'https://raw.githubusercontent.com/nygeog/data/master/nyc_crashes/data/NYPD_Motor_Vehicle_Collisions.csv'\n", "inCSV = 'data/NYPD_Motor_Vehicle_Collisions.csv'\n", "ouCSV = 'data/nypd_mv_collisions.csv'\n", "\n", "df = pd.read_csv(inCSV).rename(columns=lambda x: x.lower())\n", "\n", "#drop ones w/out valid lat #super lazy, just grabbing lat's above 35\n", "df = df[(df.latitude > 35)]\n", "#print df.dtypes \n", "print len(df.index)\n", "df.head(5)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "449644\n" ] }, { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetimeboroughzip codelatitudelongitudelocationon street namecross street nameoff street name...contributing factor vehicle 2contributing factor vehicle 3contributing factor vehicle 4contributing factor vehicle 5unique keyvehicle type code 1vehicle type code 2vehicle type code 3vehicle type code 4vehicle type code 5
1 02/13/2015 21:45 MANHATTAN 10002 40.715622-73.994275 (40.7156221, -73.9942752) FORSYTH STREET CANAL STREET NaN... NaN NaN NaN NaN 3168577 PASSENGER VEHICLE UNKNOWN NaN NaN NaN
2 02/13/2015 21:45 MANHATTAN 10001 40.747535-73.988307 (40.7475349, -73.9883068) WEST 31 STREET BROADWAY NaN... Fatigued/Drowsy NaN NaN NaN 3169163 TAXI TAXI NaN NaN NaN
3 02/13/2015 21:45 BRONX 10462 40.833558-73.857732 (40.8335582, -73.8577325) WESTCHESTER AVENUE PUGSLEY AVENUE NaN... Unspecified NaN NaN NaN 3169251 PASSENGER VEHICLE SPORT UTILITY / STATION WAGON NaN NaN NaN
4 02/13/2015 21:44 MANHATTAN 10017 40.748800-73.969846 (40.7487997, -73.969846) EAST 42 STREET 1 AVENUE NaN... Other Vehicular NaN NaN NaN 3169176 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
5 02/13/2015 21:40 STATEN ISLAND 10304 40.617295-74.080479 (40.6172954, -74.0804791) PARK HILL AVENUE OSGOOD AVENUE NaN... Unspecified NaN NaN NaN 3169614 PASSENGER VEHICLE PASSENGER VEHICLE NaN NaN NaN
\n", "

5 rows \u00d7 29 columns

\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 12, "text": [ " date time borough zip code latitude longitude \\\n", "1 02/13/2015 21:45 MANHATTAN 10002 40.715622 -73.994275 \n", "2 02/13/2015 21:45 MANHATTAN 10001 40.747535 -73.988307 \n", "3 02/13/2015 21:45 BRONX 10462 40.833558 -73.857732 \n", "4 02/13/2015 21:44 MANHATTAN 10017 40.748800 -73.969846 \n", "5 02/13/2015 21:40 STATEN ISLAND 10304 40.617295 -74.080479 \n", "\n", " location on street name cross street name \\\n", "1 (40.7156221, -73.9942752) FORSYTH STREET CANAL STREET \n", "2 (40.7475349, -73.9883068) WEST 31 STREET BROADWAY \n", "3 (40.8335582, -73.8577325) WESTCHESTER AVENUE PUGSLEY AVENUE \n", "4 (40.7487997, -73.969846) EAST 42 STREET 1 AVENUE \n", "5 (40.6172954, -74.0804791) PARK HILL AVENUE OSGOOD AVENUE \n", "\n", " off street name ... contributing factor vehicle 2 \\\n", "1 NaN ... NaN \n", "2 NaN ... Fatigued/Drowsy \n", "3 NaN ... Unspecified \n", "4 NaN ... Other Vehicular \n", "5 NaN ... Unspecified \n", "\n", " contributing factor vehicle 3 contributing factor vehicle 4 \\\n", "1 NaN NaN \n", "2 NaN NaN \n", "3 NaN NaN \n", "4 NaN NaN \n", "5 NaN NaN \n", "\n", " contributing factor vehicle 5 unique key vehicle type code 1 \\\n", "1 NaN 3168577 PASSENGER VEHICLE \n", "2 NaN 3169163 TAXI \n", "3 NaN 3169251 PASSENGER VEHICLE \n", "4 NaN 3169176 PASSENGER VEHICLE \n", "5 NaN 3169614 PASSENGER VEHICLE \n", "\n", " vehicle type code 2 vehicle type code 3 vehicle type code 4 \\\n", "1 UNKNOWN NaN NaN \n", "2 TAXI NaN NaN \n", "3 SPORT UTILITY / STATION WAGON NaN NaN \n", "4 PASSENGER VEHICLE NaN NaN \n", "5 PASSENGER VEHICLE NaN NaN \n", "\n", " vehicle type code 5 \n", "1 NaN \n", "2 NaN \n", "3 NaN \n", "4 NaN \n", "5 NaN \n", "\n", "[5 rows x 29 columns]" ] } ], "prompt_number": 12 }, { "cell_type": "code", "collapsed": false, "input": [ "#create datetime http://stackoverflow.com/questions/17978092/combine-date-and-time-columns-using-python-pandas\n", "df['datetime'] = pd.to_datetime(df['date'] + ' ' + df['time'])" ], "language": "python", "metadata": {}, "outputs": [], "prompt_number": 13 }, { "cell_type": "code", "collapsed": false, "input": [ "df = df[['datetime','latitude','longitude']]\n", "df = df[(df.datetime > '2014-01-01 00:00:01')] #query out only data from 2014 onward\n", "df = df.sort('datetime')\n", "df.to_csv(ouCSV,index=False)\n", "print len(df.index)" ], "language": "python", "metadata": {}, "outputs": [ { "output_type": "stream", "stream": "stdout", "text": [ "192324\n" ] } ], "prompt_number": 14 }, { "cell_type": "code", "collapsed": false, "input": [ "df.head(5)" ], "language": "python", "metadata": {}, "outputs": [ { "html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
datetimelatitudelongitude
2288542014-01-01 00:01:00 40.725432-73.996771
2288502014-01-01 00:01:00 40.767889-73.981512
2288552014-01-01 00:01:00 40.750844-73.978608
2291442014-01-01 00:15:00 40.588646-73.992452
2288492014-01-01 00:20:00 40.689019-73.986157
\n", "
" ], "metadata": {}, "output_type": "pyout", "prompt_number": 15, "text": [ " datetime latitude longitude\n", "228854 2014-01-01 00:01:00 40.725432 -73.996771\n", "228850 2014-01-01 00:01:00 40.767889 -73.981512\n", "228855 2014-01-01 00:01:00 40.750844 -73.978608\n", "229144 2014-01-01 00:15:00 40.588646 -73.992452\n", "228849 2014-01-01 00:20:00 40.689019 -73.986157" ] } ], "prompt_number": 15 }, { "cell_type": "markdown", "metadata": {}, "source": [ "" ] } ], "metadata": {} } ] }