{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
DatesCategoryDescriptDayOfWeekPdDistrictResolutionAddressXY
02015-05-13 23:53:00WARRANTSWARRANT ARRESTWednesdayNORTHERNARREST, BOOKEDOAK ST / LAGUNA ST-122.42589237.774599
12015-05-13 23:53:00OTHER OFFENSESTRAFFIC VIOLATION ARRESTWednesdayNORTHERNARREST, BOOKEDOAK ST / LAGUNA ST-122.42589237.774599
22015-05-13 23:33:00OTHER OFFENSESTRAFFIC VIOLATION ARRESTWednesdayNORTHERNARREST, BOOKEDVANNESS AV / GREENWICH ST-122.42436337.800414
32015-05-13 23:30:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayNORTHERNNONE1500 Block of LOMBARD ST-122.42699537.800873
42015-05-13 23:30:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayPARKNONE100 Block of BRODERICK ST-122.43873837.771541
52015-05-13 23:30:00LARCENY/THEFTGRAND THEFT FROM UNLOCKED AUTOWednesdayINGLESIDENONE0 Block of TEDDY AV-122.40325237.713431
62015-05-13 23:30:00VEHICLE THEFTSTOLEN AUTOMOBILEWednesdayINGLESIDENONEAVALON AV / PERU AV-122.42332737.725138
72015-05-13 23:30:00VEHICLE THEFTSTOLEN AUTOMOBILEWednesdayBAYVIEWNONEKIRKWOOD AV / DONAHUE ST-122.37127437.727564
82015-05-13 23:00:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayRICHMONDNONE600 Block of 47TH AV-122.50819437.776601
92015-05-13 23:00:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayCENTRALNONEJEFFERSON ST / LEAVENWORTH ST-122.41908837.807802
102015-05-13 22:58:00LARCENY/THEFTPETTY THEFT FROM LOCKED AUTOWednesdayCENTRALNONEJEFFERSON ST / LEAVENWORTH ST-122.41908837.807802
112015-05-13 22:30:00OTHER OFFENSESMISCELLANEOUS INVESTIGATIONWednesdayTARAVALNONE0 Block of ESCOLTA WY-122.48798337.737667
122015-05-13 22:30:00VANDALISMMALICIOUS MISCHIEF, VANDALISM OF VEHICLESWednesdayTENDERLOINNONETURK ST / JONES ST-122.41241437.783004
132015-05-13 22:06:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayNORTHERNNONEFILLMORE ST / GEARY BL-122.43291537.784353
142015-05-13 22:00:00NON-CRIMINALFOUND PROPERTYWednesdayBAYVIEWNONE200 Block of WILLIAMS AV-122.39774437.729935
152015-05-13 22:00:00NON-CRIMINALFOUND PROPERTYWednesdayBAYVIEWNONE0 Block of MENDELL ST-122.38369237.743189
162015-05-13 22:00:00ROBBERYROBBERY, ARMED WITH A KNIFEWednesdayTENDERLOINNONEEDDY ST / JONES ST-122.41259737.783932
172015-05-13 21:55:00ASSAULTAGGRAVATED ASSAULT WITH BODILY FORCEWednesdayINGLESIDENONEGODEUS ST / MISSION ST-122.42168237.742822
182015-05-13 21:40:00OTHER OFFENSESTRAFFIC VIOLATIONWednesdayBAYVIEWARREST, BOOKEDMENDELL ST / HUDSON AV-122.38640137.738983
192015-05-13 21:30:00NON-CRIMINALFOUND PROPERTYWednesdayTENDERLOINNONE100 Block of JONES ST-122.41225037.782556
202015-05-13 21:30:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayINGLESIDENONE200 Block of EVELYN WY-122.44938937.742669
212015-05-13 21:17:00ROBBERYROBBERY, BODILY FORCEWednesdayINGLESIDENONE1600 Block of VALENCIA ST-122.42027237.747332
222015-05-13 21:11:00WARRANTSWARRANT ARRESTWednesdayTENDERLOINNONE100 Block of JONES ST-122.41225037.782556
232015-05-13 21:11:00NON-CRIMINALSTAY AWAY OR COURT ORDER, NON-DV RELATEDWednesdayTENDERLOINNONE100 Block of JONES ST-122.41225037.782556
242015-05-13 21:10:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayNORTHERNNONEFILLMORE ST / LOMBARD ST-122.43604937.799841
252015-05-13 21:00:00NON-CRIMINALLOST PROPERTYWednesdayTENDERLOINNONE300 Block of OFARRELL ST-122.41050937.786043
262015-05-13 21:00:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayNORTHERNNONE2000 Block of BUSH ST-122.43101837.787388
272015-05-13 21:00:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOWednesdayINGLESIDENONE500 Block of COLLEGE AV-122.42365637.732556
282015-05-13 21:00:00LARCENY/THEFTATTEMPTED THEFT FROM LOCKED VEHICLEWednesdayTARAVALNONE19TH AV / SANTIAGO ST-122.47577337.744919
292015-05-13 20:56:00OTHER OFFENSESMISCELLANEOUS INVESTIGATIONWednesdayTARAVALNONE2000 Block of 41ST AV-122.49978737.748518
..............................
8780192003-01-06 02:37:00OTHER OFFENSESDRIVERS LICENSE, SUSPENDED OR REVOKEDMondaySOUTHERNARREST, CITED6TH ST / MARKET ST-122.41029437.782231
8780202003-01-06 02:32:00OTHER OFFENSESTRAFFIC VIOLATIONMondayNORTHERNARREST, CITEDVAN NESS AV / TURK ST-122.42064237.781961
8780212003-01-06 02:24:00VANDALISMMALICIOUS MISCHIEFMondayNORTHERNNOT PROSECUTEDSANCHEZ ST / 14TH ST-122.43119137.767595
8780222003-01-06 02:16:00VEHICLE THEFTRECOVERED VEHICLE - STOLEN OUTSIDE SFMondayMISSIONNONE17TH ST / MISSION ST-122.41951637.763429
8780232003-01-06 02:15:00LARCENY/THEFTGRAND THEFT PICKPOCKETMondayTENDERLOINNONE600 Block of ELLIS ST-122.41689437.784286
8780242003-01-06 02:09:00OTHER OFFENSESVIOLATION OF MUNICIPAL POLICE CODEMondayPARKARREST, CITED600 Block of DIVISADERO ST-122.43778137.775483
8780252003-01-06 02:06:00OTHER OFFENSESTRAFFIC VIOLATIONMondayBAYVIEWARREST, BOOKEDNEWHALL ST / GALVEZ AV-122.38771037.740674
8780262003-01-06 02:06:00WARRANTSWARRANT ARRESTMondayBAYVIEWARREST, BOOKEDNEWHALL ST / GALVEZ AV-122.38771037.740674
8780272003-01-06 02:00:00WARRANTSENROUTE TO OUTSIDE JURISDICTIONMondaySOUTHERNARREST, BOOKED900 Block of MARKET ST-122.40970837.782828
8780282003-01-06 02:00:00ASSAULTAGGRAVATED ASSAULT WITH BODILY FORCEMondaySOUTHERNNONE6TH ST / MARKET ST-122.41029437.782231
8780292003-01-06 01:54:00OTHER OFFENSESPROBATION VIOLATIONMondayTENDERLOINARREST, BOOKED1400 Block of GOLDEN GATE AV-122.43442337.779193
8780302003-01-06 01:54:00SEX OFFENSES FORCIBLEFORCIBLE RAPE, BODILY FORCEMondayTENDERLOINARREST, BOOKED1400 Block of GOLDEN GATE AV-122.43442337.779193
8780312003-01-06 01:50:00ASSAULTBATTERYMondayBAYVIEWNONE3RD ST / NEWCOMB AV-122.39041737.735593
8780322003-01-06 01:36:00OTHER OFFENSESDRIVERS LICENSE, SUSPENDED OR REVOKEDMondayNORTHERNARREST, CITEDGEARY BL / FRANKLIN ST-122.42303137.785482
8780332003-01-06 01:30:00VANDALISMMALICIOUS MISCHIEF, VANDALISMMondayRICHMONDARREST, CITED1000 Block of 22ND AV-122.39166837.757793
8780342003-01-06 01:30:00TRESPASSTRESPASSINGMondayRICHMONDARREST, CITED1000 Block of 22ND AV-122.39166837.757793
8780352003-01-06 00:55:00ASSAULTBATTERYMondayNORTHERNNONE1300 Block of WEBSTER ST-122.43104637.783030
8780362003-01-06 00:55:00LARCENY/THEFTPETTY THEFT SHOPLIFTINGMondayNORTHERNNONE1300 Block of WEBSTER ST-122.43104637.783030
8780372003-01-06 00:55:00VANDALISMMALICIOUS MISCHIEF, VANDALISMMondayNORTHERNNONE1300 Block of WEBSTER ST-122.43104637.783030
8780382003-01-06 00:42:00WARRANTSENROUTE TO OUTSIDE JURISDICTIONMondayTENDERLOINARREST, BOOKEDTAYLOR ST / GEARY ST-122.41151937.786941
8780392003-01-06 00:40:00OTHER OFFENSESDRIVERS LICENSE, SUSPENDED OR REVOKEDMondayNORTHERNARREST, CITEDPOLK ST / CALIFORNIA ST-122.42069237.790577
8780402003-01-06 00:33:00ASSAULTINFLICT INJURY ON COHABITEEMondayMISSIONNONE2800 Block of FOLSOM ST-122.41407337.751685
8780412003-01-06 00:31:00OTHER OFFENSESDRIVERS LICENSE, SUSPENDED OR REVOKEDMondayRICHMONDARREST, CITEDCLEMENT ST / 14TH AV-122.47298537.782552
8780422003-01-06 00:20:00ASSAULTATTEMPTED HOMICIDE WITH A GUNMondayBAYVIEWARREST, BOOKED1500 Block of SHAFTER AV-122.38976937.730564
8780432003-01-06 00:20:00OTHER OFFENSESPAROLE VIOLATIONMondayBAYVIEWARREST, BOOKED1500 Block of SHAFTER AV-122.38976937.730564
8780442003-01-06 00:15:00ROBBERYROBBERY ON THE STREET WITH A GUNMondayTARAVALNONEFARALLONES ST / CAPITOL AV-122.45903337.714056
8780452003-01-06 00:01:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOMondayINGLESIDENONE600 Block of EDNA ST-122.44736437.731948
8780462003-01-06 00:01:00LARCENY/THEFTGRAND THEFT FROM LOCKED AUTOMondaySOUTHERNNONE5TH ST / FOLSOM ST-122.40339037.780266
8780472003-01-06 00:01:00VANDALISMMALICIOUS MISCHIEF, VANDALISM OF VEHICLESMondaySOUTHERNNONETOWNSEND ST / 2ND ST-122.39053137.780607
8780482003-01-06 00:01:00FORGERY/COUNTERFEITINGCHECKS, FORGERY (FELONY)MondayBAYVIEWNONE1800 Block of NEWCOMB AV-122.39492637.738212
\n", "

878049 rows × 9 columns

\n", "
" ], "text/plain": [ " Dates Category \\\n", "0 2015-05-13 23:53:00 WARRANTS \n", "1 2015-05-13 23:53:00 OTHER OFFENSES \n", "2 2015-05-13 23:33:00 OTHER OFFENSES \n", "3 2015-05-13 23:30:00 LARCENY/THEFT \n", "4 2015-05-13 23:30:00 LARCENY/THEFT \n", "5 2015-05-13 23:30:00 LARCENY/THEFT \n", "6 2015-05-13 23:30:00 VEHICLE THEFT \n", "7 2015-05-13 23:30:00 VEHICLE THEFT \n", "8 2015-05-13 23:00:00 LARCENY/THEFT \n", "9 2015-05-13 23:00:00 LARCENY/THEFT \n", "10 2015-05-13 22:58:00 LARCENY/THEFT \n", "11 2015-05-13 22:30:00 OTHER OFFENSES \n", "12 2015-05-13 22:30:00 VANDALISM \n", "13 2015-05-13 22:06:00 LARCENY/THEFT \n", "14 2015-05-13 22:00:00 NON-CRIMINAL \n", "15 2015-05-13 22:00:00 NON-CRIMINAL \n", "16 2015-05-13 22:00:00 ROBBERY \n", "17 2015-05-13 21:55:00 ASSAULT \n", "18 2015-05-13 21:40:00 OTHER OFFENSES \n", "19 2015-05-13 21:30:00 NON-CRIMINAL \n", "20 2015-05-13 21:30:00 LARCENY/THEFT \n", "21 2015-05-13 21:17:00 ROBBERY \n", "22 2015-05-13 21:11:00 WARRANTS \n", "23 2015-05-13 21:11:00 NON-CRIMINAL \n", "24 2015-05-13 21:10:00 LARCENY/THEFT \n", "25 2015-05-13 21:00:00 NON-CRIMINAL \n", "26 2015-05-13 21:00:00 LARCENY/THEFT \n", "27 2015-05-13 21:00:00 LARCENY/THEFT \n", "28 2015-05-13 21:00:00 LARCENY/THEFT \n", "29 2015-05-13 20:56:00 OTHER OFFENSES \n", "... ... ... \n", "878019 2003-01-06 02:37:00 OTHER OFFENSES \n", "878020 2003-01-06 02:32:00 OTHER OFFENSES \n", "878021 2003-01-06 02:24:00 VANDALISM \n", "878022 2003-01-06 02:16:00 VEHICLE THEFT \n", "878023 2003-01-06 02:15:00 LARCENY/THEFT \n", "878024 2003-01-06 02:09:00 OTHER OFFENSES \n", "878025 2003-01-06 02:06:00 OTHER OFFENSES \n", "878026 2003-01-06 02:06:00 WARRANTS \n", "878027 2003-01-06 02:00:00 WARRANTS \n", "878028 2003-01-06 02:00:00 ASSAULT \n", "878029 2003-01-06 01:54:00 OTHER OFFENSES \n", "878030 2003-01-06 01:54:00 SEX OFFENSES FORCIBLE \n", "878031 2003-01-06 01:50:00 ASSAULT \n", "878032 2003-01-06 01:36:00 OTHER OFFENSES \n", "878033 2003-01-06 01:30:00 VANDALISM \n", "878034 2003-01-06 01:30:00 TRESPASS \n", "878035 2003-01-06 00:55:00 ASSAULT \n", "878036 2003-01-06 00:55:00 LARCENY/THEFT \n", "878037 2003-01-06 00:55:00 VANDALISM \n", "878038 2003-01-06 00:42:00 WARRANTS \n", "878039 2003-01-06 00:40:00 OTHER OFFENSES \n", "878040 2003-01-06 00:33:00 ASSAULT \n", "878041 2003-01-06 00:31:00 OTHER OFFENSES \n", "878042 2003-01-06 00:20:00 ASSAULT \n", "878043 2003-01-06 00:20:00 OTHER OFFENSES \n", "878044 2003-01-06 00:15:00 ROBBERY \n", "878045 2003-01-06 00:01:00 LARCENY/THEFT \n", "878046 2003-01-06 00:01:00 LARCENY/THEFT \n", "878047 2003-01-06 00:01:00 VANDALISM \n", "878048 2003-01-06 00:01:00 FORGERY/COUNTERFEITING \n", "\n", " Descript DayOfWeek PdDistrict \\\n", "0 WARRANT ARREST Wednesday NORTHERN \n", "1 TRAFFIC VIOLATION ARREST Wednesday NORTHERN \n", "2 TRAFFIC VIOLATION ARREST Wednesday NORTHERN \n", "3 GRAND THEFT FROM LOCKED AUTO Wednesday NORTHERN \n", "4 GRAND THEFT FROM LOCKED AUTO Wednesday PARK \n", "5 GRAND THEFT FROM UNLOCKED AUTO Wednesday INGLESIDE \n", "6 STOLEN AUTOMOBILE Wednesday INGLESIDE \n", "7 STOLEN AUTOMOBILE Wednesday BAYVIEW \n", "8 GRAND THEFT FROM LOCKED AUTO Wednesday RICHMOND \n", "9 GRAND THEFT FROM LOCKED AUTO Wednesday CENTRAL \n", "10 PETTY THEFT FROM LOCKED AUTO Wednesday CENTRAL \n", "11 MISCELLANEOUS INVESTIGATION Wednesday TARAVAL \n", "12 MALICIOUS MISCHIEF, VANDALISM OF VEHICLES Wednesday TENDERLOIN \n", "13 GRAND THEFT FROM LOCKED AUTO Wednesday NORTHERN \n", "14 FOUND PROPERTY Wednesday BAYVIEW \n", "15 FOUND PROPERTY Wednesday BAYVIEW \n", "16 ROBBERY, ARMED WITH A KNIFE Wednesday TENDERLOIN \n", "17 AGGRAVATED ASSAULT WITH BODILY FORCE Wednesday INGLESIDE \n", "18 TRAFFIC VIOLATION Wednesday BAYVIEW \n", "19 FOUND PROPERTY Wednesday TENDERLOIN \n", "20 GRAND THEFT FROM LOCKED AUTO Wednesday INGLESIDE \n", "21 ROBBERY, BODILY FORCE Wednesday INGLESIDE \n", "22 WARRANT ARREST Wednesday TENDERLOIN \n", "23 STAY AWAY OR COURT ORDER, NON-DV RELATED Wednesday TENDERLOIN \n", "24 GRAND THEFT FROM LOCKED AUTO Wednesday NORTHERN \n", "25 LOST PROPERTY Wednesday TENDERLOIN \n", "26 GRAND THEFT FROM LOCKED AUTO Wednesday NORTHERN \n", "27 GRAND THEFT FROM LOCKED AUTO Wednesday INGLESIDE \n", "28 ATTEMPTED THEFT FROM LOCKED VEHICLE Wednesday TARAVAL \n", "29 MISCELLANEOUS INVESTIGATION Wednesday TARAVAL \n", "... ... ... ... \n", "878019 DRIVERS LICENSE, SUSPENDED OR REVOKED Monday SOUTHERN \n", "878020 TRAFFIC VIOLATION Monday NORTHERN \n", "878021 MALICIOUS MISCHIEF Monday NORTHERN \n", "878022 RECOVERED VEHICLE - STOLEN OUTSIDE SF Monday MISSION \n", "878023 GRAND THEFT PICKPOCKET Monday TENDERLOIN \n", "878024 VIOLATION OF MUNICIPAL POLICE CODE Monday PARK \n", "878025 TRAFFIC VIOLATION Monday BAYVIEW \n", "878026 WARRANT ARREST Monday BAYVIEW \n", "878027 ENROUTE TO OUTSIDE JURISDICTION Monday SOUTHERN \n", "878028 AGGRAVATED ASSAULT WITH BODILY FORCE Monday SOUTHERN \n", "878029 PROBATION VIOLATION Monday TENDERLOIN \n", "878030 FORCIBLE RAPE, BODILY FORCE Monday TENDERLOIN \n", "878031 BATTERY Monday BAYVIEW \n", "878032 DRIVERS LICENSE, SUSPENDED OR REVOKED Monday NORTHERN \n", "878033 MALICIOUS MISCHIEF, VANDALISM Monday RICHMOND \n", "878034 TRESPASSING Monday RICHMOND \n", "878035 BATTERY Monday NORTHERN \n", "878036 PETTY THEFT SHOPLIFTING Monday NORTHERN \n", "878037 MALICIOUS MISCHIEF, VANDALISM Monday NORTHERN \n", "878038 ENROUTE TO OUTSIDE JURISDICTION Monday TENDERLOIN \n", "878039 DRIVERS LICENSE, SUSPENDED OR REVOKED Monday NORTHERN \n", "878040 INFLICT INJURY ON COHABITEE Monday MISSION \n", "878041 DRIVERS LICENSE, SUSPENDED OR REVOKED Monday RICHMOND \n", "878042 ATTEMPTED HOMICIDE WITH A GUN Monday BAYVIEW \n", "878043 PAROLE VIOLATION Monday BAYVIEW \n", "878044 ROBBERY ON THE STREET WITH A GUN Monday TARAVAL \n", "878045 GRAND THEFT FROM LOCKED AUTO Monday INGLESIDE \n", "878046 GRAND THEFT FROM LOCKED AUTO Monday SOUTHERN \n", "878047 MALICIOUS MISCHIEF, VANDALISM OF VEHICLES Monday SOUTHERN \n", "878048 CHECKS, FORGERY (FELONY) Monday BAYVIEW \n", "\n", " Resolution Address X Y \n", "0 ARREST, BOOKED OAK ST / LAGUNA ST -122.425892 37.774599 \n", "1 ARREST, BOOKED OAK ST / LAGUNA ST -122.425892 37.774599 \n", "2 ARREST, BOOKED VANNESS AV / GREENWICH ST -122.424363 37.800414 \n", "3 NONE 1500 Block of LOMBARD ST -122.426995 37.800873 \n", "4 NONE 100 Block of BRODERICK ST -122.438738 37.771541 \n", "5 NONE 0 Block of TEDDY AV -122.403252 37.713431 \n", "6 NONE AVALON AV / PERU AV -122.423327 37.725138 \n", "7 NONE KIRKWOOD AV / DONAHUE ST -122.371274 37.727564 \n", "8 NONE 600 Block of 47TH AV -122.508194 37.776601 \n", "9 NONE JEFFERSON ST / LEAVENWORTH ST -122.419088 37.807802 \n", "10 NONE JEFFERSON ST / LEAVENWORTH ST -122.419088 37.807802 \n", "11 NONE 0 Block of ESCOLTA WY -122.487983 37.737667 \n", "12 NONE TURK ST / JONES ST -122.412414 37.783004 \n", "13 NONE FILLMORE ST / GEARY BL -122.432915 37.784353 \n", "14 NONE 200 Block of WILLIAMS AV -122.397744 37.729935 \n", "15 NONE 0 Block of MENDELL ST -122.383692 37.743189 \n", "16 NONE EDDY ST / JONES ST -122.412597 37.783932 \n", "17 NONE GODEUS ST / MISSION ST -122.421682 37.742822 \n", "18 ARREST, BOOKED MENDELL ST / HUDSON AV -122.386401 37.738983 \n", "19 NONE 100 Block of JONES ST -122.412250 37.782556 \n", "20 NONE 200 Block of EVELYN WY -122.449389 37.742669 \n", "21 NONE 1600 Block of VALENCIA ST -122.420272 37.747332 \n", "22 NONE 100 Block of JONES ST -122.412250 37.782556 \n", "23 NONE 100 Block of JONES ST -122.412250 37.782556 \n", "24 NONE FILLMORE ST / LOMBARD ST -122.436049 37.799841 \n", "25 NONE 300 Block of OFARRELL ST -122.410509 37.786043 \n", "26 NONE 2000 Block of BUSH ST -122.431018 37.787388 \n", "27 NONE 500 Block of COLLEGE AV -122.423656 37.732556 \n", "28 NONE 19TH AV / SANTIAGO ST -122.475773 37.744919 \n", "29 NONE 2000 Block of 41ST AV -122.499787 37.748518 \n", "... ... ... ... ... \n", "878019 ARREST, CITED 6TH ST / MARKET ST -122.410294 37.782231 \n", "878020 ARREST, CITED VAN NESS AV / TURK ST -122.420642 37.781961 \n", "878021 NOT PROSECUTED SANCHEZ ST / 14TH ST -122.431191 37.767595 \n", "878022 NONE 17TH ST / MISSION ST -122.419516 37.763429 \n", "878023 NONE 600 Block of ELLIS ST -122.416894 37.784286 \n", "878024 ARREST, CITED 600 Block of DIVISADERO ST -122.437781 37.775483 \n", "878025 ARREST, BOOKED NEWHALL ST / GALVEZ AV -122.387710 37.740674 \n", "878026 ARREST, BOOKED NEWHALL ST / GALVEZ AV -122.387710 37.740674 \n", "878027 ARREST, BOOKED 900 Block of MARKET ST -122.409708 37.782828 \n", "878028 NONE 6TH ST / MARKET ST -122.410294 37.782231 \n", "878029 ARREST, BOOKED 1400 Block of GOLDEN GATE AV -122.434423 37.779193 \n", "878030 ARREST, BOOKED 1400 Block of GOLDEN GATE AV -122.434423 37.779193 \n", "878031 NONE 3RD ST / NEWCOMB AV -122.390417 37.735593 \n", "878032 ARREST, CITED GEARY BL / FRANKLIN ST -122.423031 37.785482 \n", "878033 ARREST, CITED 1000 Block of 22ND AV -122.391668 37.757793 \n", "878034 ARREST, CITED 1000 Block of 22ND AV -122.391668 37.757793 \n", "878035 NONE 1300 Block of WEBSTER ST -122.431046 37.783030 \n", "878036 NONE 1300 Block of WEBSTER ST -122.431046 37.783030 \n", "878037 NONE 1300 Block of WEBSTER ST -122.431046 37.783030 \n", "878038 ARREST, BOOKED TAYLOR ST / GEARY ST -122.411519 37.786941 \n", "878039 ARREST, CITED POLK ST / CALIFORNIA ST -122.420692 37.790577 \n", "878040 NONE 2800 Block of FOLSOM ST -122.414073 37.751685 \n", "878041 ARREST, CITED CLEMENT ST / 14TH AV -122.472985 37.782552 \n", "878042 ARREST, BOOKED 1500 Block of SHAFTER AV -122.389769 37.730564 \n", "878043 ARREST, BOOKED 1500 Block of SHAFTER AV -122.389769 37.730564 \n", "878044 NONE FARALLONES ST / CAPITOL AV -122.459033 37.714056 \n", "878045 NONE 600 Block of EDNA ST -122.447364 37.731948 \n", "878046 NONE 5TH ST / FOLSOM ST -122.403390 37.780266 \n", "878047 NONE TOWNSEND ST / 2ND ST -122.390531 37.780607 \n", "878048 NONE 1800 Block of NEWCOMB AV -122.394926 37.738212 \n", "\n", "[878049 rows x 9 columns]" ] }, "execution_count": 1, "metadata": {}, "output_type": "execute_result" } ], "source": [ "#Kaggle旧金山犯罪类型分类问题,Kaggle地址为https://www.kaggle.com/c/sf-crime\n", "#这是作为讲述朴素贝叶斯分类器的博客示例做的ipython notebook\n", "#博文地址http://blog.csdn.net/han_xiaoyang/article/details/50629608,欢迎提建议\n", "import pandas as pd\n", "import numpy as np\n", "\n", "#用pandas载入csv训练数据,并解析第一列为日期格式\n", "train=pd.read_csv('/Users/Hanxiaoyang/sf_crime_data/train.csv', parse_dates = ['Dates'])\n", "test=pd.read_csv('/Users/Hanxiaoyang/sf_crime_data/test.csv', parse_dates = ['Dates'])\n", "train" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
0123456789...CENTRALINGLESIDEMISSIONNORTHERNPARKRICHMONDSOUTHERNTARAVALTENDERLOINcrime
00000000000...00010000037
10000000000...00010000021
20000000000...00010000021
30000000000...00010000016
40000000000...00001000016
50000000000...01000000016
60000000000...01000000036
70000000000...00000000036
80000000000...00000100016
90000000000...10000000016
100000000000...10000000016
110000000000...00000001021
120000000000...00000000135
130000000000...00010000016
140000000000...00000000020
150000000000...00000000020
160000000000...00000000125
170000000000...0100000001
180000000000...00000000021
190000000000...00000000120
200000000000...01000000016
210000000000...01000000025
220000000000...00000000137
230000000000...00000000120
240000000000...00010000016
250000000000...00000000120
260000000000...00010000016
270000000000...01000000016
280000000000...00000001016
290000000000...00000001021
..................................................................
8780190010000000...00000010021
8780200010000000...00010000021
8780210010000000...00010000035
8780220010000000...00100000036
8780230010000000...00000000116
8780240010000000...00001000021
8780250010000000...00000000021
8780260010000000...00000000037
8780270010000000...00000010037
8780280010000000...0000001001
8780290100000000...00000000121
8780300100000000...00000000128
8780310100000000...0000000001
8780320100000000...00010000021
8780330100000000...00000100035
8780340100000000...00000100034
8780351000000000...0001000001
8780361000000000...00010000016
8780371000000000...00010000035
8780381000000000...00000000137
8780391000000000...00010000021
8780401000000000...0010000001
8780411000000000...00000100021
8780421000000000...0000000001
8780431000000000...00000000021
8780441000000000...00000001025
8780451000000000...01000000016
8780461000000000...00000010016
8780471000000000...00000010035
8780481000000000...00000000012
\n", "

878049 rows × 42 columns

\n", "
" ], "text/plain": [ " 0 1 2 3 4 5 6 7 8 9 ... CENTRAL INGLESIDE MISSION \\\n", "0 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "1 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "2 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "3 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "4 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "5 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "6 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "7 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "8 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "9 0 0 0 0 0 0 0 0 0 0 ... 1 0 0 \n", "10 0 0 0 0 0 0 0 0 0 0 ... 1 0 0 \n", "11 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "12 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "13 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "14 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "15 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "16 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "17 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "18 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "19 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "20 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "21 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "22 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "23 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "24 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "25 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "26 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "27 0 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "28 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "29 0 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "... .. .. .. .. .. .. .. .. .. .. ... ... ... ... \n", "878019 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878020 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878021 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878022 0 0 1 0 0 0 0 0 0 0 ... 0 0 1 \n", "878023 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878024 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878025 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878026 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878027 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878028 0 0 1 0 0 0 0 0 0 0 ... 0 0 0 \n", "878029 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878030 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878031 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878032 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878033 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878034 0 1 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878035 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878036 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878037 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878038 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878039 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878040 1 0 0 0 0 0 0 0 0 0 ... 0 0 1 \n", "878041 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878042 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878043 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878044 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878045 1 0 0 0 0 0 0 0 0 0 ... 0 1 0 \n", "878046 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878047 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "878048 1 0 0 0 0 0 0 0 0 0 ... 0 0 0 \n", "\n", " NORTHERN PARK RICHMOND SOUTHERN TARAVAL TENDERLOIN crime \n", "0 1 0 0 0 0 0 37 \n", "1 1 0 0 0 0 0 21 \n", "2 1 0 0 0 0 0 21 \n", "3 1 0 0 0 0 0 16 \n", "4 0 1 0 0 0 0 16 \n", "5 0 0 0 0 0 0 16 \n", "6 0 0 0 0 0 0 36 \n", "7 0 0 0 0 0 0 36 \n", "8 0 0 1 0 0 0 16 \n", "9 0 0 0 0 0 0 16 \n", "10 0 0 0 0 0 0 16 \n", "11 0 0 0 0 1 0 21 \n", "12 0 0 0 0 0 1 35 \n", "13 1 0 0 0 0 0 16 \n", "14 0 0 0 0 0 0 20 \n", "15 0 0 0 0 0 0 20 \n", "16 0 0 0 0 0 1 25 \n", "17 0 0 0 0 0 0 1 \n", "18 0 0 0 0 0 0 21 \n", "19 0 0 0 0 0 1 20 \n", "20 0 0 0 0 0 0 16 \n", "21 0 0 0 0 0 0 25 \n", "22 0 0 0 0 0 1 37 \n", "23 0 0 0 0 0 1 20 \n", "24 1 0 0 0 0 0 16 \n", "25 0 0 0 0 0 1 20 \n", "26 1 0 0 0 0 0 16 \n", "27 0 0 0 0 0 0 16 \n", "28 0 0 0 0 1 0 16 \n", "29 0 0 0 0 1 0 21 \n", "... ... ... ... ... ... ... ... \n", "878019 0 0 0 1 0 0 21 \n", "878020 1 0 0 0 0 0 21 \n", "878021 1 0 0 0 0 0 35 \n", "878022 0 0 0 0 0 0 36 \n", "878023 0 0 0 0 0 1 16 \n", "878024 0 1 0 0 0 0 21 \n", "878025 0 0 0 0 0 0 21 \n", "878026 0 0 0 0 0 0 37 \n", "878027 0 0 0 1 0 0 37 \n", "878028 0 0 0 1 0 0 1 \n", "878029 0 0 0 0 0 1 21 \n", "878030 0 0 0 0 0 1 28 \n", "878031 0 0 0 0 0 0 1 \n", "878032 1 0 0 0 0 0 21 \n", "878033 0 0 1 0 0 0 35 \n", "878034 0 0 1 0 0 0 34 \n", "878035 1 0 0 0 0 0 1 \n", "878036 1 0 0 0 0 0 16 \n", "878037 1 0 0 0 0 0 35 \n", "878038 0 0 0 0 0 1 37 \n", "878039 1 0 0 0 0 0 21 \n", "878040 0 0 0 0 0 0 1 \n", "878041 0 0 1 0 0 0 21 \n", "878042 0 0 0 0 0 0 1 \n", "878043 0 0 0 0 0 0 21 \n", "878044 0 0 0 0 1 0 25 \n", "878045 0 0 0 0 0 0 16 \n", "878046 0 0 0 1 0 0 16 \n", "878047 0 0 0 1 0 0 35 \n", "878048 0 0 0 0 0 0 12 \n", "\n", "[878049 rows x 42 columns]" ] }, "execution_count": 2, "metadata": {}, "output_type": "execute_result" } ], "source": [ "import pandas as pd\n", "import numpy as np\n", "from sklearn.cross_validation import train_test_split\n", "from sklearn import preprocessing\n", "\n", "#用LabelEncoder对不同的犯罪类型编号\n", "leCrime = preprocessing.LabelEncoder()\n", "crime = leCrime.fit_transform(train.Category)\n", "\n", "#因子化星期几,街区,小时等特征\n", "days = pd.get_dummies(train.DayOfWeek)\n", "district = pd.get_dummies(train.PdDistrict)\n", "hour = train.Dates.dt.hour\n", "hour = pd.get_dummies(hour) \n", "\n", "#组合特征\n", "trainData = pd.concat([hour, days, district], axis=1)\n", "trainData['crime']=crime\n", "\n", "#对于测试数据做相同的处理\n", "days = pd.get_dummies(test.DayOfWeek)\n", "district = pd.get_dummies(test.PdDistrict)\n", "\n", "hour = test.Dates.dt.hour\n", "hour = pd.get_dummies(hour) \n", "\n", "testData = pd.concat([hour, days, district], axis=1)\n", "trainData" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "朴素贝叶斯建模耗时 0.655217 秒\n", "朴素贝叶斯log损失为 2.613483\n", "逻辑回归建模耗时 45.460170 秒\n", "逻辑回归log损失为 2.620500\n" ] } ], "source": [ "from sklearn.cross_validation import train_test_split\n", "from sklearn import preprocessing\n", "from sklearn.metrics import log_loss\n", "from sklearn.naive_bayes import BernoulliNB\n", "from sklearn.linear_model import LogisticRegression\n", "import time\n", "\n", "# 只取星期几和街区作为分类器输入特征\n", "features = ['Monday', 'Tuesday', 'Wednesday', 'Thursday', 'Friday', 'Saturday', 'Sunday', 'BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION',\n", " 'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN']\n", "\n", "# 分割训练集(3/5)和测试集(2/5)\n", "training, validation = train_test_split(trainData, train_size=.60)\n", "\n", "# 朴素贝叶斯建模,计算log_loss\n", "model = BernoulliNB()\n", "nbStart = time.time()\n", "model.fit(training[features], training['crime'])\n", "nbCostTime = time.time() - nbStart\n", "predicted = np.array(model.predict_proba(validation[features]))\n", "print \"朴素贝叶斯建模耗时 %f 秒\" %(nbCostTime)\n", "print \"朴素贝叶斯log损失为 %f\" %(log_loss(validation['crime'], predicted))\n", "\n", "#逻辑回归建模,计算log_loss\n", "model = LogisticRegression(C=.01)\n", "lrStart= time.time()\n", "model.fit(training[features], training['crime'])\n", "lrCostTime = time.time() - lrStart\n", "predicted = np.array(model.predict_proba(validation[features]))\n", "log_loss(validation['crime'], predicted)\n", "print \"逻辑回归建模耗时 %f 秒\" %(lrCostTime)\n", "print \"逻辑回归log损失为 %f\" %(log_loss(validation['crime'], predicted)) " ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "朴素贝叶斯建模耗时 1.350199 秒\n", "朴素贝叶斯log损失为 2.582355\n", "逻辑回归建模耗时 60.785606 秒\n", "逻辑回归log损失为 2.591964\n" ] } ], "source": [ "from sklearn.cross_validation import train_test_split\n", "from sklearn import preprocessing\n", "from sklearn.metrics import log_loss\n", "from sklearn.naive_bayes import BernoulliNB\n", "from sklearn.linear_model import LogisticRegression\n", "import time\n", "\n", "# 添加犯罪的小时时间点作为特征\n", "features = ['Friday', 'Monday', 'Saturday', 'Sunday', 'Thursday', 'Tuesday',\n", "'Wednesday', 'BAYVIEW', 'CENTRAL', 'INGLESIDE', 'MISSION',\n", "'NORTHERN', 'PARK', 'RICHMOND', 'SOUTHERN', 'TARAVAL', 'TENDERLOIN']\n", "\n", "hourFea = [x for x in range(0,24)]\n", "features = features + hourFea\n", "\n", "# 分割训练集(3/5)和测试集(2/5)\n", "training, validation = train_test_split(trainData, train_size=.60)\n", "\n", "# 朴素贝叶斯建模,计算log_loss\n", "model = BernoulliNB()\n", "nbStart = time.time()\n", "model.fit(training[features], training['crime'])\n", "nbCostTime = time.time() - nbStart\n", "predicted = np.array(model.predict_proba(validation[features]))\n", "print \"朴素贝叶斯建模耗时 %f 秒\" %(nbCostTime)\n", "print \"朴素贝叶斯log损失为 %f\" %(log_loss(validation['crime'], predicted))\n", "\n", "#逻辑回归建模,计算log_loss\n", "model = LogisticRegression(C=.01)\n", "lrStart= time.time()\n", "model.fit(training[features], training['crime'])\n", "lrCostTime = time.time() - lrStart\n", "predicted = np.array(model.predict_proba(validation[features]))\n", "log_loss(validation['crime'], predicted)\n", "print \"逻辑回归建模耗时 %f 秒\" %(lrCostTime)\n", "print \"逻辑回归log损失为 %f\" %(log_loss(validation['crime'], predicted))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 2", "language": "python", "name": "python2" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 2 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython2", "version": "2.7.9" } }, "nbformat": 4, "nbformat_minor": 0 }