{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import dask.distributed\n", "import dask.dataframe as dd\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "client = dask.distributed.Client('localhost:8786')" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "" ] }, "execution_count": 16, "metadata": {}, "output_type": "execute_result" } ], "source": [ "client.restart()" ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df = dd.read_parquet('/bigdata/citibike.parquet')\n", "# df = client.persist(df.repartition(npartitions=20))" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
trip_durationstart_timestop_timestart_station_idstart_station_namestart_station_latitudestart_station_longitudeend_station_idend_station_nameend_station_latitudeend_station_longitudebike_iduser_typebirth_yeargender
06342013-07-01 00:00:002013-07-01 00:10:34164E 47 St & 2 Ave40.753231-73.9703225041 Ave & E 15 St40.732220-73.98165916950CustomerNaN0
115472013-07-01 00:00:022013-07-01 00:25:49388W 26 St & 10 Ave40.749718-74.002953459W 20 St & 11 Ave40.746746-74.00775919816CustomerNaN0
21782013-07-01 00:01:042013-07-01 00:04:02293Lafayette St & E 8 St40.730286-73.990768237E 11 St & 2 Ave40.730473-73.98672514548Subscriber1980.02
315802013-07-01 00:01:062013-07-01 00:27:26531Forsyth St & Broome St40.718941-73.992661499Broadway & W 60 St40.769154-73.98191816063CustomerNaN0
47572013-07-01 00:01:102013-07-01 00:13:47382University Pl & E 14 St40.734928-73.992004410Suffolk St & Stanton St40.720665-73.98517619213Subscriber1986.01
\n", "
" ], "text/plain": [ " trip_duration start_time stop_time start_station_id \\\n", "0 634 2013-07-01 00:00:00 2013-07-01 00:10:34 164 \n", "1 1547 2013-07-01 00:00:02 2013-07-01 00:25:49 388 \n", "2 178 2013-07-01 00:01:04 2013-07-01 00:04:02 293 \n", "3 1580 2013-07-01 00:01:06 2013-07-01 00:27:26 531 \n", "4 757 2013-07-01 00:01:10 2013-07-01 00:13:47 382 \n", "\n", " start_station_name start_station_latitude start_station_longitude \\\n", "0 E 47 St & 2 Ave 40.753231 -73.970322 \n", "1 W 26 St & 10 Ave 40.749718 -74.002953 \n", "2 Lafayette St & E 8 St 40.730286 -73.990768 \n", "3 Forsyth St & Broome St 40.718941 -73.992661 \n", "4 University Pl & E 14 St 40.734928 -73.992004 \n", "\n", " end_station_id end_station_name end_station_latitude \\\n", "0 504 1 Ave & E 15 St 40.732220 \n", "1 459 W 20 St & 11 Ave 40.746746 \n", "2 237 E 11 St & 2 Ave 40.730473 \n", "3 499 Broadway & W 60 St 40.769154 \n", "4 410 Suffolk St & Stanton St 40.720665 \n", "\n", " end_station_longitude bike_id user_type birth_year gender \n", "0 -73.981659 16950 Customer NaN 0 \n", "1 -74.007759 19816 Customer NaN 0 \n", "2 -73.986725 14548 Subscriber 1980.0 2 \n", "3 -73.981918 16063 Customer NaN 0 \n", "4 -73.985176 19213 Subscriber 1986.0 1 " ] }, "execution_count": 18, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.head()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "all_ids_in_dataset = df.start_station_id.unique().compute().values.tolist() + df.end_station_id.unique().compute().values.tolist()\n", "all_ids_in_dataset.sort()" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import urllib, json" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "scrolled": false }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
altitudeavailableBikesavailableDockscityidlandMarklastCommunicationTimelatitudelocationlongitudepostalCodestAddress1stAddress2stationNamestatusKeystatusValuetestStationtotalDocks
0137722017-04-19 12:56:59 PM40.767272-73.993929W 52 St & 11 AveW 52 St & 11 Ave1In ServiceFalse39
1276792017-04-19 12:56:18 PM40.719116-74.006667Franklin St & W BroadwayFranklin St & W Broadway1In ServiceFalse33
200822017-04-13 12:05:52 PM40.711174-74.000165St James Pl & Pearl StSt James Pl & Pearl St3Not In ServiceFalse0
33326832017-04-19 12:58:16 PM40.683826-73.976323Atlantic Ave & Fort Greene PlAtlantic Ave & Fort Greene Pl1In ServiceFalse62
412261162017-04-19 12:58:08 PM40.741776-74.001497W 17 St & 8 AveW 17 St & 8 Ave1In ServiceFalse39
\n", "
" ], "text/plain": [ " altitude availableBikes availableDocks city id landMark \\\n", "0 1 37 72 \n", "1 27 6 79 \n", "2 0 0 82 \n", "3 33 26 83 \n", "4 12 26 116 \n", "\n", " lastCommunicationTime latitude location longitude postalCode \\\n", "0 2017-04-19 12:56:59 PM 40.767272 -73.993929 \n", "1 2017-04-19 12:56:18 PM 40.719116 -74.006667 \n", "2 2017-04-13 12:05:52 PM 40.711174 -74.000165 \n", "3 2017-04-19 12:58:16 PM 40.683826 -73.976323 \n", "4 2017-04-19 12:58:08 PM 40.741776 -74.001497 \n", "\n", " stAddress1 stAddress2 stationName \\\n", "0 W 52 St & 11 Ave W 52 St & 11 Ave \n", "1 Franklin St & W Broadway Franklin St & W Broadway \n", "2 St James Pl & Pearl St St James Pl & Pearl St \n", "3 Atlantic Ave & Fort Greene Pl Atlantic Ave & Fort Greene Pl \n", "4 W 17 St & 8 Ave W 17 St & 8 Ave \n", "\n", " statusKey statusValue testStation totalDocks \n", "0 1 In Service False 39 \n", "1 1 In Service False 33 \n", "2 3 Not In Service False 0 \n", "3 1 In Service False 62 \n", "4 1 In Service False 39 " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zz = pd.DataFrame(json.loads(\n", " (urllib.request.urlopen('https://feeds.citibikenyc.com/stations/stations.json').read()).decode('utf-8')\n", " )['stationBeanList'])\n", "zz.head()" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(36902025,)" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df.start_station_id.compute().shape" ] }, { "cell_type": "code", "execution_count": 21, "metadata": { "collapsed": true }, "outputs": [], "source": [ "df_s = df[['start_station_id', 'start_station_name', 'start_station_latitude', 'start_station_longitude']]\n", "df_e = df[['end_station_id', 'end_station_name', 'end_station_latitude', 'end_station_longitude']]" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [], "source": [ "df_s = df_s[(df_s.start_station_latitude > 40.) & (df_s.start_station_latitude < 41.)]\n", "df_s = df_s[(df_s.start_station_longitude + 74.0 > -0.25) & (df_s.start_station_longitude + 74.0 < 0.25)]\n", "\n", "df_e = df_e[(df_e.end_station_latitude > 40.) & (df_e.end_station_latitude < 41.)]\n", "df_e = df_e[(df_e.end_station_longitude + 74.0 > -0.25) & (df_e.end_station_longitude + 74.0 < 0.25)]\n" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
start_station_idstart_station_namestart_station_latitudestart_station_longitude
0164E 47 St & 2 Ave40.753231-73.970322
1388W 26 St & 10 Ave40.749718-74.002953
2293Lafayette St & E 8 St40.730286-73.990768
3531Forsyth St & Broome St40.718941-73.992661
4382University Pl & E 14 St40.734928-73.992004
\n", "
" ], "text/plain": [ " start_station_id start_station_name start_station_latitude \\\n", "0 164 E 47 St & 2 Ave 40.753231 \n", "1 388 W 26 St & 10 Ave 40.749718 \n", "2 293 Lafayette St & E 8 St 40.730286 \n", "3 531 Forsyth St & Broome St 40.718941 \n", "4 382 University Pl & E 14 St 40.734928 \n", "\n", " start_station_longitude \n", "0 -73.970322 \n", "1 -74.002953 \n", "2 -73.990768 \n", "3 -73.992661 \n", "4 -73.992004 " ] }, "execution_count": 23, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_s.head()" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "a1 = df_s.groupby(['start_station_id', 'start_station_name']).mean().compute()\n", "a2 = df_e.groupby(['end_station_id', 'end_station_name']).mean().compute()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "b1 = a1.copy()\n", "b2 = a2.copy()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "b1.index = b1.index.rename(['id', 'name'])\n", "b1.columns = ['lat', 'lon']\n", "b2.index = b2.index.rename(['id', 'name'])\n", "b2.columns = ['lat', 'lon']" ] }, { "cell_type": "code", "execution_count": 83, "metadata": {}, "outputs": [], "source": [ "c = b1.append(b2).drop_duplicates().reset_index().sort_values('id').reset_index(drop=True)" ] }, { "cell_type": "code", "execution_count": 106, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
start_station_idstart_station_nameend_station_idend_station_name
2660523239Bressler3239Bressler
2686113239Bressler3239Bressler
2688023239Bressler3239Bressler
2691953239Bressler3239Bressler
3419273239Bressler3239Bressler
3433183239Bressler3239Bressler
3478613239Bressler3239Bressler
3650813239Bressler3239Bressler
3737183239Bressler3239Bressler
3747933239Bressler3239Bressler
3754503239Bressler3239Bressler
66673239Bressler3239Bressler
200793239Bressler3239Bressler
214583239Bressler3239Bressler
230073239Bressler3239Bressler
233823239Bressler3239Bressler
241123239Bressler3239Bressler
257913239Bressler3239Bressler
\n", "
" ], "text/plain": [ " start_station_id start_station_name end_station_id end_station_name\n", "266052 3239 Bressler 3239 Bressler\n", "268611 3239 Bressler 3239 Bressler\n", "268802 3239 Bressler 3239 Bressler\n", "269195 3239 Bressler 3239 Bressler\n", "341927 3239 Bressler 3239 Bressler\n", "343318 3239 Bressler 3239 Bressler\n", "347861 3239 Bressler 3239 Bressler\n", "365081 3239 Bressler 3239 Bressler\n", "373718 3239 Bressler 3239 Bressler\n", "374793 3239 Bressler 3239 Bressler\n", "375450 3239 Bressler 3239 Bressler\n", "6667 3239 Bressler 3239 Bressler\n", "20079 3239 Bressler 3239 Bressler\n", "21458 3239 Bressler 3239 Bressler\n", "23007 3239 Bressler 3239 Bressler\n", "23382 3239 Bressler 3239 Bressler\n", "24112 3239 Bressler 3239 Bressler\n", "25791 3239 Bressler 3239 Bressler" ] }, "execution_count": 106, "metadata": {}, "output_type": "execute_result" } ], "source": [ "mm = (df[['start_station_id', 'start_station_name', 'end_station_id', 'end_station_name']])[(df.start_station_id==3239) | (df.end_station_id == 3239)].compute()\n", "mm" ] }, { "cell_type": "code", "execution_count": 85, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idnamelatlon
10743239Bressler40.646539-74.016588
10833245Kiosk in a box Motivate40.646677-74.016298
7343040SSP Tech Workshop40.646678-74.016262
7333040SSP Tech Workshop40.646679-74.016269
116255NYCBS Depot - SSP40.646765-74.016513
117255NYCBS Depot - SSP40.646766-74.016510
73130368D OPS 0140.651489-74.026636
13843423West Drive & Prospect Park West40.661064-73.979458
13853423West Drive & Prospect Park West40.661066-73.979451
14003432Bike in Movie Night | Prospect Park Bandshell40.662987-73.976919
14013432Bike in Movie Night | Prospect Park Bandshell40.662988-73.976922
1172331014 St & 7 Ave40.663779-73.983970
1173331014 St & 7 Ave40.663781-73.983969
11533300Prospect Park West & 8 St40.665146-73.976375
11523300Prospect Park West & 8 St40.665147-73.976377
1165330610 St & 7 Ave40.666206-73.982001
1164330610 St & 7 Ave40.666208-73.982001
1188331914 St & 5 Ave40.666285-73.988950
1189331914 St & 5 Ave40.666287-73.988959
117833136 Ave & 12 St40.666319-73.985456
117933136 Ave & 12 St40.666319-73.985458
116033046 Ave & 9 St40.668124-73.983782
116133046 Ave & 9 St40.668125-73.983779
125233543 St & Prospect Park West40.668132-73.973639
125333543 St & Prospect Park West40.668133-73.973642
119833243 Ave & 14 St40.668544-73.993327
119933243 Ave & 14 St40.668544-73.993333
1194332212 St & 4 Ave40.668601-73.990444
1195332212 St & 4 Ave40.668602-73.990439
1186331710 St & 5 Ave40.668628-73.987002
...............
11813314W 95 St & Broadway40.793769-73.971884
11803314W 95 St & Broadway40.793773-73.971886
11663307West End Ave & W 94 St40.794165-73.974123
11673307West End Ave & W 94 St40.794168-73.974121
12073328W 100 St & Manhattan Ave40.794996-73.964502
12063328W 100 St & Manhattan Ave40.795000-73.964500
12293341Central Park West & W 102 St40.795345-73.961860
12283341Central Park West & W 102 St40.795347-73.961864
13413400E 110 St & Madison Ave40.796155-73.947823
13403400E 110 St & Madison Ave40.796157-73.947824
11573302Columbus Ave & W 103 St40.796935-73.964336
11563302Columbus Ave & W 103 St40.796937-73.964344
12463350W 100 St & Broadway40.797368-73.970411
12473350W 100 St & Broadway40.797371-73.970416
11963323W 106 St & Central Park West40.798183-73.960594
11973323W 106 St & Central Park West40.798191-73.960598
11853316W 104 St & Amsterdam Ave40.798991-73.966219
11843316W 104 St & Amsterdam Ave40.798991-73.966217
12913374Central Park North & Adam Clayton Powell Blvd40.799484-73.955612
12923374Central Park North & Adam Clayton Powell Blvd40.799486-73.955614
12323343W 107 St & Columbus Ave40.799754-73.962110
12333343W 107 St & Columbus Ave40.799755-73.962108
12593357W 106 St & Amsterdam Ave40.800833-73.966447
12583357W 106 St & Amsterdam Ave40.800834-73.966450
12133331Riverside Dr & W 104 St40.801342-73.971141
12123331Riverside Dr & W 104 St40.801345-73.971141
12773366West End Ave & W 107 St40.802117-73.968177
12763366West End Ave & W 107 St40.802117-73.968177
13073383Cathedral Pkwy & Broadway40.804216-73.966983
13083383Cathedral Pkwy & Broadway40.804218-73.966992
\n", "

1413 rows × 4 columns

\n", "
" ], "text/plain": [ " id name lat \\\n", "1074 3239 Bressler 40.646539 \n", "1083 3245 Kiosk in a box Motivate 40.646677 \n", "734 3040 SSP Tech Workshop 40.646678 \n", "733 3040 SSP Tech Workshop 40.646679 \n", "116 255 NYCBS Depot - SSP 40.646765 \n", "117 255 NYCBS Depot - SSP 40.646766 \n", "731 3036 8D OPS 01 40.651489 \n", "1384 3423 West Drive & Prospect Park West 40.661064 \n", "1385 3423 West Drive & Prospect Park West 40.661066 \n", "1400 3432 Bike in Movie Night | Prospect Park Bandshell 40.662987 \n", "1401 3432 Bike in Movie Night | Prospect Park Bandshell 40.662988 \n", "1172 3310 14 St & 7 Ave 40.663779 \n", "1173 3310 14 St & 7 Ave 40.663781 \n", "1153 3300 Prospect Park West & 8 St 40.665146 \n", "1152 3300 Prospect Park West & 8 St 40.665147 \n", "1165 3306 10 St & 7 Ave 40.666206 \n", "1164 3306 10 St & 7 Ave 40.666208 \n", "1188 3319 14 St & 5 Ave 40.666285 \n", "1189 3319 14 St & 5 Ave 40.666287 \n", "1178 3313 6 Ave & 12 St 40.666319 \n", "1179 3313 6 Ave & 12 St 40.666319 \n", "1160 3304 6 Ave & 9 St 40.668124 \n", "1161 3304 6 Ave & 9 St 40.668125 \n", "1252 3354 3 St & Prospect Park West 40.668132 \n", "1253 3354 3 St & Prospect Park West 40.668133 \n", "1198 3324 3 Ave & 14 St 40.668544 \n", "1199 3324 3 Ave & 14 St 40.668544 \n", "1194 3322 12 St & 4 Ave 40.668601 \n", "1195 3322 12 St & 4 Ave 40.668602 \n", "1186 3317 10 St & 5 Ave 40.668628 \n", "... ... ... ... \n", "1181 3314 W 95 St & Broadway 40.793769 \n", "1180 3314 W 95 St & Broadway 40.793773 \n", "1166 3307 West End Ave & W 94 St 40.794165 \n", "1167 3307 West End Ave & W 94 St 40.794168 \n", "1207 3328 W 100 St & Manhattan Ave 40.794996 \n", "1206 3328 W 100 St & Manhattan Ave 40.795000 \n", "1229 3341 Central Park West & W 102 St 40.795345 \n", "1228 3341 Central Park West & W 102 St 40.795347 \n", "1341 3400 E 110 St & Madison Ave 40.796155 \n", "1340 3400 E 110 St & Madison Ave 40.796157 \n", "1157 3302 Columbus Ave & W 103 St 40.796935 \n", "1156 3302 Columbus Ave & W 103 St 40.796937 \n", "1246 3350 W 100 St & Broadway 40.797368 \n", "1247 3350 W 100 St & Broadway 40.797371 \n", "1196 3323 W 106 St & Central Park West 40.798183 \n", "1197 3323 W 106 St & Central Park West 40.798191 \n", "1185 3316 W 104 St & Amsterdam Ave 40.798991 \n", "1184 3316 W 104 St & Amsterdam Ave 40.798991 \n", "1291 3374 Central Park North & Adam Clayton Powell Blvd 40.799484 \n", "1292 3374 Central Park North & Adam Clayton Powell Blvd 40.799486 \n", "1232 3343 W 107 St & Columbus Ave 40.799754 \n", "1233 3343 W 107 St & Columbus Ave 40.799755 \n", "1259 3357 W 106 St & Amsterdam Ave 40.800833 \n", "1258 3357 W 106 St & Amsterdam Ave 40.800834 \n", "1213 3331 Riverside Dr & W 104 St 40.801342 \n", "1212 3331 Riverside Dr & W 104 St 40.801345 \n", "1277 3366 West End Ave & W 107 St 40.802117 \n", "1276 3366 West End Ave & W 107 St 40.802117 \n", "1307 3383 Cathedral Pkwy & Broadway 40.804216 \n", "1308 3383 Cathedral Pkwy & Broadway 40.804218 \n", "\n", " lon \n", "1074 -74.016588 \n", "1083 -74.016298 \n", "734 -74.016262 \n", "733 -74.016269 \n", "116 -74.016513 \n", "117 -74.016510 \n", "731 -74.026636 \n", "1384 -73.979458 \n", "1385 -73.979451 \n", "1400 -73.976919 \n", "1401 -73.976922 \n", "1172 -73.983970 \n", "1173 -73.983969 \n", "1153 -73.976375 \n", "1152 -73.976377 \n", "1165 -73.982001 \n", "1164 -73.982001 \n", "1188 -73.988950 \n", "1189 -73.988959 \n", "1178 -73.985456 \n", "1179 -73.985458 \n", "1160 -73.983782 \n", "1161 -73.983779 \n", "1252 -73.973639 \n", "1253 -73.973642 \n", "1198 -73.993327 \n", "1199 -73.993333 \n", "1194 -73.990444 \n", "1195 -73.990439 \n", "1186 -73.987002 \n", "... ... \n", "1181 -73.971884 \n", "1180 -73.971886 \n", "1166 -73.974123 \n", "1167 -73.974121 \n", "1207 -73.964502 \n", "1206 -73.964500 \n", "1229 -73.961860 \n", "1228 -73.961864 \n", "1341 -73.947823 \n", "1340 -73.947824 \n", "1157 -73.964336 \n", "1156 -73.964344 \n", "1246 -73.970411 \n", "1247 -73.970416 \n", "1196 -73.960594 \n", "1197 -73.960598 \n", "1185 -73.966219 \n", "1184 -73.966217 \n", "1291 -73.955612 \n", "1292 -73.955614 \n", "1232 -73.962110 \n", "1233 -73.962108 \n", "1259 -73.966447 \n", "1258 -73.966450 \n", "1213 -73.971141 \n", "1212 -73.971141 \n", "1277 -73.968177 \n", "1276 -73.968177 \n", "1307 -73.966983 \n", "1308 -73.966992 \n", "\n", "[1413 rows x 4 columns]" ] }, "execution_count": 85, "metadata": {}, "output_type": "execute_result" } ], "source": [ "c.sort_values(['lat', 'lon'])" ] }, { "cell_type": "code", "execution_count": 99, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idstationNamelatitudelongitude
6393423West Drive & Prospect Park West40.661063-73.979453
530331014 St & 7 Ave40.663779-73.983968
5203300Prospect Park West & 8 St40.665147-73.976376
526330610 St & 7 Ave40.666208-73.981999
539331914 St & 5 Ave40.666287-73.988951
53333136 Ave & 12 St40.666318-73.985462
52433046 Ave & 9 St40.668127-73.983776
57333543 St & Prospect Park West40.668132-73.973638
54433243 Ave & 14 St40.668546-73.993333
542332212 St & 4 Ave40.668603-73.990439
537331710 St & 5 Ave40.668627-73.987001
51732976 St & 7 Ave40.668663-73.979881
58433653 St & 7 Ave40.670384-73.978397
62234055 St & 6 Ave40.670484-73.982090
62134047 St & 5 Ave40.670492-73.985417
62034034 Ave & 9 St40.670513-73.988766
5773358Garfield Pl & 8 Ave40.671198-73.974841
61834012 Ave & 9 St40.672481-73.993314
5503330Henry St & Bay St40.672506-74.004947
61633997 St & 3 Ave40.672603-73.989830
5713352Sigourney St & Columbia St40.672672-74.008795
58733685 Ave & 3 St40.672815-73.983524
5683349Grand Army Plaza & Plaza St West40.672968-73.970880
5563337Dwight St & Van Dyke St40.673636-74.011956
5803361Carroll St & 6 Ave40.674089-73.978728
5463326Clinton St & Centre St40.674340-74.001947
59033714 Ave & 2 St40.674613-73.985011
6153398Smith St & 9 St40.674696-73.997858
5533333Columbia St & Lorraine St40.674706-74.007557
5723353Reed St & Van Brunt St40.674784-74.016128
...............
6083391E 106 St & 1 Ave40.789253-73.939562
5643345Madison Ave & E 99 St40.789485-73.952429
5103289W 90 St & Amsterdam Ave40.790179-73.972889
6473434W 88 St & West End Ave40.790254-73.977183
5983379E 103 St & Lexington Ave40.790305-73.947558
5823363E 102 St & Park Ave40.790483-73.950331
5153295Central Park W & W 96 St40.791270-73.964839
6533445Riverside Dr & W 89 St40.791812-73.978602
5213301Columbus Ave & W 95 St40.791956-73.968087
6403424E 106 St & Lexington Ave40.791976-73.945993
5133293W 92 St & Broadway40.792100-73.973900
58633675 Ave & E 103 St40.792255-73.952499
6073390E 109 St & 3 Ave40.793297-73.943208
5403320Central Park West & W 100 St40.793393-73.963556
6043387E 106 St & Madison Ave40.793434-73.949450
5343314W 95 St & Broadway40.793770-73.971888
5273307West End Ave & W 94 St40.794165-73.974124
5483328W 100 St & Manhattan Ave40.795000-73.964500
5603341Central Park West & W 102 St40.795346-73.961860
6173400E 110 St & Madison Ave40.796154-73.947821
5223302Columbus Ave & W 103 St40.796935-73.964341
5693350W 100 St & Broadway40.797372-73.970412
5433323W 106 St & Central Park West40.798186-73.960591
5363316W 104 St & Amsterdam Ave40.798994-73.966217
5933374Central Park North & Adam Clayton Powell Blvd40.799484-73.955613
5623343W 107 St & Columbus Ave40.799757-73.962113
5763357W 106 St & Amsterdam Ave40.800836-73.966449
5513331Riverside Dr & W 104 St40.801343-73.971146
5853366West End Ave & W 107 St40.802117-73.968181
6013383Cathedral Pkwy & Broadway40.804213-73.966991
\n", "

664 rows × 4 columns

\n", "
" ], "text/plain": [ " id stationName latitude longitude\n", "639 3423 West Drive & Prospect Park West 40.661063 -73.979453\n", "530 3310 14 St & 7 Ave 40.663779 -73.983968\n", "520 3300 Prospect Park West & 8 St 40.665147 -73.976376\n", "526 3306 10 St & 7 Ave 40.666208 -73.981999\n", "539 3319 14 St & 5 Ave 40.666287 -73.988951\n", "533 3313 6 Ave & 12 St 40.666318 -73.985462\n", "524 3304 6 Ave & 9 St 40.668127 -73.983776\n", "573 3354 3 St & Prospect Park West 40.668132 -73.973638\n", "544 3324 3 Ave & 14 St 40.668546 -73.993333\n", "542 3322 12 St & 4 Ave 40.668603 -73.990439\n", "537 3317 10 St & 5 Ave 40.668627 -73.987001\n", "517 3297 6 St & 7 Ave 40.668663 -73.979881\n", "584 3365 3 St & 7 Ave 40.670384 -73.978397\n", "622 3405 5 St & 6 Ave 40.670484 -73.982090\n", "621 3404 7 St & 5 Ave 40.670492 -73.985417\n", "620 3403 4 Ave & 9 St 40.670513 -73.988766\n", "577 3358 Garfield Pl & 8 Ave 40.671198 -73.974841\n", "618 3401 2 Ave & 9 St 40.672481 -73.993314\n", "550 3330 Henry St & Bay St 40.672506 -74.004947\n", "616 3399 7 St & 3 Ave 40.672603 -73.989830\n", "571 3352 Sigourney St & Columbia St 40.672672 -74.008795\n", "587 3368 5 Ave & 3 St 40.672815 -73.983524\n", "568 3349 Grand Army Plaza & Plaza St West 40.672968 -73.970880\n", "556 3337 Dwight St & Van Dyke St 40.673636 -74.011956\n", "580 3361 Carroll St & 6 Ave 40.674089 -73.978728\n", "546 3326 Clinton St & Centre St 40.674340 -74.001947\n", "590 3371 4 Ave & 2 St 40.674613 -73.985011\n", "615 3398 Smith St & 9 St 40.674696 -73.997858\n", "553 3333 Columbia St & Lorraine St 40.674706 -74.007557\n", "572 3353 Reed St & Van Brunt St 40.674784 -74.016128\n", ".. ... ... ... ...\n", "608 3391 E 106 St & 1 Ave 40.789253 -73.939562\n", "564 3345 Madison Ave & E 99 St 40.789485 -73.952429\n", "510 3289 W 90 St & Amsterdam Ave 40.790179 -73.972889\n", "647 3434 W 88 St & West End Ave 40.790254 -73.977183\n", "598 3379 E 103 St & Lexington Ave 40.790305 -73.947558\n", "582 3363 E 102 St & Park Ave 40.790483 -73.950331\n", "515 3295 Central Park W & W 96 St 40.791270 -73.964839\n", "653 3445 Riverside Dr & W 89 St 40.791812 -73.978602\n", "521 3301 Columbus Ave & W 95 St 40.791956 -73.968087\n", "640 3424 E 106 St & Lexington Ave 40.791976 -73.945993\n", "513 3293 W 92 St & Broadway 40.792100 -73.973900\n", "586 3367 5 Ave & E 103 St 40.792255 -73.952499\n", "607 3390 E 109 St & 3 Ave 40.793297 -73.943208\n", "540 3320 Central Park West & W 100 St 40.793393 -73.963556\n", "604 3387 E 106 St & Madison Ave 40.793434 -73.949450\n", "534 3314 W 95 St & Broadway 40.793770 -73.971888\n", "527 3307 West End Ave & W 94 St 40.794165 -73.974124\n", "548 3328 W 100 St & Manhattan Ave 40.795000 -73.964500\n", "560 3341 Central Park West & W 102 St 40.795346 -73.961860\n", "617 3400 E 110 St & Madison Ave 40.796154 -73.947821\n", "522 3302 Columbus Ave & W 103 St 40.796935 -73.964341\n", "569 3350 W 100 St & Broadway 40.797372 -73.970412\n", "543 3323 W 106 St & Central Park West 40.798186 -73.960591\n", "536 3316 W 104 St & Amsterdam Ave 40.798994 -73.966217\n", "593 3374 Central Park North & Adam Clayton Powell Blvd 40.799484 -73.955613\n", "562 3343 W 107 St & Columbus Ave 40.799757 -73.962113\n", "576 3357 W 106 St & Amsterdam Ave 40.800836 -73.966449\n", "551 3331 Riverside Dr & W 104 St 40.801343 -73.971146\n", "585 3366 West End Ave & W 107 St 40.802117 -73.968181\n", "601 3383 Cathedral Pkwy & Broadway 40.804213 -73.966991\n", "\n", "[664 rows x 4 columns]" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "zz[['id', 'stationName', 'latitude', 'longitude']].sort_values(['latitude', 'longitude'])" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": true }, "outputs": [], "source": [ "import sklearn.neighbors" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Help on class BallTree in module sklearn.neighbors.ball_tree:\n", "\n", "class BallTree(BinaryTree)\n", " | BallTree for fast generalized N-point problems\n", " | \n", " | BallTree(X, leaf_size=40, metric='minkowski', \\**kwargs)\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array-like, shape = [n_samples, n_features]\n", " | n_samples is the number of points in the data set, and\n", " | n_features is the dimension of the parameter space.\n", " | Note: if X is a C-contiguous array of doubles then data will\n", " | not be copied. Otherwise, an internal copy will be made.\n", " | \n", " | leaf_size : positive integer (default = 40)\n", " | Number of points at which to switch to brute-force. Changing\n", " | leaf_size will not affect the results of a query, but can\n", " | significantly impact the speed of a query and the memory required\n", " | to store the constructed tree. The amount of memory needed to\n", " | store the tree scales as approximately n_samples / leaf_size.\n", " | For a specified ``leaf_size``, a leaf node is guaranteed to\n", " | satisfy ``leaf_size <= n_points <= 2 * leaf_size``, except in\n", " | the case that ``n_samples < leaf_size``.\n", " | \n", " | metric : string or DistanceMetric object\n", " | the distance metric to use for the tree. Default='minkowski'\n", " | with p=2 (that is, a euclidean metric). See the documentation\n", " | of the DistanceMetric class for a list of available metrics.\n", " | ball_tree.valid_metrics gives a list of the metrics which\n", " | are valid for BallTree.\n", " | \n", " | Additional keywords are passed to the distance metric class.\n", " | \n", " | Attributes\n", " | ----------\n", " | data : np.ndarray\n", " | The training data\n", " | \n", " | Examples\n", " | --------\n", " | Query for k-nearest neighbors\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", " | >>> tree = BallTree(X, leaf_size=2) # doctest: +SKIP\n", " | >>> dist, ind = tree.query([X[0]], k=3) # doctest: +SKIP\n", " | >>> print ind # indices of 3 closest neighbors\n", " | [0 3 1]\n", " | >>> print dist # distances to 3 closest neighbors\n", " | [ 0. 0.19662693 0.29473397]\n", " | \n", " | Pickle and Unpickle a tree. Note that the state of the tree is saved in the\n", " | pickle operation: the tree needs not be rebuilt upon unpickling.\n", " | \n", " | >>> import numpy as np\n", " | >>> import pickle\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", " | >>> tree = BallTree(X, leaf_size=2) # doctest: +SKIP\n", " | >>> s = pickle.dumps(tree) # doctest: +SKIP\n", " | >>> tree_copy = pickle.loads(s) # doctest: +SKIP\n", " | >>> dist, ind = tree_copy.query(X[0], k=3) # doctest: +SKIP\n", " | >>> print ind # indices of 3 closest neighbors\n", " | [0 3 1]\n", " | >>> print dist # distances to 3 closest neighbors\n", " | [ 0. 0.19662693 0.29473397]\n", " | \n", " | Query for neighbors within a given radius\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", " | >>> tree = BallTree(X, leaf_size=2) # doctest: +SKIP\n", " | >>> print tree.query_radius(X[0], r=0.3, count_only=True)\n", " | 3\n", " | >>> ind = tree.query_radius(X[0], r=0.3) # doctest: +SKIP\n", " | >>> print ind # indices of neighbors within distance 0.3\n", " | [3 0 1]\n", " | \n", " | \n", " | Compute a gaussian kernel density estimate:\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(1)\n", " | >>> X = np.random.random((100, 3))\n", " | >>> tree = BallTree(X) # doctest: +SKIP\n", " | >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')\n", " | array([ 6.94114649, 7.83281226, 7.2071716 ])\n", " | \n", " | Compute a two-point auto-correlation function\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((30, 3))\n", " | >>> r = np.linspace(0, 1, 5)\n", " | >>> tree = BallTree(X) # doctest: +SKIP\n", " | >>> tree.two_point_correlation(X, r)\n", " | array([ 30, 62, 278, 580, 820])\n", " | \n", " | Method resolution order:\n", " | BallTree\n", " | BinaryTree\n", " | builtins.object\n", " | \n", " | Methods defined here:\n", " | \n", " | __new__(*args, **kwargs) from builtins.type\n", " | Create and return a new object. See help(type) for accurate signature.\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data and other attributes defined here:\n", " | \n", " | __pyx_vtable__ = \n", " | \n", " | ----------------------------------------------------------------------\n", " | Methods inherited from BinaryTree:\n", " | \n", " | __getstate__(...)\n", " | get state for pickling\n", " | \n", " | __init__(self, /, *args, **kwargs)\n", " | Initialize self. See help(type(self)) for accurate signature.\n", " | \n", " | __reduce__(...)\n", " | reduce method used for pickling\n", " | \n", " | __setstate__(...)\n", " | set state for pickling\n", " | \n", " | get_arrays(...)\n", " | \n", " | get_n_calls(...)\n", " | \n", " | get_tree_stats(...)\n", " | \n", " | kernel_density(...)\n", " | kernel_density(self, X, h, kernel='gaussian', atol=0, rtol=1E-8,\n", " | breadth_first=True, return_log=False)\n", " | \n", " | Compute the kernel density estimate at points X with the given kernel,\n", " | using the distance metric specified at tree creation.\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array_like\n", " | An array of points to query. Last dimension should match dimension\n", " | of training data.\n", " | h : float\n", " | the bandwidth of the kernel\n", " | kernel : string\n", " | specify the kernel to use. Options are\n", " | - 'gaussian'\n", " | - 'tophat'\n", " | - 'epanechnikov'\n", " | - 'exponential'\n", " | - 'linear'\n", " | - 'cosine'\n", " | Default is kernel = 'gaussian'\n", " | atol, rtol : float (default = 0)\n", " | Specify the desired relative and absolute tolerance of the result.\n", " | If the true result is K_true, then the returned result K_ret\n", " | satisfies ``abs(K_true - K_ret) < atol + rtol * K_ret``\n", " | The default is zero (i.e. machine precision) for both.\n", " | breadth_first : boolean (default = False)\n", " | if True, use a breadth-first search. If False (default) use a\n", " | depth-first search. Breadth-first is generally faster for\n", " | compact kernels and/or high tolerances.\n", " | return_log : boolean (default = False)\n", " | return the logarithm of the result. This can be more accurate\n", " | than returning the result itself for narrow kernels.\n", " | \n", " | Returns\n", " | -------\n", " | density : ndarray\n", " | The array of (log)-density evaluations, shape = X.shape[:-1]\n", " | \n", " | Examples\n", " | --------\n", " | Compute a gaussian kernel density estimate:\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(1)\n", " | >>> X = np.random.random((100, 3))\n", " | >>> tree = BinaryTree(X) # doctest: +SKIP\n", " | >>> tree.kernel_density(X[:3], h=0.1, kernel='gaussian')\n", " | array([ 6.94114649, 7.83281226, 7.2071716 ])\n", " | \n", " | query(...)\n", " | query(X, k=1, return_distance=True,\n", " | dualtree=False, breadth_first=False)\n", " | \n", " | query the tree for the k nearest neighbors\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array-like, last dimension self.dim\n", " | An array of points to query\n", " | k : integer (default = 1)\n", " | The number of nearest neighbors to return\n", " | return_distance : boolean (default = True)\n", " | if True, return a tuple (d, i) of distances and indices\n", " | if False, return array i\n", " | dualtree : boolean (default = False)\n", " | if True, use the dual tree formalism for the query: a tree is\n", " | built for the query points, and the pair of trees is used to\n", " | efficiently search this space. This can lead to better\n", " | performance as the number of points grows large.\n", " | breadth_first : boolean (default = False)\n", " | if True, then query the nodes in a breadth-first manner.\n", " | Otherwise, query the nodes in a depth-first manner.\n", " | sort_results : boolean (default = True)\n", " | if True, then distances and indices of each point are sorted\n", " | on return, so that the first column contains the closest points.\n", " | Otherwise, neighbors are returned in an arbitrary order.\n", " | \n", " | Returns\n", " | -------\n", " | i : if return_distance == False\n", " | (d,i) : if return_distance == True\n", " | \n", " | d : array of doubles - shape: x.shape[:-1] + (k,)\n", " | each entry gives the list of distances to the\n", " | neighbors of the corresponding point\n", " | \n", " | i : array of integers - shape: x.shape[:-1] + (k,)\n", " | each entry gives the list of indices of\n", " | neighbors of the corresponding point\n", " | \n", " | Examples\n", " | --------\n", " | Query for k-nearest neighbors\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", " | >>> tree = BinaryTree(X, leaf_size=2) # doctest: +SKIP\n", " | >>> dist, ind = tree.query(X[0], k=3) # doctest: +SKIP\n", " | >>> print ind # indices of 3 closest neighbors\n", " | [0 3 1]\n", " | >>> print dist # distances to 3 closest neighbors\n", " | [ 0. 0.19662693 0.29473397]\n", " | \n", " | query_radius(...)\n", " | query_radius(self, X, r, count_only = False):\n", " | \n", " | query the tree for neighbors within a radius r\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array-like, last dimension self.dim\n", " | An array of points to query\n", " | r : distance within which neighbors are returned\n", " | r can be a single value, or an array of values of shape\n", " | x.shape[:-1] if different radii are desired for each point.\n", " | return_distance : boolean (default = False)\n", " | if True, return distances to neighbors of each point\n", " | if False, return only neighbors\n", " | Note that unlike the query() method, setting return_distance=True\n", " | here adds to the computation time. Not all distances need to be\n", " | calculated explicitly for return_distance=False. Results are\n", " | not sorted by default: see ``sort_results`` keyword.\n", " | count_only : boolean (default = False)\n", " | if True, return only the count of points within distance r\n", " | if False, return the indices of all points within distance r\n", " | If return_distance==True, setting count_only=True will\n", " | result in an error.\n", " | sort_results : boolean (default = False)\n", " | if True, the distances and indices will be sorted before being\n", " | returned. If False, the results will not be sorted. If\n", " | return_distance == False, setting sort_results = True will\n", " | result in an error.\n", " | \n", " | Returns\n", " | -------\n", " | count : if count_only == True\n", " | ind : if count_only == False and return_distance == False\n", " | (ind, dist) : if count_only == False and return_distance == True\n", " | \n", " | count : array of integers, shape = X.shape[:-1]\n", " | each entry gives the number of neighbors within\n", " | a distance r of the corresponding point.\n", " | \n", " | ind : array of objects, shape = X.shape[:-1]\n", " | each element is a numpy integer array listing the indices of\n", " | neighbors of the corresponding point. Note that unlike\n", " | the results of a k-neighbors query, the returned neighbors\n", " | are not sorted by distance by default.\n", " | \n", " | dist : array of objects, shape = X.shape[:-1]\n", " | each element is a numpy double array\n", " | listing the distances corresponding to indices in i.\n", " | \n", " | Examples\n", " | --------\n", " | Query for neighbors in a given radius\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((10, 3)) # 10 points in 3 dimensions\n", " | >>> tree = BinaryTree(X, leaf_size=2) # doctest: +SKIP\n", " | >>> print tree.query_radius(X[0], r=0.3, count_only=True)\n", " | 3\n", " | >>> ind = tree.query_radius(X[0], r=0.3) # doctest: +SKIP\n", " | >>> print ind # indices of neighbors within distance 0.3\n", " | [3 0 1]\n", " | \n", " | reset_n_calls(...)\n", " | \n", " | two_point_correlation(...)\n", " | Compute the two-point correlation function\n", " | \n", " | Parameters\n", " | ----------\n", " | X : array_like\n", " | An array of points to query. Last dimension should match dimension\n", " | of training data.\n", " | r : array_like\n", " | A one-dimensional array of distances\n", " | dualtree : boolean (default = False)\n", " | If true, use a dualtree algorithm. Otherwise, use a single-tree\n", " | algorithm. Dual tree algorithms can have better scaling for\n", " | large N.\n", " | \n", " | Returns\n", " | -------\n", " | counts : ndarray\n", " | counts[i] contains the number of pairs of points with distance\n", " | less than or equal to r[i]\n", " | \n", " | Examples\n", " | --------\n", " | Compute the two-point autocorrelation function of X:\n", " | \n", " | >>> import numpy as np\n", " | >>> np.random.seed(0)\n", " | >>> X = np.random.random((30, 3))\n", " | >>> r = np.linspace(0, 1, 5)\n", " | >>> tree = BinaryTree(X) # doctest: +SKIP\n", " | >>> tree.two_point_correlation(X, r)\n", " | array([ 30, 62, 278, 580, 820])\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data descriptors inherited from BinaryTree:\n", " | \n", " | data\n", " | \n", " | idx_array\n", " | \n", " | node_bounds\n", " | \n", " | node_data\n", " | \n", " | ----------------------------------------------------------------------\n", " | Data and other attributes inherited from BinaryTree:\n", " | \n", " | valid_metrics = ['braycurtis', 'russellrao', 'wminkowski', 'matching',...\n", "\n" ] } ], "source": [ "help(sklearn.neighbors.BallTree)" ] }, { "cell_type": "code", "execution_count": 94, "metadata": { "collapsed": true }, "outputs": [], "source": [ "bt = sklearn.neighbors.BallTree(zz[['id', 'stationName', 'latitude', 'longitude']].ix[:, 2:].values, metric='euclidean')" ] }, { "cell_type": "code", "execution_count": 114, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(72, array([0])),\n", " (72, array([0])),\n", " (79, array([1])),\n", " (79, array([1])),\n", " (82, array([2])),\n", " (82, array([2])),\n", " (83, array([3])),\n", " (83, array([3])),\n", " (116, array([4])),\n", " (116, array([4])),\n", " (119, array([5])),\n", " (119, array([5])),\n", " (120, array([6])),\n", " (120, array([6])),\n", " (127, array([7])),\n", " (127, array([7])),\n", " (128, array([8])),\n", " (128, array([8])),\n", " (137, array([9])),\n", " (137, array([9])),\n", " (143, array([10])),\n", " (143, array([10])),\n", " (144, array([11])),\n", " (144, array([11])),\n", " (146, array([12])),\n", " (146, array([12])),\n", " (147, array([13])),\n", " (147, array([13])),\n", " (150, array([14])),\n", " (150, array([14])),\n", " (151, array([15])),\n", " (151, array([15])),\n", " (152, array([16])),\n", " (152, array([16])),\n", " (153, array([17])),\n", " (153, array([17])),\n", " (157, array([18])),\n", " (157, array([18])),\n", " (160, array([], dtype=int64)),\n", " (160, array([], dtype=int64)),\n", " (161, array([19])),\n", " (161, array([19])),\n", " (164, array([20])),\n", " (164, array([20])),\n", " (167, array([21])),\n", " (167, array([21])),\n", " (168, array([22])),\n", " (168, array([22])),\n", " (173, array([23])),\n", " (173, array([23])),\n", " (174, array([24])),\n", " (174, array([24])),\n", " (195, array([25])),\n", " (195, array([25])),\n", " (212, array([26])),\n", " (212, array([26])),\n", " (216, array([27])),\n", " (216, array([27])),\n", " (217, array([28])),\n", " (217, array([28])),\n", " (218, array([], dtype=int64)),\n", " (218, array([], dtype=int64)),\n", " (223, array([29])),\n", " (223, array([29])),\n", " (224, array([30])),\n", " (224, array([30])),\n", " (225, array([31])),\n", " (225, array([31])),\n", " (228, array([32])),\n", " (228, array([32])),\n", " (229, array([33])),\n", " (229, array([33])),\n", " (232, array([34])),\n", " (232, array([34])),\n", " (233, array([651])),\n", " (233, array([], dtype=int64)),\n", " (233, array([651])),\n", " (233, array([], dtype=int64)),\n", " (236, array([35])),\n", " (236, array([35])),\n", " (237, array([36])),\n", " (237, array([36])),\n", " (238, array([37])),\n", " (238, array([37])),\n", " (239, array([38])),\n", " (239, array([38])),\n", " (241, array([39])),\n", " (241, array([39])),\n", " (242, array([40])),\n", " (242, array([40])),\n", " (242, array([40])),\n", " (242, array([40])),\n", " (243, array([41])),\n", " (243, array([41])),\n", " (244, array([42])),\n", " (244, array([42])),\n", " (245, array([43])),\n", " (245, array([43])),\n", " (247, array([44])),\n", " (247, array([44])),\n", " (248, array([45])),\n", " (248, array([45])),\n", " (249, array([46])),\n", " (249, array([46])),\n", " (250, array([642])),\n", " (250, array([642])),\n", " (250, array([642])),\n", " (250, array([642])),\n", " (251, array([47])),\n", " (251, array([47])),\n", " (252, array([48])),\n", " (252, array([48])),\n", " (253, array([49])),\n", " (253, array([49])),\n", " (254, array([50])),\n", " (254, array([50])),\n", " (255, array([], dtype=int64)),\n", " (255, array([], dtype=int64)),\n", " (257, array([51])),\n", " (257, array([51])),\n", " (258, array([52])),\n", " (258, array([52])),\n", " (259, array([53])),\n", " (259, array([53])),\n", " (260, array([54])),\n", " (260, array([54])),\n", " (261, array([55])),\n", " (261, array([55])),\n", " (262, array([56])),\n", " (262, array([56])),\n", " (263, array([], dtype=int64)),\n", " (263, array([], dtype=int64)),\n", " (264, array([57])),\n", " (264, array([57])),\n", " (265, array([58])),\n", " (265, array([58])),\n", " (266, array([59])),\n", " (266, array([59])),\n", " (267, array([60])),\n", " (267, array([60])),\n", " (268, array([61])),\n", " (268, array([61])),\n", " (270, array([62])),\n", " (270, array([62])),\n", " (271, array([644])),\n", " (271, array([644])),\n", " (274, array([63])),\n", " (274, array([63])),\n", " (275, array([64])),\n", " (275, array([64])),\n", " (276, array([65])),\n", " (276, array([65])),\n", " (278, array([66])),\n", " (278, array([66])),\n", " (279, array([], dtype=int64)),\n", " (279, array([67])),\n", " (279, array([67])),\n", " (279, array([67])),\n", " (279, array([67])),\n", " (279, array([], dtype=int64)),\n", " (280, array([68])),\n", " (280, array([68])),\n", " (281, array([69])),\n", " (281, array([69])),\n", " (282, array([70])),\n", " (282, array([70])),\n", " (284, array([71])),\n", " (284, array([71])),\n", " (285, array([72])),\n", " (285, array([72])),\n", " (289, array([73])),\n", " (289, array([73])),\n", " (290, array([], dtype=int64)),\n", " (290, array([], dtype=int64)),\n", " (291, array([74])),\n", " (291, array([74])),\n", " (293, array([75])),\n", " (293, array([75])),\n", " (294, array([], dtype=int64)),\n", " (294, array([], dtype=int64)),\n", " (295, array([76])),\n", " (295, array([76])),\n", " (296, array([77])),\n", " (296, array([77])),\n", " (297, array([78])),\n", " (297, array([78])),\n", " (298, array([], dtype=int64)),\n", " (298, array([], dtype=int64)),\n", " (300, array([], dtype=int64)),\n", " (300, array([], dtype=int64)),\n", " (300, array([], dtype=int64)),\n", " (300, array([], dtype=int64)),\n", " (301, array([79])),\n", " (301, array([79])),\n", " (302, array([80])),\n", " (302, array([80])),\n", " (303, array([81])),\n", " (303, array([81])),\n", " (304, array([82])),\n", " (304, array([82])),\n", " (305, array([83])),\n", " (305, array([83])),\n", " (306, array([84])),\n", " (306, array([84])),\n", " (307, array([85])),\n", " (307, array([85])),\n", " (308, array([86])),\n", " (308, array([86])),\n", " (309, array([87])),\n", " (309, array([87])),\n", " (310, array([88])),\n", " (310, array([88])),\n", " (311, array([89])),\n", " (311, array([89])),\n", " (312, array([90])),\n", " (312, array([90])),\n", " (312, array([90])),\n", " (312, array([90])),\n", " (313, array([91])),\n", " (313, array([91])),\n", " (314, array([92])),\n", " (314, array([92])),\n", " (314, array([], dtype=int64)),\n", " (314, array([], dtype=int64)),\n", " (315, array([93])),\n", " (315, array([93])),\n", " (316, array([94])),\n", " (316, array([94])),\n", " (317, array([95])),\n", " (317, array([95])),\n", " (318, array([], dtype=int64)),\n", " (318, array([], dtype=int64)),\n", " (319, array([96])),\n", " (319, array([], dtype=int64)),\n", " (319, array([], dtype=int64)),\n", " (319, array([96])),\n", " (320, array([97])),\n", " (320, array([97])),\n", " (320, array([97])),\n", " (320, array([97])),\n", " (321, array([98])),\n", " (321, array([98])),\n", " (322, array([99])),\n", " (322, array([99])),\n", " (323, array([100])),\n", " (323, array([100])),\n", " (324, array([101])),\n", " (324, array([101])),\n", " (325, array([102])),\n", " (325, array([102])),\n", " (326, array([103])),\n", " (326, array([103])),\n", " (327, array([104])),\n", " (327, array([104])),\n", " (328, array([105])),\n", " (328, array([105])),\n", " (329, array([], dtype=int64)),\n", " (329, array([], dtype=int64)),\n", " (329, array([], dtype=int64)),\n", " (329, array([], dtype=int64)),\n", " (330, array([106])),\n", " (330, array([106])),\n", " (331, array([107])),\n", " (331, array([107])),\n", " (332, array([108])),\n", " (332, array([108])),\n", " (334, array([109])),\n", " (334, array([109])),\n", " (335, array([110])),\n", " (335, array([110])),\n", " (336, array([111])),\n", " (336, array([111])),\n", " (337, array([112])),\n", " (337, array([112])),\n", " (339, array([113])),\n", " (339, array([113])),\n", " (340, array([114])),\n", " (340, array([114])),\n", " (341, array([115])),\n", " (341, array([115])),\n", " (342, array([116])),\n", " (342, array([116])),\n", " (343, array([117])),\n", " (343, array([117])),\n", " (344, array([118])),\n", " (344, array([118])),\n", " (345, array([119])),\n", " (345, array([119])),\n", " (346, array([120])),\n", " (346, array([120])),\n", " (347, array([], dtype=int64)),\n", " (347, array([121])),\n", " (347, array([121])),\n", " (347, array([], dtype=int64)),\n", " (348, array([122])),\n", " (348, array([122])),\n", " (349, array([123])),\n", " (349, array([123])),\n", " (350, array([124])),\n", " (350, array([124])),\n", " (351, array([125])),\n", " (351, array([125])),\n", " (352, array([126])),\n", " (352, array([126])),\n", " (353, array([127])),\n", " (353, array([127])),\n", " (354, array([128])),\n", " (354, array([128])),\n", " (355, array([129])),\n", " (355, array([129])),\n", " (356, array([130])),\n", " (356, array([130])),\n", " (357, array([131])),\n", " (357, array([131])),\n", " (358, array([132])),\n", " (358, array([132])),\n", " (359, array([133])),\n", " (359, array([133])),\n", " (360, array([134])),\n", " (360, array([134])),\n", " (361, array([135])),\n", " (361, array([135])),\n", " (362, array([136])),\n", " (362, array([136])),\n", " (363, array([137])),\n", " (363, array([137])),\n", " (364, array([138])),\n", " (364, array([138])),\n", " (365, array([139])),\n", " (365, array([139])),\n", " (366, array([140])),\n", " (366, array([140])),\n", " (367, array([], dtype=int64)),\n", " (367, array([], dtype=int64)),\n", " (368, array([141])),\n", " (368, array([141])),\n", " (369, array([142])),\n", " (369, array([142])),\n", " (372, array([143])),\n", " (372, array([143])),\n", " (373, array([144])),\n", " (373, array([144])),\n", " (375, array([487])),\n", " (375, array([487])),\n", " (375, array([487])),\n", " (375, array([487])),\n", " (376, array([145])),\n", " (376, array([145])),\n", " (377, array([146])),\n", " (377, array([146])),\n", " (379, array([147])),\n", " (379, array([147])),\n", " (380, array([148])),\n", " (380, array([148])),\n", " (382, array([149])),\n", " (382, array([149])),\n", " (383, array([150])),\n", " (383, array([150])),\n", " (384, array([151])),\n", " (384, array([151])),\n", " (384, array([], dtype=int64)),\n", " (384, array([], dtype=int64)),\n", " (385, array([152])),\n", " (385, array([152])),\n", " (386, array([153])),\n", " (386, array([153])),\n", " (387, array([154])),\n", " (387, array([154])),\n", " (388, array([155])),\n", " (388, array([155])),\n", " (389, array([156])),\n", " (389, array([156])),\n", " (390, array([157])),\n", " (390, array([157])),\n", " (391, array([158])),\n", " (391, array([158])),\n", " (392, array([159])),\n", " (392, array([159])),\n", " (393, array([160])),\n", " (393, array([160])),\n", " (394, array([161])),\n", " (394, array([161])),\n", " (395, array([162])),\n", " (395, array([162])),\n", " (396, array([163])),\n", " (396, array([163])),\n", " (397, array([164])),\n", " (397, array([164])),\n", " (398, array([165])),\n", " (398, array([165])),\n", " (399, array([166])),\n", " (399, array([166])),\n", " (400, array([167])),\n", " (400, array([167])),\n", " (401, array([168])),\n", " (401, array([168])),\n", " (402, array([169])),\n", " (402, array([169])),\n", " (403, array([170])),\n", " (403, array([170])),\n", " (404, array([], dtype=int64)),\n", " (404, array([], dtype=int64)),\n", " (405, array([171])),\n", " (405, array([171])),\n", " (406, array([172])),\n", " (406, array([172])),\n", " (407, array([173])),\n", " (407, array([173])),\n", " (408, array([174])),\n", " (408, array([174])),\n", " (409, array([175])),\n", " (409, array([175])),\n", " (410, array([176])),\n", " (410, array([176])),\n", " (411, array([177])),\n", " (411, array([177])),\n", " (412, array([178])),\n", " (412, array([178])),\n", " (414, array([179])),\n", " (414, array([179])),\n", " (415, array([180])),\n", " (415, array([180])),\n", " (416, array([181])),\n", " (416, array([181])),\n", " (417, array([182])),\n", " (417, array([182])),\n", " (418, array([183])),\n", " (418, array([183])),\n", " (419, array([184])),\n", " (419, array([184])),\n", " (420, array([185])),\n", " (420, array([185])),\n", " (421, array([186])),\n", " (421, array([186])),\n", " (422, array([187])),\n", " (422, array([187])),\n", " (423, array([188])),\n", " (423, array([188])),\n", " (426, array([189])),\n", " (426, array([189])),\n", " (427, array([190])),\n", " (427, array([], dtype=int64)),\n", " (427, array([190])),\n", " (427, array([], dtype=int64)),\n", " (428, array([191])),\n", " (428, array([191])),\n", " (430, array([192])),\n", " (430, array([192])),\n", " (431, array([], dtype=int64)),\n", " (431, array([], dtype=int64)),\n", " (432, array([193])),\n", " (432, array([193])),\n", " (433, array([194])),\n", " (433, array([194])),\n", " (434, array([195])),\n", " (434, array([195])),\n", " (435, array([196])),\n", " (435, array([196])),\n", " (436, array([197])),\n", " (436, array([197])),\n", " (437, array([198])),\n", " (437, array([198])),\n", " (438, array([199])),\n", " (438, array([199])),\n", " (439, array([200])),\n", " (439, array([200])),\n", " (440, array([201])),\n", " (440, array([201])),\n", " (441, array([202])),\n", " (441, array([202])),\n", " (442, array([203])),\n", " (442, array([203])),\n", " (443, array([204])),\n", " (443, array([204])),\n", " (443, array([204])),\n", " (443, array([204])),\n", " (444, array([205])),\n", " (444, array([205])),\n", " (445, array([206])),\n", " (445, array([206])),\n", " (446, array([207])),\n", " (446, array([207])),\n", " (447, array([208])),\n", " (447, array([208])),\n", " (448, array([209])),\n", " (448, array([209])),\n", " (449, array([210])),\n", " (449, array([210])),\n", " (450, array([211])),\n", " (450, array([211])),\n", " (453, array([212])),\n", " (453, array([212])),\n", " (454, array([213])),\n", " (454, array([213])),\n", " (455, array([214])),\n", " (455, array([214])),\n", " (456, array([215])),\n", " (456, array([215])),\n", " (457, array([216])),\n", " (457, array([216])),\n", " (458, array([217])),\n", " (458, array([217])),\n", " (459, array([218])),\n", " (459, array([218])),\n", " (460, array([219])),\n", " (460, array([219])),\n", " (461, array([220])),\n", " (461, array([220])),\n", " (462, array([221])),\n", " (462, array([221])),\n", " (463, array([], dtype=int64)),\n", " (463, array([], dtype=int64)),\n", " (464, array([], dtype=int64)),\n", " (464, array([], dtype=int64)),\n", " (465, array([222])),\n", " (465, array([222])),\n", " (466, array([223])),\n", " (466, array([223])),\n", " (467, array([224])),\n", " (467, array([224])),\n", " (468, array([225])),\n", " (468, array([225])),\n", " (469, array([226])),\n", " (469, array([226])),\n", " (470, array([227])),\n", " (470, array([227])),\n", " (471, array([228])),\n", " (471, array([228])),\n", " (472, array([229])),\n", " (472, array([229])),\n", " (473, array([230])),\n", " (473, array([230])),\n", " (474, array([231])),\n", " (474, array([231])),\n", " (475, array([663])),\n", " (475, array([663])),\n", " (476, array([232])),\n", " (476, array([232])),\n", " (477, array([233])),\n", " (477, array([233])),\n", " (478, array([234])),\n", " (478, array([234])),\n", " (479, array([235])),\n", " (479, array([235])),\n", " (480, array([236])),\n", " (480, array([236])),\n", " (481, array([237])),\n", " (481, array([237])),\n", " (482, array([238])),\n", " (482, array([238])),\n", " (483, array([239])),\n", " (483, array([239])),\n", " (484, array([240])),\n", " (484, array([240])),\n", " (485, array([241])),\n", " (485, array([241])),\n", " (486, array([242])),\n", " (486, array([242])),\n", " (487, array([243])),\n", " (487, array([243])),\n", " (488, array([244])),\n", " (488, array([244])),\n", " (489, array([], dtype=int64)),\n", " (489, array([], dtype=int64)),\n", " (490, array([245])),\n", " (490, array([245])),\n", " (491, array([246])),\n", " (491, array([246])),\n", " (492, array([247])),\n", " (492, array([247])),\n", " (493, array([248])),\n", " (493, array([248])),\n", " (494, array([249])),\n", " (494, array([249])),\n", " (495, array([250])),\n", " (495, array([250])),\n", " (496, array([251])),\n", " (496, array([251])),\n", " (497, array([252])),\n", " (497, array([252])),\n", " (498, array([253])),\n", " (498, array([253])),\n", " (499, array([254])),\n", " (499, array([254])),\n", " (500, array([255])),\n", " (500, array([255])),\n", " (501, array([256])),\n", " (501, array([256])),\n", " (502, array([257])),\n", " (502, array([257])),\n", " (503, array([258])),\n", " (503, array([258])),\n", " (504, array([259])),\n", " (504, array([259])),\n", " (504, array([259])),\n", " (504, array([259])),\n", " (505, array([260])),\n", " (505, array([260])),\n", " (507, array([261])),\n", " (507, array([261])),\n", " (508, array([262])),\n", " (508, array([262])),\n", " (509, array([263])),\n", " (509, array([263])),\n", " (510, array([], dtype=int64)),\n", " (510, array([], dtype=int64)),\n", " (511, array([264])),\n", " (511, array([264])),\n", " (512, array([], dtype=int64)),\n", " (512, array([], dtype=int64)),\n", " (513, array([265])),\n", " (513, array([265])),\n", " (514, array([266])),\n", " (514, array([266])),\n", " (515, array([267])),\n", " (515, array([267])),\n", " (516, array([268])),\n", " (516, array([268])),\n", " (517, array([271, 269])),\n", " (517, array([475])),\n", " (517, array([271, 269])),\n", " (517, array([271, 269])),\n", " (517, array([475])),\n", " (517, array([271, 269])),\n", " (518, array([270])),\n", " (518, array([270])),\n", " (519, array([271, 269])),\n", " (519, array([271, 269])),\n", " (519, array([271, 269])),\n", " (519, array([], dtype=int64)),\n", " (519, array([], dtype=int64)),\n", " (519, array([271, 269])),\n", " (520, array([272])),\n", " (520, array([272])),\n", " (521, array([483])),\n", " (521, array([483])),\n", " (521, array([483])),\n", " (521, array([483])),\n", " (522, array([273])),\n", " (522, array([273])),\n", " (523, array([274])),\n", " (523, array([274])),\n", " (524, array([275])),\n", " (524, array([275])),\n", " (525, array([276])),\n", " (525, array([276])),\n", " (526, array([277])),\n", " (526, array([277])),\n", " (527, array([], dtype=int64)),\n", " (527, array([278])),\n", " (527, array([278])),\n", " (527, array([], dtype=int64)),\n", " (528, array([279])),\n", " (528, array([279])),\n", " (529, array([280])),\n", " (529, array([280])),\n", " (530, array([281])),\n", " (530, array([281])),\n", " (531, array([282])),\n", " (531, array([282])),\n", " (532, array([283])),\n", " (532, array([283])),\n", " (533, array([284])),\n", " (533, array([284])),\n", " (534, array([285])),\n", " (534, array([285])),\n", " (536, array([286])),\n", " (536, array([286])),\n", " (537, array([287])),\n", " (537, array([287])),\n", " (538, array([], dtype=int64)),\n", " (538, array([], dtype=int64)),\n", " (538, array([], dtype=int64)),\n", " (538, array([], dtype=int64)),\n", " (539, array([288])),\n", " (539, array([288])),\n", " (540, array([], dtype=int64)),\n", " (540, array([289])),\n", " (540, array([], dtype=int64)),\n", " (540, array([289])),\n", " (545, array([290])),\n", " (545, array([290])),\n", " (546, array([291])),\n", " (546, array([291])),\n", " (2000, array([292])),\n", " (2000, array([292])),\n", " (2001, array([], dtype=int64)),\n", " (2001, array([293])),\n", " (2001, array([293])),\n", " (2001, array([], dtype=int64)),\n", " (2002, array([294])),\n", " (2002, array([294])),\n", " (2003, array([295])),\n", " (2003, array([295])),\n", " (2004, array([296])),\n", " (2004, array([296])),\n", " (2005, array([297])),\n", " (2005, array([297])),\n", " (2006, array([298])),\n", " (2006, array([298])),\n", " (2008, array([299])),\n", " (2008, array([299])),\n", " (2009, array([300])),\n", " (2009, array([300])),\n", " (2010, array([301])),\n", " (2010, array([301])),\n", " (2012, array([302])),\n", " (2012, array([302])),\n", " (2017, array([], dtype=int64)),\n", " (2017, array([], dtype=int64)),\n", " (2021, array([303])),\n", " (2021, array([303])),\n", " (2022, array([], dtype=int64)),\n", " (2022, array([], dtype=int64)),\n", " (2022, array([304])),\n", " (2022, array([304])),\n", " (2023, array([305])),\n", " (2023, array([305])),\n", " (3002, array([306])),\n", " (3002, array([306])),\n", " (3014, array([], dtype=int64)),\n", " (3014, array([], dtype=int64)),\n", " (3014, array([], dtype=int64)),\n", " (3014, array([], dtype=int64)),\n", " (3016, array([395])),\n", " (3016, array([307])),\n", " (3016, array([307])),\n", " (3016, array([395])),\n", " (3017, array([], dtype=int64)),\n", " (3017, array([], dtype=int64)),\n", " (3019, array([], dtype=int64)),\n", " (3036, array([], dtype=int64)),\n", " (3039, array([476])),\n", " (3040, array([], dtype=int64)),\n", " (3040, array([], dtype=int64)),\n", " (3041, array([308])),\n", " (3041, array([308])),\n", " (3042, array([309])),\n", " (3042, array([309])),\n", " (3043, array([310])),\n", " (3043, array([310])),\n", " (3044, array([311])),\n", " (3044, array([311])),\n", " (3046, array([312])),\n", " (3046, array([312])),\n", " (3047, array([313])),\n", " (3047, array([313])),\n", " (3048, array([314])),\n", " (3048, array([314])),\n", " (3049, array([315])),\n", " (3049, array([315])),\n", " (3050, array([316])),\n", " (3050, array([316])),\n", " (3052, array([317])),\n", " (3052, array([317])),\n", " (3053, array([318])),\n", " (3053, array([318])),\n", " (3054, array([319])),\n", " (3054, array([319])),\n", " (3055, array([320])),\n", " (3055, array([320])),\n", " (3056, array([321])),\n", " (3056, array([321])),\n", " (3057, array([322])),\n", " (3057, array([322])),\n", " (3058, array([323])),\n", " (3058, array([323])),\n", " (3059, array([324])),\n", " (3059, array([324])),\n", " (3060, array([325])),\n", " (3060, array([325])),\n", " (3061, array([326])),\n", " (3061, array([326])),\n", " (3062, array([327])),\n", " (3062, array([327])),\n", " (3063, array([328])),\n", " (3063, array([328])),\n", " (3064, array([329])),\n", " (3064, array([329])),\n", " (3065, array([330])),\n", " (3065, array([330])),\n", " (3066, array([331])),\n", " (3066, array([331])),\n", " (3067, array([332])),\n", " (3067, array([332])),\n", " (3068, array([333])),\n", " (3068, array([333])),\n", " (3069, array([334])),\n", " (3069, array([334])),\n", " (3070, array([335])),\n", " (3070, array([335])),\n", " (3071, array([336])),\n", " (3071, array([336])),\n", " (3072, array([337])),\n", " (3072, array([337])),\n", " (3073, array([338])),\n", " (3073, array([338])),\n", " (3074, array([339])),\n", " (3074, array([339])),\n", " (3075, array([340])),\n", " (3075, array([340])),\n", " (3076, array([341])),\n", " (3076, array([341])),\n", " (3077, array([342])),\n", " (3077, array([342])),\n", " (3078, array([343])),\n", " (3078, array([343])),\n", " (3079, array([], dtype=int64)),\n", " (3079, array([], dtype=int64)),\n", " (3080, array([344])),\n", " (3080, array([344])),\n", " (3081, array([345])),\n", " (3081, array([345])),\n", " (3082, array([346])),\n", " (3082, array([346])),\n", " (3083, array([347])),\n", " (3083, array([347])),\n", " (3084, array([], dtype=int64)),\n", " (3084, array([], dtype=int64)),\n", " (3085, array([348])),\n", " (3085, array([348])),\n", " (3086, array([349])),\n", " (3086, array([349])),\n", " (3087, array([350])),\n", " (3087, array([350])),\n", " (3088, array([351])),\n", " (3088, array([351])),\n", " (3089, array([], dtype=int64)),\n", " (3089, array([], dtype=int64)),\n", " (3090, array([352])),\n", " (3090, array([352])),\n", " (3091, array([353])),\n", " (3091, array([353])),\n", " (3092, array([354])),\n", " (3092, array([354])),\n", " (3093, array([355])),\n", " (3093, array([355])),\n", " (3094, array([356])),\n", " (3094, array([356])),\n", " (3095, array([357])),\n", " (3095, array([357])),\n", " (3096, array([358])),\n", " (3096, array([358])),\n", " (3097, array([645])),\n", " (3097, array([645])),\n", " (3098, array([656])),\n", " (3098, array([656])),\n", " (3099, array([], dtype=int64)),\n", " (3099, array([], dtype=int64)),\n", " (3100, array([359])),\n", " (3100, array([359])),\n", " (3101, array([360])),\n", " (3101, array([360])),\n", " (3102, array([361])),\n", " (3102, array([361])),\n", " (3103, array([362])),\n", " (3103, array([362])),\n", " (3104, array([307])),\n", " (3104, array([307])),\n", " (3104, array([307])),\n", " (3104, array([307])),\n", " (3105, array([363])),\n", " (3105, array([363])),\n", " (3106, array([364])),\n", " (3106, array([364])),\n", " (3107, array([365])),\n", " (3107, array([365])),\n", " (3108, array([366])),\n", " (3108, array([366])),\n", " (3109, array([367])),\n", " (3109, array([367])),\n", " (3110, array([368])),\n", " (3110, array([368])),\n", " (3111, array([369])),\n", " (3111, array([369])),\n", " (3112, array([370])),\n", " (3112, array([370])),\n", " (3113, array([371])),\n", " (3113, array([371])),\n", " (3114, array([372])),\n", " (3114, array([372])),\n", " (3115, array([373])),\n", " (3115, array([373])),\n", " (3116, array([374])),\n", " (3116, array([374])),\n", " (3117, array([375])),\n", " (3117, array([375])),\n", " (3118, array([376])),\n", " (3118, array([376])),\n", " (3119, array([377])),\n", " (3119, array([377])),\n", " (3120, array([378])),\n", " (3120, array([378])),\n", " (3120, array([378])),\n", " (3120, array([378])),\n", " (3121, array([379])),\n", " (3121, array([379])),\n", " (3122, array([380])),\n", " (3122, array([380])),\n", " (3123, array([381])),\n", " (3123, array([381])),\n", " (3124, array([382])),\n", " (3124, array([382])),\n", " (3125, array([383])),\n", " (3125, array([383])),\n", " (3126, array([384])),\n", " (3126, array([384])),\n", " (3127, array([385])),\n", " (3127, array([385])),\n", " (3128, array([386])),\n", " (3128, array([386])),\n", " (3129, array([387])),\n", " (3129, array([387])),\n", " (3130, array([388])),\n", " (3130, array([388])),\n", " (3131, array([389])),\n", " (3131, array([389])),\n", " (3132, array([390])),\n", " (3132, array([390])),\n", " (3133, array([], dtype=int64)),\n", " (3133, array([], dtype=int64)),\n", " (3133, array([], dtype=int64)),\n", " (3133, array([], dtype=int64)),\n", " (3134, array([391])),\n", " (3134, array([391])),\n", " (3135, array([392])),\n", " (3135, array([392])),\n", " (3136, array([393])),\n", " (3136, array([393])),\n", " (3137, array([394])),\n", " (3137, array([394])),\n", " (3138, array([], dtype=int64)),\n", " (3138, array([], dtype=int64)),\n", " (3139, array([395])),\n", " (3139, array([395])),\n", " (3140, array([396])),\n", " (3140, array([396])),\n", " (3141, array([397])),\n", " (3141, array([397])),\n", " (3142, array([398])),\n", " (3142, array([398])),\n", " (3143, array([399])),\n", " (3143, array([399])),\n", " (3144, array([400])),\n", " (3144, array([400])),\n", " (3145, array([401])),\n", " (3145, array([401])),\n", " (3146, array([402])),\n", " (3146, array([402])),\n", " (3147, array([403])),\n", " (3147, array([403])),\n", " (3148, array([404])),\n", " (3148, array([404])),\n", " (3149, array([], dtype=int64)),\n", " (3149, array([], dtype=int64)),\n", " (3150, array([405])),\n", " (3150, array([405])),\n", " (3151, array([406])),\n", " (3151, array([406])),\n", " (3152, array([407])),\n", " (3152, array([407])),\n", " (3153, array([], dtype=int64)),\n", " (3153, array([], dtype=int64)),\n", " (3154, array([], dtype=int64)),\n", " (3154, array([], dtype=int64)),\n", " (3155, array([408])),\n", " (3155, array([408])),\n", " (3156, array([409])),\n", " (3156, array([409])),\n", " (3157, array([410])),\n", " (3157, array([410])),\n", " (3158, array([411])),\n", " (3158, array([411])),\n", " (3159, array([412])),\n", " (3159, array([412])),\n", " (3160, array([413])),\n", " (3160, array([413])),\n", " (3161, array([414])),\n", " (3161, array([414])),\n", " (3162, array([415])),\n", " (3162, array([415])),\n", " (3163, array([416])),\n", " (3163, array([416])),\n", " (3164, array([417])),\n", " (3164, array([417])),\n", " (3165, array([418])),\n", " (3165, array([418])),\n", " (3166, array([419])),\n", " (3166, array([419])),\n", " (3166, array([419])),\n", " (3166, array([419])),\n", " (3167, array([420])),\n", " (3167, array([420])),\n", " (3168, array([421])),\n", " (3168, array([421])),\n", " (3169, array([422])),\n", " (3169, array([422])),\n", " (3170, array([423])),\n", " (3170, array([423])),\n", " (3171, array([424])),\n", " ...]" ] }, "execution_count": 114, "metadata": {}, "output_type": "execute_result" } ], "source": [ "list(zip(c.id, bt.query_radius(c.ix[:,['lat', 'lon']], 0.0005)))" ] }, { "cell_type": "code", "execution_count": 118, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 [0]\n", "1 [0]\n", "2 [1]\n", "3 [1]\n", "4 [2]\n", "5 [2]\n", "6 [3]\n", "7 [3]\n", "8 [4]\n", "9 [4]\n", "10 [5]\n", "11 [5]\n", "12 [6]\n", "13 [6]\n", "14 [7]\n", "15 [7]\n", "16 [8]\n", "17 [8]\n", "18 [9]\n", "19 [9]\n", "20 [10]\n", "21 [10]\n", "22 [11]\n", "23 [11]\n", "24 [12]\n", "25 [12]\n", "26 [13]\n", "27 [13]\n", "28 [14]\n", "29 [14]\n", " ... \n", "1383 [638]\n", "1384 [639]\n", "1385 [639]\n", "1386 [640]\n", "1387 [640]\n", "1388 [641]\n", "1389 [641]\n", "1390 [642]\n", "1391 [642]\n", "1392 [643]\n", "1393 [643]\n", "1394 [644]\n", "1395 [644]\n", "1396 [645]\n", "1397 [645]\n", "1398 [646]\n", "1399 [646]\n", "1400 []\n", "1401 []\n", "1402 [647]\n", "1403 [647]\n", "1404 [648]\n", "1405 [648]\n", "1406 [649]\n", "1407 [649]\n", "1408 [650]\n", "1409 [650]\n", "1410 [169]\n", "1411 [651]\n", "1412 [651]\n", "dtype: object" ] }, "execution_count": 118, "metadata": {}, "output_type": "execute_result" } ], "source": [ "pd.Series(bt.query_radius(c.ix[:,['lat', 'lon']], 0.0005))" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] } ], "metadata": { "anaconda-cloud": {}, "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 1 }