{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "collapsed": false }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Using Theano backend.\n", "Using gpu device 1: GeForce GTX TITAN X (CNMeM is enabled with initial size: 80.0% of memory, cuDNN 5110)\n", "/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/theano/sandbox/cuda/__init__.py:600: UserWarning: Your cuDNN version is more recent than the one Theano officially supports. If you see any problems, try updating Theano or downgrading cuDNN to version 5.\n", " warnings.warn(warn)\n" ] } ], "source": [ "import ast\n", "\n", "import pandas as pd\n", "\n", "import datetime\n", "\n", "from keras.layers import Input, Dense, Embedding, merge, Flatten, Merge, BatchNormalization\n", "from keras.models import Model, load_model\n", "from keras.regularizers import l2\n", "import keras.backend as K\n", "from keras.optimizers import SGD\n", "import numpy as np\n", "\n", "from sklearn.cluster import MeanShift, estimate_bandwidth\n", "\n", "import utils\n", "\n", "import data\n", "\n", "from sklearn.model_selection import train_test_split\n", "\n", "from bcolz_array_iterator import BcolzArrayIterator\n", "\n", "import bcolz\n", "\n", "from keras_tqdm import TQDMNotebookCallback\n", "from keras.callbacks import ModelCheckpoint" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Below path is a shared directory, swap to own" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "collapsed": true }, "outputs": [], "source": [ "data_path = \"/data/datasets/taxi/\"" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Replication of 'csv_to_hdf5.py'" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Original repo used some bizarre tuple method of reading in data to save in a hdf5 file using fuel. The following does the same approach in that module, only using pandas and saving in a bcolz format (w/ training data as example)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "meta = pd.read_csv(data_path+'metaData_taxistandsID_name_GPSlocation.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 66, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
IDDescricaoLatitudeLongitude
01Agra41.177146-8.609670
12Alameda41.156190-8.591064
23Aldoar41.170525-8.665876
34Alfândega41.143764-8.621803
45Amial41.183510-8.612726
\n", "
" ], "text/plain": [ " ID Descricao Latitude Longitude\n", "0 1 Agra 41.177146 -8.609670\n", "1 2 Alameda 41.156190 -8.591064\n", "2 3 Aldoar 41.170525 -8.665876\n", "3 4 Alfândega 41.143764 -8.621803\n", "4 5 Amial 41.183510 -8.612726" ] }, "execution_count": 66, "metadata": {}, "output_type": "execute_result" } ], "source": [ "meta.head()" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train = pd.read_csv(data_path+'train/train.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TRIP_IDCALL_TYPEORIGIN_CALLORIGIN_STANDTAXI_IDTIMESTAMPDAY_TYPEMISSING_DATAPOLYLINE
01372636858620000589CNaNNaN200005891372636858AFalse[[-8.618643,41.141412],[-8.618499,41.141376],[...
11372637303620000596BNaN7.0200005961372637303AFalse[[-8.639847,41.159826],[-8.640351,41.159871],[...
21372636951620000320CNaNNaN200003201372636951AFalse[[-8.612964,41.140359],[-8.613378,41.14035],[-...
31372636854620000520CNaNNaN200005201372636854AFalse[[-8.574678,41.151951],[-8.574705,41.151942],[...
41372637091620000337CNaNNaN200003371372637091AFalse[[-8.645994,41.18049],[-8.645949,41.180517],[-...
\n", "
" ], "text/plain": [ " TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID \\\n", "0 1372636858620000589 C NaN NaN 20000589 \n", "1 1372637303620000596 B NaN 7.0 20000596 \n", "2 1372636951620000320 C NaN NaN 20000320 \n", "3 1372636854620000520 C NaN NaN 20000520 \n", "4 1372637091620000337 C NaN NaN 20000337 \n", "\n", " TIMESTAMP DAY_TYPE MISSING_DATA \\\n", "0 1372636858 A False \n", "1 1372637303 A False \n", "2 1372636951 A False \n", "3 1372636854 A False \n", "4 1372637091 A False \n", "\n", " POLYLINE \n", "0 [[-8.618643,41.141412],[-8.618499,41.141376],[... \n", "1 [[-8.639847,41.159826],[-8.640351,41.159871],[... \n", "2 [[-8.612964,41.140359],[-8.613378,41.14035],[-... \n", "3 [[-8.574678,41.151951],[-8.574705,41.151942],[... \n", "4 [[-8.645994,41.18049],[-8.645949,41.180517],[-... " ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['ORIGIN_CALL'] = pd.Series(pd.factorize(train['ORIGIN_CALL'])[0]) + 1" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['ORIGIN_STAND']=pd.Series([0 if pd.isnull(x) or x=='' else int(x) for x in train[\"ORIGIN_STAND\"]])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['TAXI_ID'] = pd.Series(pd.factorize(train['TAXI_ID'])[0]) + 1" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['DAY_TYPE'] = pd.Series([ord(x[0]) - ord('A') for x in train['DAY_TYPE']])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "The array of long/lat coordinates per trip (row) is read in as a string. The function `ast.literal_eval(x)` evaluates the string into the expression it represents (safely). This happens below" ] }, { "cell_type": "code", "execution_count": 138, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "polyline = pd.Series([ast.literal_eval(x) for x in train['POLYLINE']])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Split into latitude/longitude" ] }, { "cell_type": "code", "execution_count": 148, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['LATITUDE'] = pd.Series([np.array([point[1] for point in poly],dtype=np.float32) for poly in polyline])" ] }, { "cell_type": "code", "execution_count": 150, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['LONGITUDE'] = pd.Series([np.array([point[0] for point in poly],dtype=np.float32) for poly in polyline])" ] }, { "cell_type": "code", "execution_count": 157, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "utils.save_array(data_path+'train/train.bc', train.as_matrix())" ] }, { "cell_type": "code", "execution_count": 158, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "utils.save_array(data_path+'train/meta_train.bc', meta.as_matrix())" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Further Feature Engineering" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "After converting 'csv_to_hdf5.py' functionality to pandas, I saved that array and then simply constructed the rest of the features as specified in the paper using pandas. I didn't bother seeing how the author did it as it was extremely obtuse and involved the fuel module." ] }, { "cell_type": "code", "execution_count": 424, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train = pd.DataFrame(utils.load_array(data_path+'train/train.bc'), columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE'])" ] }, { "cell_type": "code", "execution_count": 425, "metadata": { "collapsed": true, "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TRIP_IDCALL_TYPEORIGIN_CALLORIGIN_STANDTAXI_IDTIMESTAMPDAY_TYPEMISSING_DATAPOLYLINELATITUDELONGITUDE
01372636858620000589C00113726368580False[[-8.618643,41.141412],[-8.618499,41.141376],[...[41.1414, 41.1414, 41.1425, 41.1438, 41.1444, ...[-8.61864, -8.6185, -8.62033, -8.62215, -8.623...
11372637303620000596B07213726373030False[[-8.639847,41.159826],[-8.640351,41.159871],[...[41.1598, 41.1599, 41.1601, 41.1605, 41.1609, ...[-8.63985, -8.64035, -8.6422, -8.64445, -8.646...
21372636951620000320C00313726369510False[[-8.612964,41.140359],[-8.613378,41.14035],[-...[41.1404, 41.1404, 41.1403, 41.1404, 41.1404, ...[-8.61296, -8.61338, -8.61421, -8.61477, -8.61...
31372636854620000520C00413726368540False[[-8.574678,41.151951],[-8.574705,41.151942],[...[41.152, 41.1519, 41.1519, 41.152, 41.1519, 41...[-8.57468, -8.57471, -8.5747, -8.57466, -8.574...
41372637091620000337C00513726370910False[[-8.645994,41.18049],[-8.645949,41.180517],[-...[41.1805, 41.1805, 41.18, 41.1789, 41.1785, 41...[-8.64599, -8.64595, -8.64605, -8.6468, -8.649...
\n", "
" ], "text/plain": [ " TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID TIMESTAMP \\\n", "0 1372636858620000589 C 0 0 1 1372636858 \n", "1 1372637303620000596 B 0 7 2 1372637303 \n", "2 1372636951620000320 C 0 0 3 1372636951 \n", "3 1372636854620000520 C 0 0 4 1372636854 \n", "4 1372637091620000337 C 0 0 5 1372637091 \n", "\n", " DAY_TYPE MISSING_DATA POLYLINE \\\n", "0 0 False [[-8.618643,41.141412],[-8.618499,41.141376],[... \n", "1 0 False [[-8.639847,41.159826],[-8.640351,41.159871],[... \n", "2 0 False [[-8.612964,41.140359],[-8.613378,41.14035],[-... \n", "3 0 False [[-8.574678,41.151951],[-8.574705,41.151942],[... \n", "4 0 False [[-8.645994,41.18049],[-8.645949,41.180517],[-... \n", "\n", " LATITUDE \\\n", "0 [41.1414, 41.1414, 41.1425, 41.1438, 41.1444, ... \n", "1 [41.1598, 41.1599, 41.1601, 41.1605, 41.1609, ... \n", "2 [41.1404, 41.1404, 41.1403, 41.1404, 41.1404, ... \n", "3 [41.152, 41.1519, 41.1519, 41.152, 41.1519, 41... \n", "4 [41.1805, 41.1805, 41.18, 41.1789, 41.1785, 41... \n", "\n", " LONGITUDE \n", "0 [-8.61864, -8.6185, -8.62033, -8.62215, -8.623... \n", "1 [-8.63985, -8.64035, -8.6422, -8.64445, -8.646... \n", "2 [-8.61296, -8.61338, -8.61421, -8.61477, -8.61... \n", "3 [-8.57468, -8.57471, -8.5747, -8.57466, -8.574... \n", "4 [-8.64599, -8.64595, -8.64605, -8.6468, -8.649... " ] }, "execution_count": 425, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "The paper discusses how many categorical variables there are per category. The following all check out" ] }, { "cell_type": "code", "execution_count": 426, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "57105" ] }, "execution_count": 426, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['ORIGIN_CALL'].max()" ] }, { "cell_type": "code", "execution_count": 427, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "63" ] }, "execution_count": 427, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['ORIGIN_STAND'].max()" ] }, { "cell_type": "code", "execution_count": 428, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "448" ] }, "execution_count": 428, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['TAXI_ID'].max()" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Self-explanatory" ] }, { "cell_type": "code", "execution_count": 429, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['DAY_OF_WEEK'] = pd.Series([datetime.datetime.fromtimestamp(t).weekday() for t in train['TIMESTAMP']])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Quarter hour of the day, i.e. 1 of the `4*24 = 96` quarter hours of the day" ] }, { "cell_type": "code", "execution_count": 430, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['QUARTER_HOUR'] = pd.Series([int((datetime.datetime.fromtimestamp(t).hour*60 + datetime.datetime.fromtimestamp(t).minute)/15)\n", " for t in train['TIMESTAMP']])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Self-explanatory" ] }, { "cell_type": "code", "execution_count": 431, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['WEEK_OF_YEAR'] = pd.Series([datetime.datetime.fromtimestamp(t).isocalendar()[1] for t in train['TIMESTAMP']])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Target coords are the last in the sequence (final position). If there are no positions, or only 1, then mark as invalid w/ nan in order to drop later" ] }, { "cell_type": "code", "execution_count": 433, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['TARGET'] = pd.Series([[l[1][0][-1], l[1][1][-1]] if len(l[1][0]) > 1 else numpy.nan for l in train[['LONGITUDE','LATITUDE']].iterrows()])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "This function creates the continuous inputs, which are the concatened k first and k last coords in a sequence, as discussed in the paper. \n", "\n", "If there aren't at least 2* k coords excluding the target, then the k first and k last overlap. In this case the sequence (excluding target) is padded at the end with the last coord in the sequence. The paper mentioned they padded front and back but didn't specify in what manner.\n", "\n", "Also marks any invalid w/ na's" ] }, { "cell_type": "code", "execution_count": 437, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def start_stop_inputs(k):\n", " result = []\n", " for l in train[['LONGITUDE','LATITUDE']].iterrows():\n", " if len(l[1][0]) < 2 or len(l[1][1]) < 2:\n", " result.append(numpy.nan)\n", " elif len(l[1][0][:-1]) >= 2*k:\n", " result.append(numpy.concatenate([l[1][0][0:k],l[1][0][-(k+1):-1],l[1][1][0:k],l[1][1][-(k+1):-1]]).flatten())\n", " else:\n", " l1 = numpy.lib.pad(l[1][0][:-1], (0,20-len(l[1][0][:-1])), mode='edge')\n", " l2 = numpy.lib.pad(l[1][1][:-1], (0,20-len(l[1][1][:-1])), mode='edge')\n", " result.append(numpy.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())\n", " return pd.Series(result) " ] }, { "cell_type": "code", "execution_count": 438, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train['COORD_FEATURES'] = start_stop_inputs(5)" ] }, { "cell_type": "code", "execution_count": 442, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(1710670, 16)" ] }, "execution_count": 442, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.shape" ] }, { "cell_type": "code", "execution_count": 441, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "(1674160, 16)" ] }, "execution_count": 441, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.dropna().shape" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Drop na's" ] }, { "cell_type": "code", "execution_count": 443, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "train = train.dropna()" ] }, { "cell_type": "code", "execution_count": 446, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "utils.save_array(data_path+'train/train_features.bc', train.as_matrix())" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## End to end feature transformation" ] }, { "cell_type": "code", "execution_count": 155, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "train = pd.read_csv(data_path+'train/train.csv', header=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "test = pd.read_csv(data_path+'test/test.csv', header=0)" ] }, { "cell_type": "code", "execution_count": 139, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "def start_stop_inputs(k, data, test):\n", " result = []\n", " for l in data[['LONGITUDE','LATITUDE']].iterrows():\n", " if not test:\n", " if len(l[1][0]) < 2 or len(l[1][1]) < 2:\n", " result.append(np.nan)\n", " elif len(l[1][0][:-1]) >= 2*k:\n", " result.append(np.concatenate([l[1][0][0:k],l[1][0][-(k+1):-1],l[1][1][0:k],l[1][1][-(k+1):-1]]).flatten())\n", " else:\n", " l1 = np.lib.pad(l[1][0][:-1], (0,4*k-len(l[1][0][:-1])), mode='edge')\n", " l2 = np.lib.pad(l[1][1][:-1], (0,4*k-len(l[1][1][:-1])), mode='edge')\n", " result.append(np.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())\n", " else:\n", " if len(l[1][0]) < 1 or len(l[1][1]) < 1:\n", " result.append(np.nan)\n", " elif len(l[1][0]) >= 2*k:\n", " result.append(np.concatenate([l[1][0][0:k],l[1][0][-k:],l[1][1][0:k],l[1][1][-k:]]).flatten())\n", " else:\n", " l1 = np.lib.pad(l[1][0], (0,4*k-len(l[1][0])), mode='edge')\n", " l2 = np.lib.pad(l[1][1], (0,4*k-len(l[1][1])), mode='edge')\n", " result.append(np.concatenate([l1[0:k],l1[-k:],l2[0:k],l2[-k:]]).flatten())\n", " return pd.Series(result) " ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Pre-calculated below on train set" ] }, { "cell_type": "code", "execution_count": 143, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "lat_mean = 41.15731\n", "lat_std = 0.074120656\n", "long_mean = -8.6161413\n", "long_std = 0.057200309" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "def feature_ext(data, test=False): \n", " \n", " data['ORIGIN_CALL'] = pd.Series(pd.factorize(data['ORIGIN_CALL'])[0]) + 1\n", "\n", " data['ORIGIN_STAND']=pd.Series([0 if pd.isnull(x) or x=='' else int(x) for x in data[\"ORIGIN_STAND\"]])\n", "\n", " data['TAXI_ID'] = pd.Series(pd.factorize(data['TAXI_ID'])[0]) + 1\n", "\n", " data['DAY_TYPE'] = pd.Series([ord(x[0]) - ord('A') for x in data['DAY_TYPE']])\n", "\n", " polyline = pd.Series([ast.literal_eval(x) for x in data['POLYLINE']])\n", "\n", " data['LATITUDE'] = pd.Series([np.array([point[1] for point in poly],dtype=np.float32) for poly in polyline])\n", "\n", " data['LONGITUDE'] = pd.Series([np.array([point[0] for point in poly],dtype=np.float32) for poly in polyline])\n", " \n", " if not test:\n", " \n", " data['TARGET'] = pd.Series([[l[1][0][-1], l[1][1][-1]] if len(l[1][0]) > 1 else np.nan for l in data[['LONGITUDE','LATITUDE']].iterrows()])\n", "\n", " \n", " data['LATITUDE'] = pd.Series([(t-lat_mean)/lat_std for t in data['LATITUDE']])\n", " \n", " data['LONGITUDE'] = pd.Series([(t-long_mean)/long_std for t in data['LONGITUDE']])\n", " \n", " data['COORD_FEATURES'] = start_stop_inputs(5, data, test)\n", "\n", " data['DAY_OF_WEEK'] = pd.Series([datetime.datetime.fromtimestamp(t).weekday() for t in data['TIMESTAMP']])\n", "\n", " data['QUARTER_HOUR'] = pd.Series([int((datetime.datetime.fromtimestamp(t).hour*60 + datetime.datetime.fromtimestamp(t).minute)/15)\n", " for t in data['TIMESTAMP']])\n", "\n", " data['WEEK_OF_YEAR'] = pd.Series([datetime.datetime.fromtimestamp(t).isocalendar()[1] for t in data['TIMESTAMP']])\n", " \n", " \n", " data = data.dropna()\n", "\n", " return data" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train = feature_ext(train)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [], "source": [ "test = feature_ext(test, test=True)" ] }, { "cell_type": "code", "execution_count": 161, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TRIP_IDCALL_TYPEORIGIN_CALLORIGIN_STANDTAXI_IDTIMESTAMPDAY_TYPEMISSING_DATAPOLYLINELATITUDELONGITUDECOORD_FEATURESDAY_OF_WEEKQUARTER_HOURWEEK_OF_YEAR
0T1B015114080390370False[[-8.585676,41.148522],[-8.585712,41.148639],[...[-0.118578, -0.116982, -0.1141, -0.113122, -0....[0.532604, 0.531971, 0.532454, 0.531671, 0.527...[0.532604, 0.531971, 0.532454, 0.531671, 0.527...34333
1T2B057214080386110False[[-8.610876,41.14557],[-8.610858,41.145579],[-...[-0.158413, -0.158258, -0.155736, -0.150024, -...[0.0920491, 0.0923659, 0.0915823, 0.0996017, 0...[0.0920491, 0.0923659, 0.0915823, 0.0996017, 0...34333
2T3B015314080385680False[[-8.585739,41.148558],[-8.58573,41.148828],[-...[-0.118063, -0.11446, -0.112505, -0.111887, -0...[0.531504, 0.531671, 0.531821, 0.5219, 0.52490...[0.531504, 0.531671, 0.531821, 0.5219, 0.52490...34333
3T4B053414080390900False[[-8.613963,41.141169],[-8.614125,41.141124],[...[-0.217753, -0.21837, -0.221047, -0.222488, -0...[0.0380801, 0.0352457, 0.0184065, 0.0151053, 0...[0.0380801, 0.0352457, 0.0184065, 0.0151053, 0...34333
4T5B018514080391770False[[-8.619903,41.148036],[-8.619894,41.148036]][-0.125114, -0.125114][-0.0657565, -0.0656064][-0.0657565, -0.0656064, -0.0656064, -0.065606...34333
\n", "
" ], "text/plain": [ " TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID TIMESTAMP DAY_TYPE \\\n", "0 T1 B 0 15 1 1408039037 0 \n", "1 T2 B 0 57 2 1408038611 0 \n", "2 T3 B 0 15 3 1408038568 0 \n", "3 T4 B 0 53 4 1408039090 0 \n", "4 T5 B 0 18 5 1408039177 0 \n", "\n", " MISSING_DATA POLYLINE \\\n", "0 False [[-8.585676,41.148522],[-8.585712,41.148639],[... \n", "1 False [[-8.610876,41.14557],[-8.610858,41.145579],[-... \n", "2 False [[-8.585739,41.148558],[-8.58573,41.148828],[-... \n", "3 False [[-8.613963,41.141169],[-8.614125,41.141124],[... \n", "4 False [[-8.619903,41.148036],[-8.619894,41.148036]] \n", "\n", " LATITUDE \\\n", "0 [-0.118578, -0.116982, -0.1141, -0.113122, -0.... \n", "1 [-0.158413, -0.158258, -0.155736, -0.150024, -... \n", "2 [-0.118063, -0.11446, -0.112505, -0.111887, -0... \n", "3 [-0.217753, -0.21837, -0.221047, -0.222488, -0... \n", "4 [-0.125114, -0.125114] \n", "\n", " LONGITUDE \\\n", "0 [0.532604, 0.531971, 0.532454, 0.531671, 0.527... \n", "1 [0.0920491, 0.0923659, 0.0915823, 0.0996017, 0... \n", "2 [0.531504, 0.531671, 0.531821, 0.5219, 0.52490... \n", "3 [0.0380801, 0.0352457, 0.0184065, 0.0151053, 0... \n", "4 [-0.0657565, -0.0656064] \n", "\n", " COORD_FEATURES DAY_OF_WEEK \\\n", "0 [0.532604, 0.531971, 0.532454, 0.531671, 0.527... 3 \n", "1 [0.0920491, 0.0923659, 0.0915823, 0.0996017, 0... 3 \n", "2 [0.531504, 0.531671, 0.531821, 0.5219, 0.52490... 3 \n", "3 [0.0380801, 0.0352457, 0.0184065, 0.0151053, 0... 3 \n", "4 [-0.0657565, -0.0656064, -0.0656064, -0.065606... 3 \n", "\n", " QUARTER_HOUR WEEK_OF_YEAR \n", "0 43 33 \n", "1 43 33 \n", "2 43 33 \n", "3 43 33 \n", "4 43 33 " ] }, "execution_count": 161, "metadata": {}, "output_type": "execute_result" } ], "source": [ "test.head()" ] }, { "cell_type": "code", "execution_count": 162, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "utils.save_array(data_path+'train/train_features.bc', train.as_matrix())" ] }, { "cell_type": "code", "execution_count": 163, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "utils.save_array(data_path+'test/test_features.bc', test.as_matrix())" ] }, { "cell_type": "code", "execution_count": 164, "metadata": { "collapsed": true, "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TRIP_IDCALL_TYPEORIGIN_CALLORIGIN_STANDTAXI_IDTIMESTAMPDAY_TYPEMISSING_DATAPOLYLINELATITUDELONGITUDETARGETCOORD_FEATURESDAY_OF_WEEKQUARTER_HOURWEEK_OF_YEAR
01372636858620000589C00113726368580False[[-8.618643,41.141412],[-8.618499,41.141376],[...[-0.21451, -0.214974, -0.199688, -0.182087, -0...[-0.0437321, -0.0412145, -0.0731591, -0.105104...[-8.63084, 41.1545][-0.0437321, -0.0412145, -0.0731591, -0.105104...66826
11372637303620000596B07213726373030False[[-8.639847,41.159826],[-8.640351,41.159871],[...[0.0339161, 0.0345337, 0.0378275, 0.0429227, 0...[-0.414429, -0.423249, -0.455494, -0.494991, -...[-8.66574, 41.1707][-0.414429, -0.423249, -0.455494, -0.494991, -...66826
21372636951620000320C00313726369510False[[-8.612964,41.140359],[-8.613378,41.14035],[-...[-0.228715, -0.228818, -0.229796, -0.228561, -...[0.0555529, 0.048317, 0.0336785, 0.0239251, 0....[-8.61597, 41.1405][0.0555529, 0.048317, 0.0336785, 0.0239251, 0....66826
31372636854620000520C00413726368540False[[-8.574678,41.151951],[-8.574705,41.151942],[...[-0.0723098, -0.0724127, -0.0725671, -0.072206...[0.724872, 0.724405, 0.724572, 0.725189, 0.724...[-8.608, 41.1429][0.724872, 0.724405, 0.724572, 0.725189, 0.724...66826
41372637091620000337C00513726370910False[[-8.645994,41.18049],[-8.645949,41.180517],[-...[0.312708, 0.313068, 0.306789, 0.291092, 0.285...[-0.5219, -0.521117, -0.522834, -0.536055, -0....[-8.68727, 41.1781][-0.5219, -0.521117, -0.522834, -0.536055, -0....66826
\n", "
" ], "text/plain": [ " TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID \\\n", "0 1372636858620000589 C 0 0 1 \n", "1 1372637303620000596 B 0 7 2 \n", "2 1372636951620000320 C 0 0 3 \n", "3 1372636854620000520 C 0 0 4 \n", "4 1372637091620000337 C 0 0 5 \n", "\n", " TIMESTAMP DAY_TYPE MISSING_DATA \\\n", "0 1372636858 0 False \n", "1 1372637303 0 False \n", "2 1372636951 0 False \n", "3 1372636854 0 False \n", "4 1372637091 0 False \n", "\n", " POLYLINE \\\n", "0 [[-8.618643,41.141412],[-8.618499,41.141376],[... \n", "1 [[-8.639847,41.159826],[-8.640351,41.159871],[... \n", "2 [[-8.612964,41.140359],[-8.613378,41.14035],[-... \n", "3 [[-8.574678,41.151951],[-8.574705,41.151942],[... \n", "4 [[-8.645994,41.18049],[-8.645949,41.180517],[-... \n", "\n", " LATITUDE \\\n", "0 [-0.21451, -0.214974, -0.199688, -0.182087, -0... \n", "1 [0.0339161, 0.0345337, 0.0378275, 0.0429227, 0... \n", "2 [-0.228715, -0.228818, -0.229796, -0.228561, -... \n", "3 [-0.0723098, -0.0724127, -0.0725671, -0.072206... \n", "4 [0.312708, 0.313068, 0.306789, 0.291092, 0.285... \n", "\n", " LONGITUDE TARGET \\\n", "0 [-0.0437321, -0.0412145, -0.0731591, -0.105104... [-8.63084, 41.1545] \n", "1 [-0.414429, -0.423249, -0.455494, -0.494991, -... [-8.66574, 41.1707] \n", "2 [0.0555529, 0.048317, 0.0336785, 0.0239251, 0.... [-8.61597, 41.1405] \n", "3 [0.724872, 0.724405, 0.724572, 0.725189, 0.724... [-8.608, 41.1429] \n", "4 [-0.5219, -0.521117, -0.522834, -0.536055, -0.... [-8.68727, 41.1781] \n", "\n", " COORD_FEATURES DAY_OF_WEEK \\\n", "0 [-0.0437321, -0.0412145, -0.0731591, -0.105104... 6 \n", "1 [-0.414429, -0.423249, -0.455494, -0.494991, -... 6 \n", "2 [0.0555529, 0.048317, 0.0336785, 0.0239251, 0.... 6 \n", "3 [0.724872, 0.724405, 0.724572, 0.725189, 0.724... 6 \n", "4 [-0.5219, -0.521117, -0.522834, -0.536055, -0.... 6 \n", "\n", " QUARTER_HOUR WEEK_OF_YEAR \n", "0 68 26 \n", "1 68 26 \n", "2 68 26 \n", "3 68 26 \n", "4 68 26 " ] }, "execution_count": 164, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.head()" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## MEANSHIFT" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Meanshift clustering as performed in the paper" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'DAY_OF_WEEK',\n", " 'QUARTER_HOUR', \"WEEK_OF_YEAR\", \"TARGET\", \"COORD_FEATURES\"])" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Clustering performed on the targets" ] }, { "cell_type": "code", "execution_count": 532, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "y_targ = np.vstack(train[\"TARGET\"].as_matrix())" ] }, { "cell_type": "code", "execution_count": 524, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "from sklearn.cluster import MeanShift, estimate_bandwidth" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "Can use the commented out code for a estimate of bandwidth, which causes clustering to converge much quicker.\n", "\n", "This is not mentioned in the paper but is included in the code. In order to get results similar to the paper's,\n", "they manually chose the uncommented bandwidth" ] }, { "cell_type": "code", "execution_count": 533, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "#bw = estimate_bandwidth(y_targ, quantile=.1, n_samples=1000)\n", "bw = 0.001" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "This takes some time" ] }, { "cell_type": "code", "execution_count": 545, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "MeanShift(bandwidth=0.001, bin_seeding=True, cluster_all=True, min_bin_freq=5,\n", " n_jobs=1, seeds=None)" ] }, "execution_count": 545, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ms = MeanShift(bandwidth=bw, bin_seeding=True, min_bin_freq=5)\n", "ms.fit(y_targ)" ] }, { "cell_type": "code", "execution_count": 546, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "cluster_centers = ms.cluster_centers_" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "This is very close to the number of clusters mentioned in the paper" ] }, { "cell_type": "code", "execution_count": 547, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "(3421, 2)" ] }, "execution_count": 547, "metadata": {}, "output_type": "execute_result" } ], "source": [ "cluster_centers.shape" ] }, { "cell_type": "code", "execution_count": 548, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "utils.save_array(data_path+\"cluster_centers_bw_001.bc\", cluster_centers)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Formatting Features for Bcolz iterator / garbage" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n", " 'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cluster_centers = utils.load_array(data_path+\"cluster_centers_bw_001.bc\")" ] }, { "cell_type": "code", "execution_count": 50, "metadata": { "collapsed": true }, "outputs": [], "source": [ "long = np.array([c[0] for c in cluster_centers])\n", "lat = np.array([c[1] for c in cluster_centers])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train, X_val = train_test_split(train, test_size=0.2, random_state=42)" ] }, { "cell_type": "code", "execution_count": 11, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_features(data):\n", " return [np.vstack(data['COORD_FEATURES'].as_matrix()), np.vstack(data['ORIGIN_CALL'].as_matrix()), \n", " np.vstack(data['TAXI_ID'].as_matrix()), np.vstack(data['ORIGIN_STAND'].as_matrix()),\n", " np.vstack(data['QUARTER_HOUR'].as_matrix()), np.vstack(data['DAY_OF_WEEK'].as_matrix()), \n", " np.vstack(data['WEEK_OF_YEAR'].as_matrix()), np.array([long for i in range(0,data.shape[0])]),\n", " np.array([lat for i in range(0,data.shape[0])])]" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def get_target(data):\n", " return np.vstack(data[\"TARGET\"].as_matrix())" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train_features = get_features(X_train)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train_target = get_target(X_train)" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(1339328, 20)" ] }, "execution_count": 13, "metadata": {}, "output_type": "execute_result" } ], "source": [ "utils.save_array(data_path+'train/X_train_features.bc', get_features(X_train))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## MODEL" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Load training data and cluster centers" ] }, { "cell_type": "code", "execution_count": 16, "metadata": { "collapsed": true }, "outputs": [], "source": [ "train = pd.DataFrame(utils.load_array(data_path+'train/train_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n", " 'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Validation cuts " ] }, { "cell_type": "code", "execution_count": 17, "metadata": { "collapsed": true }, "outputs": [], "source": [ "cuts = [\n", " 1376503200, # 2013-08-14 18:00\n", " 1380616200, # 2013-10-01 08:30\n", " 1381167900, # 2013-10-07 17:45\n", " 1383364800, # 2013-11-02 04:00\n", " 1387722600 # 2013-12-22 14:30\n", "]" ] }, { "cell_type": "code", "execution_count": 41, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2013-08-14 11:00:00\n" ] } ], "source": [ "print(datetime.datetime.fromtimestamp(1376503200))" ] }, { "cell_type": "code", "execution_count": 22, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "(1674160, 16)" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train.shape" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val_indices = []\n", "index = 0\n", "for index, row in train.iterrows():\n", " time = row['TIMESTAMP']\n", " latitude = row['LATITUDE']\n", " for ts in cuts:\n", " if time <= ts and time + 15 * (len(latitude) - 1) >= ts:\n", " val_indices.append(index)\n", " break\n", " index += 1" ] }, { "cell_type": "code", "execution_count": 60, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_valid = train.iloc[val_indices]" ] }, { "cell_type": "code", "execution_count": 53, "metadata": { "collapsed": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TRIP_IDCALL_TYPEORIGIN_CALLORIGIN_STANDTAXI_IDTIMESTAMPDAY_TYPEMISSING_DATAPOLYLINELATITUDELONGITUDETARGETCOORD_FEATURESDAY_OF_WEEKQUARTER_HOURWEEK_OF_YEAR
2001531376502576620000126B03624713765025760False[[-8.649504,41.15421],[-8.649684,41.154201],[-...[-0.0418419, -0.0419448, -0.0449813, -0.046422...[-0.583255, -0.586407, -0.59711, -0.589074, -0...[-8.61122, 41.1463][-0.583255, -0.586407, -0.59711, -0.589074, -0...24333
2001861376503146620000161B0351913765031460False[[-8.649621,41.167323],[-8.64963,41.167251],[-...[0.135098, 0.134121, 0.126709, 0.125371, 0.124...[-0.585306, -0.585456, -0.589241, -0.588774, -...[-8.64504, 41.1586][-0.585306, -0.585456, -0.589241, -0.588774, -...24333
2002001376502942620000500B01542813765029420False[[-8.585694,41.148522],[-8.585712,41.148801],[...[-0.118578, -0.114821, -0.112402, -0.116982, -...[0.532287, 0.531971, 0.523018, 0.524735, 0.524...[-8.61524, 41.1418][0.532287, 0.531971, 0.523018, 0.524735, 0.524...24333
2002021376502604620000105C008713765026040False[[-8.61093,41.145498],[-8.610939,41.145516],[-...[-0.15939, -0.159133, -0.153883, -0.145392, -0...[0.0910987, 0.0909487, 0.093783, 0.108572, 0.1...[-8.64832, 41.1648][0.0910987, 0.0909487, 0.093783, 0.108572, 0.1...24333
2002271376502611620000022C0030413765026110False[[-8.591301,41.162715],[-8.591004,41.162562],[...[0.0729274, 0.0708687, 0.0587228, 0.0539879, 0...[0.43427, 0.439455, 0.42735, 0.423566, 0.41539...[-8.60977, 41.1512][0.43427, 0.439455, 0.42735, 0.423566, 0.41539...24333
\n", "
" ], "text/plain": [ " TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID \\\n", "200153 1376502576620000126 B 0 36 247 \n", "200186 1376503146620000161 B 0 35 19 \n", "200200 1376502942620000500 B 0 15 428 \n", "200202 1376502604620000105 C 0 0 87 \n", "200227 1376502611620000022 C 0 0 304 \n", "\n", " TIMESTAMP DAY_TYPE MISSING_DATA \\\n", "200153 1376502576 0 False \n", "200186 1376503146 0 False \n", "200200 1376502942 0 False \n", "200202 1376502604 0 False \n", "200227 1376502611 0 False \n", "\n", " POLYLINE \\\n", "200153 [[-8.649504,41.15421],[-8.649684,41.154201],[-... \n", "200186 [[-8.649621,41.167323],[-8.64963,41.167251],[-... \n", "200200 [[-8.585694,41.148522],[-8.585712,41.148801],[... \n", "200202 [[-8.61093,41.145498],[-8.610939,41.145516],[-... \n", "200227 [[-8.591301,41.162715],[-8.591004,41.162562],[... \n", "\n", " LATITUDE \\\n", "200153 [-0.0418419, -0.0419448, -0.0449813, -0.046422... \n", "200186 [0.135098, 0.134121, 0.126709, 0.125371, 0.124... \n", "200200 [-0.118578, -0.114821, -0.112402, -0.116982, -... \n", "200202 [-0.15939, -0.159133, -0.153883, -0.145392, -0... \n", "200227 [0.0729274, 0.0708687, 0.0587228, 0.0539879, 0... \n", "\n", " LONGITUDE \\\n", "200153 [-0.583255, -0.586407, -0.59711, -0.589074, -0... \n", "200186 [-0.585306, -0.585456, -0.589241, -0.588774, -... \n", "200200 [0.532287, 0.531971, 0.523018, 0.524735, 0.524... \n", "200202 [0.0910987, 0.0909487, 0.093783, 0.108572, 0.1... \n", "200227 [0.43427, 0.439455, 0.42735, 0.423566, 0.41539... \n", "\n", " TARGET \\\n", "200153 [-8.61122, 41.1463] \n", "200186 [-8.64504, 41.1586] \n", "200200 [-8.61524, 41.1418] \n", "200202 [-8.64832, 41.1648] \n", "200227 [-8.60977, 41.1512] \n", "\n", " COORD_FEATURES DAY_OF_WEEK \\\n", "200153 [-0.583255, -0.586407, -0.59711, -0.589074, -0... 2 \n", "200186 [-0.585306, -0.585456, -0.589241, -0.588774, -... 2 \n", "200200 [0.532287, 0.531971, 0.523018, 0.524735, 0.524... 2 \n", "200202 [0.0910987, 0.0909487, 0.093783, 0.108572, 0.1... 2 \n", "200227 [0.43427, 0.439455, 0.42735, 0.423566, 0.41539... 2 \n", "\n", " QUARTER_HOUR WEEK_OF_YEAR \n", "200153 43 33 \n", "200186 43 33 \n", "200200 43 33 \n", "200202 43 33 \n", "200227 43 33 " ] }, "execution_count": 53, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valid.head()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": { "collapsed": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2013-08-14 10:49:36\n", "2013-08-14 10:59:06\n", "2013-08-14 10:55:42\n", "2013-08-14 10:50:04\n", "2013-08-14 10:50:11\n", "2013-08-14 10:56:57\n", "2013-08-14 10:36:51\n", "2013-08-14 10:44:15\n", "2013-08-14 10:55:50\n", "2013-08-14 10:50:35\n", "2013-08-14 10:50:27\n", "2013-08-14 10:43:57\n", "2013-08-14 10:16:48\n", "2013-08-14 10:40:47\n", "2013-08-14 10:45:55\n", "2013-08-14 10:43:00\n", "2013-08-14 10:53:22\n", "2013-08-14 10:50:03\n", "2013-08-14 10:26:22\n", "2013-08-14 10:59:15\n", "2013-08-14 10:50:17\n", "2013-08-14 10:56:34\n", "2013-08-14 10:53:42\n", "2013-08-14 10:47:46\n", "2013-08-14 10:58:46\n", "2013-08-14 10:24:23\n", "2013-08-14 10:55:19\n", "2013-08-14 10:57:03\n", "2013-08-14 10:56:11\n", "2013-08-14 10:56:52\n", "2013-08-14 10:57:57\n", "2013-08-14 10:08:15\n", "2013-08-14 10:51:14\n", "2013-08-14 10:58:31\n", "2013-08-14 10:47:31\n", "2013-08-14 10:30:36\n", "2013-08-14 10:17:59\n", "2013-08-14 10:48:03\n", "2013-08-14 10:55:52\n", "2013-08-14 10:49:06\n", "2013-08-14 10:58:55\n", "2013-08-14 10:51:24\n", "2013-08-14 10:54:12\n", "2013-08-14 10:54:26\n", "2013-08-14 10:51:18\n", "2013-08-14 10:59:56\n", "2013-08-14 10:48:31\n", "2013-08-14 10:51:56\n", "2013-08-14 10:39:22\n", "2013-08-14 10:57:25\n", "2013-08-14 10:57:28\n", "2013-08-14 10:57:40\n", "2013-08-14 10:39:01\n", "2013-08-14 10:50:39\n", "2013-08-14 09:48:19\n", "2013-10-01 01:16:12\n", "2013-10-01 01:28:04\n", "2013-10-01 01:18:37\n", "2013-10-01 01:24:48\n", "2013-10-01 01:23:39\n", "2013-10-01 01:28:37\n", "2013-10-01 01:20:16\n", "2013-10-01 01:23:49\n", "2013-10-01 01:27:11\n", "2013-10-01 01:06:20\n", "2013-10-01 01:28:08\n", "2013-10-01 01:29:02\n", "2013-10-01 01:24:44\n", "2013-10-01 01:24:44\n", "2013-10-01 01:19:06\n", "2013-10-01 00:28:33\n", "2013-10-01 01:29:28\n", "2013-10-01 01:27:31\n", "2013-10-01 01:22:13\n", "2013-10-01 01:26:03\n", "2013-10-01 01:28:55\n", "2013-10-01 01:18:10\n", "2013-10-01 01:22:13\n", "2013-10-01 01:14:30\n", "2013-10-01 01:24:41\n", "2013-10-01 01:22:16\n", "2013-10-01 01:25:35\n", "2013-10-01 01:21:27\n", "2013-10-01 01:11:33\n", "2013-10-01 01:10:18\n", "2013-10-01 01:09:33\n", "2013-10-01 01:01:15\n", "2013-10-01 01:17:58\n", "2013-10-01 01:18:00\n", "2013-10-01 01:13:26\n", "2013-10-01 01:18:01\n", "2013-10-01 01:25:54\n", "2013-10-01 01:21:20\n", "2013-10-01 01:25:31\n", "2013-10-01 01:25:54\n", "2013-10-01 01:23:40\n", "2013-10-01 01:26:46\n", "2013-10-01 01:23:31\n", "2013-10-01 01:17:09\n", "2013-10-01 01:21:57\n", "2013-10-01 00:29:09\n", "2013-10-01 01:14:47\n", "2013-10-01 01:04:25\n", "2013-10-01 01:14:09\n", "2013-10-01 01:16:59\n", "2013-10-01 01:27:16\n", "2013-10-01 01:16:26\n", "2013-10-01 01:23:18\n", "2013-10-01 01:16:05\n", "2013-10-01 01:27:43\n", "2013-10-01 01:08:13\n", "2013-10-01 01:19:21\n", "2013-10-01 01:21:19\n", "2013-10-01 01:24:20\n", "2013-10-01 01:26:45\n", "2013-10-01 01:18:28\n", "2013-10-01 01:19:45\n", "2013-10-01 01:28:10\n", "2013-10-01 01:22:20\n", "2013-10-01 01:18:42\n", "2013-10-01 01:19:52\n", "2013-10-01 01:18:44\n", "2013-10-01 01:15:11\n", "2013-10-01 01:19:24\n", "2013-10-01 01:23:58\n", "2013-10-01 01:28:50\n", "2013-10-01 01:13:24\n", "2013-10-01 01:28:38\n", "2013-10-01 01:24:50\n", "2013-10-01 01:14:19\n", "2013-10-01 01:10:05\n", "2013-10-01 01:26:31\n", "2013-10-01 01:28:01\n", "2013-09-30 23:44:16\n", "2013-10-01 01:21:43\n", "2013-10-01 01:26:57\n", "2013-10-01 01:25:25\n", "2013-10-01 01:25:36\n", "2013-10-01 01:16:34\n", "2013-10-01 01:26:40\n", "2013-10-01 01:14:56\n", "2013-10-01 01:13:10\n", "2013-10-01 01:28:34\n", "2013-10-01 01:19:08\n", "2013-10-01 01:24:57\n", "2013-10-01 00:52:43\n", "2013-10-01 01:25:28\n", "2013-10-01 01:22:54\n", "2013-10-01 01:28:49\n", "2013-10-01 00:13:25\n", "2013-10-07 10:34:47\n", "2013-10-07 10:38:08\n", "2013-10-07 10:31:10\n", "2013-10-07 10:35:12\n", "2013-10-07 10:41:50\n", "2013-10-07 10:34:31\n", "2013-10-07 10:42:02\n", "2013-10-07 10:39:05\n", "2013-10-07 10:31:43\n", "2013-10-07 10:34:27\n", "2013-10-07 10:31:48\n", "2013-10-07 10:42:24\n", "2013-10-07 10:38:37\n", "2013-10-07 10:29:02\n", "2013-10-07 10:33:55\n", "2013-10-07 10:17:07\n", "2013-10-07 10:44:31\n", "2013-10-07 10:42:52\n", "2013-10-07 10:26:05\n", "2013-10-07 10:34:07\n", "2013-10-07 10:40:59\n", "2013-10-07 10:41:36\n", "2013-10-07 10:33:47\n", "2013-10-07 10:30:59\n", "2013-10-07 10:38:59\n", "2013-10-07 10:28:56\n", "2013-10-07 10:41:24\n", "2013-10-07 10:41:49\n", "2013-10-07 10:42:47\n", "2013-10-07 10:34:09\n", "2013-10-07 10:40:31\n", "2013-10-07 10:21:34\n", "2013-10-07 10:43:52\n", "2013-10-07 10:18:11\n", "2013-10-07 10:41:47\n", "2013-10-07 10:33:04\n", "2013-10-07 10:40:53\n", "2013-10-07 10:36:38\n", "2013-10-07 10:41:46\n", "2013-10-07 10:03:36\n", "2013-10-07 10:44:45\n", "2013-10-07 10:21:42\n", "2013-10-07 10:24:07\n", "2013-10-07 10:40:35\n", "2013-10-07 10:41:00\n", "2013-10-07 10:43:10\n", "2013-10-07 10:23:55\n", "2013-10-07 10:43:30\n", "2013-10-07 10:25:24\n", "2013-10-07 10:35:07\n", "2013-10-07 10:43:33\n", "2013-10-07 10:39:30\n", "2013-10-07 10:31:42\n", "2013-10-07 10:39:17\n", "2013-10-07 10:42:47\n", "2013-10-07 10:39:20\n", "2013-10-07 10:44:41\n", "2013-10-07 10:24:22\n", "2013-10-07 10:12:39\n", "2013-10-07 10:37:25\n", "2013-10-07 10:42:55\n", "2013-10-07 10:14:35\n", "2013-10-07 10:37:12\n", "2013-10-07 10:32:29\n", "2013-10-07 10:42:37\n", "2013-10-07 10:26:52\n", "2013-10-07 10:31:19\n", "2013-10-07 10:44:58\n", "2013-11-01 20:47:37\n", "2013-11-01 20:54:00\n", "2013-11-01 20:58:53\n", "2013-11-01 20:56:37\n", "2013-11-01 20:56:09\n", "2013-11-01 20:51:05\n", "2013-11-01 20:50:58\n", "2013-11-01 20:55:26\n", "2013-11-01 20:53:43\n", "2013-11-01 20:53:46\n", "2013-11-01 20:54:55\n", "2013-11-01 20:59:28\n", "2013-11-01 20:56:54\n", "2013-11-01 20:50:37\n", "2013-11-01 20:48:40\n", "2013-11-01 20:55:46\n", "2013-11-01 20:45:20\n", "2013-11-01 20:46:22\n", "2013-11-01 20:48:25\n", "2013-11-01 20:47:19\n", "2013-11-01 20:57:31\n", "2013-11-01 20:58:14\n", "2013-11-01 20:49:30\n", "2013-11-01 20:43:31\n", "2013-11-01 20:59:00\n", "2013-11-01 20:54:23\n", "2013-11-01 20:51:01\n", "2013-11-01 20:38:12\n", "2013-11-01 20:59:31\n", "2013-11-01 20:56:46\n", "2013-11-01 20:53:51\n", "2013-11-01 20:48:00\n", "2013-11-01 20:58:04\n", "2013-11-01 20:52:50\n", "2013-11-01 20:58:12\n", "2013-11-01 20:57:37\n", "2013-11-01 20:53:33\n", "2013-11-01 20:54:11\n", "2013-11-01 20:48:49\n", "2013-11-01 20:42:56\n", "2013-11-01 20:55:36\n", "2013-11-01 20:51:36\n", "2013-11-01 20:48:45\n", "2013-11-01 20:49:17\n", "2013-11-01 20:53:50\n", "2013-11-01 20:45:28\n", "2013-11-01 20:45:04\n", "2013-11-01 20:52:17\n", "2013-11-01 20:52:10\n", "2013-11-01 20:59:16\n", "2013-11-01 20:51:37\n", "2013-11-01 20:50:10\n", "2013-12-22 06:24:50\n", "2013-12-22 06:04:12\n", "2013-12-22 06:16:27\n", "2013-12-22 06:23:06\n", "2013-12-22 06:24:04\n", "2013-12-22 06:17:33\n", "2013-12-22 06:22:55\n", "2013-12-22 06:24:35\n", "2013-12-22 06:21:56\n", "2013-12-22 06:22:49\n", "2013-12-22 06:25:31\n", "2013-12-22 06:21:31\n", "2013-12-22 06:27:31\n", "2013-12-22 06:29:45\n", "2013-12-22 06:26:09\n", "2013-12-22 06:17:08\n", "2013-12-22 06:26:00\n", "2013-12-22 06:20:56\n", "2013-12-22 06:23:09\n", "2013-12-22 06:22:31\n", "2013-12-22 06:29:59\n", "2013-12-22 06:27:43\n", "2013-12-22 06:23:04\n", "2013-12-22 06:25:30\n", "2013-12-22 06:19:16\n", "2013-12-22 06:23:06\n", "2013-12-22 06:26:01\n", "2013-12-22 06:19:45\n", "2013-12-22 02:34:23\n", "2013-12-22 06:29:54\n", "2013-12-22 06:28:39\n", "2013-12-22 06:27:43\n", "2013-12-22 06:16:23\n", "2013-12-22 06:17:26\n" ] } ], "source": [ "for d in valid['TIMESTAMP']:\n", " print(datetime.datetime.fromtimestamp(d))" ] }, { "cell_type": "code", "execution_count": 58, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train = train.drop(train.index[[val_indices]])" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "collapsed": false }, "outputs": [], "source": [ "cluster_centers = utils.load_array(data_path+\"/data/cluster_centers_bw_001.bc\")" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": true }, "outputs": [], "source": [ "long = np.array([c[0] for c in cluster_centers])\n", "lat = np.array([c[1] for c in cluster_centers])" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [], "source": [ "utils.save_array(data_path+'train/X_train.bc', X_train.as_matrix())" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [], "source": [ "utils.save_array(data_path+'valid/X_val.bc', X_valid.as_matrix())" ] }, { "cell_type": "code", "execution_count": 24, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_train = pd.DataFrame(utils.load_array(data_path+'train/X_train.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n", " 'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])" ] }, { "cell_type": "code", "execution_count": 25, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_val = pd.DataFrame(utils.load_array(data_path+'valid/X_val.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE', 'TARGET',\n", " 'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The equirectangular loss function mentioned in the paper.\n", "\n", "Note: Very important that y[0] is longitude and y[1] is latitude.\n", "\n", "Omitted the radius of the earth constant \"R\" as it does not affect minimization and units were not given in the paper." ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def equirectangular_loss(y_true, y_pred):\n", " deg2rad = 3.141592653589793 / 180\n", " long_1 = y_true[:,0]*deg2rad\n", " long_2 = y_pred[:,0]*deg2rad\n", " lat_1 = y_true[:,1]*deg2rad\n", " lat_2 = y_pred[:,1]*deg2rad\n", " return 6371*K.sqrt(K.square((long_1 - long_2)*K.cos((lat_1 + lat_2)/2.))\n", " +K.square(lat_1 - lat_2))" ] }, { "cell_type": "code", "execution_count": 9, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def embedding_input(name, n_in, n_out, reg):\n", " inp = Input(shape=(1,), dtype='int64', name=name)\n", " return inp, Embedding(n_in, n_out, input_length=1, W_regularizer=l2(reg))(inp)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "The following returns a fully-connected model as mentioned in the paper. Takes as input k as defined before, and the cluster centers.\n", "\n", "Inputs: Embeddings for each category, concatenated w/ the 4*k continous variable representing the first/last k coords as mentioned above.\n", "\n", "Embeddings have no regularization, as it was not mentioned in paper, though are easily equipped to include.\n", "\n", "Paper mentions global normalization. Didn't specify exactly how they did that, whether thay did it sequentially or whatnot. I just included a batchnorm layer for the continuous inputs.\n", "\n", "After concatenation, 1 hidden layer of 500 neurons as called for in paper.\n", "\n", "Finally, output layer has as many outputs as there are cluster centers, w/ a softmax activation. Call this output P.\n", "\n", "The prediction is the weighted sum of each cluster center c_i w/ corresponding predicted prob P_i.\n", "\n", "To facilitate this, dotted output w/ cluster latitudes and longitudes separately. (this happens at variable y), then concatenated \n", " into single tensor.\n", " \n", "NOTE!!: You will see that I have the cluster center coords as inputs. Ideally, This function should store the cluster longs/lats as a constant to be used in the model, but I could not figure out. As a consequence, I pass them in as a repeated input." ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def taxi_mlp(k, cluster_centers):\n", " shp = cluster_centers.shape[0]\n", " nums = Input(shape=(4*k,))\n", "\n", " center_longs = Input(shape=(shp,))\n", " center_lats = Input(shape=(shp,))\n", "\n", " emb_names = ['client_ID', 'taxi_ID', \"stand_ID\", \"quarter_hour\", \"day_of_week\", \"week_of_year\"]\n", " emb_ins = [57106, 448, 64, 96, 7, 52]\n", " emb_outs = [10 for i in range(0,6)]\n", " regs = [0 for i in range(0,6)]\n", "\n", " embs = [embedding_input(e[0], e[1]+1, e[2], e[3]) for e in zip(emb_names, emb_ins, emb_outs, regs)]\n", "\n", " x = merge([nums] + [Flatten()(e[1]) for e in embs], mode='concat')\n", "\n", " x = Dense(500, activation='relu')(x)\n", "\n", " x = Dense(shp, activation='softmax')(x)\n", "\n", " y = merge([merge([x, center_longs], mode='dot'), merge([x, center_lats], mode='dot')], mode='concat')\n", "\n", " return Model(input = [nums]+[e[0] for e in embs] + [center_longs, center_lats], output = y)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "As mentioned, construction of repeated cluster longs/lats for input" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Iterator for in memory `train` pandas dataframe. I did this as opposed to bcolz iterator due to the pre-processing" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": false }, "outputs": [], "source": [ "def data_iter(data, batch_size, cluster_centers):\n", " long = [c[0] for c in cluster_centers]\n", " lat = [c[1] for c in cluster_centers]\n", " i = 0\n", " N = data.shape[0]\n", " while True:\n", " yield ([np.vstack(data['COORD_FEATURES'][i:i+batch_size].as_matrix()), np.vstack(data['ORIGIN_CALL'][i:i+batch_size].as_matrix()), \n", " np.vstack(data['TAXI_ID'][i:i+batch_size].as_matrix()), np.vstack(data['ORIGIN_STAND'][i:i+batch_size].as_matrix()),\n", " np.vstack(data['QUARTER_HOUR'][i:i+batch_size].as_matrix()), np.vstack(data['DAY_OF_WEEK'][i:i+batch_size].as_matrix()), \n", " np.vstack(data['WEEK_OF_YEAR'][i:i+batch_size].as_matrix()), np.array([long for i in range(0,batch_size)]),\n", " np.array([lat for i in range(0,batch_size)])], np.vstack(data[\"TARGET\"][i:i+batch_size].as_matrix()))\n", " i += batch_size\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "x=Lambda(thing)([x,long,lat])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Of course, k in the model needs to match k from feature construction. We again use 5 as they did in the paper" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false, "scrolled": false }, "outputs": [], "source": [ "model = taxi_mlp(5, cluster_centers)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "Paper used SGD opt w/ following paramerters" ] }, { "cell_type": "code", "execution_count": 69, "metadata": { "collapsed": false }, "outputs": [], "source": [ "model.compile(optimizer=SGD(0.01, momentum=0.9), loss=equirectangular_loss, metrics=['mse'])" ] }, { "cell_type": "code", "execution_count": 73, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_train_feat = get_features(X_train)" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_train_target = get_target(X_train)" ] }, { "cell_type": "code", "execution_count": 76, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_val_feat = get_features(X_valid)" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": true }, "outputs": [], "source": [ "X_val_target = get_target(X_valid)" ] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": true }, "outputs": [], "source": [ "tqdm = TQDMNotebookCallback()" ] }, { "cell_type": "code", "execution_count": 79, "metadata": { "collapsed": true }, "outputs": [], "source": [ "checkpoint = ModelCheckpoint(filepath=data_path+'models/tmp/weights.{epoch:03d}.{val_loss:.8f}.hdf5', save_best_only=True)" ] }, { "cell_type": "code", "execution_count": 80, "metadata": { "collapsed": true }, "outputs": [], "source": [ "batch_size=256" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "### original" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r", " \r", "5272/|/[loss: 0.469, mean_squared_error: 0.000] 100%|| 5272/5273 [01:54<00:00, 47.14it/s]" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train_feat, X_train_target, nb_epoch=1, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "5272/|/[loss: 0.107, mean_squared_error: 0.000] 100%|| 5272/5273 [01:54<00:00, 49.65it/s]" ] } ], "source": [ "model.fit(X_train_feat, X_train_target, nb_epoch=30, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model = load_model(data_path+'models/weights.0.0799.hdf5', custom_objects={'equirectangular_loss':equirectangular_loss})" ] }, { "cell_type": "code", "execution_count": 42, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n", "5231/|/[loss: 0.074, mean_squared_error: 0.000] 100%|| 5231/5232 [01:58<00:00, 50.19it/s]\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train_feat, X_train_target, nb_epoch=100, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)" ] }, { "cell_type": "code", "execution_count": 43, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "model.save(data_path+'models/current_model.hdf5')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "### new valid" ] }, { "cell_type": "code", "execution_count": 81, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\n" ] }, { "data": { "text/plain": [ "" ] }, "execution_count": 81, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model.fit(X_train_feat, X_train_target, nb_epoch=1, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "model.fit(X_train_feat, X_train_target, nb_epoch=400, batch_size=batch_size, validation_data=(X_val_feat, X_val_target), callbacks=[tqdm, checkpoint], verbose=0)" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": false }, "outputs": [], "source": [ "model.save(data_path+'/models/current_model.hdf5')" ] }, { "cell_type": "code", "execution_count": 84, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "304" ] }, "execution_count": 84, "metadata": {}, "output_type": "execute_result" } ], "source": [ "len(X_val_feat[0])" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "It works, but it seems to converge unrealistically quick and the loss values are not the same. The paper does not mention what it's using as \"error\" in it's results. I assume the same equirectangular? Not very clear. The difference in values could be due to the missing Earth-radius factor" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Kaggle Entry" ] }, { "cell_type": "code", "execution_count": 23, "metadata": { "collapsed": false }, "outputs": [], "source": [ "best_model = load_model(data_path+'models/weights.308.0.03373993.hdf5', custom_objects={'equirectangular_loss':equirectangular_loss})" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "\r", " 32/304 [==>...........................] - ETA: 0s" ] }, { "data": { "text/plain": [ "[0.033743755401749363, 2.5798687967213293e-07]" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "best_model.evaluate(X_val_feat, X_val_target)" ] }, { "cell_type": "code", "execution_count": 61, "metadata": { "collapsed": true }, "outputs": [], "source": [ "test = pd.DataFrame(utils.load_array(data_path+'test/test_features.bc'),columns=['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE', 'LONGITUDE',\n", " 'COORD_FEATURES', \"DAY_OF_WEEK\", \"QUARTER_HOUR\", \"WEEK_OF_YEAR\"])" ] }, { "cell_type": "code", "execution_count": 62, "metadata": { "collapsed": false }, "outputs": [], "source": [ "test['ORIGIN_CALL'] = pd.read_csv(data_path+'real_origin_call.csv', header=None)" ] }, { "cell_type": "code", "execution_count": 63, "metadata": { "collapsed": true }, "outputs": [], "source": [ "test['TAXI_ID'] = pd.read_csv(data_path+'real_taxi_id.csv',header=None)" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false }, "outputs": [], "source": [ "X_test = get_features(test)" ] }, { "cell_type": "code", "execution_count": 65, "metadata": { "collapsed": false }, "outputs": [], "source": [ "b = np.sort(X_test[1],axis=None)" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": false }, "outputs": [], "source": [ "test_preds = np.round(best_model.predict(X_test), decimals=6)" ] }, { "cell_type": "code", "execution_count": 68, "metadata": { "collapsed": false }, "outputs": [], "source": [ "d = {0:test['TRIP_ID'], 1:test_preds[:,1], 2:test_preds[:,0]}\n", "kaggle_out = pd.DataFrame(data=d)" ] }, { "cell_type": "code", "execution_count": 121, "metadata": { "collapsed": true }, "outputs": [], "source": [ "kaggle_out.to_csv(data_path+'submission.csv', header=['TRIP_ID','LATITUDE', 'LONGITUDE'], index=False)" ] }, { "cell_type": "code", "execution_count": 117, "metadata": { "collapsed": true }, "outputs": [], "source": [ "def hdist(a, b):\n", " deg2rad = 3.141592653589793 / 180\n", "\n", " lat1 = a[:, 1] * deg2rad\n", " lon1 = a[:, 0] * deg2rad\n", " lat2 = b[:, 1] * deg2rad\n", " lon2 = b[:, 0] * deg2rad\n", "\n", " dlat = abs(lat1-lat2)\n", " dlon = abs(lon1-lon2)\n", "\n", " al = np.sin(dlat/2)**2 + np.cos(lat1) * np.cos(lat2) * (np.sin(dlon/2)**2)\n", " d = np.arctan2(np.sqrt(al), np.sqrt(1-al))\n", "\n", " hd = 2 * 6371 * d\n", "\n", " return hd" ] }, { "cell_type": "code", "execution_count": 118, "metadata": { "collapsed": false }, "outputs": [], "source": [ "val_preds = best_model.predict(X_val_feat)" ] }, { "cell_type": "code", "execution_count": 88, "metadata": { "collapsed": false }, "outputs": [ { "ename": "KeyboardInterrupt", "evalue": "", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyboardInterrupt\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mtrn_preds\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mmodel\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mX_train_feat\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36mpredict\u001b[0;34m(self, x, batch_size, verbose)\u001b[0m\n\u001b[1;32m 1270\u001b[0m \u001b[0mf\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mpredict_function\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1271\u001b[0m return self._predict_loop(f, ins,\n\u001b[0;32m-> 1272\u001b[0;31m batch_size=batch_size, verbose=verbose)\n\u001b[0m\u001b[1;32m 1273\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 1274\u001b[0m def train_on_batch(self, x, y,\n", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/engine/training.py\u001b[0m in \u001b[0;36m_predict_loop\u001b[0;34m(self, f, ins, batch_size, verbose)\u001b[0m\n\u001b[1;32m 943\u001b[0m \u001b[0mins_batch\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mslice_X\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mbatch_ids\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 944\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 945\u001b[0;31m \u001b[0mbatch_outs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mf\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mins_batch\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 946\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0;32mnot\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mbatch_outs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlist\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 947\u001b[0m \u001b[0mbatch_outs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;34m[\u001b[0m\u001b[0mbatch_outs\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/keras/backend/theano_backend.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, inputs)\u001b[0m\n\u001b[1;32m 957\u001b[0m \u001b[0;32mdef\u001b[0m \u001b[0m__call__\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 958\u001b[0m \u001b[0;32massert\u001b[0m \u001b[0misinstance\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m(\u001b[0m\u001b[0mlist\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mtuple\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 959\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfunction\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0minputs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 960\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 961\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/theano/compile/function_module.py\u001b[0m in \u001b[0;36m__call__\u001b[0;34m(self, *args, **kwargs)\u001b[0m\n\u001b[1;32m 857\u001b[0m \u001b[0mt0_fn\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mtime\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mtime\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 858\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 859\u001b[0;31m \u001b[0moutputs\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 860\u001b[0m \u001b[0;32mexcept\u001b[0m \u001b[0mException\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 861\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mhasattr\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mfn\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'position_of_error'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mKeyboardInterrupt\u001b[0m: " ] } ], "source": [ "trn_preds = model.predict(X_train_feat)" ] }, { "cell_type": "code", "execution_count": 119, "metadata": { "collapsed": false }, "outputs": [], "source": [ "er = hdist(val_preds, X_val_target)" ] }, { "cell_type": "code", "execution_count": 120, "metadata": { "collapsed": false }, "outputs": [ { "data": { "text/plain": [ "0.033741556" ] }, "execution_count": 120, "metadata": {}, "output_type": "execute_result" } ], "source": [ "er.mean()" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true }, "outputs": [], "source": [ "K.equal()" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "To-do: simple to extend to validation data" ] }, { "cell_type": "markdown", "metadata": { "heading_collapsed": true }, "source": [ "## Uh oh... training data not representative of test" ] }, { "cell_type": "code", "execution_count": 67, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "cuts = [\n", " 1376503200, # 2013-08-14 18:00\n", " 1380616200, # 2013-10-01 08:30\n", " 1381167900, # 2013-10-07 17:45\n", " 1383364800, # 2013-11-02 04:00\n", " 1387722600 # 2013-12-22 14:30\n", "]" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 86, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.any([train['TIMESTAMP'].map(lambda x: x in cuts)])" ] }, { "cell_type": "code", "execution_count": 87, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "0 1372636858\n", "1 1372637303\n", "2 1372636951\n", "3 1372636854\n", "4 1372637091\n", "5 1372636965\n", "6 1372637210\n", "7 1372637299\n", "8 1372637274\n", "9 1372637905\n", "10 1372636875\n", "11 1372637984\n", "12 1372637343\n", "13 1372638595\n", "14 1372638151\n", "15 1372637610\n", "16 1372638481\n", "17 1372639135\n", "18 1372637482\n", "19 1372639181\n", "20 1372638161\n", "21 1372637254\n", "22 1372638502\n", "23 1372639960\n", "24 1372637658\n", "25 1372639092\n", "26 1372639535\n", "27 1372640499\n", "28 1372639635\n", "29 1372640555\n", " ... \n", "1710640 1404151621\n", "1710641 1404152121\n", "1710642 1404170192\n", "1710643 1386603894\n", "1710644 1401596832\n", "1710645 1404151410\n", "1710646 1404172198\n", "1710647 1404155241\n", "1710648 1404171548\n", "1710649 1404151498\n", "1710650 1404168899\n", "1710651 1404153627\n", "1710652 1401475142\n", "1710653 1403935197\n", "1710654 1404166892\n", "1710655 1404143157\n", "1710656 1404014448\n", "1710657 1380123541\n", "1710658 1373986578\n", "1710659 1403941536\n", "1710660 1384165182\n", "1710661 1404164723\n", "1710662 1404155105\n", "1710663 1388660427\n", "1710664 1390403767\n", "1710665 1404171463\n", "1710666 1404171367\n", "1710667 1388745716\n", "1710668 1404141826\n", "1710669 1404157147\n", "Name: TIMESTAMP, dtype: int64" ] }, "execution_count": 87, "metadata": {}, "output_type": "execute_result" } ], "source": [ "train['TIMESTAMP']" ] }, { "cell_type": "code", "execution_count": 90, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 90, "metadata": {}, "output_type": "execute_result" } ], "source": [ "np.any(train['TIMESTAMP']==1381167900)" ] }, { "cell_type": "code", "execution_count": 91, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "times = train['TIMESTAMP'].as_matrix()" ] }, { "cell_type": "code", "execution_count": 98, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "Index(['TRIP_ID', 'CALL_TYPE', 'ORIGIN_CALL', 'ORIGIN_STAND', 'TAXI_ID',\n", " 'TIMESTAMP', 'DAY_TYPE', 'MISSING_DATA', 'POLYLINE', 'LATITUDE',\n", " 'LONGITUDE', 'TARGET', 'COORD_FEATURES', 'DAY_OF_WEEK', 'QUARTER_HOUR',\n", " 'WEEK_OF_YEAR'],\n", " dtype='object')" ] }, "execution_count": 98, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_train.columns" ] }, { "cell_type": "code", "execution_count": 92, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "array([1372636858, 1372637303, 1372636951, ..., 1388745716, 1404141826, 1404157147])" ] }, "execution_count": 92, "metadata": {}, "output_type": "execute_result" } ], "source": [ "times" ] }, { "cell_type": "code", "execution_count": 102, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "\n", "count = 0\n", "for index, row in X_val.iterrows():\n", " for ts in cuts:\n", " time = row['TIMESTAMP']\n", " latitude = row['LATITUDE']\n", " if time <= ts and time + 15 * (len(latitude) - 1) >= ts:\n", " count += 1" ] }, { "cell_type": "code", "execution_count": 101, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "one = count" ] }, { "cell_type": "code", "execution_count": 104, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "304" ] }, "execution_count": 104, "metadata": {}, "output_type": "execute_result" } ], "source": [ "count + one" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "import h5py" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "h = h5py.File(data_path+'original/data.hdf5', 'r')" ] }, { "cell_type": "code", "execution_count": 15, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "ename": "KeyError", "evalue": "'Unable to open object (Component not found)'", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mevrData\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mh\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0;34m'/Configure:0000/Run:0000/CalibCycle:0000/EvrData::DataV3/NoDetector.0:Evr.0/data'\u001b[0m\u001b[0;34m]\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2856)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2814)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/h5py/_hl/group.py\u001b[0m in \u001b[0;36m__getitem__\u001b[0;34m(self, name)\u001b[0m\n\u001b[1;32m 164\u001b[0m \u001b[0;32mraise\u001b[0m \u001b[0mValueError\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\"Invalid HDF5 object reference\"\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 165\u001b[0m \u001b[0;32melse\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 166\u001b[0;31m \u001b[0moid\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh5o\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mopen\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_e\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mname\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0mlapl\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0mself\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0m_lapl\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 167\u001b[0m \u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 168\u001b[0m \u001b[0motype\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mh5i\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mget_type\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0moid\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2856)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mh5py/_objects.pyx\u001b[0m in \u001b[0;36mh5py._objects.with_phil.wrapper (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/_objects.c:2814)\u001b[0;34m()\u001b[0m\n", "\u001b[0;32mh5py/h5o.pyx\u001b[0m in \u001b[0;36mh5py.h5o.open (/home/ilan/minonda/conda-bld/h5py_1482475225177/work/h5py/h5o.c:3742)\u001b[0;34m()\u001b[0m\n", "\u001b[0;31mKeyError\u001b[0m: 'Unable to open object (Component not found)'" ] } ], "source": [ "evrData=h['/Configure:0000/Run:0000/CalibCycle:0000/EvrData::DataV3/NoDetector.0:Evr.0/data']" ] }, { "cell_type": "code", "execution_count": 13, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "ename": "OSError", "evalue": "Failed to interpret file '/data/bckenstler/data/taxi/original/arrival-clusters.pkl' as a pickle", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mUnicodeDecodeError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/npyio.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding)\u001b[0m\n\u001b[1;32m 412\u001b[0m \u001b[0;32mtry\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m--> 413\u001b[0;31m \u001b[0;32mreturn\u001b[0m \u001b[0mpickle\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mfid\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mpickle_kwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 414\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mUnicodeDecodeError\u001b[0m: 'ascii' codec can't decode byte 0xf7 in position 0: ordinal not in range(128)", "\nDuring handling of the above exception, another exception occurred:\n", "\u001b[0;31mOSError\u001b[0m Traceback (most recent call last)", "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mc\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mnp\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mload\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mdata_path\u001b[0m\u001b[0;34m+\u001b[0m\u001b[0;34m'original/arrival-clusters.pkl'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", "\u001b[0;32m/home/bckenstler/anaconda3/envs/py36/lib/python3.6/site-packages/numpy/lib/npyio.py\u001b[0m in \u001b[0;36mload\u001b[0;34m(file, mmap_mode, allow_pickle, fix_imports, encoding)\u001b[0m\n\u001b[1;32m 414\u001b[0m \u001b[0;32mexcept\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 415\u001b[0m raise IOError(\n\u001b[0;32m--> 416\u001b[0;31m \"Failed to interpret file %s as a pickle\" % repr(file))\n\u001b[0m\u001b[1;32m 417\u001b[0m \u001b[0;32mfinally\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[1;32m 418\u001b[0m \u001b[0;32mif\u001b[0m \u001b[0mown_fid\u001b[0m\u001b[0;34m:\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mOSError\u001b[0m: Failed to interpret file '/data/bckenstler/data/taxi/original/arrival-clusters.pkl' as a pickle" ] } ], "source": [ "c = np.load(data_path+'original/arrival-clusters.pkl')" ] }, { "cell_type": "markdown", "metadata": { "hidden": true }, "source": [ "### hd5f files" ] }, { "cell_type": "code", "execution_count": 10, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "from fuel.utils import find_in_data_path\n", "from fuel.datasets import H5PYDataset\n" ] }, { "cell_type": "code", "execution_count": 7, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "original_path = '/data/bckenstler/data/taxi/original/'" ] }, { "cell_type": "code", "execution_count": 33, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "train_set = H5PYDataset(original_path+'data.hdf5', which_sets=('train',),load_in_memory=True)" ] }, { "cell_type": "code", "execution_count": 48, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "valid_set = H5PYDataset(original_path+'valid.hdf5', which_sets=('cuts/test_times_0',),load_in_memory=True)" ] }, { "cell_type": "code", "execution_count": 34, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "1710670\n" ] } ], "source": [ "print(train_set.num_examples)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "304\n" ] } ], "source": [ "print(valid_set.num_examples)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "data = train_set.data_sources" ] }, { "cell_type": "code", "execution_count": 44, "metadata": { "collapsed": false, "hidden": true, "scrolled": true }, "outputs": [ { "data": { "text/plain": [ "array([2, 1, 2, ..., 2, 1, 1], dtype=int8)" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data[0]" ] }, { "cell_type": "code", "execution_count": 49, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "valid_data = valid_set.data_sources" ] }, { "cell_type": "code", "execution_count": 89, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "array([ 41.1542, 41.1542, 41.154 , 41.1539, 41.1542, 41.1544, 41.1542, 41.1538, 41.1533,\n", " 41.1528, 41.1525, 41.1525, 41.1527, 41.1527, 41.1527, 41.1526, 41.1524, 41.1526,\n", " 41.1526, 41.1522, 41.1508, 41.1507, 41.1497, 41.1489, 41.1489, 41.1486, 41.1479,\n", " 41.1475, 41.1468, 41.1461, 41.1463, 41.1464, 41.146 , 41.1449, 41.1451, 41.1454,\n", " 41.1458, 41.1459, 41.1458, 41.1459, 41.146 , 41.146 ], dtype=float32)" ] }, "execution_count": 89, "metadata": {}, "output_type": "execute_result" } ], "source": [ "valid_data[4][0]" ] }, { "cell_type": "code", "execution_count": 77, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "stamps = valid_data[-3]" ] }, { "cell_type": "code", "execution_count": 99, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "1376502576" ] }, "execution_count": 99, "metadata": {}, "output_type": "execute_result" } ], "source": [ "stamps[0]" ] }, { "cell_type": "code", "execution_count": 115, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n", "False\n" ] } ], "source": [ "for i in range(0,304): \n", " print(np.any([t==int(stamps[i]) for t in X_val['TIMESTAMP']]))" ] }, { "cell_type": "code", "execution_count": 101, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "int" ] }, "execution_count": 101, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(X_train['TIMESTAMP'][0])" ] }, { "cell_type": "code", "execution_count": 83, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "numpy.int32" ] }, "execution_count": 83, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(stamps[0])" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": 78, "metadata": { "collapsed": true, "hidden": true }, "outputs": [], "source": [ "check = [s in stamps for s in X_val['TIMESTAMP']]" ] }, { "cell_type": "code", "execution_count": 86, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2013-08-14 10:07:32\n", "2013-08-14 10:14:21\n", "2013-08-14 10:28:47\n", "2013-08-14 10:36:23\n", "2013-08-14 10:25:13\n", "2013-08-14 10:31:23\n", "2013-08-14 10:14:21\n", "2013-08-14 10:14:13\n", "2013-08-14 10:03:40\n", "2013-08-14 11:06:08\n", "2013-08-14 11:00:40\n", "2013-08-14 11:18:32\n", "2013-08-14 10:51:01\n", "2013-08-14 10:15:37\n", "2013-08-14 10:42:00\n", "2013-08-14 09:15:51\n", "2013-08-14 10:35:23\n", "2013-08-14 11:05:51\n", "2013-08-14 11:16:11\n", "2013-08-14 11:47:27\n", "2013-08-14 11:35:11\n", "2013-08-14 11:43:53\n", "2013-08-14 12:01:14\n", "2013-08-14 11:09:23\n", "2013-08-14 10:26:21\n", "2013-08-14 11:22:43\n", "2013-08-14 12:07:18\n", "2013-08-14 10:29:38\n", "2013-08-14 11:57:18\n", "2013-08-14 11:23:06\n", "2013-08-14 12:15:02\n", "2013-08-14 11:06:17\n", "2013-08-14 12:33:55\n", "2013-08-13 22:42:40\n", "2013-08-14 12:07:26\n", "2013-08-14 09:02:36\n", "2013-08-14 13:08:03\n", "2013-08-14 07:25:36\n", "2013-08-14 13:37:10\n", "2013-08-14 13:52:50\n", "2013-08-14 14:24:04\n", "2013-08-14 15:15:05\n", "2013-08-14 15:41:34\n", "2013-08-14 19:15:39\n", "2013-08-14 20:28:13\n", "2013-08-14 19:58:07\n", "2013-08-14 21:43:57\n", "2013-08-14 21:41:07\n", "2013-08-14 22:46:27\n", "2013-08-14 23:11:28\n", "2013-08-15 00:01:02\n", "2013-08-15 01:40:11\n", "2013-08-15 01:31:05\n", "2013-08-15 04:04:21\n", "2013-08-29 01:54:35\n", "2013-09-30 07:58:58\n", "2013-10-01 00:57:30\n", "2013-10-01 01:14:21\n", "2013-10-01 01:07:59\n", "2013-10-01 01:12:46\n", "2013-10-01 01:15:23\n", "2013-10-01 00:56:55\n", "2013-10-01 01:34:44\n", "2013-09-30 10:05:15\n", "2013-10-01 01:13:14\n", "2013-10-01 00:50:05\n", "2013-10-01 01:12:57\n", "2013-10-01 01:34:34\n", "2013-09-30 08:35:34\n", "2013-10-01 01:39:09\n", "2013-10-01 00:58:29\n", "2013-10-01 00:53:42\n", "2013-09-30 08:50:42\n", "2013-10-01 00:59:11\n", "2013-10-01 01:46:21\n", "2013-10-01 00:57:02\n", "2013-10-01 01:30:39\n", "2013-10-01 00:40:31\n", "2013-10-01 01:49:07\n", "2013-10-01 01:52:21\n", "2013-10-01 00:43:41\n", "2013-10-01 02:06:20\n", "2013-10-01 01:54:00\n", "2013-10-01 01:13:36\n", "2013-10-01 00:55:21\n", "2013-10-01 02:00:42\n", "2013-09-30 16:02:13\n", "2013-10-01 01:55:31\n", "2013-10-01 01:20:36\n", "2013-09-30 14:18:09\n", "2013-10-01 02:12:26\n", "2013-10-01 01:46:34\n", "2013-10-01 01:58:42\n", "2013-10-01 01:59:55\n", "2013-10-01 01:48:49\n", "2013-10-01 01:50:59\n", "2013-10-01 00:33:27\n", "2013-09-30 13:02:58\n", "2013-10-01 01:20:56\n", "2013-10-01 02:05:00\n", "2013-10-01 01:42:57\n", "2013-10-01 01:37:42\n", "2013-10-01 01:51:28\n", "2013-10-01 01:40:01\n", "2013-10-01 01:53:52\n", "2013-10-01 02:18:16\n", "2013-10-01 02:20:50\n", "2013-10-01 02:22:04\n", "2013-10-01 01:38:33\n", "2013-10-01 01:53:27\n", "2013-10-01 01:59:50\n", "2013-10-01 00:59:27\n", "2013-10-01 01:53:45\n", "2013-10-01 02:11:18\n", "2013-10-01 01:51:55\n", "2013-10-01 01:46:14\n", "2013-10-01 01:49:47\n", "2013-10-01 02:17:16\n", "2013-10-01 01:57:39\n", "2013-10-01 02:09:57\n", "2013-10-01 02:36:04\n", "2013-10-01 01:51:49\n", "2013-10-01 02:10:14\n", "2013-10-01 02:15:34\n", "2013-10-01 02:03:47\n", "2013-10-01 02:01:06\n", "2013-10-01 02:02:54\n", "2013-10-01 02:39:46\n", "2013-09-30 14:47:45\n", "2013-10-01 02:34:19\n", "2013-10-01 01:55:35\n", "2013-10-01 02:04:15\n", "2013-10-01 02:25:37\n", "2013-10-01 02:53:51\n", "2013-10-01 02:21:52\n", "2013-10-01 02:17:23\n", "2013-10-01 02:52:09\n", "2013-10-01 03:10:34\n", "2013-10-01 02:50:11\n", "2013-10-01 02:17:02\n", "2013-10-01 02:51:34\n", "2013-10-01 02:47:29\n", "2013-10-01 02:47:58\n", "2013-10-01 02:48:11\n", "2013-10-01 02:44:48\n", "2013-10-01 02:55:34\n", "2013-10-01 03:06:12\n", "2013-10-01 04:22:22\n", "2013-10-01 03:55:25\n", "2013-10-01 09:55:50\n", "2013-10-07 09:39:25\n", "2013-10-07 10:22:21\n", "2013-10-07 04:17:58\n", "2013-10-07 10:25:18\n", "2013-10-07 07:28:48\n", "2013-10-07 09:53:31\n", "2013-10-07 10:28:40\n", "2013-10-07 09:43:36\n", "2013-10-07 11:33:33\n", "2013-10-07 09:47:13\n", "2013-10-07 10:45:36\n", "2013-10-07 11:36:41\n", "2013-10-07 12:02:04\n", "2013-10-07 11:37:48\n", "2013-10-07 11:52:38\n", "2013-10-07 12:06:22\n", "2013-10-07 11:34:34\n", "2013-10-07 10:18:22\n", "2013-10-07 11:31:49\n", "2013-10-07 11:54:39\n", "2013-10-07 11:15:50\n", "2013-10-07 11:25:14\n", "2013-10-07 12:22:42\n", "2013-10-07 11:58:31\n", "2013-10-07 11:56:48\n", "2013-10-07 11:58:08\n", "2013-10-07 11:59:03\n", "2013-10-07 06:53:29\n", "2013-10-07 08:41:29\n", "2013-10-07 12:23:19\n", "2013-10-07 12:13:27\n", "2013-10-07 12:52:41\n", "2013-10-07 10:52:23\n", "2013-10-07 11:12:36\n", "2013-10-07 12:53:53\n", "2013-10-07 12:45:15\n", "2013-10-07 12:54:38\n", "2013-10-07 10:46:32\n", "2013-10-07 11:54:15\n", "2013-10-07 11:52:09\n", "2013-10-07 12:01:28\n", "2013-10-07 11:35:00\n", "2013-10-07 12:24:21\n", "2013-10-07 13:07:04\n", "2013-10-07 13:40:22\n", "2013-10-07 13:47:05\n", "2013-10-07 10:10:45\n", "2013-10-07 13:28:27\n", "2013-10-07 12:35:05\n", "2013-10-07 13:09:15\n", "2013-10-07 11:44:18\n", "2013-10-07 14:42:34\n", "2013-10-07 13:24:59\n", "2013-10-07 13:11:00\n", "2013-10-07 14:10:43\n", "2013-10-07 15:09:55\n", "2013-10-07 22:16:07\n", "2013-10-07 21:46:40\n", "2013-10-07 23:43:29\n", "2013-10-07 09:15:06\n", "2013-10-07 19:40:37\n", "2013-10-08 00:10:51\n", "2013-10-07 12:39:02\n", "2013-10-07 13:55:44\n", "2013-10-08 00:31:15\n", "2013-10-07 23:57:18\n", "2013-10-08 01:08:20\n", "2013-10-08 04:09:15\n", "2013-11-01 21:32:47\n", "2013-11-01 21:14:53\n", "2013-11-01 21:39:50\n", "2013-11-01 21:30:52\n", "2013-11-01 21:20:27\n", "2013-11-01 21:09:21\n", "2013-11-01 21:22:48\n", "2013-11-01 21:38:38\n", "2013-11-01 21:05:58\n", "2013-11-01 21:38:29\n", "2013-11-01 20:24:41\n", "2013-11-01 21:45:04\n", "2013-11-01 21:32:47\n", "2013-11-01 21:06:05\n", "2013-11-01 21:32:46\n", "2013-11-01 21:40:51\n", "2013-11-01 21:37:10\n", "2013-11-01 20:36:02\n", "2013-11-01 21:45:05\n", "2013-11-01 21:33:28\n", "2013-11-01 21:49:08\n", "2013-11-01 21:37:25\n", "2013-11-01 21:51:12\n", "2013-11-01 21:13:05\n", "2013-11-01 21:33:50\n", "2013-11-01 21:35:31\n", "2013-11-01 21:46:46\n", "2013-11-01 21:37:35\n", "2013-11-01 21:42:36\n", "2013-11-01 21:53:26\n", "2013-11-01 22:01:40\n", "2013-11-01 21:38:20\n", "2013-11-01 21:36:27\n", "2013-11-01 22:05:16\n", "2013-11-01 21:59:10\n", "2013-11-01 18:00:02\n", "2013-11-01 22:09:29\n", "2013-11-01 21:58:45\n", "2013-11-01 22:16:30\n", "2013-11-01 21:06:47\n", "2013-11-01 22:21:46\n", "2013-11-01 22:12:47\n", "2013-11-01 22:10:46\n", "2013-11-01 22:20:50\n", "2013-11-01 21:52:14\n", "2013-11-01 22:12:02\n", "2013-11-01 22:12:30\n", "2013-11-01 22:59:32\n", "2013-11-01 22:11:17\n", "2013-11-01 23:35:01\n", "2013-11-01 23:27:56\n", "2013-11-02 09:37:04\n", "2013-12-22 06:39:00\n", "2013-12-22 06:39:18\n", "2013-12-22 06:56:09\n", "2013-12-22 07:57:34\n", "2013-12-22 07:19:53\n", "2013-12-22 07:33:46\n", "2013-12-22 08:01:08\n", "2013-12-22 08:01:17\n", "2013-12-22 08:29:30\n", "2013-12-22 08:01:29\n", "2013-12-22 07:45:23\n", "2013-12-22 08:08:20\n", "2013-12-22 08:30:08\n", "2013-12-21 13:07:37\n", "2013-12-22 07:51:17\n", "2013-12-22 07:11:40\n", "2013-12-22 08:57:33\n", "2013-12-22 08:49:51\n", "2013-12-22 06:49:38\n", "2013-12-22 09:00:47\n", "2013-12-22 09:36:42\n", "2013-12-22 09:02:56\n", "2013-12-22 08:21:05\n", "2013-12-22 10:05:26\n", "2013-12-22 04:01:53\n", "2013-12-22 10:02:21\n", "2013-12-22 08:54:18\n", "2013-12-22 10:31:35\n", "2013-12-22 10:37:30\n", "2013-12-22 11:28:57\n", "2013-12-22 11:56:01\n", "2013-12-22 15:40:59\n", "2013-12-22 10:02:07\n", "2013-12-23 00:48:48\n" ] } ], "source": [ "for s in X_val['TIMESTAMP']:\n", " print(datetime.datetime.fromtimestamp(s))" ] }, { "cell_type": "code", "execution_count": 85, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "2013-08-14 10:49:36\n", "2013-08-14 10:59:06\n", "2013-08-14 10:55:42\n", "2013-08-14 10:50:04\n", "2013-08-14 10:50:11\n", "2013-08-14 10:56:57\n", "2013-08-14 10:36:51\n", "2013-08-14 10:44:15\n", "2013-08-14 10:55:50\n", "2013-08-14 10:50:35\n", "2013-08-14 10:50:27\n", "2013-08-14 10:43:57\n", "2013-08-14 10:16:48\n", "2013-08-14 10:40:47\n", "2013-08-14 10:45:55\n", "2013-08-14 10:43:00\n", "2013-08-14 10:53:22\n", "2013-08-14 10:50:03\n", "2013-08-14 10:26:22\n", "2013-08-14 10:59:15\n", "2013-08-14 10:50:17\n", "2013-08-14 10:56:34\n", "2013-08-14 10:53:42\n", "2013-08-14 10:47:46\n", "2013-08-14 10:58:46\n", "2013-08-14 10:24:23\n", "2013-08-14 10:55:19\n", "2013-08-14 10:57:03\n", "2013-08-14 10:56:11\n", "2013-08-14 10:56:52\n", "2013-08-14 10:57:57\n", "2013-08-14 10:08:15\n", "2013-08-14 10:51:14\n", "2013-08-14 10:58:31\n", "2013-08-14 10:47:31\n", "2013-08-14 10:30:36\n", "2013-08-14 10:17:59\n", "2013-08-14 10:48:03\n", "2013-08-14 10:55:52\n", "2013-08-14 10:49:06\n", "2013-08-14 10:58:55\n", "2013-08-14 10:51:24\n", "2013-08-14 10:54:12\n", "2013-08-14 10:54:26\n", "2013-08-14 10:51:18\n", "2013-08-14 10:59:56\n", "2013-08-14 10:48:31\n", "2013-08-14 10:51:56\n", "2013-08-14 10:39:22\n", "2013-08-14 10:57:25\n", "2013-08-14 10:57:28\n", "2013-08-14 10:57:40\n", "2013-08-14 10:39:01\n", "2013-08-14 10:50:39\n", "2013-08-14 09:48:19\n", "2013-10-01 01:16:12\n", "2013-10-01 01:28:04\n", "2013-10-01 01:18:37\n", "2013-10-01 01:24:48\n", "2013-10-01 01:23:39\n", "2013-10-01 01:28:37\n", "2013-10-01 01:20:16\n", "2013-10-01 01:23:49\n", "2013-10-01 01:27:11\n", "2013-10-01 01:06:20\n", "2013-10-01 01:28:08\n", "2013-10-01 01:29:02\n", "2013-10-01 01:24:44\n", "2013-10-01 01:24:44\n", "2013-10-01 01:19:06\n", "2013-10-01 00:28:33\n", "2013-10-01 01:29:28\n", "2013-10-01 01:27:31\n", "2013-10-01 01:22:13\n", "2013-10-01 01:26:03\n", "2013-10-01 01:28:55\n", "2013-10-01 01:18:10\n", "2013-10-01 01:22:13\n", "2013-10-01 01:14:30\n", "2013-10-01 01:24:41\n", "2013-10-01 01:22:16\n", "2013-10-01 01:25:35\n", "2013-10-01 01:21:27\n", "2013-10-01 01:11:33\n", "2013-10-01 01:10:18\n", "2013-10-01 01:09:33\n", "2013-10-01 01:01:15\n", "2013-10-01 01:17:58\n", "2013-10-01 01:18:00\n", "2013-10-01 01:13:26\n", "2013-10-01 01:18:01\n", "2013-10-01 01:25:54\n", "2013-10-01 01:21:20\n", "2013-10-01 01:25:31\n", "2013-10-01 01:25:54\n", "2013-10-01 01:23:40\n", "2013-10-01 01:26:46\n", "2013-10-01 01:23:31\n", "2013-10-01 01:17:09\n", "2013-10-01 01:21:57\n", "2013-10-01 00:29:09\n", "2013-10-01 01:14:47\n", "2013-10-01 01:04:25\n", "2013-10-01 01:14:09\n", "2013-10-01 01:16:59\n", "2013-10-01 01:27:16\n", "2013-10-01 01:16:26\n", "2013-10-01 01:23:18\n", "2013-10-01 01:16:05\n", "2013-10-01 01:27:43\n", "2013-10-01 01:08:13\n", "2013-10-01 01:19:21\n", "2013-10-01 01:21:19\n", "2013-10-01 01:24:20\n", "2013-10-01 01:26:45\n", "2013-10-01 01:18:28\n", "2013-10-01 01:19:45\n", "2013-10-01 01:28:10\n", "2013-10-01 01:22:20\n", "2013-10-01 01:18:42\n", "2013-10-01 01:19:52\n", "2013-10-01 01:18:44\n", "2013-10-01 01:15:11\n", "2013-10-01 01:19:24\n", "2013-10-01 01:23:58\n", "2013-10-01 01:28:50\n", "2013-10-01 01:13:24\n", "2013-10-01 01:28:38\n", "2013-10-01 01:24:50\n", "2013-10-01 01:14:19\n", "2013-10-01 01:10:05\n", "2013-10-01 01:26:31\n", "2013-10-01 01:28:01\n", "2013-09-30 23:44:16\n", "2013-10-01 01:21:43\n", "2013-10-01 01:26:57\n", "2013-10-01 01:25:25\n", "2013-10-01 01:25:36\n", "2013-10-01 01:16:34\n", "2013-10-01 01:26:40\n", "2013-10-01 01:14:56\n", "2013-10-01 01:13:10\n", "2013-10-01 01:28:34\n", "2013-10-01 01:19:08\n", "2013-10-01 01:24:57\n", "2013-10-01 00:52:43\n", "2013-10-01 01:25:28\n", "2013-10-01 01:22:54\n", "2013-10-01 01:28:49\n", "2013-10-01 00:13:25\n", "2013-10-07 10:34:47\n", "2013-10-07 10:38:08\n", "2013-10-07 10:31:10\n", "2013-10-07 10:35:12\n", "2013-10-07 10:41:50\n", "2013-10-07 10:34:31\n", "2013-10-07 10:42:02\n", "2013-10-07 10:39:05\n", "2013-10-07 10:31:43\n", "2013-10-07 10:34:27\n", "2013-10-07 10:31:48\n", "2013-10-07 10:42:24\n", "2013-10-07 10:38:37\n", "2013-10-07 10:29:02\n", "2013-10-07 10:33:55\n", "2013-10-07 10:17:07\n", "2013-10-07 10:44:31\n", "2013-10-07 10:42:52\n", "2013-10-07 10:26:05\n", "2013-10-07 10:34:07\n", "2013-10-07 10:40:59\n", "2013-10-07 10:41:36\n", "2013-10-07 10:33:47\n", "2013-10-07 10:30:59\n", "2013-10-07 10:38:59\n", "2013-10-07 10:28:56\n", "2013-10-07 10:41:24\n", "2013-10-07 10:41:49\n", "2013-10-07 10:42:47\n", "2013-10-07 10:34:09\n", "2013-10-07 10:40:31\n", "2013-10-07 10:21:34\n", "2013-10-07 10:43:52\n", "2013-10-07 10:18:11\n", "2013-10-07 10:41:47\n", "2013-10-07 10:33:04\n", "2013-10-07 10:40:53\n", "2013-10-07 10:36:38\n", "2013-10-07 10:41:46\n", "2013-10-07 10:03:36\n", "2013-10-07 10:44:45\n", "2013-10-07 10:21:42\n", "2013-10-07 10:24:07\n", "2013-10-07 10:40:35\n", "2013-10-07 10:41:00\n", "2013-10-07 10:43:10\n", "2013-10-07 10:23:55\n", "2013-10-07 10:43:30\n", "2013-10-07 10:25:24\n", "2013-10-07 10:35:07\n", "2013-10-07 10:43:33\n", "2013-10-07 10:39:30\n", "2013-10-07 10:31:42\n", "2013-10-07 10:39:17\n", "2013-10-07 10:42:47\n", "2013-10-07 10:39:20\n", "2013-10-07 10:44:41\n", "2013-10-07 10:24:22\n", "2013-10-07 10:12:39\n", "2013-10-07 10:37:25\n", "2013-10-07 10:42:55\n", "2013-10-07 10:14:35\n", "2013-10-07 10:37:12\n", "2013-10-07 10:32:29\n", "2013-10-07 10:42:37\n", "2013-10-07 10:26:52\n", "2013-10-07 10:31:19\n", "2013-10-07 10:44:58\n", "2013-11-01 20:47:37\n", "2013-11-01 20:54:00\n", "2013-11-01 20:58:53\n", "2013-11-01 20:56:37\n", "2013-11-01 20:56:09\n", "2013-11-01 20:51:05\n", "2013-11-01 20:50:58\n", "2013-11-01 20:55:26\n", "2013-11-01 20:53:43\n", "2013-11-01 20:53:46\n", "2013-11-01 20:54:55\n", "2013-11-01 20:59:28\n", "2013-11-01 20:56:54\n", "2013-11-01 20:50:37\n", "2013-11-01 20:48:40\n", "2013-11-01 20:55:46\n", "2013-11-01 20:45:20\n", "2013-11-01 20:46:22\n", "2013-11-01 20:48:25\n", "2013-11-01 20:47:19\n", "2013-11-01 20:57:31\n", "2013-11-01 20:58:14\n", "2013-11-01 20:49:30\n", "2013-11-01 20:43:31\n", "2013-11-01 20:59:00\n", "2013-11-01 20:54:23\n", "2013-11-01 20:51:01\n", "2013-11-01 20:38:12\n", "2013-11-01 20:59:31\n", "2013-11-01 20:56:46\n", "2013-11-01 20:53:51\n", "2013-11-01 20:48:00\n", "2013-11-01 20:58:04\n", "2013-11-01 20:52:50\n", "2013-11-01 20:58:12\n", "2013-11-01 20:57:37\n", "2013-11-01 20:53:33\n", "2013-11-01 20:54:11\n", "2013-11-01 20:48:49\n", "2013-11-01 20:42:56\n", "2013-11-01 20:55:36\n", "2013-11-01 20:51:36\n", "2013-11-01 20:48:45\n", "2013-11-01 20:49:17\n", "2013-11-01 20:53:50\n", "2013-11-01 20:45:28\n", "2013-11-01 20:45:04\n", "2013-11-01 20:52:17\n", "2013-11-01 20:52:10\n", "2013-11-01 20:59:16\n", "2013-11-01 20:51:37\n", "2013-11-01 20:50:10\n", "2013-12-22 06:24:50\n", "2013-12-22 06:04:12\n", "2013-12-22 06:16:27\n", "2013-12-22 06:23:06\n", "2013-12-22 06:24:04\n", "2013-12-22 06:17:33\n", "2013-12-22 06:22:55\n", "2013-12-22 06:24:35\n", "2013-12-22 06:21:56\n", "2013-12-22 06:22:49\n", "2013-12-22 06:25:31\n", "2013-12-22 06:21:31\n", "2013-12-22 06:27:31\n", "2013-12-22 06:29:45\n", "2013-12-22 06:26:09\n", "2013-12-22 06:17:08\n", "2013-12-22 06:26:00\n", "2013-12-22 06:20:56\n", "2013-12-22 06:23:09\n", "2013-12-22 06:22:31\n", "2013-12-22 06:29:59\n", "2013-12-22 06:27:43\n", "2013-12-22 06:23:04\n", "2013-12-22 06:25:30\n", "2013-12-22 06:19:16\n", "2013-12-22 06:23:06\n", "2013-12-22 06:26:01\n", "2013-12-22 06:19:45\n", "2013-12-22 02:34:23\n", "2013-12-22 06:29:54\n", "2013-12-22 06:28:39\n", "2013-12-22 06:27:43\n", "2013-12-22 06:16:23\n", "2013-12-22 06:17:26\n" ] } ], "source": [ "for s in stamps:\n", " print(datetime.datetime.fromtimestamp(s))" ] }, { "cell_type": "code", "execution_count": 71, "metadata": { "collapsed": false, "hidden": true }, "outputs": [], "source": [ "ids = valid_data[-1]" ] }, { "cell_type": "code", "execution_count": 74, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "numpy.bytes_" ] }, "execution_count": 74, "metadata": {}, "output_type": "execute_result" } ], "source": [ "type(ids[0])" ] }, { "cell_type": "code", "execution_count": 70, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/plain": [ "[\"b'1376502576620000126'\",\n", " \"b'1376503146620000161'\",\n", " \"b'1376502942620000500'\",\n", " \"b'1376502604620000105'\",\n", " \"b'1376502611620000022'\",\n", " \"b'1376503017620000272'\",\n", " \"b'1376501811620000617'\",\n", " \"b'1376502255620000663'\",\n", " \"b'1376502950620000005'\",\n", " \"b'1376502635620000276'\",\n", " \"b'1376502627620000596'\",\n", " \"b'1376502237620000675'\",\n", " \"b'1376500608620000409'\",\n", " \"b'1376502047620000574'\",\n", " \"b'1376502355620000338'\",\n", " \"b'1376502180620000080'\",\n", " \"b'1376502802620000680'\",\n", " \"b'1376502603620000142'\",\n", " \"b'1376501182620000651'\",\n", " \"b'1376503155620000026'\",\n", " \"b'1376502617620000657'\",\n", " \"b'1376502994620000604'\",\n", " \"b'1376502822620000093'\",\n", " \"b'1376502466620000561'\",\n", " \"b'1376503126620000410'\",\n", " \"b'1376501063620000343'\",\n", " \"b'1376502919620000166'\",\n", " \"b'1376503023620000010'\",\n", " \"b'1376502971620000517'\",\n", " \"b'1376503012620000273'\",\n", " \"b'1376503077620000470'\",\n", " \"b'1376500095620000569'\",\n", " \"b'1376502674620000426'\",\n", " \"b'1376503111620000674'\",\n", " \"b'1376502451620000310'\",\n", " \"b'1376501436620000344'\",\n", " \"b'1376500679620000108'\",\n", " \"b'1376502483620000356'\",\n", " \"b'1376502952620000687'\",\n", " \"b'1376502546620000254'\",\n", " \"b'1376503135620000053'\",\n", " \"b'1376502684620000503'\",\n", " \"b'1376502852620000321'\",\n", " \"b'1376502866620000421'\",\n", " \"b'1376502678620000460'\",\n", " \"b'1376503196620000386'\",\n", " \"b'1376502511620000480'\",\n", " \"b'1376502716620000224'\",\n", " \"b'1376501962620000507'\",\n", " \"b'1376503045620000633'\",\n", " \"b'1376503048620000349'\",\n", " \"b'1376503060620000049'\",\n", " \"b'1376501941620000667'\",\n", " \"b'1376502639620000281'\",\n", " \"b'1376498899620000172'\",\n", " \"b'1380615372620000303'\",\n", " \"b'1380616084620000260'\",\n", " \"b'1380615517620000372'\",\n", " \"b'1380615888620000588'\",\n", " \"b'1380615819620000042'\",\n", " \"b'1380616117620000325'\",\n", " \"b'1380615616620000040'\",\n", " \"b'1380615829620000682'\",\n", " \"b'1380616031620000001'\",\n", " \"b'1380614780620000352'\",\n", " \"b'1380616088620000513'\",\n", " \"b'1380616142620000289'\",\n", " \"b'1380615884620000166'\",\n", " \"b'1380615884620000671'\",\n", " \"b'1380615546620000187'\",\n", " \"b'1380612513620000172'\",\n", " \"b'1380616168620000472'\",\n", " \"b'1380616051620000597'\",\n", " \"b'1380615733620000105'\",\n", " \"b'1380615963620000137'\",\n", " \"b'1380616135620000672'\",\n", " \"b'1380615490620000574'\",\n", " \"b'1380615733620000051'\",\n", " \"b'1380615270620000612'\",\n", " \"b'1380615881620000031'\",\n", " \"b'1380615736620000246'\",\n", " \"b'1380615935620000367'\",\n", " \"b'1380615687620000577'\",\n", " \"b'1380615093620000272'\",\n", " \"b'1380615018620000632'\",\n", " \"b'1380614973620000258'\",\n", " \"b'1380614475620000032'\",\n", " \"b'1380615478620000138'\",\n", " \"b'1380615480620000381'\",\n", " \"b'1380615206620000397'\",\n", " \"b'1380615481620000077'\",\n", " \"b'1380615954620000546'\",\n", " \"b'1380615680620000192'\",\n", " \"b'1380615931620000068'\",\n", " \"b'1380615954620000395'\",\n", " \"b'1380615820620000482'\",\n", " \"b'1380616006620000080'\",\n", " \"b'1380615811620000431'\",\n", " \"b'1380615429620000602'\",\n", " \"b'1380615717620000497'\",\n", " \"b'1380612549620000161'\",\n", " \"b'1380615287620000675'\",\n", " \"b'1380614665620000458'\",\n", " \"b'1380615249620000222'\",\n", " \"b'1380615419620000487'\",\n", " \"b'1380616036620000669'\",\n", " \"b'1380615386620000476'\",\n", " \"b'1380615798620000523'\",\n", " \"b'1380615365620000215'\",\n", " \"b'1380616063620000065'\",\n", " \"b'1380614893620000011'\",\n", " \"b'1380615561620000391'\",\n", " \"b'1380615679620000004'\",\n", " \"b'1380615860620000429'\",\n", " \"b'1380616005620000695'\",\n", " \"b'1380615508620000361'\",\n", " \"b'1380615585620000665'\",\n", " \"b'1380616090620000562'\",\n", " \"b'1380615740620000398'\",\n", " \"b'1380615522620000156'\",\n", " \"b'1380615592620000674'\",\n", " \"b'1380615524620000279'\",\n", " \"b'1380615311620000540'\",\n", " \"b'1380615564620000216'\",\n", " \"b'1380615838620000324'\",\n", " \"b'1380616130620000356'\",\n", " \"b'1380615204620000387'\",\n", " \"b'1380616118620000649'\",\n", " \"b'1380615890620000159'\",\n", " \"b'1380615259620000393'\",\n", " \"b'1380615005620000249'\",\n", " \"b'1380615991620000589'\",\n", " \"b'1380616081620000633'\",\n", " \"b'1380609856620000609'\",\n", " \"b'1380615703620000410'\",\n", " \"b'1380616017620000470'\",\n", " \"b'1380615925620000177'\",\n", " \"b'1380615936620000547'\",\n", " \"b'1380615394620000400'\",\n", " \"b'1380616000620000140'\",\n", " \"b'1380615296620000020'\",\n", " \"b'1380615190620000477'\",\n", " \"b'1380616114620000151'\",\n", " \"b'1380615548620000247'\",\n", " \"b'1380615897620000616'\",\n", " \"b'1380613963620000005'\",\n", " \"b'1380615928620000449'\",\n", " \"b'1380615774620000158'\",\n", " \"b'1380616129620000281'\",\n", " \"b'1380611605620000351'\",\n", " \"b'1381167287620000123'\",\n", " \"b'1381167488620000626'\",\n", " \"b'1381167070620000142'\",\n", " \"b'1381167312620000337'\",\n", " \"b'1381167710620000684'\",\n", " \"b'1381167271620000159'\",\n", " \"b'1381167722620000624'\",\n", " \"b'1381167545620000419'\",\n", " \"b'1381167103620000114'\",\n", " \"b'1381167267620000668'\",\n", " \"b'1381167108620000307'\",\n", " \"b'1381167744620000051'\",\n", " \"b'1381167517620000356'\",\n", " \"b'1381166942620000518'\",\n", " \"b'1381167235620000529'\",\n", " \"b'1381166227620000901'\",\n", " \"b'1381167871620000463'\",\n", " \"b'1381167772620000495'\",\n", " \"b'1381166765620000008'\",\n", " \"b'1381167247620000345'\",\n", " \"b'1381167659620000235'\",\n", " \"b'1381167696620000085'\",\n", " \"b'1381167227620000156'\",\n", " \"b'1381167059620000004'\",\n", " \"b'1381167539620000256'\",\n", " \"b'1381166936620000426'\",\n", " \"b'1381167684620000621'\",\n", " \"b'1381167709620000249'\",\n", " \"b'1381167767620000094'\",\n", " \"b'1381167249620000675'\",\n", " \"b'1381167631620000116'\",\n", " \"b'1381166494620000480'\",\n", " \"b'1381167832620000074'\",\n", " \"b'1381166291620000326'\",\n", " \"b'1381167707620000653'\",\n", " \"b'1381167184620000560'\",\n", " \"b'1381167653620000295'\",\n", " \"b'1381167398620000686'\",\n", " \"b'1381167706620000321'\",\n", " \"b'1381165416620000697'\",\n", " \"b'1381167885620000280'\",\n", " \"b'1381166502620000297'\",\n", " \"b'1381166647620000657'\",\n", " \"b'1381167635620000662'\",\n", " \"b'1381167660620000594'\",\n", " \"b'1381167790620000093'\",\n", " \"b'1381166635620000195'\",\n", " \"b'1381167810620000431'\",\n", " \"b'1381166724620000311'\",\n", " \"b'1381167307620000591'\",\n", " \"b'1381167813620000267'\",\n", " \"b'1381167570620000648'\",\n", " \"b'1381167102620000525'\",\n", " \"b'1381167557620000424'\",\n", " \"b'1381167767620000160'\",\n", " \"b'1381167560620000633'\",\n", " \"b'1381167881620000391'\",\n", " \"b'1381166662620000189'\",\n", " \"b'1381165959620000138'\",\n", " \"b'1381167445620000344'\",\n", " \"b'1381167775620000049'\",\n", " \"b'1381166075620000068'\",\n", " \"b'1381167432620000001'\",\n", " \"b'1381167149620000257'\",\n", " \"b'1381167757620000324'\",\n", " \"b'1381166812620000595'\",\n", " \"b'1381167079620000535'\",\n", " \"b'1381167898620000667'\",\n", " \"b'1383364057620000066'\",\n", " \"b'1383364440620000010'\",\n", " \"b'1383364733620000009'\",\n", " \"b'1383364597620000601'\",\n", " \"b'1383364569620000356'\",\n", " \"b'1383364265620000007'\",\n", " \"b'1383364258620000574'\",\n", " \"b'1383364526620000108'\",\n", " \"b'1383364423620000015'\",\n", " \"b'1383364426620000632'\",\n", " \"b'1383364495620000611'\",\n", " \"b'1383364768620000388'\",\n", " \"b'1383364614620000372'\",\n", " \"b'1383364237620000455'\",\n", " \"b'1383364120620000403'\",\n", " \"b'1383364546620000041'\",\n", " \"b'1383363920620000020'\",\n", " \"b'1383363982620000591'\",\n", " \"b'1383364105620000665'\",\n", " \"b'1383364039620000618'\",\n", " \"b'1383364651620000513'\",\n", " \"b'1383364694620000364'\",\n", " \"b'1383364170620000239'\",\n", " \"b'1383363811620000031'\",\n", " \"b'1383364740620000252'\",\n", " \"b'1383364463620000345'\",\n", " \"b'1383364261620000436'\",\n", " \"b'1383363492620000672'\",\n", " \"b'1383364771620000320'\",\n", " \"b'1383364606620000508'\",\n", " \"b'1383364431620000233'\",\n", " \"b'1383364080620000527'\",\n", " \"b'1383364684620000005'\",\n", " \"b'1383364370620000140'\",\n", " \"b'1383364692620000118'\",\n", " \"b'1383364657620000570'\",\n", " \"b'1383364413620000492'\",\n", " \"b'1383364451620000309'\",\n", " \"b'1383364129620000013'\",\n", " \"b'1383363776620000434'\",\n", " \"b'1383364536620000217'\",\n", " \"b'1383364296620000112'\",\n", " \"b'1383364125620000625'\",\n", " \"b'1383364157620000648'\",\n", " \"b'1383364430620000542'\",\n", " \"b'1383363928620000616'\",\n", " \"b'1383363904620000105'\",\n", " \"b'1383364337620000612'\",\n", " \"b'1383364330620000333'\",\n", " \"b'1383364756620000540'\",\n", " \"b'1383364297620000596'\",\n", " \"b'1383364210620000153'\",\n", " \"b'1387722290620000362'\",\n", " \"b'1387721052620000311'\",\n", " \"b'1387721787620000046'\",\n", " \"b'1387722186620000565'\",\n", " \"b'1387722244620000068'\",\n", " \"b'1387721853620000403'\",\n", " \"b'1387722175620000633'\",\n", " \"b'1387722275620000172'\",\n", " \"b'1387722116620000187'\",\n", " \"b'1387722169620000060'\",\n", " \"b'1387722331620000058'\",\n", " \"b'1387722091620000607'\",\n", " \"b'1387722451620000540'\",\n", " \"b'1387722585620000430'\",\n", " \"b'1387722369620000120'\",\n", " \"b'1387721828620000123'\",\n", " \"b'1387722360620000391'\",\n", " \"b'1387722056620000089'\",\n", " \"b'1387722189620000480'\",\n", " \"b'1387722151620000184'\",\n", " \"b'1387722599620000137'\",\n", " \"b'1387722463620000314'\",\n", " \"b'1387722184620000057'\",\n", " \"b'1387722330620000171'\",\n", " \"b'1387721956620000373'\",\n", " \"b'1387722186620000197'\",\n", " \"b'1387722361620000697'\",\n", " \"b'1387721985620000173'\",\n", " \"b'1387708463620000329'\",\n", " \"b'1387722594620000900'\",\n", " \"b'1387722519620000482'\",\n", " \"b'1387722463620000481'\",\n", " \"b'1387721783620000030'\",\n", " \"b'1387721846620000247'\"]" ] }, "execution_count": 70, "metadata": {}, "output_type": "execute_result" } ], "source": [ "ids" ] }, { "cell_type": "code", "execution_count": 64, "metadata": { "collapsed": false, "hidden": true }, "outputs": [ { "data": { "text/html": [ "
\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
TRIP_IDCALL_TYPEORIGIN_CALLORIGIN_STANDTAXI_IDTIMESTAMPDAY_TYPEMISSING_DATAPOLYLINELATITUDELONGITUDETARGETCOORD_FEATURESDAY_OF_WEEKQUARTER_HOURWEEK_OF_YEAR
01376500052620000184C0011513765000520False[[-8.649891,41.154399],[-8.649981,41.154417],[...[-0.0392686, -0.0390627, -0.0440035, -0.049458...[-0.590024, -0.591592, -0.596627, -0.596793, -...[-8.61043, 41.1411][-0.590024, -0.591592, -0.596627, -0.596793, -...24033
11376500461620000525C0021413765004610False[[-8.610876,41.145759],[-8.610849,41.145759],[...[-0.155839, -0.155839, -0.151619, -0.14673, -0...[0.0920491, 0.0925159, 0.0985014, 0.105587, 0....[-8.63072, 41.1547][0.0920491, 0.0925159, 0.0985014, 0.105587, 0....24033
21376501327620000095B01136713765013270False[[-8.613243,41.166873],[-8.613252,41.166747],[...[0.129025, 0.127327, 0.125474, 0.118835, 0.104...[0.0506678, 0.0505178, 0.0497175, 0.0700247, 0...[-8.61534, 41.1407][0.0506678, 0.0505178, 0.0497175, 0.0700247, 0...24133
31376501783620000173B0103913765017830False[[-8.606988,41.15025],[-8.607213,41.150007],[-...[-0.0952637, -0.0985575, -0.112865, -0.113843,...[0.160023, 0.156088, 0.148386, 0.145868, 0.144...[-8.55426, 41.1628][0.160023, 0.156088, 0.148386, 0.145868, 0.144...24233
41376501113620000252B01336413765011130False[[-8.628273,41.157405],[-8.628255,41.157423],[...[0.00128665, 0.00149252, 0.00236744, 0.0135356...[-0.212091, -0.211775, -0.209724, -0.20894, -0...[-8.61928, 41.1786][-0.212091, -0.211775, -0.209724, -0.20894, -0...24133
51376501483620000424B0192513765014830False[[-8.605818,41.153391],[-8.607339,41.153427],[...[-0.0528556, -0.0523924, -0.0513116, -0.050694...[0.18048, 0.153888, 0.112506, 0.0797781, 0.071...[-8.64643, 41.1616][0.18048, 0.153888, 0.112506, 0.0797781, 0.071...24233
61376500461620000326B01424013765004610False[[-8.611137,41.149332],[-8.611263,41.149161],[...[-0.107667, -0.109931, -0.110086, -0.110086, -...[0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0...[-8.61446, 41.1422][0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0...24033
71376500453620000263C0040713765004530False[[-8.586396,41.149224],[-8.586378,41.149026],[...[-0.109108, -0.111784, -0.11199, -0.107873, -0...[0.520016, 0.520333, 0.513247, 0.49249, 0.4643...[-8.58591, 41.1486][0.520016, 0.520333, 0.513247, 0.49249, 0.4643...24033
81376499820620000467C0027013764998200False[[-8.625177,41.157333],[-8.625609,41.157405],[...[0.000308796, 0.00128665, 0.00494074, 0.006021...[-0.157972, -0.165525, -0.194935, -0.202171, -...[-8.64726, 41.1732][-0.157972, -0.165525, -0.194935, -0.202171, -...24033
91376503568620000213B02843113765035680False[[-8.584335,41.163111],[-8.585127,41.162922],[...[0.0782799, 0.0757066, 0.0835809, 0.0913522, 0...[0.556046, 0.542208, 0.51058, 0.479736, 0.4769...[-8.58525, 41.1689][0.556046, 0.542208, 0.51058, 0.479736, 0.4769...24433
101376503240620000002B06342113765032400False[[-8.609688,41.160348],[-8.609967,41.159277],[...[0.040967, 0.0265565, 0.00370556, 0.000669059,...[0.112823, 0.107938, 0.107938, 0.107471, 0.106...[-8.61071, 41.1456][0.112823, 0.107938, 0.107938, 0.107471, 0.106...24433
111376504312620000617C0019913765043120False[[-8.624502,41.179554],[-8.624511,41.179527],[...[0.300099, 0.299738, 0.299738, 0.299841, 0.299...[-0.146168, -0.146318, -0.146485, -0.146318, -...[-8.62455, 41.1796][-0.146168, -0.146318, -0.146485, -0.146318, -...24533
121376502661620000400B02911713765026610False[[-8.638443,41.170797],[-8.6382,41.170716],[-8...[0.181932, 0.180852, 0.184866, 0.192174, 0.200...[-0.389887, -0.385636, -0.36046, -0.330883, -0...[-8.6206, 41.1739][-0.389887, -0.385636, -0.36046, -0.330883, -0...24333
131376500537620000246B01331813765005370False[[-8.628147,41.157198],[-8.628156,41.157198],[...[-0.00149252, -0.00149252, -0.00128665, -0.001...[-0.209891, -0.210041, -0.20879, -0.208473, -0...[-8.61782, 41.1525][-0.209891, -0.210041, -0.20879, -0.208473, -0...24133
141376502120620000557B03224513765021200False[[-8.627643,41.157765],[-8.627958,41.1579],[-8...[0.00612446, 0.00797724, 0.0135356, 0.0206894,...[-0.201071, -0.206589, -0.20879, -0.228147, -0...[-8.61148, 41.1461][-0.201071, -0.206589, -0.20879, -0.228147, -0...24233
151376496951620000012A707913764969510False[[-8.604045,41.182569],[-8.604135,41.182353],[...[0.340757, 0.337875, 0.316876, 0.295724, 0.278...[0.211474, 0.209907, 0.197003, 0.183148, 0.161...[-8.62064, 41.1643][0.211474, 0.209907, 0.197003, 0.183148, 0.161...23733
161376501723620000554B05318313765017230False[[-8.613945,41.141277],[-8.613972,41.141286],[...[-0.216312, -0.216209, -0.221047, -0.222642, -...[0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0...[-8.63607, 41.1592][0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0...24233
171376503551620000376B03424613765035510False[[-8.615556,41.14071],[-8.615565,41.140692],[-...[-0.22398, -0.224186, -0.22434, -0.22362, -0.2...[0.0102369, 0.0100702, 0.0100702, 0.010387, 0....[-8.64072, 41.1612][0.0102369, 0.0100702, 0.0100702, 0.010387, 0....24433
181376504171620000146B01033813765041710False[[-8.606979,41.150268],[-8.607285,41.150124],[...[-0.0950063, -0.096962, -0.0962415, -0.0962415...[0.160173, 0.154838, 0.148852, 0.148536, 0.128...[-8.61805, 41.1525][0.160173, 0.154838, 0.148852, 0.148536, 0.128...24533
191376506047620000026B05716713765060470False[[-8.610804,41.145741],[-8.610822,41.145768],[...[-0.156097, -0.155736, -0.155839, -0.151722, -...[0.0933162, 0.0929994, 0.0917323, 0.0961339, 0...[-8.60417, 41.1489][0.0933162, 0.0929994, 0.0917323, 0.0961339, 0...24733
201376505311620000392A7034913765053110False[[-8.583165,41.164713],[-8.583012,41.164407],[...[0.0998956, 0.0957268, 0.0964474, 0.105557, 0....[0.576503, 0.579187, 0.580438, 0.580121, 0.597...[-8.6118, 41.1429][0.576503, 0.579187, 0.580438, 0.580121, 0.597...24633
211376505833620000120B01314413765058330False[[-8.628345,41.15763],[-8.628345,41.157576],[-...[0.00432315, 0.00360262, 0.00504367, 0.0026247...[-0.213342, -0.213342, -0.206906, -0.178896, -...[-8.61802, 41.1501][-0.213342, -0.213342, -0.206906, -0.178896, -...24633
221376506874620000255B03319413765068740False[[-8.600184,41.182686],[-8.600031,41.182758],[...[0.342352, 0.34333, 0.33736, 0.335559, 0.33314...[0.278965, 0.281649, 0.310276, 0.319096, 0.341...[-8.56627, 41.1814][0.278965, 0.281649, 0.310276, 0.319096, 0.341...24833
231376503763620000015B0604813765037630False[[-8.609706,41.151276],[-8.609679,41.151294],[...[-0.0814193, -0.081162, -0.0792063, -0.0644355...[0.112506, 0.112973, 0.115491, 0.107788, 0.108...[-8.61818, 41.1696][0.112506, 0.112973, 0.115491, 0.107788, 0.108...24433
241376501181620000360B003713765011810False[[-8.598996,41.149026],[-8.598843,41.148873],[...[-0.111784, -0.113843, -0.115284, -0.119195, -...[0.299739, 0.302423, 0.303357, 0.305408, 0.304...[-8.60023, 41.1493][0.299739, 0.302423, 0.303357, 0.305408, 0.304...24133
251376504563620000017A954033513765045630False[[-8.618022,41.151519],[-8.618337,41.151447],[...[-0.0781255, -0.0791033, -0.0844558, -0.087132...[-0.0328782, -0.0383802, -0.0624553, -0.079444...[-8.59822, 41.1484][-0.0328782, -0.0383802, -0.0624553, -0.079444...24533
261376507238620000114C0016513765072380False[[-8.63028,41.157432],[-8.630505,41.157153],[-...[0.00164691, -0.00211011, -0.00452901, 0.01085...[-0.24717, -0.251105, -0.271246, -0.289819, -0...[-8.65056, 41.1615][-0.24717, -0.251105, -0.271246, -0.289819, -0...24833
271376501378620000195B0606713765013780False[[-8.609499,41.151294],[-8.609535,41.151312],[...[-0.081162, -0.0809046, -0.0778681, -0.0758095...[0.116124, 0.115491, 0.117375, 0.111556, 0.100...[-8.61674, 41.137][0.116124, 0.115491, 0.117375, 0.111556, 0.100...24133
281376506638620000038B01714013765066380False[[-8.632323,41.164326],[-8.632917,41.164065],[...[0.0946461, 0.0911464, 0.0867718, 0.093205, 0....[-0.2829, -0.293287, -0.317345, -0.346305, -0....[-8.65428, 41.181][-0.2829, -0.293287, -0.317345, -0.346305, -0....24733
291376504586620000608B01831013765045860False[[-8.619921,41.148018],[-8.620218,41.147712],[...[-0.125371, -0.129489, -0.1176, -0.104013, -0....[-0.0660733, -0.0712751, -0.0792946, -0.084179...[-8.61061, 41.1515][-0.0660733, -0.0712751, -0.0792946, -0.084179...24533
...................................................
2741387725593620000440B04223313877255930False[[-8.612145,41.172777],[-8.612568,41.172768],[...[0.208643, 0.20854, 0.216569, 0.226142, 0.2362...[0.0698579, 0.062472, 0.0542858, 0.0513014, 0....[-8.58568, 41.1489][0.0698579, 0.062472, 0.0542858, 0.0513014, 0....62951
2751387726426620000621A160203413877264260False[[-8.648964,41.179752],[-8.648982,41.179752],[...[0.302775, 0.302775, 0.314663, 0.316104, 0.316...[-0.573819, -0.574136, -0.57807, -0.576336, -0...[-8.63323, 41.1756][-0.573819, -0.574136, -0.57807, -0.576336, -0...63051
2761387728068620000012A352107913877280680False[[-8.658126,41.154876],[-8.657829,41.154579],[...[-0.0328353, -0.0368497, -0.0219245, -0.001286...[-0.733992, -0.728807, -0.743912, -0.769238, -...[-8.65425, 41.1809][-0.733992, -0.728807, -0.743912, -0.769238, -...63251
2771387728077620000502B05411613877280770False[[-8.630316,41.15754],[-8.629668,41.157],[-8.6...[0.00308796, -0.00416875, -0.00844043, -0.0272...[-0.247804, -0.236483, -0.233182, -0.24497, -0...[-8.6304, 41.1554][-0.247804, -0.236483, -0.233182, -0.24497, -0...63251
2781387729770620000384A3184022513877297700False[[-8.6121,41.158674],[-8.6121,41.158674],[-8.6...[0.0183734, 0.0183734, 0.00452901, -0.0170353,...[0.0706582, 0.0706582, 0.064356, 0.0511347, 0....[-8.62106, 41.151][0.0706582, 0.0706582, 0.064356, 0.0511347, 0....63351
2791387728089620000640B02621813877280890False[[-8.580204,41.15934],[-8.580627,41.159241],[-...[0.0273799, 0.0260418, 0.015131, -0.005301, -0...[0.628271, 0.620869, 0.633156, 0.637241, 0.637...[-8.58601, 41.1486][0.628271, 0.620869, 0.633156, 0.637241, 0.637...63251
2801387727123620000055B0735213877271230False[[-8.63991,41.15979],[-8.640693,41.159664],[-8...[0.0334529, 0.0317546, 0.00586713, -0.0161089,...[-0.41553, -0.429218, -0.438821, -0.447324, -0...[-8.6178, 41.1471][-0.41553, -0.429218, -0.438821, -0.447324, -0...63151
2811387728500620000271B05723413877285000False[[-8.610885,41.14566],[-8.610885,41.145669],[-...[-0.157177, -0.157074, -0.157435, -0.156714, -...[0.091899, 0.091899, 0.0917323, 0.0923659, 0.0...[-8.66138, 41.1481][0.091899, 0.091899, 0.0917323, 0.0923659, 0.0...63251
2821387729808620000151B02814613877298080False[[-8.584335,41.163156],[-8.584425,41.163102],[...[0.078846, 0.0781255, 0.0776623, 0.0841985, 0....[0.556046, 0.554479, 0.533088, 0.506012, 0.478...[-8.6117, 41.16][0.556046, 0.554479, 0.533088, 0.506012, 0.478...63451
2831387660057620000026B05716713876600570False[[-8.610768,41.145642],[-8.610759,41.145642],[...[-0.157435, -0.157435, -0.157074, -0.156354, -...[0.0939331, 0.0940998, 0.0942499, 0.0939331, 0...[-8.63085, 41.1466][0.0939331, 0.0940998, 0.0942499, 0.0939331, 0...55251
2841387727477620000513B05336613877274770False[[-8.613972,41.141349],[-8.613963,41.141349],[...[-0.215334, -0.215334, -0.216929, -0.205607, -...[0.03793, 0.0380801, 0.029277, 0.0308442, 0.03...[-8.61403, 41.1499][0.03793, 0.0380801, 0.029277, 0.0308442, 0.03...63151
2851387725100620000157A254039013877251000False[[-8.676234,41.15484],[-8.676198,41.154822],[-...[-0.03335, -0.0335559, -0.0335559, -0.0334529,...[-1.05057, -1.04994, -1.04994, -1.04962, -1.04...[-8.6488, 41.1486][-1.05057, -1.04994, -1.04994, -1.04962, -1.04...62851
2861387731453620000032A9559037113877314530False[[-8.657946,41.148234],[-8.657937,41.148207],[...[-0.122438, -0.122798, -0.122798, -0.122695, -...[-0.730841, -0.730691, -0.730691, -0.730541, -...[-8.65648, 41.1532][-0.730841, -0.730691, -0.730691, -0.730541, -...63551
2871387730991620000217A20908032113877309910False[[-8.569818,41.170158],[-8.569278,41.169996],[...[0.173338, 0.171125, 0.158052, 0.160934, 0.166...[0.809852, 0.819288, 0.845881, 0.854534, 0.858...[-8.572, 41.1629][0.809852, 0.819288, 0.845881, 0.854534, 0.858...63551
2881387723778620000364B02141913877237780False[[-8.628867,41.160996],[-8.628849,41.160951],[...[0.0497162, 0.0490986, 0.0543481, 0.0718466, 0...[-0.222478, -0.222162, -0.209724, -0.202021, -...[-8.71435, 41.2082][-0.222478, -0.222162, -0.209724, -0.202021, -...62751
2891387731647620000129B05726513877316470False[[-8.610759,41.145651],[-8.610768,41.145678],[...[-0.15728, -0.15692, -0.155839, -0.154244, -0....[0.0940998, 0.0939331, 0.0936163, 0.0936163, 0...[-8.63835, 41.1592][0.0940998, 0.0939331, 0.0936163, 0.0936163, 0...63651
2901387733802620000364B02141913877338020False[[-8.628786,41.161041],[-8.628579,41.160897],[...[0.0503338, 0.0483781, 0.0476576, 0.0474002, 0...[-0.221061, -0.217443, -0.21776, -0.21791, -0....[-8.596, 41.1696][-0.221061, -0.217443, -0.21776, -0.21791, -0....63851
2911387731776620000207B03621113877317760False[[-8.649423,41.154345],[-8.6499,41.154273],[-8...[-0.0399891, -0.040967, -0.0452387, -0.0437976...[-0.581838, -0.590191, -0.59616, -0.579487, -0...[-8.57125, 41.1646][-0.581838, -0.590191, -0.59616, -0.579487, -0...63651
2921387729265620000068B0018513877292650False[[-8.608779,41.147793],[-8.608734,41.147802],[...[-0.128408, -0.128305, -0.128305, -0.128408, -...[0.128712, 0.129496, 0.129812, 0.133114, 0.133...[-8.62051, 41.1651][0.128712, 0.129496, 0.129812, 0.133114, 0.133...63351
2931387735526620000023C0040413877355260False[[-8.597673,41.142681],[-8.597682,41.142681]][-0.197372, -0.197372][0.322864, 0.322714][-8.59768, 41.1427][0.322864, 0.322864, 0.322864, 0.322864, 0.322...64051
2941387713713620000255A34988019413877137130False[[-8.594352,41.169375],[-8.594352,41.169375],[...[0.162787, 0.162787, 0.16289, 0.162993, 0.1631...[0.380934, 0.380934, 0.381084, 0.381084, 0.381...[-8.58298, 41.1704][0.380934, 0.380934, 0.381084, 0.381084, 0.381...61651
2951387735341620000216B01233113877353410False[[-8.630766,41.154948],[-8.631414,41.15439],[-...[-0.0318575, -0.039423, -0.054554, -0.0752434,...[-0.255673, -0.267011, -0.283683, -0.29422, -0...[-8.63564, 41.1406][-0.255673, -0.267011, -0.283683, -0.29422, -0...64051
2961387731258620000486C007513877312580False[[-8.59698,41.171328],[-8.595054,41.172327],[-...[0.189138, 0.20257, 0.253367, 0.308848, 0.3575...[0.334985, 0.368663, 0.395873, 0.406426, 0.397...[-8.33168, 41.2035][0.334985, 0.368663, 0.395873, 0.406426, 0.397...63551
2971387737095620000217A495032113877370950False[[-8.591688,41.159556],[-8.591625,41.159421],[...[0.0303135, 0.0284607, 0.0216672, 0.0165721, 0...[0.427501, 0.428601, 0.428134, 0.413496, 0.402...[-8.60578, 41.1498][0.427501, 0.428601, 0.428134, 0.413496, 0.402...64251
2981387737450620000384B05222513877374500False[[-8.61327,41.154453],[-8.613297,41.154147],[-...[-0.0385481, -0.0426654, -0.0465768, -0.047657...[0.050201, 0.0497175, 0.0495675, 0.0564866, 0....[-8.58762, 41.1885][0.050201, 0.0497175, 0.0495675, 0.0564866, 0....64251
2991387740537620000657B0471713877405370False[[-8.654796,41.173551],[-8.654526,41.173668],[...[0.219091, 0.220686, 0.236486, 0.239369, 0.233...[-0.675771, -0.671053, -0.652646, -0.632039, -...[-8.63023, 41.1584][-0.675771, -0.671053, -0.652646, -0.632039, -...64551
3001387742161620000503C003313877421610False[[-8.639487,41.167422],[-8.639424,41.16753],[-...[0.136436, 0.137878, 0.135819, 0.12393, 0.1178...[-0.408144, -0.407043, -0.402008, -0.397757, -...[-8.66577, 41.2102][-0.408144, -0.407043, -0.402008, -0.397757, -...64751
3011387755659620000372A48102713877556590False[[-8.679753,41.156559],[-8.679717,41.156568],[...[-0.0101388, -0.0100359, -0.00308796, -0.00710...[-1.11209, -1.11146, -1.0954, -1.07763, -1.058...[-8.61165, 41.1461][-1.11209, -1.11146, -1.0954, -1.07763, -1.058...66251
3021387735327620000068B02718513877353270False[[-8.608707,41.147811],[-8.608689,41.147829],[...[-0.12815, -0.127945, -0.128665, -0.13304, -0....[0.129962, 0.130279, 0.13328, 0.129812, 0.1073...[-8.62782, 41.1698][0.129962, 0.130279, 0.13328, 0.129812, 0.1073...64051
3031387788528620000010A831202613877885280False[[-8.609247,41.155182],[-8.60922,41.155254],[-...[-0.0287181, -0.0277402, -0.0210496, -0.021409...[0.120526, 0.121009, 0.117541, 0.108105, 0.106...[-8.61635, 41.163][0.120526, 0.121009, 0.117541, 0.108105, 0.106...0352
\n", "

304 rows × 16 columns

\n", "
" ], "text/plain": [ " TRIP_ID CALL_TYPE ORIGIN_CALL ORIGIN_STAND TAXI_ID \\\n", "0 1376500052620000184 C 0 0 115 \n", "1 1376500461620000525 C 0 0 214 \n", "2 1376501327620000095 B 0 11 367 \n", "3 1376501783620000173 B 0 10 39 \n", "4 1376501113620000252 B 0 13 364 \n", "5 1376501483620000424 B 0 19 25 \n", "6 1376500461620000326 B 0 14 240 \n", "7 1376500453620000263 C 0 0 407 \n", "8 1376499820620000467 C 0 0 270 \n", "9 1376503568620000213 B 0 28 431 \n", "10 1376503240620000002 B 0 63 421 \n", "11 1376504312620000617 C 0 0 199 \n", "12 1376502661620000400 B 0 29 117 \n", "13 1376500537620000246 B 0 13 318 \n", "14 1376502120620000557 B 0 32 245 \n", "15 1376496951620000012 A 7 0 79 \n", "16 1376501723620000554 B 0 53 183 \n", "17 1376503551620000376 B 0 34 246 \n", "18 1376504171620000146 B 0 10 338 \n", "19 1376506047620000026 B 0 57 167 \n", "20 1376505311620000392 A 7 0 349 \n", "21 1376505833620000120 B 0 13 144 \n", "22 1376506874620000255 B 0 33 194 \n", "23 1376503763620000015 B 0 60 48 \n", "24 1376501181620000360 B 0 0 37 \n", "25 1376504563620000017 A 954 0 335 \n", "26 1376507238620000114 C 0 0 165 \n", "27 1376501378620000195 B 0 60 67 \n", "28 1376506638620000038 B 0 17 140 \n", "29 1376504586620000608 B 0 18 310 \n", ".. ... ... ... ... ... \n", "274 1387725593620000440 B 0 42 233 \n", "275 1387726426620000621 A 1602 0 34 \n", "276 1387728068620000012 A 3521 0 79 \n", "277 1387728077620000502 B 0 54 116 \n", "278 1387729770620000384 A 3184 0 225 \n", "279 1387728089620000640 B 0 26 218 \n", "280 1387727123620000055 B 0 7 352 \n", "281 1387728500620000271 B 0 57 234 \n", "282 1387729808620000151 B 0 28 146 \n", "283 1387660057620000026 B 0 57 167 \n", "284 1387727477620000513 B 0 53 366 \n", "285 1387725100620000157 A 254 0 390 \n", "286 1387731453620000032 A 9559 0 371 \n", "287 1387730991620000217 A 20908 0 321 \n", "288 1387723778620000364 B 0 21 419 \n", "289 1387731647620000129 B 0 57 265 \n", "290 1387733802620000364 B 0 21 419 \n", "291 1387731776620000207 B 0 36 211 \n", "292 1387729265620000068 B 0 0 185 \n", "293 1387735526620000023 C 0 0 404 \n", "294 1387713713620000255 A 34988 0 194 \n", "295 1387735341620000216 B 0 12 331 \n", "296 1387731258620000486 C 0 0 75 \n", "297 1387737095620000217 A 495 0 321 \n", "298 1387737450620000384 B 0 52 225 \n", "299 1387740537620000657 B 0 47 17 \n", "300 1387742161620000503 C 0 0 33 \n", "301 1387755659620000372 A 481 0 27 \n", "302 1387735327620000068 B 0 27 185 \n", "303 1387788528620000010 A 8312 0 26 \n", "\n", " TIMESTAMP DAY_TYPE MISSING_DATA \\\n", "0 1376500052 0 False \n", "1 1376500461 0 False \n", "2 1376501327 0 False \n", "3 1376501783 0 False \n", "4 1376501113 0 False \n", "5 1376501483 0 False \n", "6 1376500461 0 False \n", "7 1376500453 0 False \n", "8 1376499820 0 False \n", "9 1376503568 0 False \n", "10 1376503240 0 False \n", "11 1376504312 0 False \n", "12 1376502661 0 False \n", "13 1376500537 0 False \n", "14 1376502120 0 False \n", "15 1376496951 0 False \n", "16 1376501723 0 False \n", "17 1376503551 0 False \n", "18 1376504171 0 False \n", "19 1376506047 0 False \n", "20 1376505311 0 False \n", "21 1376505833 0 False \n", "22 1376506874 0 False \n", "23 1376503763 0 False \n", "24 1376501181 0 False \n", "25 1376504563 0 False \n", "26 1376507238 0 False \n", "27 1376501378 0 False \n", "28 1376506638 0 False \n", "29 1376504586 0 False \n", ".. ... ... ... \n", "274 1387725593 0 False \n", "275 1387726426 0 False \n", "276 1387728068 0 False \n", "277 1387728077 0 False \n", "278 1387729770 0 False \n", "279 1387728089 0 False \n", "280 1387727123 0 False \n", "281 1387728500 0 False \n", "282 1387729808 0 False \n", "283 1387660057 0 False \n", "284 1387727477 0 False \n", "285 1387725100 0 False \n", "286 1387731453 0 False \n", "287 1387730991 0 False \n", "288 1387723778 0 False \n", "289 1387731647 0 False \n", "290 1387733802 0 False \n", "291 1387731776 0 False \n", "292 1387729265 0 False \n", "293 1387735526 0 False \n", "294 1387713713 0 False \n", "295 1387735341 0 False \n", "296 1387731258 0 False \n", "297 1387737095 0 False \n", "298 1387737450 0 False \n", "299 1387740537 0 False \n", "300 1387742161 0 False \n", "301 1387755659 0 False \n", "302 1387735327 0 False \n", "303 1387788528 0 False \n", "\n", " POLYLINE \\\n", "0 [[-8.649891,41.154399],[-8.649981,41.154417],[... \n", "1 [[-8.610876,41.145759],[-8.610849,41.145759],[... \n", "2 [[-8.613243,41.166873],[-8.613252,41.166747],[... \n", "3 [[-8.606988,41.15025],[-8.607213,41.150007],[-... \n", "4 [[-8.628273,41.157405],[-8.628255,41.157423],[... \n", "5 [[-8.605818,41.153391],[-8.607339,41.153427],[... \n", "6 [[-8.611137,41.149332],[-8.611263,41.149161],[... \n", "7 [[-8.586396,41.149224],[-8.586378,41.149026],[... \n", "8 [[-8.625177,41.157333],[-8.625609,41.157405],[... \n", "9 [[-8.584335,41.163111],[-8.585127,41.162922],[... \n", "10 [[-8.609688,41.160348],[-8.609967,41.159277],[... \n", "11 [[-8.624502,41.179554],[-8.624511,41.179527],[... \n", "12 [[-8.638443,41.170797],[-8.6382,41.170716],[-8... \n", "13 [[-8.628147,41.157198],[-8.628156,41.157198],[... \n", "14 [[-8.627643,41.157765],[-8.627958,41.1579],[-8... \n", "15 [[-8.604045,41.182569],[-8.604135,41.182353],[... \n", "16 [[-8.613945,41.141277],[-8.613972,41.141286],[... \n", "17 [[-8.615556,41.14071],[-8.615565,41.140692],[-... \n", "18 [[-8.606979,41.150268],[-8.607285,41.150124],[... \n", "19 [[-8.610804,41.145741],[-8.610822,41.145768],[... \n", "20 [[-8.583165,41.164713],[-8.583012,41.164407],[... \n", "21 [[-8.628345,41.15763],[-8.628345,41.157576],[-... \n", "22 [[-8.600184,41.182686],[-8.600031,41.182758],[... \n", "23 [[-8.609706,41.151276],[-8.609679,41.151294],[... \n", "24 [[-8.598996,41.149026],[-8.598843,41.148873],[... \n", "25 [[-8.618022,41.151519],[-8.618337,41.151447],[... \n", "26 [[-8.63028,41.157432],[-8.630505,41.157153],[-... \n", "27 [[-8.609499,41.151294],[-8.609535,41.151312],[... \n", "28 [[-8.632323,41.164326],[-8.632917,41.164065],[... \n", "29 [[-8.619921,41.148018],[-8.620218,41.147712],[... \n", ".. ... \n", "274 [[-8.612145,41.172777],[-8.612568,41.172768],[... \n", "275 [[-8.648964,41.179752],[-8.648982,41.179752],[... \n", "276 [[-8.658126,41.154876],[-8.657829,41.154579],[... \n", "277 [[-8.630316,41.15754],[-8.629668,41.157],[-8.6... \n", "278 [[-8.6121,41.158674],[-8.6121,41.158674],[-8.6... \n", "279 [[-8.580204,41.15934],[-8.580627,41.159241],[-... \n", "280 [[-8.63991,41.15979],[-8.640693,41.159664],[-8... \n", "281 [[-8.610885,41.14566],[-8.610885,41.145669],[-... \n", "282 [[-8.584335,41.163156],[-8.584425,41.163102],[... \n", "283 [[-8.610768,41.145642],[-8.610759,41.145642],[... \n", "284 [[-8.613972,41.141349],[-8.613963,41.141349],[... \n", "285 [[-8.676234,41.15484],[-8.676198,41.154822],[-... \n", "286 [[-8.657946,41.148234],[-8.657937,41.148207],[... \n", "287 [[-8.569818,41.170158],[-8.569278,41.169996],[... \n", "288 [[-8.628867,41.160996],[-8.628849,41.160951],[... \n", "289 [[-8.610759,41.145651],[-8.610768,41.145678],[... \n", "290 [[-8.628786,41.161041],[-8.628579,41.160897],[... \n", "291 [[-8.649423,41.154345],[-8.6499,41.154273],[-8... \n", "292 [[-8.608779,41.147793],[-8.608734,41.147802],[... \n", "293 [[-8.597673,41.142681],[-8.597682,41.142681]] \n", "294 [[-8.594352,41.169375],[-8.594352,41.169375],[... \n", "295 [[-8.630766,41.154948],[-8.631414,41.15439],[-... \n", "296 [[-8.59698,41.171328],[-8.595054,41.172327],[-... \n", "297 [[-8.591688,41.159556],[-8.591625,41.159421],[... \n", "298 [[-8.61327,41.154453],[-8.613297,41.154147],[-... \n", "299 [[-8.654796,41.173551],[-8.654526,41.173668],[... \n", "300 [[-8.639487,41.167422],[-8.639424,41.16753],[-... \n", "301 [[-8.679753,41.156559],[-8.679717,41.156568],[... \n", "302 [[-8.608707,41.147811],[-8.608689,41.147829],[... \n", "303 [[-8.609247,41.155182],[-8.60922,41.155254],[-... \n", "\n", " LATITUDE \\\n", "0 [-0.0392686, -0.0390627, -0.0440035, -0.049458... \n", "1 [-0.155839, -0.155839, -0.151619, -0.14673, -0... \n", "2 [0.129025, 0.127327, 0.125474, 0.118835, 0.104... \n", "3 [-0.0952637, -0.0985575, -0.112865, -0.113843,... \n", "4 [0.00128665, 0.00149252, 0.00236744, 0.0135356... \n", "5 [-0.0528556, -0.0523924, -0.0513116, -0.050694... \n", "6 [-0.107667, -0.109931, -0.110086, -0.110086, -... \n", "7 [-0.109108, -0.111784, -0.11199, -0.107873, -0... \n", "8 [0.000308796, 0.00128665, 0.00494074, 0.006021... \n", "9 [0.0782799, 0.0757066, 0.0835809, 0.0913522, 0... \n", "10 [0.040967, 0.0265565, 0.00370556, 0.000669059,... \n", "11 [0.300099, 0.299738, 0.299738, 0.299841, 0.299... \n", "12 [0.181932, 0.180852, 0.184866, 0.192174, 0.200... \n", "13 [-0.00149252, -0.00149252, -0.00128665, -0.001... \n", "14 [0.00612446, 0.00797724, 0.0135356, 0.0206894,... \n", "15 [0.340757, 0.337875, 0.316876, 0.295724, 0.278... \n", "16 [-0.216312, -0.216209, -0.221047, -0.222642, -... \n", "17 [-0.22398, -0.224186, -0.22434, -0.22362, -0.2... \n", "18 [-0.0950063, -0.096962, -0.0962415, -0.0962415... \n", "19 [-0.156097, -0.155736, -0.155839, -0.151722, -... \n", "20 [0.0998956, 0.0957268, 0.0964474, 0.105557, 0.... \n", "21 [0.00432315, 0.00360262, 0.00504367, 0.0026247... \n", "22 [0.342352, 0.34333, 0.33736, 0.335559, 0.33314... \n", "23 [-0.0814193, -0.081162, -0.0792063, -0.0644355... \n", "24 [-0.111784, -0.113843, -0.115284, -0.119195, -... \n", "25 [-0.0781255, -0.0791033, -0.0844558, -0.087132... \n", "26 [0.00164691, -0.00211011, -0.00452901, 0.01085... \n", "27 [-0.081162, -0.0809046, -0.0778681, -0.0758095... \n", "28 [0.0946461, 0.0911464, 0.0867718, 0.093205, 0.... \n", "29 [-0.125371, -0.129489, -0.1176, -0.104013, -0.... \n", ".. ... \n", "274 [0.208643, 0.20854, 0.216569, 0.226142, 0.2362... \n", "275 [0.302775, 0.302775, 0.314663, 0.316104, 0.316... \n", "276 [-0.0328353, -0.0368497, -0.0219245, -0.001286... \n", "277 [0.00308796, -0.00416875, -0.00844043, -0.0272... \n", "278 [0.0183734, 0.0183734, 0.00452901, -0.0170353,... \n", "279 [0.0273799, 0.0260418, 0.015131, -0.005301, -0... \n", "280 [0.0334529, 0.0317546, 0.00586713, -0.0161089,... \n", "281 [-0.157177, -0.157074, -0.157435, -0.156714, -... \n", "282 [0.078846, 0.0781255, 0.0776623, 0.0841985, 0.... \n", "283 [-0.157435, -0.157435, -0.157074, -0.156354, -... \n", "284 [-0.215334, -0.215334, -0.216929, -0.205607, -... \n", "285 [-0.03335, -0.0335559, -0.0335559, -0.0334529,... \n", "286 [-0.122438, -0.122798, -0.122798, -0.122695, -... \n", "287 [0.173338, 0.171125, 0.158052, 0.160934, 0.166... \n", "288 [0.0497162, 0.0490986, 0.0543481, 0.0718466, 0... \n", "289 [-0.15728, -0.15692, -0.155839, -0.154244, -0.... \n", "290 [0.0503338, 0.0483781, 0.0476576, 0.0474002, 0... \n", "291 [-0.0399891, -0.040967, -0.0452387, -0.0437976... \n", "292 [-0.128408, -0.128305, -0.128305, -0.128408, -... \n", "293 [-0.197372, -0.197372] \n", "294 [0.162787, 0.162787, 0.16289, 0.162993, 0.1631... \n", "295 [-0.0318575, -0.039423, -0.054554, -0.0752434,... \n", "296 [0.189138, 0.20257, 0.253367, 0.308848, 0.3575... \n", "297 [0.0303135, 0.0284607, 0.0216672, 0.0165721, 0... \n", "298 [-0.0385481, -0.0426654, -0.0465768, -0.047657... \n", "299 [0.219091, 0.220686, 0.236486, 0.239369, 0.233... \n", "300 [0.136436, 0.137878, 0.135819, 0.12393, 0.1178... \n", "301 [-0.0101388, -0.0100359, -0.00308796, -0.00710... \n", "302 [-0.12815, -0.127945, -0.128665, -0.13304, -0.... \n", "303 [-0.0287181, -0.0277402, -0.0210496, -0.021409... \n", "\n", " LONGITUDE TARGET \\\n", "0 [-0.590024, -0.591592, -0.596627, -0.596793, -... [-8.61043, 41.1411] \n", "1 [0.0920491, 0.0925159, 0.0985014, 0.105587, 0.... [-8.63072, 41.1547] \n", "2 [0.0506678, 0.0505178, 0.0497175, 0.0700247, 0... [-8.61534, 41.1407] \n", "3 [0.160023, 0.156088, 0.148386, 0.145868, 0.144... [-8.55426, 41.1628] \n", "4 [-0.212091, -0.211775, -0.209724, -0.20894, -0... [-8.61928, 41.1786] \n", "5 [0.18048, 0.153888, 0.112506, 0.0797781, 0.071... [-8.64643, 41.1616] \n", "6 [0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0... [-8.61446, 41.1422] \n", "7 [0.520016, 0.520333, 0.513247, 0.49249, 0.4643... [-8.58591, 41.1486] \n", "8 [-0.157972, -0.165525, -0.194935, -0.202171, -... [-8.64726, 41.1732] \n", "9 [0.556046, 0.542208, 0.51058, 0.479736, 0.4769... [-8.58525, 41.1689] \n", "10 [0.112823, 0.107938, 0.107938, 0.107471, 0.106... [-8.61071, 41.1456] \n", "11 [-0.146168, -0.146318, -0.146485, -0.146318, -... [-8.62455, 41.1796] \n", "12 [-0.389887, -0.385636, -0.36046, -0.330883, -0... [-8.6206, 41.1739] \n", "13 [-0.209891, -0.210041, -0.20879, -0.208473, -0... [-8.61782, 41.1525] \n", "14 [-0.201071, -0.206589, -0.20879, -0.228147, -0... [-8.61148, 41.1461] \n", "15 [0.211474, 0.209907, 0.197003, 0.183148, 0.161... [-8.62064, 41.1643] \n", "16 [0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0... [-8.63607, 41.1592] \n", "17 [0.0102369, 0.0100702, 0.0100702, 0.010387, 0.... [-8.64072, 41.1612] \n", "18 [0.160173, 0.154838, 0.148852, 0.148536, 0.128... [-8.61805, 41.1525] \n", "19 [0.0933162, 0.0929994, 0.0917323, 0.0961339, 0... [-8.60417, 41.1489] \n", "20 [0.576503, 0.579187, 0.580438, 0.580121, 0.597... [-8.6118, 41.1429] \n", "21 [-0.213342, -0.213342, -0.206906, -0.178896, -... [-8.61802, 41.1501] \n", "22 [0.278965, 0.281649, 0.310276, 0.319096, 0.341... [-8.56627, 41.1814] \n", "23 [0.112506, 0.112973, 0.115491, 0.107788, 0.108... [-8.61818, 41.1696] \n", "24 [0.299739, 0.302423, 0.303357, 0.305408, 0.304... [-8.60023, 41.1493] \n", "25 [-0.0328782, -0.0383802, -0.0624553, -0.079444... [-8.59822, 41.1484] \n", "26 [-0.24717, -0.251105, -0.271246, -0.289819, -0... [-8.65056, 41.1615] \n", "27 [0.116124, 0.115491, 0.117375, 0.111556, 0.100... [-8.61674, 41.137] \n", "28 [-0.2829, -0.293287, -0.317345, -0.346305, -0.... [-8.65428, 41.181] \n", "29 [-0.0660733, -0.0712751, -0.0792946, -0.084179... [-8.61061, 41.1515] \n", ".. ... ... \n", "274 [0.0698579, 0.062472, 0.0542858, 0.0513014, 0.... [-8.58568, 41.1489] \n", "275 [-0.573819, -0.574136, -0.57807, -0.576336, -0... [-8.63323, 41.1756] \n", "276 [-0.733992, -0.728807, -0.743912, -0.769238, -... [-8.65425, 41.1809] \n", "277 [-0.247804, -0.236483, -0.233182, -0.24497, -0... [-8.6304, 41.1554] \n", "278 [0.0706582, 0.0706582, 0.064356, 0.0511347, 0.... [-8.62106, 41.151] \n", "279 [0.628271, 0.620869, 0.633156, 0.637241, 0.637... [-8.58601, 41.1486] \n", "280 [-0.41553, -0.429218, -0.438821, -0.447324, -0... [-8.6178, 41.1471] \n", "281 [0.091899, 0.091899, 0.0917323, 0.0923659, 0.0... [-8.66138, 41.1481] \n", "282 [0.556046, 0.554479, 0.533088, 0.506012, 0.478... [-8.6117, 41.16] \n", "283 [0.0939331, 0.0940998, 0.0942499, 0.0939331, 0... [-8.63085, 41.1466] \n", "284 [0.03793, 0.0380801, 0.029277, 0.0308442, 0.03... [-8.61403, 41.1499] \n", "285 [-1.05057, -1.04994, -1.04994, -1.04962, -1.04... [-8.6488, 41.1486] \n", "286 [-0.730841, -0.730691, -0.730691, -0.730541, -... [-8.65648, 41.1532] \n", "287 [0.809852, 0.819288, 0.845881, 0.854534, 0.858... [-8.572, 41.1629] \n", "288 [-0.222478, -0.222162, -0.209724, -0.202021, -... [-8.71435, 41.2082] \n", "289 [0.0940998, 0.0939331, 0.0936163, 0.0936163, 0... [-8.63835, 41.1592] \n", "290 [-0.221061, -0.217443, -0.21776, -0.21791, -0.... [-8.596, 41.1696] \n", "291 [-0.581838, -0.590191, -0.59616, -0.579487, -0... [-8.57125, 41.1646] \n", "292 [0.128712, 0.129496, 0.129812, 0.133114, 0.133... [-8.62051, 41.1651] \n", "293 [0.322864, 0.322714] [-8.59768, 41.1427] \n", "294 [0.380934, 0.380934, 0.381084, 0.381084, 0.381... [-8.58298, 41.1704] \n", "295 [-0.255673, -0.267011, -0.283683, -0.29422, -0... [-8.63564, 41.1406] \n", "296 [0.334985, 0.368663, 0.395873, 0.406426, 0.397... [-8.33168, 41.2035] \n", "297 [0.427501, 0.428601, 0.428134, 0.413496, 0.402... [-8.60578, 41.1498] \n", "298 [0.050201, 0.0497175, 0.0495675, 0.0564866, 0.... [-8.58762, 41.1885] \n", "299 [-0.675771, -0.671053, -0.652646, -0.632039, -... [-8.63023, 41.1584] \n", "300 [-0.408144, -0.407043, -0.402008, -0.397757, -... [-8.66577, 41.2102] \n", "301 [-1.11209, -1.11146, -1.0954, -1.07763, -1.058... [-8.61165, 41.1461] \n", "302 [0.129962, 0.130279, 0.13328, 0.129812, 0.1073... [-8.62782, 41.1698] \n", "303 [0.120526, 0.121009, 0.117541, 0.108105, 0.106... [-8.61635, 41.163] \n", "\n", " COORD_FEATURES DAY_OF_WEEK \\\n", "0 [-0.590024, -0.591592, -0.596627, -0.596793, -... 2 \n", "1 [0.0920491, 0.0925159, 0.0985014, 0.105587, 0.... 2 \n", "2 [0.0506678, 0.0505178, 0.0497175, 0.0700247, 0... 2 \n", "3 [0.160023, 0.156088, 0.148386, 0.145868, 0.144... 2 \n", "4 [-0.212091, -0.211775, -0.209724, -0.20894, -0... 2 \n", "5 [0.18048, 0.153888, 0.112506, 0.0797781, 0.071... 2 \n", "6 [0.0874808, 0.08528, 0.0849633, 0.0848132, 0.0... 2 \n", "7 [0.520016, 0.520333, 0.513247, 0.49249, 0.4643... 2 \n", "8 [-0.157972, -0.165525, -0.194935, -0.202171, -... 2 \n", "9 [0.556046, 0.542208, 0.51058, 0.479736, 0.4769... 2 \n", "10 [0.112823, 0.107938, 0.107938, 0.107471, 0.106... 2 \n", "11 [-0.146168, -0.146318, -0.146485, -0.146318, -... 2 \n", "12 [-0.389887, -0.385636, -0.36046, -0.330883, -0... 2 \n", "13 [-0.209891, -0.210041, -0.20879, -0.208473, -0... 2 \n", "14 [-0.201071, -0.206589, -0.20879, -0.228147, -0... 2 \n", "15 [0.211474, 0.209907, 0.197003, 0.183148, 0.161... 2 \n", "16 [0.0383969, 0.03793, 0.0220411, 0.0168393, 0.0... 2 \n", "17 [0.0102369, 0.0100702, 0.0100702, 0.010387, 0.... 2 \n", "18 [0.160173, 0.154838, 0.148852, 0.148536, 0.128... 2 \n", "19 [0.0933162, 0.0929994, 0.0917323, 0.0961339, 0... 2 \n", "20 [0.576503, 0.579187, 0.580438, 0.580121, 0.597... 2 \n", "21 [-0.213342, -0.213342, -0.206906, -0.178896, -... 2 \n", "22 [0.278965, 0.281649, 0.310276, 0.319096, 0.341... 2 \n", "23 [0.112506, 0.112973, 0.115491, 0.107788, 0.108... 2 \n", "24 [0.299739, 0.302423, 0.303357, 0.305408, 0.304... 2 \n", "25 [-0.0328782, -0.0383802, -0.0624553, -0.079444... 2 \n", "26 [-0.24717, -0.251105, -0.271246, -0.289819, -0... 2 \n", "27 [0.116124, 0.115491, 0.117375, 0.111556, 0.100... 2 \n", "28 [-0.2829, -0.293287, -0.317345, -0.346305, -0.... 2 \n", "29 [-0.0660733, -0.0712751, -0.0792946, -0.084179... 2 \n", ".. ... ... \n", "274 [0.0698579, 0.062472, 0.0542858, 0.0513014, 0.... 6 \n", "275 [-0.573819, -0.574136, -0.57807, -0.576336, -0... 6 \n", "276 [-0.733992, -0.728807, -0.743912, -0.769238, -... 6 \n", "277 [-0.247804, -0.236483, -0.233182, -0.24497, -0... 6 \n", "278 [0.0706582, 0.0706582, 0.064356, 0.0511347, 0.... 6 \n", "279 [0.628271, 0.620869, 0.633156, 0.637241, 0.637... 6 \n", "280 [-0.41553, -0.429218, -0.438821, -0.447324, -0... 6 \n", "281 [0.091899, 0.091899, 0.0917323, 0.0923659, 0.0... 6 \n", "282 [0.556046, 0.554479, 0.533088, 0.506012, 0.478... 6 \n", "283 [0.0939331, 0.0940998, 0.0942499, 0.0939331, 0... 5 \n", "284 [0.03793, 0.0380801, 0.029277, 0.0308442, 0.03... 6 \n", "285 [-1.05057, -1.04994, -1.04994, -1.04962, -1.04... 6 \n", "286 [-0.730841, -0.730691, -0.730691, -0.730541, -... 6 \n", "287 [0.809852, 0.819288, 0.845881, 0.854534, 0.858... 6 \n", "288 [-0.222478, -0.222162, -0.209724, -0.202021, -... 6 \n", "289 [0.0940998, 0.0939331, 0.0936163, 0.0936163, 0... 6 \n", "290 [-0.221061, -0.217443, -0.21776, -0.21791, -0.... 6 \n", "291 [-0.581838, -0.590191, -0.59616, -0.579487, -0... 6 \n", "292 [0.128712, 0.129496, 0.129812, 0.133114, 0.133... 6 \n", "293 [0.322864, 0.322864, 0.322864, 0.322864, 0.322... 6 \n", "294 [0.380934, 0.380934, 0.381084, 0.381084, 0.381... 6 \n", "295 [-0.255673, -0.267011, -0.283683, -0.29422, -0... 6 \n", "296 [0.334985, 0.368663, 0.395873, 0.406426, 0.397... 6 \n", "297 [0.427501, 0.428601, 0.428134, 0.413496, 0.402... 6 \n", "298 [0.050201, 0.0497175, 0.0495675, 0.0564866, 0.... 6 \n", "299 [-0.675771, -0.671053, -0.652646, -0.632039, -... 6 \n", "300 [-0.408144, -0.407043, -0.402008, -0.397757, -... 6 \n", "301 [-1.11209, -1.11146, -1.0954, -1.07763, -1.058... 6 \n", "302 [0.129962, 0.130279, 0.13328, 0.129812, 0.1073... 6 \n", "303 [0.120526, 0.121009, 0.117541, 0.108105, 0.106... 0 \n", "\n", " QUARTER_HOUR WEEK_OF_YEAR \n", "0 40 33 \n", "1 40 33 \n", "2 41 33 \n", "3 42 33 \n", "4 41 33 \n", "5 42 33 \n", "6 40 33 \n", "7 40 33 \n", "8 40 33 \n", "9 44 33 \n", "10 44 33 \n", "11 45 33 \n", "12 43 33 \n", "13 41 33 \n", "14 42 33 \n", "15 37 33 \n", "16 42 33 \n", "17 44 33 \n", "18 45 33 \n", "19 47 33 \n", "20 46 33 \n", "21 46 33 \n", "22 48 33 \n", "23 44 33 \n", "24 41 33 \n", "25 45 33 \n", "26 48 33 \n", "27 41 33 \n", "28 47 33 \n", "29 45 33 \n", ".. ... ... \n", "274 29 51 \n", "275 30 51 \n", "276 32 51 \n", "277 32 51 \n", "278 33 51 \n", "279 32 51 \n", "280 31 51 \n", "281 32 51 \n", "282 34 51 \n", "283 52 51 \n", "284 31 51 \n", "285 28 51 \n", "286 35 51 \n", "287 35 51 \n", "288 27 51 \n", "289 36 51 \n", "290 38 51 \n", "291 36 51 \n", "292 33 51 \n", "293 40 51 \n", "294 16 51 \n", "295 40 51 \n", "296 35 51 \n", "297 42 51 \n", "298 42 51 \n", "299 45 51 \n", "300 47 51 \n", "301 62 51 \n", "302 40 51 \n", "303 3 52 \n", "\n", "[304 rows x 16 columns]" ] }, "execution_count": 64, "metadata": {}, "output_type": "execute_result" } ], "source": [ "X_val" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.6.0" } }, "nbformat": 4, "nbformat_minor": 2 }