{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import statistics\n", "import polars as pl\n", "import pandas as pd\n", "import seaborn as sns\n", "import polars.selectors as cs\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime, timedelta\n", "\n", "import warnings\n", "\n", "warnings.filterwarnings(\"ignore\")" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# KPI\n", "\n", "Vamos a utilizar el **MAE**. Para ello necesitamos obtener las variables:\n", "- `y_pred`: la predicción de la hora que llega el autobús que ofrece la API cuando se realiza la llamada\n", "- `y_true`: para cada autobús,linea, parada y destino concreto, la predicción que ofrece la API más fiable (entendiendose por fiable aquella que es inferior a 60 segundos).\n", "\n", "Para obtener `y_true`, cogemos el valor de `y_pred` para los valores en los que `estimateArrive` sea inferior a 60 segundos de cada autobús, linea, parada y destino concretos\n" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "sample_data_02 = pl.scan_csv('/home/mlia/proyectos/data-generation/data/train/emt/2024/03/03/emt_20240303.csv')\n", "sample_data_aux_02 = pl.scan_csv('/home/mlia/proyectos/data-generation/data/train/emt/2024/03/03/emt_20240303_aux.csv')" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 15)
PKdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrive
i64strstrstri64stri64f64f64i64strf64i64stri64
0"2024-03-03 22:…"2024-03-03""2024-03-03 22:…2331"127"1617-3.7189840.4650983645"CUATRO CAMINOS…null0"FE"818
1"2024-03-03 10:…"2024-03-03""2024-03-03 10:…2133"107"497-3.68869340.4673282778"HORTALEZA"15.00"FE"555
2"2024-03-03 20:…"2024-03-03""2024-03-03 20:…541"70"232-3.67548540.4674726703"ALSACIA"12.00"FE"1369
3"2024-03-03 13:…"2024-03-03""2024-03-03 13:…2268"175"4508-3.69662740.48698713206"PLAZA CASTILLA…20.01"FE"2318
4"2024-03-03 15:…"2024-03-03""2024-03-03 15:…3273"11"220-3.66939840.4588098153"BARRIO BLANCO"null0"FE"2399
" ], "text/plain": [ "shape: (5, 15)\n", "┌─────┬──────────────┬────────────┬─────────────┬───┬─────────────┬────────┬─────────┬─────────────┐\n", "│ ┆ PK ┆ date ┆ datetime ┆ … ┆ MinimunFreq ┆ isHead ┆ dayType ┆ estimateArr │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ uency ┆ --- ┆ --- ┆ ive │\n", "│ i64 ┆ str ┆ str ┆ str ┆ ┆ --- ┆ i64 ┆ str ┆ --- │\n", "│ ┆ ┆ ┆ ┆ ┆ f64 ┆ ┆ ┆ i64 │\n", "╞═════╪══════════════╪════════════╪═════════════╪═══╪═════════════╪════════╪═════════╪═════════════╡\n", "│ 0 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 818 │\n", "│ ┆ 22:40:05.502 ┆ ┆ 22:40:05.50 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 889_B2331… ┆ ┆ 2889 ┆ ┆ ┆ ┆ ┆ │\n", "│ 1 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ 15.0 ┆ 0 ┆ FE ┆ 555 │\n", "│ ┆ 10:56:04.279 ┆ ┆ 10:56:04.27 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 680_B2133… ┆ ┆ 9680 ┆ ┆ ┆ ┆ ┆ │\n", "│ 2 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ 12.0 ┆ 0 ┆ FE ┆ 1369 │\n", "│ ┆ 20:48:03.810 ┆ ┆ 20:48:03.81 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 984_B541_… ┆ ┆ 0984 ┆ ┆ ┆ ┆ ┆ │\n", "│ 3 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ 20.0 ┆ 1 ┆ FE ┆ 2318 │\n", "│ ┆ 13:49:21.341 ┆ ┆ 13:49:21.34 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 701_B2268… ┆ ┆ 1701 ┆ ┆ ┆ ┆ ┆ │\n", "│ 4 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 2399 │\n", "│ ┆ 15:32:19.394 ┆ ┆ 15:32:19.39 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 222_B3273… ┆ ┆ 4222 ┆ ┆ ┆ ┆ ┆ │\n", "└─────┴──────────────┴────────────┴─────────────┴───┴─────────────┴────────┴─────────┴─────────────┘" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data_02.head().collect()" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (5, 6)
PKreliable_arrival_datepredict_arrival_dateinterval_timeestimateArrive
i64strstrstrstri64
0"2024-03-03 13:…"2024-03-03 13:…"2024-03-03 14:…"[12 14]"2318
1"2024-03-03 15:…"2024-03-03 15:…"2024-03-03 16:…"[14 16]"2399
2"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 13:…"[11 13]"2053
3"2024-03-03 13:…"2024-03-03 13:…"2024-03-03 13:…"[12 14]"56
4"2024-03-03 19:…"2024-03-03 19:…"2024-03-03 19:…"[18 20]"201
" ], "text/plain": [ "shape: (5, 6)\n", "┌─────┬───────────────────┬───────────────────┬───────────────────┬───────────────┬────────────────┐\n", "│ ┆ PK ┆ reliable_arrival_ ┆ predict_arrival_d ┆ interval_time ┆ estimateArrive │\n", "│ --- ┆ --- ┆ date ┆ ate ┆ --- ┆ --- │\n", "│ i64 ┆ str ┆ --- ┆ --- ┆ str ┆ i64 │\n", "│ ┆ ┆ str ┆ str ┆ ┆ │\n", "╞═════╪═══════════════════╪═══════════════════╪═══════════════════╪═══════════════╪════════════════╡\n", "│ 0 ┆ 2024-03-03 13:49: ┆ 2024-03-03 ┆ 2024-03-03 ┆ [12 14] ┆ 2318 │\n", "│ ┆ 21.341701_B2268… ┆ 13:34:04.533532 ┆ 14:27:59.341701 ┆ ┆ │\n", "│ 1 ┆ 2024-03-03 15:32: ┆ 2024-03-03 ┆ 2024-03-03 ┆ [14 16] ┆ 2399 │\n", "│ ┆ 19.394222_B3273… ┆ 15:02:52.864953 ┆ 16:12:18.394222 ┆ ┆ │\n", "│ 2 ┆ 2024-03-03 12:48: ┆ 2024-03-03 ┆ 2024-03-03 ┆ [11 13] ┆ 2053 │\n", "│ ┆ 05.546055_B8830… ┆ 12:34:04.041122 ┆ 13:22:18.546055 ┆ ┆ │\n", "│ 3 ┆ 2024-03-03 13:08: ┆ 2024-03-03 ┆ 2024-03-03 ┆ [12 14] ┆ 56 │\n", "│ ┆ 03.566043_B2134… ┆ 13:08:45.337469 ┆ 13:08:59.566043 ┆ ┆ │\n", "│ 4 ┆ 2024-03-03 19:07: ┆ 2024-03-03 ┆ 2024-03-03 ┆ [18 20] ┆ 201 │\n", "│ ┆ 19.641947_B8804… ┆ 19:10:04.418367 ┆ 19:10:40.641947 ┆ ┆ │\n", "└─────┴───────────────────┴───────────────────┴───────────────────┴───────────────┴────────────────┘" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data_aux_02.head().collect()" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [], "source": [ "def KPI_fun(date_true,date_pred):\n", " date_true = datetime.strptime(date_true, '%Y-%m-%d %H:%M:%S.%f')\n", " date_pred = datetime.strptime(date_pred, '%Y-%m-%d %H:%M:%S.%f')\n", " \n", " dif = max(date_true,date_pred) - min(date_true,date_pred)\n", " return dif.total_seconds()" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "KPI_data = sample_data_aux_02.with_columns(pl.struct(pl.col('reliable_arrival_date'),pl.col('predict_arrival_date')).alias('struct').map_elements(lambda x: KPI_fun(x['reliable_arrival_date'], x['predict_arrival_date'])).alias('KPI_value')).collect()" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [], "source": [ "dict_data = KPI_data.sort('estimateArrive').select('estimateArrive','KPI_value').to_dict()" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "dict_data2 = KPI_data.filter(pl.col('KPI_value')<500).sort('estimateArrive').select('estimateArrive','KPI_value').to_dict()" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [], "source": [ "def plot_KPI(dict):\n", " new_dict = {}\n", " MAE = []\n", " for eta, kpi in zip(dict['estimateArrive'], dict['KPI_value']):\n", " # Verificar si el entero ya está en el diccionario\n", " if eta in new_dict:\n", " # Si el entero ya está en el diccionario, agregar el valor numérico a la lista existente\n", " new_dict[eta].append(kpi)\n", " else:\n", " # Si el entero no está en el diccionario, crear una nueva lista con el valor numérico\n", " new_dict[eta] = [kpi]\n", " \n", " for key,val in zip(new_dict.keys(),new_dict.values()):\n", " MAE.append((key/60,statistics.mean(val)/60))\n", " \n", "\n", " plt.plot([data[0] for data in MAE], [data[1] for data in MAE])\n", " plt.gca().invert_xaxis()\n", " plt.grid(True, linestyle='--', linewidth=0.5)\n", " plt.title('CRTM API estimation error')\n", " plt.xlabel('Remaining time (minutes)')\n", " plt.ylabel('MAE (minutes)')\n", " plt.show()" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_KPI(dict_data)" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "#TODO: una posibilidad es filtrar por los que el KPI sea <500 segundos (aprox 8 minutos) para asi quitar los valores erroneos que hayan salido con la creacion del intervalo" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_KPI(dict_data2)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Filtramos\n", "* Bus 51\n", "* Line BR1\n", "* Stop 4366\n", "* Destination VALDEBEBAS\n" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "sample_data_02 = sample_data_02.filter(pl.col('bus')==51, pl.col('line')==\"BR1\",pl.col('stop')==4366, pl.col('destination')==\"VALDEBEBAS\")" ] }, { "cell_type": "code", "execution_count": 17, "metadata": {}, "outputs": [], "source": [ "data = sample_data_aux_02.join(sample_data_02,on='PK',how='inner')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (116, 20)
PKreliable_arrival_datepredict_arrival_dateinterval_timeestimateArrive_rightdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrive_right
i64strstrstrstri64i64strstri64stri64f64f64i64strf64i64stri64
13333"2024-03-03 11:…"2024-03-03 11:…"2024-03-03 11:…"[10 12]"86417814"2024-03-03""2024-03-03 11:…51"BR1"4366-3.69841340.4905198045"VALDEBEBAS"null0"FE"864
20709"2024-03-03 11:…"2024-03-03 11:…"2024-03-03 11:…"[10 12]"54027536"2024-03-03""2024-03-03 11:…51"BR1"4366-3.66199840.4989733287"VALDEBEBAS"null0"FE"540
32386"2024-03-03 11:…"2024-03-03 11:…"2024-03-03 11:…"[10 12]"125043012"2024-03-03""2024-03-03 11:…51"BR1"4366-3.69243240.48662910340"VALDEBEBAS"null0"FE"1250
35941"2024-03-03 08:…"2024-03-03 08:…"2024-03-03 08:…"[7 9]"204447768"2024-03-03""2024-03-03 08:…51"BR1"4366-3.64167640.48464118705"VALDEBEBAS"null0"FE"2044
40658"2024-03-03 10:…"2024-03-03 10:…"2024-03-03 11:…"[ 9 11]"141054070"2024-03-03""2024-03-03 10:…51"BR1"4366-3.6756140.48407912528"VALDEBEBAS"null0"FE"1410
780200"2024-03-03 08:…"2024-03-03 08:…"2024-03-03 08:…"[7 9]"6281037349"2024-03-03""2024-03-03 08:…51"BR1"4366-3.67643240.4838766105"VALDEBEBAS"null0"FE"628
786139"2024-03-03 10:…"2024-03-03 10:…"2024-03-03 10:…"[ 9 11]"1681045276"2024-03-03""2024-03-03 10:…51"BR1"4366-3.65350840.487681868"VALDEBEBAS"null0"FE"168
797676"2024-03-03 08:…"2024-03-03 08:…"2024-03-03 08:…"[7 9]"01060661"2024-03-03""2024-03-03 08:…51"BR1"4366-3.64233340.4842410"VALDEBEBAS"null0"FE"0
807589"2024-03-03 10:…"2024-03-03 10:…"2024-03-03 11:…"[ 9 11]"12391073832"2024-03-03""2024-03-03 10:…51"BR1"4366-3.69633540.48738910513"VALDEBEBAS"null0"FE"1239
831602"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…"[11 13]"3431106062"2024-03-03""2024-03-03 12:…51"BR1"4366-3.65823840.4935022131"VALDEBEBAS"null0"FE"343
" ], "text/plain": [ "shape: (116, 20)\n", "┌────────┬─────────────┬─────────────┬────────────┬───┬────────────┬────────┬─────────┬────────────┐\n", "│ ┆ PK ┆ reliable_ar ┆ predict_ar ┆ … ┆ MinimunFre ┆ isHead ┆ dayType ┆ estimateAr │\n", "│ --- ┆ --- ┆ rival_date ┆ rival_date ┆ ┆ quency ┆ --- ┆ --- ┆ rive_right │\n", "│ i64 ┆ str ┆ --- ┆ --- ┆ ┆ --- ┆ i64 ┆ str ┆ --- │\n", "│ ┆ ┆ str ┆ str ┆ ┆ f64 ┆ ┆ ┆ i64 │\n", "╞════════╪═════════════╪═════════════╪════════════╪═══╪════════════╪════════╪═════════╪════════════╡\n", "│ 13333 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 864 │\n", "│ ┆ 11:02:06.65 ┆ 11:17:08.10 ┆ 11:16:30.6 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 1109_B51_L… ┆ 9869 ┆ 51109 ┆ ┆ ┆ ┆ ┆ │\n", "│ 20709 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 540 │\n", "│ ┆ 11:08:05.04 ┆ 11:17:08.10 ┆ 11:17:05.0 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 0397_B51_L… ┆ 9869 ┆ 40397 ┆ ┆ ┆ ┆ ┆ │\n", "│ 32386 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 1250 │\n", "│ ┆ 11:00:06.06 ┆ 11:17:08.10 ┆ 11:20:56.0 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 6596_B51_L… ┆ 9869 ┆ 66596 ┆ ┆ ┆ ┆ ┆ │\n", "│ 35941 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 2044 │\n", "│ ┆ 08:11:05.46 ┆ 08:45:38.99 ┆ 08:45:09.4 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 0142_B51_L… ┆ 5529 ┆ 60142 ┆ ┆ ┆ ┆ ┆ │\n", "│ 40658 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 1410 │\n", "│ ┆ 10:54:04.24 ┆ 10:04:27.20 ┆ 11:17:34.2 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 3333_B51_L… ┆ 4599 ┆ 43333 ┆ ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 780200 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 628 │\n", "│ ┆ 08:31:08.15 ┆ 08:45:38.99 ┆ 08:41:36.1 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 0538_B51_L… ┆ 5529 ┆ 50538 ┆ ┆ ┆ ┆ ┆ │\n", "│ 786139 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 168 │\n", "│ ┆ 10:02:04.50 ┆ 10:04:27.20 ┆ 10:04:52.5 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 0658_B51_L… ┆ 4599 ┆ 00658 ┆ ┆ ┆ ┆ ┆ │\n", "│ 797676 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 0 │\n", "│ ┆ 08:46:05.34 ┆ 08:45:38.99 ┆ 08:46:05.3 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 6794_B51_L… ┆ 5529 ┆ 46794 ┆ ┆ ┆ ┆ ┆ │\n", "│ 807589 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 1239 │\n", "│ ┆ 10:56:05.03 ┆ 10:04:27.20 ┆ 11:16:44.0 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 9063_B51_L… ┆ 4599 ┆ 39063 ┆ ┆ ┆ ┆ ┆ │\n", "│ 831602 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ … ┆ null ┆ 0 ┆ FE ┆ 343 │\n", "│ ┆ 12:28:06.54 ┆ 12:34:01.47 ┆ 12:33:49.5 ┆ ┆ ┆ ┆ ┆ │\n", "│ ┆ 9569_B51_L… ┆ 1140 ┆ 49569 ┆ ┆ ┆ ┆ ┆ │\n", "└────────┴─────────────┴─────────────┴────────────┴───┴────────────┴────────┴─────────┴────────────┘" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.collect()" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "sample_data_aux_02_filter = data.filter(pl.col('interval_time')==\"[11 13]\").select(pl.col('datetime'),pl.col('reliable_arrival_date'),pl.col('predict_arrival_date'),pl.col('estimateArrive')).sort('datetime','estimateArrive',descending=True).collect()" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "KPI_data = sample_data_aux_02_filter.with_columns(pl.struct(reliable_arrival_date = pl.col('reliable_arrival_date'), predict_arrival_date = pl.col('predict_arrival_date').alias('struct')).map_elements(lambda x: KPI_fun(x['reliable_arrival_date'], x['predict_arrival_date'])).alias('KPI_value'))" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (30, 5)
datetimereliable_arrival_datepredict_arrival_dateestimateArriveKPI_value
strstrstri64f64
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…1958341.567779
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…1716341.148263
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…1837341.135457
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…1898340.938425
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…2061340.932139
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…1750.396977
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…11800.389483
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…11960.331104
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…10750.265574
"2024-03-03 12:…"2024-03-03 12:…"2024-03-03 12:…560.0
" ], "text/plain": [ "shape: (30, 5)\n", "┌─────────────────┬─────────────────────────┬────────────────────────┬────────────────┬────────────┐\n", "│ datetime ┆ reliable_arrival_date ┆ predict_arrival_date ┆ estimateArrive ┆ KPI_value │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ str ┆ i64 ┆ f64 │\n", "╞═════════════════╪═════════════════════════╪════════════════════════╪════════════════╪════════════╡\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1958 ┆ 341.567779 │\n", "│ 12:07:05.038919 ┆ 12:34:01.471140 ┆ 12:39:43.038919 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1716 ┆ 341.148263 │\n", "│ 12:11:06.619403 ┆ 12:34:01.471140 ┆ 12:39:42.619403 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1837 ┆ 341.135457 │\n", "│ 12:09:05.606597 ┆ 12:34:01.471140 ┆ 12:39:42.606597 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1898 ┆ 340.938425 │\n", "│ 12:08:04.409565 ┆ 12:34:01.471140 ┆ 12:39:42.409565 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 2061 ┆ 340.932139 │\n", "│ 12:05:21.403279 ┆ 12:34:01.471140 ┆ 12:39:42.403279 ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 175 ┆ 0.396977 │\n", "│ 12:31:06.074163 ┆ 12:34:01.471140 ┆ 12:34:01.074163 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1180 ┆ 0.389483 │\n", "│ 12:14:21.081657 ┆ 12:34:01.471140 ┆ 12:34:01.081657 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1196 ┆ 0.331104 │\n", "│ 12:14:05.140036 ┆ 12:34:01.471140 ┆ 12:34:01.140036 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 1075 ┆ 0.265574 │\n", "│ 12:16:06.205566 ┆ 12:34:01.471140 ┆ 12:34:01.205566 ┆ ┆ │\n", "│ 2024-03-03 ┆ 2024-03-03 ┆ 2024-03-03 ┆ 56 ┆ 0.0 │\n", "│ 12:33:05.471140 ┆ 12:34:01.471140 ┆ 12:34:01.471140 ┆ ┆ │\n", "└─────────────────┴─────────────────────────┴────────────────────────┴────────────────┴────────────┘" ] }, "execution_count": 29, "metadata": {}, "output_type": "execute_result" } ], "source": [ "KPI_data.sort('KPI_value',descending=True) #TODO: tabla para la ppt " ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "dict = KPI_data.filter(pl.col('KPI_value')<500).sort('estimateArrive').select('estimateArrive','KPI_value')" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_KPI(dict) #TODO: gráfica para la ppt" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# PRUEBA DEFINITIVA" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## BUS,LINEA,PARADA DESTINO FIJOS" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "import os\n", "import statistics\n", "import random\n", "import polars as pl\n", "import pandas as pd\n", "import seaborn as sns\n", "import polars.selectors as cs\n", "import matplotlib.pyplot as plt\n", "from datetime import datetime, timedelta\n", "\n", "import warnings\n" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))\n", "DATA_PATH = os.path.join(ROOT_PATH, \"data\", \"processed\")\n", "EMT_DATA_PATH = os.path.join(DATA_PATH, \"emt\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "data = pl.scan_csv(os.path.join(EMT_DATA_PATH, \"2024\", \"03\", f\"emt_202403.csv\"))\n", "list_day = ['02','03','04','05','06','07','08','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31']" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "26\n" ] } ], "source": [ "random.seed(1234) \n", "day = random.randint(2, 31)\n", "\n", "print(day)" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "sample_data = pl.scan_csv(os.path.join(EMT_DATA_PATH, \"2024\", \"03\",str(day), f\"emt_202403{str(day)}.csv\"))" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "def create_final_dataset(sample_data):\n", " sample_data = sample_data.with_columns((pl.col('datetime').cast(pl.String)+\"_B\"+pl.col('bus').cast(pl.String)+\"_L\"+ pl.col('line').cast(pl.String)+\"_S\"+pl.col('stop').cast(pl.String)).alias('PK'))\n", " \n", " # ETA <2400\n", " sample_data = sample_data.filter(pl.col('estimateArrive')<888888)\n", " sample_data = sample_data.group_by('PK').min()\n", " \n", " sample_data = sample_data.with_columns(pl.col(\"date\").cast(pl.Date),pl.col('bus').cast(pl.String),pl.col('line').cast(pl.String),pl.col('isHead').cast(pl.UInt8))\n", " \n", " sample_data = sample_data.with_columns(pl.col('datetime').map_elements(lambda x: datetime.strptime(x, \"%Y-%m-%d %H:%M:%S.%f\")))\n", " \n", " # Rellenamos valores nulos de dayType\n", " sample_data = sample_data.with_columns(pl.when(pl.col('dayType').is_null()).then(pl.col('date').apply(get_type_day)).otherwise(pl.col('dayType')).alias('dayType'))\n", " \n", " # Eliminamos variables\n", " sample_data = sample_data.drop('positionTypeBus','deviation','MaximumFrequency','StartTime','StopTime','strike')\n", " \n", " return sample_data.collect()\n", " " ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "def get_type_day(date):\n", " \n", " day = date.strftime(\"%A\")\n", " \n", " if day in ['Monday','Tuesday','Wednesday','Thursday','Friday']:\n", " \n", " type = 'LA'\n", " elif day == 'Saturday':\n", " type = 'SA'\n", " else:\n", " type = 'FE'\n", " \n", " return type" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [], "source": [ "sample_data = create_final_dataset(sample_data)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1_136_086, 14)
PKdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrive
strdatedatetime[μs]strstri64f64f64i64stri64u8stri64
"2024-03-26 21:…2024-03-262024-03-26 21:40:09.739783"2536""170"5399-3.68467340.5110295665"SANCHINARRO"null0"LA"967
"2024-03-26 08:…2024-03-262024-03-26 08:10:09.881904"9109""177"5803-3.68847940.4670951383"MARQUES DE VIA…90"LA"300
"2024-03-26 22:…2024-03-262024-03-26 22:23:08.488793"2477""173"5518-3.66428740.4936741672"PLAZA CASTILLA…71"LA"590
"2024-03-26 16:…2024-03-262024-03-26 16:50:07.442667"2497""170"2970-3.70160240.5041689687"SANCHINARRO"null0"LA"2178
"2024-03-26 20:…2024-03-262024-03-26 20:30:06.307634"3279""11"222-3.69366140.4592193559"BARRIO BLANCO"null0"LA"1062
"2024-03-26 21:…2024-03-262024-03-26 21:42:07.452901"4708""82"1629-3.72731140.4450776819"PITIS"null0"LA"1068
"2024-03-26 22:…2024-03-262024-03-26 22:13:08.297954"2477""173"3603-3.68912840.4682154298"SANCHINARRO"70"LA"499
"2024-03-26 22:…2024-03-262024-03-26 22:46:11.429187"51""BR1"5899-3.6315440.48568505"VALDEBEBAS"null0"LA"94
"2024-03-26 16:…2024-03-262024-03-26 16:42:05.914865"2468""134"1022-3.71412740.486424337"MONTECARMELO"90"LA"893
"2024-03-26 15:…2024-03-262024-03-26 15:44:08.527764"2514""175"4508-3.665540.5041710"PLAZA CASTILLA…91"LA"231
" ], "text/plain": [ "shape: (1_136_086, 14)\n", "┌─────────────┬────────────┬─────────────┬──────┬───┬─────────────┬────────┬─────────┬─────────────┐\n", "│ PK ┆ date ┆ datetime ┆ bus ┆ … ┆ MinimunFreq ┆ isHead ┆ dayType ┆ estimateArr │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ uency ┆ --- ┆ --- ┆ ive │\n", "│ str ┆ date ┆ datetime[μs ┆ str ┆ ┆ --- ┆ u8 ┆ str ┆ --- │\n", "│ ┆ ┆ ] ┆ ┆ ┆ i64 ┆ ┆ ┆ i64 │\n", "╞═════════════╪════════════╪═════════════╪══════╪═══╪═════════════╪════════╪═════════╪═════════════╡\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2536 ┆ … ┆ null ┆ 0 ┆ LA ┆ 967 │\n", "│ 21:40:09.73 ┆ ┆ 21:40:09.73 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 9783_B2536… ┆ ┆ 9783 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 9109 ┆ … ┆ 9 ┆ 0 ┆ LA ┆ 300 │\n", "│ 08:10:09.88 ┆ ┆ 08:10:09.88 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 1904_B9109… ┆ ┆ 1904 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2477 ┆ … ┆ 7 ┆ 1 ┆ LA ┆ 590 │\n", "│ 22:23:08.48 ┆ ┆ 22:23:08.48 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 8793_B2477… ┆ ┆ 8793 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2497 ┆ … ┆ null ┆ 0 ┆ LA ┆ 2178 │\n", "│ 16:50:07.44 ┆ ┆ 16:50:07.44 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2667_B2497… ┆ ┆ 2667 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 3279 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1062 │\n", "│ 20:30:06.30 ┆ ┆ 20:30:06.30 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 7634_B3279… ┆ ┆ 7634 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 4708 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1068 │\n", "│ 21:42:07.45 ┆ ┆ 21:42:07.45 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2901_B4708… ┆ ┆ 2901 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2477 ┆ … ┆ 7 ┆ 0 ┆ LA ┆ 499 │\n", "│ 22:13:08.29 ┆ ┆ 22:13:08.29 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 7954_B2477… ┆ ┆ 7954 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 94 │\n", "│ 22:46:11.42 ┆ ┆ 22:46:11.42 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 9187_B51_L… ┆ ┆ 9187 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2468 ┆ … ┆ 9 ┆ 0 ┆ LA ┆ 893 │\n", "│ 16:42:05.91 ┆ ┆ 16:42:05.91 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 4865_B2468… ┆ ┆ 4865 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2514 ┆ … ┆ 9 ┆ 1 ┆ LA ┆ 231 │\n", "│ 15:44:08.52 ┆ ┆ 15:44:08.52 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 7764_B2514… ┆ ┆ 7764 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "└─────────────┴────────────┴─────────────┴──────┴───┴─────────────┴────────┴─────────┴─────────────┘" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data" ] }, { "cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": [ "sample_data = sample_data.filter(pl.col('bus')=='51', pl.col('line')==\"BR1\",pl.col('stop')==4366, pl.col('destination')==\"VALDEBEBAS\").sort('datetime')" ] }, { "cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (222, 14)
PKdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrive
strdatedatetime[μs]strstri64f64f64i64stri64u8stri64
"2024-03-26 09:…2024-03-262024-03-26 09:07:08.486404"51""BR1"4366-3.69530940.4867479195"VALDEBEBAS"null0"LA"1536
"2024-03-26 09:…2024-03-262024-03-26 09:08:08.881899"51""BR1"4366-3.69530940.4867478836"VALDEBEBAS"null0"LA"1476
"2024-03-26 09:…2024-03-262024-03-26 09:09:08.755131"51""BR1"4366-3.69530940.4867479226"VALDEBEBAS"null0"LA"1381
"2024-03-26 09:…2024-03-262024-03-26 09:10:08.875354"51""BR1"4366-3.69530940.4867478825"VALDEBEBAS"null0"LA"1321
"2024-03-26 09:…2024-03-262024-03-26 09:11:10.677695"51""BR1"4366-3.69661740.488219028"VALDEBEBAS"null0"LA"1239
"2024-03-26 22:…2024-03-262024-03-26 22:39:07.409588"51""BR1"4366-3.65805740.4936681920"VALDEBEBAS"null0"LA"339
"2024-03-26 22:…2024-03-262024-03-26 22:40:08.462574"51""BR1"4366-3.65928740.4905221455"VALDEBEBAS"null0"LA"263
"2024-03-26 22:…2024-03-262024-03-26 22:41:08.942691"51""BR1"4366-3.65466540.4880511123"VALDEBEBAS"null0"LA"203
"2024-03-26 22:…2024-03-262024-03-26 22:42:10.443984"51""BR1"4366-3.65057340.486745760"VALDEBEBAS"null0"LA"198
"2024-03-26 22:…2024-03-262024-03-26 22:43:10.564491"51""BR1"4366-3.64552140.485224236"VALDEBEBAS"null0"LA"39
" ], "text/plain": [ "shape: (222, 14)\n", "┌──────────────┬────────────┬─────────────┬─────┬───┬─────────────┬────────┬─────────┬─────────────┐\n", "│ PK ┆ date ┆ datetime ┆ bus ┆ … ┆ MinimunFreq ┆ isHead ┆ dayType ┆ estimateArr │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ uency ┆ --- ┆ --- ┆ ive │\n", "│ str ┆ date ┆ datetime[μs ┆ str ┆ ┆ --- ┆ u8 ┆ str ┆ --- │\n", "│ ┆ ┆ ] ┆ ┆ ┆ i64 ┆ ┆ ┆ i64 │\n", "╞══════════════╪════════════╪═════════════╪═════╪═══╪═════════════╪════════╪═════════╪═════════════╡\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1536 │\n", "│ 09:07:08.486 ┆ ┆ 09:07:08.48 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 404_B51_L… ┆ ┆ 6404 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1476 │\n", "│ 09:08:08.881 ┆ ┆ 09:08:08.88 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 899_B51_L… ┆ ┆ 1899 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1381 │\n", "│ 09:09:08.755 ┆ ┆ 09:09:08.75 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 131_B51_L… ┆ ┆ 5131 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1321 │\n", "│ 09:10:08.875 ┆ ┆ 09:10:08.87 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 354_B51_L… ┆ ┆ 5354 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1239 │\n", "│ 09:11:10.677 ┆ ┆ 09:11:10.67 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 695_B51_L… ┆ ┆ 7695 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 339 │\n", "│ 22:39:07.409 ┆ ┆ 22:39:07.40 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 588_B51_L… ┆ ┆ 9588 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 263 │\n", "│ 22:40:08.462 ┆ ┆ 22:40:08.46 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 574_B51_L… ┆ ┆ 2574 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 203 │\n", "│ 22:41:08.942 ┆ ┆ 22:41:08.94 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 691_B51_L… ┆ ┆ 2691 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 198 │\n", "│ 22:42:10.443 ┆ ┆ 22:42:10.44 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 984_B51_L… ┆ ┆ 3984 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ null ┆ 0 ┆ LA ┆ 39 │\n", "│ 22:43:10.564 ┆ ┆ 22:43:10.56 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 491_B51_L… ┆ ┆ 4491 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "└──────────────┴────────────┴─────────────┴─────┴───┴─────────────┴────────┴─────────┴─────────────┘" ] }, "execution_count": 12, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data" ] }, { "cell_type": "code", "execution_count": 13, "metadata": {}, "outputs": [], "source": [ "def calculate_predict_arrival_date(date_datetime,second):\n", " new_date_datetime = date_datetime + timedelta(seconds=second)\n", " \n", " return new_date_datetime" ] }, { "cell_type": "code", "execution_count": 14, "metadata": {}, "outputs": [], "source": [ "sample_data = sample_data.with_columns(pl.struct(datetime = pl.col('datetime'), estimateArrive = pl.col('estimateArrive').alias('struct')).map_elements(lambda x: calculate_predict_arrival_date(x['datetime'], x['estimateArrive'])).alias('predict_arrival_date'))" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "sample_data_pd = sample_data.to_pandas()\n", "sample_data_pd['bloque_id'] = None\n", "\n", "bloque_actual = 1\n", "for i in range(0, len(sample_data_pd)-1):\n", " if (sample_data_pd['datetime'][i + 1] - sample_data_pd['datetime'][i]) > timedelta(minutes=5) and (sample_data_pd['estimateArrive'][i] < sample_data_pd['estimateArrive'][i + 1]):\n", " sample_data_pd['bloque_id'][i] = bloque_actual\n", " bloque_actual += 1\n", " \n", " else:\n", " sample_data_pd['bloque_id'][i] = bloque_actual" ] }, { "cell_type": "code", "execution_count": 16, "metadata": {}, "outputs": [], "source": [ "sample_data_pd.at[sample_data_pd.index[-1], 'bloque_id'] = bloque_actual" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sample_data_pd.head(20)" ] }, { "cell_type": "code", "execution_count": 18, "metadata": {}, "outputs": [], "source": [ "sample_data_pl = pl.from_pandas(sample_data_pd)" ] }, { "cell_type": "code", "execution_count": 19, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (222, 16)
PKdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrivepredict_arrival_datebloque_id
strdatetime[ms]datetime[μs]strstri64f64f64i64strf64u8stri64datetime[μs]i64
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:05:08.146916"51""BR1"4366-3.62517440.48262319754"VALDEBEBAS"null0"LA"25152024-03-26 22:47:03.1469169
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:06:08.223862"51""BR1"4366-3.63524840.48630319275"VALDEBEBAS"null0"LA"24542024-03-26 22:47:02.2238629
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:07:07.950631"51""BR1"4366-3.64305340.48447318804"VALDEBEBAS"null0"LA"23942024-03-26 22:47:01.9506319
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:08:08.530312"51""BR1"4366-3.64305340.48447318286"VALDEBEBAS"null0"LA"23282024-03-26 22:46:56.5303129
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:09:08.167593"51""BR1"4366-3.64305340.48447317862"VALDEBEBAS"null0"LA"22742024-03-26 22:47:02.1675939
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:23:08.256559"51""BR1"4366-3.65966240.49221746"VALDEBEBAS"null0"LA"3212024-03-26 09:28:29.2565591
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:24:08.643877"51""BR1"4366-3.65842840.4899811276"VALDEBEBAS"null0"LA"2322024-03-26 09:28:00.6438771
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:25:09.036527"51""BR1"4366-3.65534940.487956847"VALDEBEBAS"null0"LA"1592024-03-26 09:27:48.0365271
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:26:08.257810"51""BR1"4366-3.65048340.486716533"VALDEBEBAS"null0"LA"1002024-03-26 09:27:48.2578101
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:27:08.674862"51""BR1"4366-3.64367840.484670"VALDEBEBAS"null0"LA"02024-03-26 09:27:08.6748621
" ], "text/plain": [ "shape: (222, 16)\n", "┌─────────────┬─────────────┬────────────┬─────┬───┬─────────┬────────────┬────────────┬───────────┐\n", "│ PK ┆ date ┆ datetime ┆ bus ┆ … ┆ dayType ┆ estimateAr ┆ predict_ar ┆ bloque_id │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ rive ┆ rival_date ┆ --- │\n", "│ str ┆ datetime[ms ┆ datetime[μ ┆ str ┆ ┆ str ┆ --- ┆ --- ┆ i64 │\n", "│ ┆ ] ┆ s] ┆ ┆ ┆ ┆ i64 ┆ datetime[μ ┆ │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ s] ┆ │\n", "╞═════════════╪═════════════╪════════════╪═════╪═══╪═════════╪════════════╪════════════╪═══════════╡\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 2515 ┆ 2024-03-26 ┆ 9 │\n", "│ 22:05:08.14 ┆ 00:00:00 ┆ 22:05:08.1 ┆ ┆ ┆ ┆ ┆ 22:47:03.1 ┆ │\n", "│ 6916_B51_L… ┆ ┆ 46916 ┆ ┆ ┆ ┆ ┆ 46916 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 2454 ┆ 2024-03-26 ┆ 9 │\n", "│ 22:06:08.22 ┆ 00:00:00 ┆ 22:06:08.2 ┆ ┆ ┆ ┆ ┆ 22:47:02.2 ┆ │\n", "│ 3862_B51_L… ┆ ┆ 23862 ┆ ┆ ┆ ┆ ┆ 23862 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 2394 ┆ 2024-03-26 ┆ 9 │\n", "│ 22:07:07.95 ┆ 00:00:00 ┆ 22:07:07.9 ┆ ┆ ┆ ┆ ┆ 22:47:01.9 ┆ │\n", "│ 0631_B51_L… ┆ ┆ 50631 ┆ ┆ ┆ ┆ ┆ 50631 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 2328 ┆ 2024-03-26 ┆ 9 │\n", "│ 22:08:08.53 ┆ 00:00:00 ┆ 22:08:08.5 ┆ ┆ ┆ ┆ ┆ 22:46:56.5 ┆ │\n", "│ 0312_B51_L… ┆ ┆ 30312 ┆ ┆ ┆ ┆ ┆ 30312 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 2274 ┆ 2024-03-26 ┆ 9 │\n", "│ 22:09:08.16 ┆ 00:00:00 ┆ 22:09:08.1 ┆ ┆ ┆ ┆ ┆ 22:47:02.1 ┆ │\n", "│ 7593_B51_L… ┆ ┆ 67593 ┆ ┆ ┆ ┆ ┆ 67593 ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 321 ┆ 2024-03-26 ┆ 1 │\n", "│ 09:23:08.25 ┆ 00:00:00 ┆ 09:23:08.2 ┆ ┆ ┆ ┆ ┆ 09:28:29.2 ┆ │\n", "│ 6559_B51_L… ┆ ┆ 56559 ┆ ┆ ┆ ┆ ┆ 56559 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 232 ┆ 2024-03-26 ┆ 1 │\n", "│ 09:24:08.64 ┆ 00:00:00 ┆ 09:24:08.6 ┆ ┆ ┆ ┆ ┆ 09:28:00.6 ┆ │\n", "│ 3877_B51_L… ┆ ┆ 43877 ┆ ┆ ┆ ┆ ┆ 43877 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 159 ┆ 2024-03-26 ┆ 1 │\n", "│ 09:25:09.03 ┆ 00:00:00 ┆ 09:25:09.0 ┆ ┆ ┆ ┆ ┆ 09:27:48.0 ┆ │\n", "│ 6527_B51_L… ┆ ┆ 36527 ┆ ┆ ┆ ┆ ┆ 36527 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 100 ┆ 2024-03-26 ┆ 1 │\n", "│ 09:26:08.25 ┆ 00:00:00 ┆ 09:26:08.2 ┆ ┆ ┆ ┆ ┆ 09:27:48.2 ┆ │\n", "│ 7810_B51_L… ┆ ┆ 57810 ┆ ┆ ┆ ┆ ┆ 57810 ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 ┆ 1 │\n", "│ 09:27:08.67 ┆ 00:00:00 ┆ 09:27:08.6 ┆ ┆ ┆ ┆ ┆ 09:27:08.6 ┆ │\n", "│ 4862_B51_L… ┆ ┆ 74862 ┆ ┆ ┆ ┆ ┆ 74862 ┆ │\n", "└─────────────┴─────────────┴────────────┴─────┴───┴─────────┴────────────┴────────────┴───────────┘" ] }, "execution_count": 19, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data_pl.sort('bloque_id','estimateArrive',descending=True)" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (8, 17)
buslinestopdestinationdatebloque_idPKdatetimepositionBusLonpositionBusLatDistanceBusMinimunFrequencyisHeaddayTypeestimateArrivepredict_arrival_datereliable_arrival_date
strstri64strdatetime[ms]i64strdatetime[μs]f64f64i64f64u8stri64datetime[μs]datetime[μs]
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:004"2024-03-26 13:…2024-03-26 13:42:09.058943-3.64984140.486513202null0"LA"282024-03-26 13:42:37.0589432024-03-26 13:42:37.058943
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:003"2024-03-26 12:…2024-03-26 12:15:09.310898-3.64647140.4853190null0"LA"02024-03-26 12:15:09.3108982024-03-26 12:15:09.310898
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:007"2024-03-26 20:…2024-03-26 20:00:08.897971-3.64703740.485411214null0"LA"332024-03-26 20:00:41.8979712024-03-26 20:00:41.897971
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:008"2024-03-26 21:…2024-03-26 21:22:09.062779-3.64707140.4844710null0"LA"02024-03-26 21:22:55.0627792024-03-26 21:22:55.062779
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:006"2024-03-26 18:…2024-03-26 18:31:10.487315-3.64347140.4846030null0"LA"02024-03-26 18:31:10.4873152024-03-26 18:31:10.487315
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:001"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:002"2024-03-26 10:…2024-03-26 10:49:07.411681-3.64410640.4847260null0"LA"02024-03-26 10:49:23.4116812024-03-26 10:49:23.411681
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:009"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
" ], "text/plain": [ "shape: (8, 17)\n", "┌─────┬──────┬──────┬─────────────┬───┬─────────┬────────────────┬────────────────┬────────────────┐\n", "│ bus ┆ line ┆ stop ┆ destination ┆ … ┆ dayType ┆ estimateArrive ┆ predict_arriva ┆ reliable_arriv │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ l_date ┆ al_date │\n", "│ str ┆ str ┆ i64 ┆ str ┆ ┆ str ┆ i64 ┆ --- ┆ --- │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ datetime[μs] │\n", "╞═════╪══════╪══════╪═════════════╪═══╪═════════╪════════════════╪════════════════╪════════════════╡\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 28 ┆ 2024-03-26 13: ┆ 2024-03-26 13: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 42:37.058943 ┆ 42:37.058943 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 12: ┆ 2024-03-26 12: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 15:09.310898 ┆ 15:09.310898 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 33 ┆ 2024-03-26 20: ┆ 2024-03-26 20: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 00:41.897971 ┆ 00:41.897971 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 21: ┆ 2024-03-26 21: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 22:55.062779 ┆ 22:55.062779 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 18: ┆ 2024-03-26 18: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 31:10.487315 ┆ 31:10.487315 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 09: ┆ 2024-03-26 09: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 27:08.674862 ┆ 27:08.674862 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 10: ┆ 2024-03-26 10: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 49:23.411681 ┆ 49:23.411681 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 39 ┆ 2024-03-26 22: ┆ 2024-03-26 22: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 43:49.564491 ┆ 43:49.564491 │\n", "└─────┴──────┴──────┴─────────────┴───┴─────────┴────────────────┴────────────────┴────────────────┘" ] }, "execution_count": 20, "metadata": {}, "output_type": "execute_result" } ], "source": [ "small_sample_data = sample_data_pl.filter(pl.col('estimateArrive')<=60).group_by(pl.col('bus'),pl.col('line'),pl.col('stop'),pl.col('destination'),pl.col('date'),pl.col('bloque_id')).min().with_columns(pl.col('predict_arrival_date').alias('reliable_arrival_date'))\n", "small_sample_data" ] }, { "cell_type": "code", "execution_count": 21, "metadata": {}, "outputs": [], "source": [ "small_sample_data = small_sample_data.filter(pl.col('bloque_id').is_not_null())" ] }, { "cell_type": "code", "execution_count": 22, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (8, 17)
buslinestopdestinationdatebloque_idPKdatetimepositionBusLonpositionBusLatDistanceBusMinimunFrequencyisHeaddayTypeestimateArrivepredict_arrival_datereliable_arrival_date
strstri64strdatetime[ms]i64strdatetime[μs]f64f64i64f64u8stri64datetime[μs]datetime[μs]
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:004"2024-03-26 13:…2024-03-26 13:42:09.058943-3.64984140.486513202null0"LA"282024-03-26 13:42:37.0589432024-03-26 13:42:37.058943
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:003"2024-03-26 12:…2024-03-26 12:15:09.310898-3.64647140.4853190null0"LA"02024-03-26 12:15:09.3108982024-03-26 12:15:09.310898
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:007"2024-03-26 20:…2024-03-26 20:00:08.897971-3.64703740.485411214null0"LA"332024-03-26 20:00:41.8979712024-03-26 20:00:41.897971
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:008"2024-03-26 21:…2024-03-26 21:22:09.062779-3.64707140.4844710null0"LA"02024-03-26 21:22:55.0627792024-03-26 21:22:55.062779
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:006"2024-03-26 18:…2024-03-26 18:31:10.487315-3.64347140.4846030null0"LA"02024-03-26 18:31:10.4873152024-03-26 18:31:10.487315
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:001"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:002"2024-03-26 10:…2024-03-26 10:49:07.411681-3.64410640.4847260null0"LA"02024-03-26 10:49:23.4116812024-03-26 10:49:23.411681
"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:009"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
" ], "text/plain": [ "shape: (8, 17)\n", "┌─────┬──────┬──────┬─────────────┬───┬─────────┬────────────────┬────────────────┬────────────────┐\n", "│ bus ┆ line ┆ stop ┆ destination ┆ … ┆ dayType ┆ estimateArrive ┆ predict_arriva ┆ reliable_arriv │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ --- ┆ --- ┆ l_date ┆ al_date │\n", "│ str ┆ str ┆ i64 ┆ str ┆ ┆ str ┆ i64 ┆ --- ┆ --- │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[μs] ┆ datetime[μs] │\n", "╞═════╪══════╪══════╪═════════════╪═══╪═════════╪════════════════╪════════════════╪════════════════╡\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 28 ┆ 2024-03-26 13: ┆ 2024-03-26 13: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 42:37.058943 ┆ 42:37.058943 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 12: ┆ 2024-03-26 12: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 15:09.310898 ┆ 15:09.310898 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 33 ┆ 2024-03-26 20: ┆ 2024-03-26 20: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 00:41.897971 ┆ 00:41.897971 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 21: ┆ 2024-03-26 21: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 22:55.062779 ┆ 22:55.062779 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 18: ┆ 2024-03-26 18: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 31:10.487315 ┆ 31:10.487315 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 09: ┆ 2024-03-26 09: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 27:08.674862 ┆ 27:08.674862 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 0 ┆ 2024-03-26 10: ┆ 2024-03-26 10: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 49:23.411681 ┆ 49:23.411681 │\n", "│ 51 ┆ BR1 ┆ 4366 ┆ VALDEBEBAS ┆ … ┆ LA ┆ 39 ┆ 2024-03-26 22: ┆ 2024-03-26 22: │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ 43:49.564491 ┆ 43:49.564491 │\n", "└─────┴──────┴──────┴─────────────┴───┴─────────┴────────────────┴────────────────┴────────────────┘" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "small_sample_data" ] }, { "cell_type": "code", "execution_count": 23, "metadata": {}, "outputs": [], "source": [ "final_sample_data = sample_data_pl.join(small_sample_data,on=pl.col('bloque_id'),how = 'left')" ] }, { "cell_type": "code", "execution_count": 24, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (222, 32)
PKdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrivepredict_arrival_datebloque_idbus_rightline_rightstop_rightdestination_rightdate_rightPK_rightdatetime_rightpositionBusLon_rightpositionBusLat_rightDistanceBus_rightMinimunFrequency_rightisHead_rightdayType_rightestimateArrive_rightpredict_arrival_date_rightreliable_arrival_date
strdatetime[ms]datetime[μs]strstri64f64f64i64strf64u8stri64datetime[μs]i64strstri64strdatetime[ms]strdatetime[μs]f64f64i64f64u8stri64datetime[μs]datetime[μs]
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:07:08.486404"51""BR1"4366-3.69530940.4867479195"VALDEBEBAS"null0"LA"15362024-03-26 09:32:44.4864041"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:08:08.881899"51""BR1"4366-3.69530940.4867478836"VALDEBEBAS"null0"LA"14762024-03-26 09:32:44.8818991"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:09:08.755131"51""BR1"4366-3.69530940.4867479226"VALDEBEBAS"null0"LA"13812024-03-26 09:32:09.7551311"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:10:08.875354"51""BR1"4366-3.69530940.4867478825"VALDEBEBAS"null0"LA"13212024-03-26 09:32:09.8753541"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"2024-03-26 09:…2024-03-26 00:00:002024-03-26 09:11:10.677695"51""BR1"4366-3.69661740.488219028"VALDEBEBAS"null0"LA"12392024-03-26 09:31:49.6776951"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 09:…2024-03-26 09:27:08.674862-3.64367840.484670null0"LA"02024-03-26 09:27:08.6748622024-03-26 09:27:08.674862
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:39:07.409588"51""BR1"4366-3.65805740.4936681920"VALDEBEBAS"null0"LA"3392024-03-26 22:44:46.4095889"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:40:08.462574"51""BR1"4366-3.65928740.4905221455"VALDEBEBAS"null0"LA"2632024-03-26 22:44:31.4625749"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:41:08.942691"51""BR1"4366-3.65466540.4880511123"VALDEBEBAS"null0"LA"2032024-03-26 22:44:31.9426919"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:42:10.443984"51""BR1"4366-3.65057340.486745760"VALDEBEBAS"null0"LA"1982024-03-26 22:45:28.4439849"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
"2024-03-26 22:…2024-03-26 00:00:002024-03-26 22:43:10.564491"51""BR1"4366-3.64552140.485224236"VALDEBEBAS"null0"LA"392024-03-26 22:43:49.5644919"51""BR1"4366"VALDEBEBAS"2024-03-26 00:00:00"2024-03-26 22:…2024-03-26 22:43:10.564491-3.64552140.485224236null0"LA"392024-03-26 22:43:49.5644912024-03-26 22:43:49.564491
" ], "text/plain": [ "shape: (222, 32)\n", "┌────────────┬────────────┬────────────┬─────┬───┬────────────┬────────────┬───────────┬───────────┐\n", "│ PK ┆ date ┆ datetime ┆ bus ┆ … ┆ dayType_ri ┆ estimateAr ┆ predict_a ┆ reliable_ │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ ght ┆ rive_right ┆ rrival_da ┆ arrival_d │\n", "│ str ┆ datetime[m ┆ datetime[μ ┆ str ┆ ┆ --- ┆ --- ┆ te_right ┆ ate │\n", "│ ┆ s] ┆ s] ┆ ┆ ┆ str ┆ i64 ┆ --- ┆ --- │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ datetime[ ┆ datetime[ │\n", "│ ┆ ┆ ┆ ┆ ┆ ┆ ┆ μs] ┆ μs] │\n", "╞════════════╪════════════╪════════════╪═════╪═══╪════════════╪════════════╪═══════════╪═══════════╡\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 0 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 09:07:08.4 ┆ 00:00:00 ┆ 09:07:08.4 ┆ ┆ ┆ ┆ ┆ 6 09:27:0 ┆ 6 09:27:0 │\n", "│ 86404_B51_ ┆ ┆ 86404 ┆ ┆ ┆ ┆ ┆ 8.674862 ┆ 8.674862 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 0 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 09:08:08.8 ┆ 00:00:00 ┆ 09:08:08.8 ┆ ┆ ┆ ┆ ┆ 6 09:27:0 ┆ 6 09:27:0 │\n", "│ 81899_B51_ ┆ ┆ 81899 ┆ ┆ ┆ ┆ ┆ 8.674862 ┆ 8.674862 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 0 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 09:09:08.7 ┆ 00:00:00 ┆ 09:09:08.7 ┆ ┆ ┆ ┆ ┆ 6 09:27:0 ┆ 6 09:27:0 │\n", "│ 55131_B51_ ┆ ┆ 55131 ┆ ┆ ┆ ┆ ┆ 8.674862 ┆ 8.674862 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 0 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 09:10:08.8 ┆ 00:00:00 ┆ 09:10:08.8 ┆ ┆ ┆ ┆ ┆ 6 09:27:0 ┆ 6 09:27:0 │\n", "│ 75354_B51_ ┆ ┆ 75354 ┆ ┆ ┆ ┆ ┆ 8.674862 ┆ 8.674862 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 0 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 09:11:10.6 ┆ 00:00:00 ┆ 09:11:10.6 ┆ ┆ ┆ ┆ ┆ 6 09:27:0 ┆ 6 09:27:0 │\n", "│ 77695_B51_ ┆ ┆ 77695 ┆ ┆ ┆ ┆ ┆ 8.674862 ┆ 8.674862 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 39 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 22:39:07.4 ┆ 00:00:00 ┆ 22:39:07.4 ┆ ┆ ┆ ┆ ┆ 6 22:43:4 ┆ 6 22:43:4 │\n", "│ 09588_B51_ ┆ ┆ 09588 ┆ ┆ ┆ ┆ ┆ 9.564491 ┆ 9.564491 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 39 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 22:40:08.4 ┆ 00:00:00 ┆ 22:40:08.4 ┆ ┆ ┆ ┆ ┆ 6 22:43:4 ┆ 6 22:43:4 │\n", "│ 62574_B51_ ┆ ┆ 62574 ┆ ┆ ┆ ┆ ┆ 9.564491 ┆ 9.564491 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 39 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 22:41:08.9 ┆ 00:00:00 ┆ 22:41:08.9 ┆ ┆ ┆ ┆ ┆ 6 22:43:4 ┆ 6 22:43:4 │\n", "│ 42691_B51_ ┆ ┆ 42691 ┆ ┆ ┆ ┆ ┆ 9.564491 ┆ 9.564491 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 39 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 22:42:10.4 ┆ 00:00:00 ┆ 22:42:10.4 ┆ ┆ ┆ ┆ ┆ 6 22:43:4 ┆ 6 22:43:4 │\n", "│ 43984_B51_ ┆ ┆ 43984 ┆ ┆ ┆ ┆ ┆ 9.564491 ┆ 9.564491 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 51 ┆ … ┆ LA ┆ 39 ┆ 2024-03-2 ┆ 2024-03-2 │\n", "│ 22:43:10.5 ┆ 00:00:00 ┆ 22:43:10.5 ┆ ┆ ┆ ┆ ┆ 6 22:43:4 ┆ 6 22:43:4 │\n", "│ 64491_B51_ ┆ ┆ 64491 ┆ ┆ ┆ ┆ ┆ 9.564491 ┆ 9.564491 │\n", "│ L… ┆ ┆ ┆ ┆ ┆ ┆ ┆ ┆ │\n", "└────────────┴────────────┴────────────┴─────┴───┴────────────┴────────────┴───────────┴───────────┘" ] }, "execution_count": 24, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_sample_data" ] }, { "cell_type": "code", "execution_count": 25, "metadata": {}, "outputs": [], "source": [ "final_sample_data = final_sample_data.select(pl.col('PK'),pl.col('datetime'),pl.col('reliable_arrival_date'),pl.col('predict_arrival_date'),pl.col('bloque_id'),pl.col('estimateArrive'))" ] }, { "cell_type": "code", "execution_count": 26, "metadata": {}, "outputs": [], "source": [ "final_sample_data = final_sample_data.filter(pl.col('reliable_arrival_date').is_not_null())" ] }, { "cell_type": "code", "execution_count": 27, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
PKdatetimereliable_arrival_datepredict_arrival_datebloque_idestimateArrive
02024-03-26 09:07:08.486404_B51_LBR1_S43662024-03-26 09:07:08.4864042024-03-26 09:27:08.6748622024-03-26 09:32:44.48640411536
12024-03-26 09:08:08.881899_B51_LBR1_S43662024-03-26 09:08:08.8818992024-03-26 09:27:08.6748622024-03-26 09:32:44.88189911476
22024-03-26 09:09:08.755131_B51_LBR1_S43662024-03-26 09:09:08.7551312024-03-26 09:27:08.6748622024-03-26 09:32:09.75513111381
32024-03-26 09:10:08.875354_B51_LBR1_S43662024-03-26 09:10:08.8753542024-03-26 09:27:08.6748622024-03-26 09:32:09.87535411321
42024-03-26 09:11:10.677695_B51_LBR1_S43662024-03-26 09:11:10.6776952024-03-26 09:27:08.6748622024-03-26 09:31:49.67769511239
52024-03-26 09:12:08.822076_B51_LBR1_S43662024-03-26 09:12:08.8220762024-03-26 09:27:08.6748622024-03-26 09:31:53.82207611185
62024-03-26 09:13:09.160954_B51_LBR1_S43662024-03-26 09:13:09.1609542024-03-26 09:27:08.6748622024-03-26 09:30:28.16095411039
72024-03-26 09:14:08.414499_B51_LBR1_S43662024-03-26 09:14:08.4144992024-03-26 09:27:08.6748622024-03-26 09:30:00.4144991952
82024-03-26 09:15:09.979541_B51_LBR1_S43662024-03-26 09:15:09.9795412024-03-26 09:27:08.6748622024-03-26 09:29:15.9795411846
92024-03-26 09:17:07.807332_B51_LBR1_S43662024-03-26 09:17:07.8073322024-03-26 09:27:08.6748622024-03-26 09:28:29.8073321682
102024-03-26 09:18:09.292909_B51_LBR1_S43662024-03-26 09:18:09.2929092024-03-26 09:27:08.6748622024-03-26 09:28:29.2929091620
112024-03-26 09:19:09.264869_B51_LBR1_S43662024-03-26 09:19:09.2648692024-03-26 09:27:08.6748622024-03-26 09:28:29.2648691560
122024-03-26 09:20:08.308902_B51_LBR1_S43662024-03-26 09:20:08.3089022024-03-26 09:27:08.6748622024-03-26 09:28:28.3089021500
132024-03-26 09:22:08.748819_B51_LBR1_S43662024-03-26 09:22:08.7488192024-03-26 09:27:08.6748622024-03-26 09:28:29.7488191381
142024-03-26 09:23:08.256559_B51_LBR1_S43662024-03-26 09:23:08.2565592024-03-26 09:27:08.6748622024-03-26 09:28:29.2565591321
152024-03-26 09:24:08.643877_B51_LBR1_S43662024-03-26 09:24:08.6438772024-03-26 09:27:08.6748622024-03-26 09:28:00.6438771232
162024-03-26 09:25:09.036527_B51_LBR1_S43662024-03-26 09:25:09.0365272024-03-26 09:27:08.6748622024-03-26 09:27:48.0365271159
172024-03-26 09:26:08.257810_B51_LBR1_S43662024-03-26 09:26:08.2578102024-03-26 09:27:08.6748622024-03-26 09:27:48.2578101100
182024-03-26 09:27:08.674862_B51_LBR1_S43662024-03-26 09:27:08.6748622024-03-26 09:27:08.6748622024-03-26 09:27:08.67486210
192024-03-26 10:28:06.759858_B51_LBR1_S43662024-03-26 10:28:06.7598582024-03-26 10:49:23.4116812024-03-26 11:06:15.75985822289
202024-03-26 10:29:08.164543_B51_LBR1_S43662024-03-26 10:29:08.1645432024-03-26 10:49:23.4116812024-03-26 11:07:21.16454322293
212024-03-26 10:30:08.351999_B51_LBR1_S43662024-03-26 10:30:08.3519992024-03-26 10:49:23.4116812024-03-26 11:08:30.35199922302
222024-03-26 10:31:08.228017_B51_LBR1_S43662024-03-26 10:31:08.2280172024-03-26 10:49:23.4116812024-03-26 11:08:25.22801722237
232024-03-26 10:32:08.593895_B51_LBR1_S43662024-03-26 10:32:08.5938952024-03-26 10:49:23.4116812024-03-26 11:09:56.59389522268
242024-03-26 10:34:08.147077_B51_LBR1_S43662024-03-26 10:34:08.1470772024-03-26 10:49:23.4116812024-03-26 11:03:07.14707721739
252024-03-26 10:35:06.721532_B51_LBR1_S43662024-03-26 10:35:06.7215322024-03-26 10:49:23.4116812024-03-26 11:03:06.72153221680
262024-03-26 10:36:08.939174_B51_LBR1_S43662024-03-26 10:36:08.9391742024-03-26 10:49:23.4116812024-03-26 10:55:26.93917421158
272024-03-26 10:38:07.059568_B51_LBR1_S43662024-03-26 10:38:07.0595682024-03-26 10:49:23.4116812024-03-26 10:54:28.0595682981
282024-03-26 10:39:07.192176_B51_LBR1_S43662024-03-26 10:39:07.1921762024-03-26 10:49:23.4116812024-03-26 10:54:09.1921762902
292024-03-26 10:40:08.062428_B51_LBR1_S43662024-03-26 10:40:08.0624282024-03-26 10:49:23.4116812024-03-26 10:52:33.0624282745
302024-03-26 10:41:08.947750_B51_LBR1_S43662024-03-26 10:41:08.9477502024-03-26 10:49:23.4116812024-03-26 10:52:36.9477502688
312024-03-26 10:43:08.450481_B51_LBR1_S43662024-03-26 10:43:08.4504812024-03-26 10:49:23.4116812024-03-26 10:51:55.4504812527
322024-03-26 10:44:08.198504_B51_LBR1_S43662024-03-26 10:44:08.1985042024-03-26 10:49:23.4116812024-03-26 10:51:56.1985042468
332024-03-26 10:45:08.323245_B51_LBR1_S43662024-03-26 10:45:08.3232452024-03-26 10:49:23.4116812024-03-26 10:51:55.3232452407
342024-03-26 10:46:08.687412_B51_LBR1_S43662024-03-26 10:46:08.6874122024-03-26 10:49:23.4116812024-03-26 10:51:23.6874122315
352024-03-26 10:47:09.993700_B51_LBR1_S43662024-03-26 10:47:09.9937002024-03-26 10:49:23.4116812024-03-26 10:50:42.9937002213
362024-03-26 10:48:08.170464_B51_LBR1_S43662024-03-26 10:48:08.1704642024-03-26 10:49:23.4116812024-03-26 10:50:27.1704642139
372024-03-26 10:49:07.411681_B51_LBR1_S43662024-03-26 10:49:07.4116812024-03-26 10:49:23.4116812024-03-26 10:49:23.411681216
382024-03-26 10:50:08.062525_B51_LBR1_S43662024-03-26 10:50:08.0625252024-03-26 10:49:23.4116812024-03-26 10:50:08.06252520
392024-03-26 11:51:09.085827_B51_LBR1_S43662024-03-26 11:51:09.0858272024-03-26 12:15:09.3108982024-03-26 12:18:49.08582731660
402024-03-26 11:52:09.190452_B51_LBR1_S43662024-03-26 11:52:09.1904522024-03-26 12:15:09.3108982024-03-26 12:19:54.19045231665
412024-03-26 11:53:08.698945_B51_LBR1_S43662024-03-26 11:53:08.6989452024-03-26 12:15:09.3108982024-03-26 12:23:53.69894531845
422024-03-26 11:54:08.804854_B51_LBR1_S43662024-03-26 11:54:08.8048542024-03-26 12:15:09.3108982024-03-26 12:26:07.80485431919
432024-03-26 11:55:08.948127_B51_LBR1_S43662024-03-26 11:55:08.9481272024-03-26 12:15:09.3108982024-03-26 12:26:06.94812731858
442024-03-26 11:56:10.271055_B51_LBR1_S43662024-03-26 11:56:10.2710552024-03-26 12:15:09.3108982024-03-26 12:27:19.27105531869
452024-03-26 11:57:08.365116_B51_LBR1_S43662024-03-26 11:57:08.3651162024-03-26 12:15:09.3108982024-03-26 12:28:25.36511631877
462024-03-26 11:58:08.667291_B51_LBR1_S43662024-03-26 11:58:08.6672912024-03-26 12:15:09.3108982024-03-26 12:29:35.66729131887
472024-03-26 12:00:08.855249_B51_LBR1_S43662024-03-26 12:00:08.8552492024-03-26 12:15:09.3108982024-03-26 12:21:28.85524931280
482024-03-26 12:01:08.169960_B51_LBR1_S43662024-03-26 12:01:08.1699602024-03-26 12:15:09.3108982024-03-26 12:21:28.16996031220
492024-03-26 12:02:08.420660_B51_LBR1_S43662024-03-26 12:02:08.4206602024-03-26 12:15:09.3108982024-03-26 12:20:54.42066031126
\n", "
" ], "text/plain": [ " PK datetime \\\n", "0 2024-03-26 09:07:08.486404_B51_LBR1_S4366 2024-03-26 09:07:08.486404 \n", "1 2024-03-26 09:08:08.881899_B51_LBR1_S4366 2024-03-26 09:08:08.881899 \n", "2 2024-03-26 09:09:08.755131_B51_LBR1_S4366 2024-03-26 09:09:08.755131 \n", "3 2024-03-26 09:10:08.875354_B51_LBR1_S4366 2024-03-26 09:10:08.875354 \n", "4 2024-03-26 09:11:10.677695_B51_LBR1_S4366 2024-03-26 09:11:10.677695 \n", "5 2024-03-26 09:12:08.822076_B51_LBR1_S4366 2024-03-26 09:12:08.822076 \n", "6 2024-03-26 09:13:09.160954_B51_LBR1_S4366 2024-03-26 09:13:09.160954 \n", "7 2024-03-26 09:14:08.414499_B51_LBR1_S4366 2024-03-26 09:14:08.414499 \n", "8 2024-03-26 09:15:09.979541_B51_LBR1_S4366 2024-03-26 09:15:09.979541 \n", "9 2024-03-26 09:17:07.807332_B51_LBR1_S4366 2024-03-26 09:17:07.807332 \n", "10 2024-03-26 09:18:09.292909_B51_LBR1_S4366 2024-03-26 09:18:09.292909 \n", "11 2024-03-26 09:19:09.264869_B51_LBR1_S4366 2024-03-26 09:19:09.264869 \n", "12 2024-03-26 09:20:08.308902_B51_LBR1_S4366 2024-03-26 09:20:08.308902 \n", "13 2024-03-26 09:22:08.748819_B51_LBR1_S4366 2024-03-26 09:22:08.748819 \n", "14 2024-03-26 09:23:08.256559_B51_LBR1_S4366 2024-03-26 09:23:08.256559 \n", "15 2024-03-26 09:24:08.643877_B51_LBR1_S4366 2024-03-26 09:24:08.643877 \n", "16 2024-03-26 09:25:09.036527_B51_LBR1_S4366 2024-03-26 09:25:09.036527 \n", "17 2024-03-26 09:26:08.257810_B51_LBR1_S4366 2024-03-26 09:26:08.257810 \n", "18 2024-03-26 09:27:08.674862_B51_LBR1_S4366 2024-03-26 09:27:08.674862 \n", "19 2024-03-26 10:28:06.759858_B51_LBR1_S4366 2024-03-26 10:28:06.759858 \n", "20 2024-03-26 10:29:08.164543_B51_LBR1_S4366 2024-03-26 10:29:08.164543 \n", "21 2024-03-26 10:30:08.351999_B51_LBR1_S4366 2024-03-26 10:30:08.351999 \n", "22 2024-03-26 10:31:08.228017_B51_LBR1_S4366 2024-03-26 10:31:08.228017 \n", "23 2024-03-26 10:32:08.593895_B51_LBR1_S4366 2024-03-26 10:32:08.593895 \n", "24 2024-03-26 10:34:08.147077_B51_LBR1_S4366 2024-03-26 10:34:08.147077 \n", "25 2024-03-26 10:35:06.721532_B51_LBR1_S4366 2024-03-26 10:35:06.721532 \n", "26 2024-03-26 10:36:08.939174_B51_LBR1_S4366 2024-03-26 10:36:08.939174 \n", "27 2024-03-26 10:38:07.059568_B51_LBR1_S4366 2024-03-26 10:38:07.059568 \n", "28 2024-03-26 10:39:07.192176_B51_LBR1_S4366 2024-03-26 10:39:07.192176 \n", "29 2024-03-26 10:40:08.062428_B51_LBR1_S4366 2024-03-26 10:40:08.062428 \n", "30 2024-03-26 10:41:08.947750_B51_LBR1_S4366 2024-03-26 10:41:08.947750 \n", "31 2024-03-26 10:43:08.450481_B51_LBR1_S4366 2024-03-26 10:43:08.450481 \n", "32 2024-03-26 10:44:08.198504_B51_LBR1_S4366 2024-03-26 10:44:08.198504 \n", "33 2024-03-26 10:45:08.323245_B51_LBR1_S4366 2024-03-26 10:45:08.323245 \n", "34 2024-03-26 10:46:08.687412_B51_LBR1_S4366 2024-03-26 10:46:08.687412 \n", "35 2024-03-26 10:47:09.993700_B51_LBR1_S4366 2024-03-26 10:47:09.993700 \n", "36 2024-03-26 10:48:08.170464_B51_LBR1_S4366 2024-03-26 10:48:08.170464 \n", "37 2024-03-26 10:49:07.411681_B51_LBR1_S4366 2024-03-26 10:49:07.411681 \n", "38 2024-03-26 10:50:08.062525_B51_LBR1_S4366 2024-03-26 10:50:08.062525 \n", "39 2024-03-26 11:51:09.085827_B51_LBR1_S4366 2024-03-26 11:51:09.085827 \n", "40 2024-03-26 11:52:09.190452_B51_LBR1_S4366 2024-03-26 11:52:09.190452 \n", "41 2024-03-26 11:53:08.698945_B51_LBR1_S4366 2024-03-26 11:53:08.698945 \n", "42 2024-03-26 11:54:08.804854_B51_LBR1_S4366 2024-03-26 11:54:08.804854 \n", "43 2024-03-26 11:55:08.948127_B51_LBR1_S4366 2024-03-26 11:55:08.948127 \n", "44 2024-03-26 11:56:10.271055_B51_LBR1_S4366 2024-03-26 11:56:10.271055 \n", "45 2024-03-26 11:57:08.365116_B51_LBR1_S4366 2024-03-26 11:57:08.365116 \n", "46 2024-03-26 11:58:08.667291_B51_LBR1_S4366 2024-03-26 11:58:08.667291 \n", "47 2024-03-26 12:00:08.855249_B51_LBR1_S4366 2024-03-26 12:00:08.855249 \n", "48 2024-03-26 12:01:08.169960_B51_LBR1_S4366 2024-03-26 12:01:08.169960 \n", "49 2024-03-26 12:02:08.420660_B51_LBR1_S4366 2024-03-26 12:02:08.420660 \n", "\n", " reliable_arrival_date predict_arrival_date bloque_id \\\n", "0 2024-03-26 09:27:08.674862 2024-03-26 09:32:44.486404 1 \n", "1 2024-03-26 09:27:08.674862 2024-03-26 09:32:44.881899 1 \n", "2 2024-03-26 09:27:08.674862 2024-03-26 09:32:09.755131 1 \n", "3 2024-03-26 09:27:08.674862 2024-03-26 09:32:09.875354 1 \n", "4 2024-03-26 09:27:08.674862 2024-03-26 09:31:49.677695 1 \n", "5 2024-03-26 09:27:08.674862 2024-03-26 09:31:53.822076 1 \n", "6 2024-03-26 09:27:08.674862 2024-03-26 09:30:28.160954 1 \n", "7 2024-03-26 09:27:08.674862 2024-03-26 09:30:00.414499 1 \n", "8 2024-03-26 09:27:08.674862 2024-03-26 09:29:15.979541 1 \n", "9 2024-03-26 09:27:08.674862 2024-03-26 09:28:29.807332 1 \n", "10 2024-03-26 09:27:08.674862 2024-03-26 09:28:29.292909 1 \n", "11 2024-03-26 09:27:08.674862 2024-03-26 09:28:29.264869 1 \n", "12 2024-03-26 09:27:08.674862 2024-03-26 09:28:28.308902 1 \n", "13 2024-03-26 09:27:08.674862 2024-03-26 09:28:29.748819 1 \n", "14 2024-03-26 09:27:08.674862 2024-03-26 09:28:29.256559 1 \n", "15 2024-03-26 09:27:08.674862 2024-03-26 09:28:00.643877 1 \n", "16 2024-03-26 09:27:08.674862 2024-03-26 09:27:48.036527 1 \n", "17 2024-03-26 09:27:08.674862 2024-03-26 09:27:48.257810 1 \n", "18 2024-03-26 09:27:08.674862 2024-03-26 09:27:08.674862 1 \n", "19 2024-03-26 10:49:23.411681 2024-03-26 11:06:15.759858 2 \n", "20 2024-03-26 10:49:23.411681 2024-03-26 11:07:21.164543 2 \n", "21 2024-03-26 10:49:23.411681 2024-03-26 11:08:30.351999 2 \n", "22 2024-03-26 10:49:23.411681 2024-03-26 11:08:25.228017 2 \n", "23 2024-03-26 10:49:23.411681 2024-03-26 11:09:56.593895 2 \n", "24 2024-03-26 10:49:23.411681 2024-03-26 11:03:07.147077 2 \n", "25 2024-03-26 10:49:23.411681 2024-03-26 11:03:06.721532 2 \n", "26 2024-03-26 10:49:23.411681 2024-03-26 10:55:26.939174 2 \n", "27 2024-03-26 10:49:23.411681 2024-03-26 10:54:28.059568 2 \n", "28 2024-03-26 10:49:23.411681 2024-03-26 10:54:09.192176 2 \n", "29 2024-03-26 10:49:23.411681 2024-03-26 10:52:33.062428 2 \n", "30 2024-03-26 10:49:23.411681 2024-03-26 10:52:36.947750 2 \n", "31 2024-03-26 10:49:23.411681 2024-03-26 10:51:55.450481 2 \n", "32 2024-03-26 10:49:23.411681 2024-03-26 10:51:56.198504 2 \n", "33 2024-03-26 10:49:23.411681 2024-03-26 10:51:55.323245 2 \n", "34 2024-03-26 10:49:23.411681 2024-03-26 10:51:23.687412 2 \n", "35 2024-03-26 10:49:23.411681 2024-03-26 10:50:42.993700 2 \n", "36 2024-03-26 10:49:23.411681 2024-03-26 10:50:27.170464 2 \n", "37 2024-03-26 10:49:23.411681 2024-03-26 10:49:23.411681 2 \n", "38 2024-03-26 10:49:23.411681 2024-03-26 10:50:08.062525 2 \n", "39 2024-03-26 12:15:09.310898 2024-03-26 12:18:49.085827 3 \n", "40 2024-03-26 12:15:09.310898 2024-03-26 12:19:54.190452 3 \n", "41 2024-03-26 12:15:09.310898 2024-03-26 12:23:53.698945 3 \n", "42 2024-03-26 12:15:09.310898 2024-03-26 12:26:07.804854 3 \n", "43 2024-03-26 12:15:09.310898 2024-03-26 12:26:06.948127 3 \n", "44 2024-03-26 12:15:09.310898 2024-03-26 12:27:19.271055 3 \n", "45 2024-03-26 12:15:09.310898 2024-03-26 12:28:25.365116 3 \n", "46 2024-03-26 12:15:09.310898 2024-03-26 12:29:35.667291 3 \n", "47 2024-03-26 12:15:09.310898 2024-03-26 12:21:28.855249 3 \n", "48 2024-03-26 12:15:09.310898 2024-03-26 12:21:28.169960 3 \n", "49 2024-03-26 12:15:09.310898 2024-03-26 12:20:54.420660 3 \n", "\n", " estimateArrive \n", "0 1536 \n", "1 1476 \n", "2 1381 \n", "3 1321 \n", "4 1239 \n", "5 1185 \n", "6 1039 \n", "7 952 \n", "8 846 \n", "9 682 \n", "10 620 \n", "11 560 \n", "12 500 \n", "13 381 \n", "14 321 \n", "15 232 \n", "16 159 \n", "17 100 \n", "18 0 \n", "19 2289 \n", "20 2293 \n", "21 2302 \n", "22 2237 \n", "23 2268 \n", "24 1739 \n", "25 1680 \n", "26 1158 \n", "27 981 \n", "28 902 \n", "29 745 \n", "30 688 \n", "31 527 \n", "32 468 \n", "33 407 \n", "34 315 \n", "35 213 \n", "36 139 \n", "37 16 \n", "38 0 \n", "39 1660 \n", "40 1665 \n", "41 1845 \n", "42 1919 \n", "43 1858 \n", "44 1869 \n", "45 1877 \n", "46 1887 \n", "47 1280 \n", "48 1220 \n", "49 1126 " ] }, "execution_count": 27, "metadata": {}, "output_type": "execute_result" } ], "source": [ "final_sample_data.to_pandas().head(20)" ] }, { "cell_type": "code", "execution_count": 28, "metadata": {}, "outputs": [], "source": [ "def KPI_fun(date_true,date_pred):\n", " #date_true = datetime.strptime(date_true, '%Y-%m-%d %H:%M:%S.%f')\n", " #date_pred = datetime.strptime(date_pred, '%Y-%m-%d %H:%M:%S.%f')\n", " \n", " dif = max(date_true,date_pred) - min(date_true,date_pred)\n", " return dif.total_seconds()" ] }, { "cell_type": "code", "execution_count": 29, "metadata": {}, "outputs": [], "source": [ "KPI_data = final_sample_data.with_columns(pl.struct(pl.col('reliable_arrival_date'),pl.col('predict_arrival_date')).alias('struct').map_elements(lambda x: KPI_fun(x['reliable_arrival_date'], x['predict_arrival_date'])).alias('KPI_value'))" ] }, { "cell_type": "code", "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (35, 7)
PKdatetimereliable_arrival_datepredict_arrival_datebloque_idestimateArriveKPI_value
strdatetime[μs]datetime[μs]datetime[μs]i64i64f64
"2024-03-26 09:…2024-03-26 09:07:08.4864042024-03-26 09:27:08.6748622024-03-26 09:32:44.48640411536335.811542
"2024-03-26 09:…2024-03-26 09:08:08.8818992024-03-26 09:27:08.6748622024-03-26 09:32:44.88189911476336.207037
"2024-03-26 09:…2024-03-26 09:09:08.7551312024-03-26 09:27:08.6748622024-03-26 09:32:09.75513111381301.080269
"2024-03-26 09:…2024-03-26 09:10:08.8753542024-03-26 09:27:08.6748622024-03-26 09:32:09.87535411321301.200492
"2024-03-26 09:…2024-03-26 09:11:10.6776952024-03-26 09:27:08.6748622024-03-26 09:31:49.67769511239281.002833
"2024-03-26 10:…2024-03-26 10:41:08.9477502024-03-26 10:49:23.4116812024-03-26 10:52:36.9477502688193.536069
"2024-03-26 10:…2024-03-26 10:43:08.4504812024-03-26 10:49:23.4116812024-03-26 10:51:55.4504812527152.0388
"2024-03-26 10:…2024-03-26 10:44:08.1985042024-03-26 10:49:23.4116812024-03-26 10:51:56.1985042468152.786823
"2024-03-26 10:…2024-03-26 10:45:08.3232452024-03-26 10:49:23.4116812024-03-26 10:51:55.3232452407151.911564
"2024-03-26 10:…2024-03-26 10:46:08.6874122024-03-26 10:49:23.4116812024-03-26 10:51:23.6874122315120.275731
" ], "text/plain": [ "shape: (35, 7)\n", "┌──────────────┬──────────────┬──────────────┬──────────────┬───────────┬─────────────┬────────────┐\n", "│ PK ┆ datetime ┆ reliable_arr ┆ predict_arri ┆ bloque_id ┆ estimateArr ┆ KPI_value │\n", "│ --- ┆ --- ┆ ival_date ┆ val_date ┆ --- ┆ ive ┆ --- │\n", "│ str ┆ datetime[μs] ┆ --- ┆ --- ┆ i64 ┆ --- ┆ f64 │\n", "│ ┆ ┆ datetime[μs] ┆ datetime[μs] ┆ ┆ i64 ┆ │\n", "╞══════════════╪══════════════╪══════════════╪══════════════╪═══════════╪═════════════╪════════════╡\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 1 ┆ 1536 ┆ 335.811542 │\n", "│ 09:07:08.486 ┆ 09:07:08.486 ┆ 09:27:08.674 ┆ 09:32:44.486 ┆ ┆ ┆ │\n", "│ 404_B51_L… ┆ 404 ┆ 862 ┆ 404 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 1 ┆ 1476 ┆ 336.207037 │\n", "│ 09:08:08.881 ┆ 09:08:08.881 ┆ 09:27:08.674 ┆ 09:32:44.881 ┆ ┆ ┆ │\n", "│ 899_B51_L… ┆ 899 ┆ 862 ┆ 899 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 1 ┆ 1381 ┆ 301.080269 │\n", "│ 09:09:08.755 ┆ 09:09:08.755 ┆ 09:27:08.674 ┆ 09:32:09.755 ┆ ┆ ┆ │\n", "│ 131_B51_L… ┆ 131 ┆ 862 ┆ 131 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 1 ┆ 1321 ┆ 301.200492 │\n", "│ 09:10:08.875 ┆ 09:10:08.875 ┆ 09:27:08.674 ┆ 09:32:09.875 ┆ ┆ ┆ │\n", "│ 354_B51_L… ┆ 354 ┆ 862 ┆ 354 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 1 ┆ 1239 ┆ 281.002833 │\n", "│ 09:11:10.677 ┆ 09:11:10.677 ┆ 09:27:08.674 ┆ 09:31:49.677 ┆ ┆ ┆ │\n", "│ 695_B51_L… ┆ 695 ┆ 862 ┆ 695 ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2 ┆ 688 ┆ 193.536069 │\n", "│ 10:41:08.947 ┆ 10:41:08.947 ┆ 10:49:23.411 ┆ 10:52:36.947 ┆ ┆ ┆ │\n", "│ 750_B51_L… ┆ 750 ┆ 681 ┆ 750 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2 ┆ 527 ┆ 152.0388 │\n", "│ 10:43:08.450 ┆ 10:43:08.450 ┆ 10:49:23.411 ┆ 10:51:55.450 ┆ ┆ ┆ │\n", "│ 481_B51_L… ┆ 481 ┆ 681 ┆ 481 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2 ┆ 468 ┆ 152.786823 │\n", "│ 10:44:08.198 ┆ 10:44:08.198 ┆ 10:49:23.411 ┆ 10:51:56.198 ┆ ┆ ┆ │\n", "│ 504_B51_L… ┆ 504 ┆ 681 ┆ 504 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2 ┆ 407 ┆ 151.911564 │\n", "│ 10:45:08.323 ┆ 10:45:08.323 ┆ 10:49:23.411 ┆ 10:51:55.323 ┆ ┆ ┆ │\n", "│ 245_B51_L… ┆ 245 ┆ 681 ┆ 245 ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2 ┆ 315 ┆ 120.275731 │\n", "│ 10:46:08.687 ┆ 10:46:08.687 ┆ 10:49:23.411 ┆ 10:51:23.687 ┆ ┆ ┆ │\n", "│ 412_B51_L… ┆ 412 ┆ 681 ┆ 412 ┆ ┆ ┆ │\n", "└──────────────┴──────────────┴──────────────┴──────────────┴───────────┴─────────────┴────────────┘" ] }, "execution_count": 30, "metadata": {}, "output_type": "execute_result" } ], "source": [ "KPI_data.head(35)" ] }, { "cell_type": "code", "execution_count": 31, "metadata": {}, "outputs": [], "source": [ "a = KPI_data.to_pandas()[['estimateArrive','KPI_value','reliable_arrival_date','predict_arrival_date','datetime']]" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "a = KPI_data.to_pandas().iloc[0:19][['estimateArrive','KPI_value','reliable_arrival_date','predict_arrival_date','datetime']]" ] }, { "cell_type": "code", "execution_count": 34, "metadata": {}, "outputs": [], "source": [ "a['reliable_estimateArrive'] = a['reliable_arrival_date'] - a['datetime']\n", "a['reliable_estimateArrive'] = abs(pd.to_timedelta(a['reliable_estimateArrive']).dt.total_seconds())" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
estimateArriveKPI_valuereliable_arrival_datepredict_arrival_datedatetimereliable_estimateArrive
01536335.8115422024-03-26 09:27:08.6748622024-03-26 09:32:44.4864042024-03-26 09:07:08.4864041200.188458
11476336.2070372024-03-26 09:27:08.6748622024-03-26 09:32:44.8818992024-03-26 09:08:08.8818991139.792963
21381301.0802692024-03-26 09:27:08.6748622024-03-26 09:32:09.7551312024-03-26 09:09:08.7551311079.919731
31321301.2004922024-03-26 09:27:08.6748622024-03-26 09:32:09.8753542024-03-26 09:10:08.8753541019.799508
41239281.0028332024-03-26 09:27:08.6748622024-03-26 09:31:49.6776952024-03-26 09:11:10.677695957.997167
51185285.1472142024-03-26 09:27:08.6748622024-03-26 09:31:53.8220762024-03-26 09:12:08.822076899.852786
61039199.4860922024-03-26 09:27:08.6748622024-03-26 09:30:28.1609542024-03-26 09:13:09.160954839.513908
7952171.7396372024-03-26 09:27:08.6748622024-03-26 09:30:00.4144992024-03-26 09:14:08.414499780.260363
8846127.3046792024-03-26 09:27:08.6748622024-03-26 09:29:15.9795412024-03-26 09:15:09.979541718.695321
968281.1324702024-03-26 09:27:08.6748622024-03-26 09:28:29.8073322024-03-26 09:17:07.807332600.867530
1062080.6180472024-03-26 09:27:08.6748622024-03-26 09:28:29.2929092024-03-26 09:18:09.292909539.381953
1156080.5900072024-03-26 09:27:08.6748622024-03-26 09:28:29.2648692024-03-26 09:19:09.264869479.409993
1250079.6340402024-03-26 09:27:08.6748622024-03-26 09:28:28.3089022024-03-26 09:20:08.308902420.365960
1338181.0739572024-03-26 09:27:08.6748622024-03-26 09:28:29.7488192024-03-26 09:22:08.748819299.926043
1432180.5816972024-03-26 09:27:08.6748622024-03-26 09:28:29.2565592024-03-26 09:23:08.256559240.418303
1523251.9690152024-03-26 09:27:08.6748622024-03-26 09:28:00.6438772024-03-26 09:24:08.643877180.030985
1615939.3616652024-03-26 09:27:08.6748622024-03-26 09:27:48.0365272024-03-26 09:25:09.036527119.638335
1710039.5829482024-03-26 09:27:08.6748622024-03-26 09:27:48.2578102024-03-26 09:26:08.25781060.417052
1800.0000002024-03-26 09:27:08.6748622024-03-26 09:27:08.6748622024-03-26 09:27:08.6748620.000000
\n", "
" ], "text/plain": [ " estimateArrive KPI_value reliable_arrival_date \\\n", "0 1536 335.811542 2024-03-26 09:27:08.674862 \n", "1 1476 336.207037 2024-03-26 09:27:08.674862 \n", "2 1381 301.080269 2024-03-26 09:27:08.674862 \n", "3 1321 301.200492 2024-03-26 09:27:08.674862 \n", "4 1239 281.002833 2024-03-26 09:27:08.674862 \n", "5 1185 285.147214 2024-03-26 09:27:08.674862 \n", "6 1039 199.486092 2024-03-26 09:27:08.674862 \n", "7 952 171.739637 2024-03-26 09:27:08.674862 \n", "8 846 127.304679 2024-03-26 09:27:08.674862 \n", "9 682 81.132470 2024-03-26 09:27:08.674862 \n", "10 620 80.618047 2024-03-26 09:27:08.674862 \n", "11 560 80.590007 2024-03-26 09:27:08.674862 \n", "12 500 79.634040 2024-03-26 09:27:08.674862 \n", "13 381 81.073957 2024-03-26 09:27:08.674862 \n", "14 321 80.581697 2024-03-26 09:27:08.674862 \n", "15 232 51.969015 2024-03-26 09:27:08.674862 \n", "16 159 39.361665 2024-03-26 09:27:08.674862 \n", "17 100 39.582948 2024-03-26 09:27:08.674862 \n", "18 0 0.000000 2024-03-26 09:27:08.674862 \n", "\n", " predict_arrival_date datetime \\\n", "0 2024-03-26 09:32:44.486404 2024-03-26 09:07:08.486404 \n", "1 2024-03-26 09:32:44.881899 2024-03-26 09:08:08.881899 \n", "2 2024-03-26 09:32:09.755131 2024-03-26 09:09:08.755131 \n", "3 2024-03-26 09:32:09.875354 2024-03-26 09:10:08.875354 \n", "4 2024-03-26 09:31:49.677695 2024-03-26 09:11:10.677695 \n", "5 2024-03-26 09:31:53.822076 2024-03-26 09:12:08.822076 \n", "6 2024-03-26 09:30:28.160954 2024-03-26 09:13:09.160954 \n", "7 2024-03-26 09:30:00.414499 2024-03-26 09:14:08.414499 \n", "8 2024-03-26 09:29:15.979541 2024-03-26 09:15:09.979541 \n", "9 2024-03-26 09:28:29.807332 2024-03-26 09:17:07.807332 \n", "10 2024-03-26 09:28:29.292909 2024-03-26 09:18:09.292909 \n", "11 2024-03-26 09:28:29.264869 2024-03-26 09:19:09.264869 \n", "12 2024-03-26 09:28:28.308902 2024-03-26 09:20:08.308902 \n", "13 2024-03-26 09:28:29.748819 2024-03-26 09:22:08.748819 \n", "14 2024-03-26 09:28:29.256559 2024-03-26 09:23:08.256559 \n", "15 2024-03-26 09:28:00.643877 2024-03-26 09:24:08.643877 \n", "16 2024-03-26 09:27:48.036527 2024-03-26 09:25:09.036527 \n", "17 2024-03-26 09:27:48.257810 2024-03-26 09:26:08.257810 \n", "18 2024-03-26 09:27:08.674862 2024-03-26 09:27:08.674862 \n", "\n", " reliable_estimateArrive \n", "0 1200.188458 \n", "1 1139.792963 \n", "2 1079.919731 \n", "3 1019.799508 \n", "4 957.997167 \n", "5 899.852786 \n", "6 839.513908 \n", "7 780.260363 \n", "8 718.695321 \n", "9 600.867530 \n", "10 539.381953 \n", "11 479.409993 \n", "12 420.365960 \n", "13 299.926043 \n", "14 240.418303 \n", "15 180.030985 \n", "16 119.638335 \n", "17 60.417052 \n", "18 0.000000 " ] }, "execution_count": 35, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
estimateArriveKPI_valuereliable_arrival_datepredict_arrival_datedatetimereliable_estimateArriveMAPE
01536335.8115422024-03-26 09:27:08.6748622024-03-26 09:32:44.4864042024-03-26 09:07:08.4864041200.18845827.979901
11476336.2070372024-03-26 09:27:08.6748622024-03-26 09:32:44.8818992024-03-26 09:08:08.8818991139.79296329.497202
21381301.0802692024-03-26 09:27:08.6748622024-03-26 09:32:09.7551312024-03-26 09:09:08.7551311079.91973127.879875
31321301.2004922024-03-26 09:27:08.6748622024-03-26 09:32:09.8753542024-03-26 09:10:08.8753541019.79950829.535265
41239281.0028332024-03-26 09:27:08.6748622024-03-26 09:31:49.6776952024-03-26 09:11:10.677695957.99716729.332324
51185285.1472142024-03-26 09:27:08.6748622024-03-26 09:31:53.8220762024-03-26 09:12:08.822076899.85278631.688207
61039199.4860922024-03-26 09:27:08.6748622024-03-26 09:30:28.1609542024-03-26 09:13:09.160954839.51390823.762095
7952171.7396372024-03-26 09:27:08.6748622024-03-26 09:30:00.4144992024-03-26 09:14:08.414499780.26036322.010555
8846127.3046792024-03-26 09:27:08.6748622024-03-26 09:29:15.9795412024-03-26 09:15:09.979541718.69532117.713303
968281.1324702024-03-26 09:27:08.6748622024-03-26 09:28:29.8073322024-03-26 09:17:07.807332600.86753013.502555
1062080.6180472024-03-26 09:27:08.6748622024-03-26 09:28:29.2929092024-03-26 09:18:09.292909539.38195314.946375
1156080.5900072024-03-26 09:27:08.6748622024-03-26 09:28:29.2648692024-03-26 09:19:09.264869479.40999316.810248
1250079.6340402024-03-26 09:27:08.6748622024-03-26 09:28:28.3089022024-03-26 09:20:08.308902420.36596018.943979
1338181.0739572024-03-26 09:27:08.6748622024-03-26 09:28:29.7488192024-03-26 09:22:08.748819299.92604327.031316
1432180.5816972024-03-26 09:27:08.6748622024-03-26 09:28:29.2565592024-03-26 09:23:08.256559240.41830333.517289
1523251.9690152024-03-26 09:27:08.6748622024-03-26 09:28:00.6438772024-03-26 09:24:08.643877180.03098528.866706
1615939.3616652024-03-26 09:27:08.6748622024-03-26 09:27:48.0365272024-03-26 09:25:09.036527119.63833532.900546
1710039.5829482024-03-26 09:27:08.6748622024-03-26 09:27:48.2578102024-03-26 09:26:08.25781060.41705265.516186
1800.0000002024-03-26 09:27:08.6748622024-03-26 09:27:08.6748622024-03-26 09:27:08.6748620.000000NaN
\n", "
" ], "text/plain": [ " estimateArrive KPI_value reliable_arrival_date \\\n", "0 1536 335.811542 2024-03-26 09:27:08.674862 \n", "1 1476 336.207037 2024-03-26 09:27:08.674862 \n", "2 1381 301.080269 2024-03-26 09:27:08.674862 \n", "3 1321 301.200492 2024-03-26 09:27:08.674862 \n", "4 1239 281.002833 2024-03-26 09:27:08.674862 \n", "5 1185 285.147214 2024-03-26 09:27:08.674862 \n", "6 1039 199.486092 2024-03-26 09:27:08.674862 \n", "7 952 171.739637 2024-03-26 09:27:08.674862 \n", "8 846 127.304679 2024-03-26 09:27:08.674862 \n", "9 682 81.132470 2024-03-26 09:27:08.674862 \n", "10 620 80.618047 2024-03-26 09:27:08.674862 \n", "11 560 80.590007 2024-03-26 09:27:08.674862 \n", "12 500 79.634040 2024-03-26 09:27:08.674862 \n", "13 381 81.073957 2024-03-26 09:27:08.674862 \n", "14 321 80.581697 2024-03-26 09:27:08.674862 \n", "15 232 51.969015 2024-03-26 09:27:08.674862 \n", "16 159 39.361665 2024-03-26 09:27:08.674862 \n", "17 100 39.582948 2024-03-26 09:27:08.674862 \n", "18 0 0.000000 2024-03-26 09:27:08.674862 \n", "\n", " predict_arrival_date datetime \\\n", "0 2024-03-26 09:32:44.486404 2024-03-26 09:07:08.486404 \n", "1 2024-03-26 09:32:44.881899 2024-03-26 09:08:08.881899 \n", "2 2024-03-26 09:32:09.755131 2024-03-26 09:09:08.755131 \n", "3 2024-03-26 09:32:09.875354 2024-03-26 09:10:08.875354 \n", "4 2024-03-26 09:31:49.677695 2024-03-26 09:11:10.677695 \n", "5 2024-03-26 09:31:53.822076 2024-03-26 09:12:08.822076 \n", "6 2024-03-26 09:30:28.160954 2024-03-26 09:13:09.160954 \n", "7 2024-03-26 09:30:00.414499 2024-03-26 09:14:08.414499 \n", "8 2024-03-26 09:29:15.979541 2024-03-26 09:15:09.979541 \n", "9 2024-03-26 09:28:29.807332 2024-03-26 09:17:07.807332 \n", "10 2024-03-26 09:28:29.292909 2024-03-26 09:18:09.292909 \n", "11 2024-03-26 09:28:29.264869 2024-03-26 09:19:09.264869 \n", "12 2024-03-26 09:28:28.308902 2024-03-26 09:20:08.308902 \n", "13 2024-03-26 09:28:29.748819 2024-03-26 09:22:08.748819 \n", "14 2024-03-26 09:28:29.256559 2024-03-26 09:23:08.256559 \n", "15 2024-03-26 09:28:00.643877 2024-03-26 09:24:08.643877 \n", "16 2024-03-26 09:27:48.036527 2024-03-26 09:25:09.036527 \n", "17 2024-03-26 09:27:48.257810 2024-03-26 09:26:08.257810 \n", "18 2024-03-26 09:27:08.674862 2024-03-26 09:27:08.674862 \n", "\n", " reliable_estimateArrive MAPE \n", "0 1200.188458 27.979901 \n", "1 1139.792963 29.497202 \n", "2 1079.919731 27.879875 \n", "3 1019.799508 29.535265 \n", "4 957.997167 29.332324 \n", "5 899.852786 31.688207 \n", "6 839.513908 23.762095 \n", "7 780.260363 22.010555 \n", "8 718.695321 17.713303 \n", "9 600.867530 13.502555 \n", "10 539.381953 14.946375 \n", "11 479.409993 16.810248 \n", "12 420.365960 18.943979 \n", "13 299.926043 27.031316 \n", "14 240.418303 33.517289 \n", "15 180.030985 28.866706 \n", "16 119.638335 32.900546 \n", "17 60.417052 65.516186 \n", "18 0.000000 NaN " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a['MAPE'] = 100*abs((a['estimateArrive'] - a['reliable_estimateArrive'])/a['reliable_estimateArrive'])\n", "a" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "a['MAPE2'] = 100*abs((a['estimateArrive'] - a['reliable_estimateArrive'])/a[['reliable_estimateArrive','estimateArrive']].max(axis=1))" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
estimateArriveKPI_valuereliable_arrival_datepredict_arrival_datedatetimereliable_estimateArriveMAPEMAPE2
01536335.8115422024-03-26 09:27:08.6748622024-03-26 09:32:44.4864042024-03-26 09:07:08.4864041200.18845827.97990121.862731
11476336.2070372024-03-26 09:27:08.6748622024-03-26 09:32:44.8818992024-03-26 09:08:08.8818991139.79296329.49720222.778255
21381301.0802692024-03-26 09:27:08.6748622024-03-26 09:32:09.7551312024-03-26 09:09:08.7551311079.91973127.87987521.801613
31321301.2004922024-03-26 09:27:08.6748622024-03-26 09:32:09.8753542024-03-26 09:10:08.8753541019.79950829.53526522.800946
41239281.0028332024-03-26 09:27:08.6748622024-03-26 09:31:49.6776952024-03-26 09:11:10.677695957.99716729.33232422.679809
51185285.1472142024-03-26 09:27:08.6748622024-03-26 09:31:53.8220762024-03-26 09:12:08.822076899.85278631.68820724.063056
61039199.4860922024-03-26 09:27:08.6748622024-03-26 09:30:28.1609542024-03-26 09:13:09.160954839.51390823.76209519.199816
7952171.7396372024-03-26 09:27:08.6748622024-03-26 09:30:00.4144992024-03-26 09:14:08.414499780.26036322.01055518.039878
8846127.3046792024-03-26 09:27:08.6748622024-03-26 09:29:15.9795412024-03-26 09:15:09.979541718.69532117.71330315.047834
968281.1324702024-03-26 09:27:08.6748622024-03-26 09:28:29.8073322024-03-26 09:17:07.807332600.86753013.50255511.896257
1062080.6180472024-03-26 09:27:08.6748622024-03-26 09:28:29.2929092024-03-26 09:18:09.292909539.38195314.94637513.002911
1156080.5900072024-03-26 09:27:08.6748622024-03-26 09:28:29.2648692024-03-26 09:19:09.264869479.40999316.81024814.391073
1250079.6340402024-03-26 09:27:08.6748622024-03-26 09:28:28.3089022024-03-26 09:20:08.308902420.36596018.94397915.926808
1338181.0739572024-03-26 09:27:08.6748622024-03-26 09:28:29.7488192024-03-26 09:22:08.748819299.92604327.03131621.279254
1432180.5816972024-03-26 09:27:08.6748622024-03-26 09:28:29.2565592024-03-26 09:23:08.256559240.41830333.51728925.103332
1523251.9690152024-03-26 09:27:08.6748622024-03-26 09:28:00.6438772024-03-26 09:24:08.643877180.03098528.86670622.400438
1615939.3616652024-03-26 09:27:08.6748622024-03-26 09:27:48.0365272024-03-26 09:25:09.036527119.63833532.90054624.755764
1710039.5829482024-03-26 09:27:08.6748622024-03-26 09:27:48.2578102024-03-26 09:26:08.25781060.41705265.51618639.582948
1800.0000002024-03-26 09:27:08.6748622024-03-26 09:27:08.6748622024-03-26 09:27:08.6748620.000000NaNNaN
\n", "
" ], "text/plain": [ " estimateArrive KPI_value reliable_arrival_date \\\n", "0 1536 335.811542 2024-03-26 09:27:08.674862 \n", "1 1476 336.207037 2024-03-26 09:27:08.674862 \n", "2 1381 301.080269 2024-03-26 09:27:08.674862 \n", "3 1321 301.200492 2024-03-26 09:27:08.674862 \n", "4 1239 281.002833 2024-03-26 09:27:08.674862 \n", "5 1185 285.147214 2024-03-26 09:27:08.674862 \n", "6 1039 199.486092 2024-03-26 09:27:08.674862 \n", "7 952 171.739637 2024-03-26 09:27:08.674862 \n", "8 846 127.304679 2024-03-26 09:27:08.674862 \n", "9 682 81.132470 2024-03-26 09:27:08.674862 \n", "10 620 80.618047 2024-03-26 09:27:08.674862 \n", "11 560 80.590007 2024-03-26 09:27:08.674862 \n", "12 500 79.634040 2024-03-26 09:27:08.674862 \n", "13 381 81.073957 2024-03-26 09:27:08.674862 \n", "14 321 80.581697 2024-03-26 09:27:08.674862 \n", "15 232 51.969015 2024-03-26 09:27:08.674862 \n", "16 159 39.361665 2024-03-26 09:27:08.674862 \n", "17 100 39.582948 2024-03-26 09:27:08.674862 \n", "18 0 0.000000 2024-03-26 09:27:08.674862 \n", "\n", " predict_arrival_date datetime \\\n", "0 2024-03-26 09:32:44.486404 2024-03-26 09:07:08.486404 \n", "1 2024-03-26 09:32:44.881899 2024-03-26 09:08:08.881899 \n", "2 2024-03-26 09:32:09.755131 2024-03-26 09:09:08.755131 \n", "3 2024-03-26 09:32:09.875354 2024-03-26 09:10:08.875354 \n", "4 2024-03-26 09:31:49.677695 2024-03-26 09:11:10.677695 \n", "5 2024-03-26 09:31:53.822076 2024-03-26 09:12:08.822076 \n", "6 2024-03-26 09:30:28.160954 2024-03-26 09:13:09.160954 \n", "7 2024-03-26 09:30:00.414499 2024-03-26 09:14:08.414499 \n", "8 2024-03-26 09:29:15.979541 2024-03-26 09:15:09.979541 \n", "9 2024-03-26 09:28:29.807332 2024-03-26 09:17:07.807332 \n", "10 2024-03-26 09:28:29.292909 2024-03-26 09:18:09.292909 \n", "11 2024-03-26 09:28:29.264869 2024-03-26 09:19:09.264869 \n", "12 2024-03-26 09:28:28.308902 2024-03-26 09:20:08.308902 \n", "13 2024-03-26 09:28:29.748819 2024-03-26 09:22:08.748819 \n", "14 2024-03-26 09:28:29.256559 2024-03-26 09:23:08.256559 \n", "15 2024-03-26 09:28:00.643877 2024-03-26 09:24:08.643877 \n", "16 2024-03-26 09:27:48.036527 2024-03-26 09:25:09.036527 \n", "17 2024-03-26 09:27:48.257810 2024-03-26 09:26:08.257810 \n", "18 2024-03-26 09:27:08.674862 2024-03-26 09:27:08.674862 \n", "\n", " reliable_estimateArrive MAPE MAPE2 \n", "0 1200.188458 27.979901 21.862731 \n", "1 1139.792963 29.497202 22.778255 \n", "2 1079.919731 27.879875 21.801613 \n", "3 1019.799508 29.535265 22.800946 \n", "4 957.997167 29.332324 22.679809 \n", "5 899.852786 31.688207 24.063056 \n", "6 839.513908 23.762095 19.199816 \n", "7 780.260363 22.010555 18.039878 \n", "8 718.695321 17.713303 15.047834 \n", "9 600.867530 13.502555 11.896257 \n", "10 539.381953 14.946375 13.002911 \n", "11 479.409993 16.810248 14.391073 \n", "12 420.365960 18.943979 15.926808 \n", "13 299.926043 27.031316 21.279254 \n", "14 240.418303 33.517289 25.103332 \n", "15 180.030985 28.866706 22.400438 \n", "16 119.638335 32.900546 24.755764 \n", "17 60.417052 65.516186 39.582948 \n", "18 0.000000 NaN NaN " ] }, "execution_count": 38, "metadata": {}, "output_type": "execute_result" } ], "source": [ "a.head(50)" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [], "source": [ "resultados_MAE = a.groupby('estimateArrive')['KPI_value'].mean()" ] }, { "cell_type": "code", "execution_count": 40, "metadata": {}, "outputs": [], "source": [ "resultados_MAPE = a.groupby('estimateArrive')['MAPE2'].mean()" ] }, { "cell_type": "code", "execution_count": 41, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: nan,\n", " 100: 39.582948,\n", " 159: 24.755764150943396,\n", " 232: 22.400437500000006,\n", " 321: 25.103332398753892,\n", " 381: 21.27925380577428,\n", " 500: 15.926808000000007,\n", " 560: 14.391072678571431,\n", " 620: 13.00291080645162,\n", " 682: 11.89625659824047,\n", " 846: 15.047834397163115,\n", " 952: 18.039877836134455,\n", " 1039: 19.199816361886427,\n", " 1185: 24.06305603375527,\n", " 1239: 22.679808958837775,\n", " 1321: 22.800945647236944,\n", " 1381: 21.80161252715424,\n", " 1476: 22.778254539295386,\n", " 1536: 21.862730598958326}" ] }, "execution_count": 41, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_dict_MAPE = resultados_MAPE.to_dict()\n", "new_dict_MAPE" ] }, { "cell_type": "code", "execution_count": 42, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{0: 0.0,\n", " 100: 39.582948,\n", " 159: 39.361665,\n", " 232: 51.969015,\n", " 321: 80.581697,\n", " 381: 81.073957,\n", " 500: 79.63404,\n", " 560: 80.590007,\n", " 620: 80.618047,\n", " 682: 81.13247,\n", " 846: 127.304679,\n", " 952: 171.739637,\n", " 1039: 199.486092,\n", " 1185: 285.147214,\n", " 1239: 281.002833,\n", " 1321: 301.200492,\n", " 1381: 301.080269,\n", " 1476: 336.207037,\n", " 1536: 335.811542}" ] }, "execution_count": 42, "metadata": {}, "output_type": "execute_result" } ], "source": [ "new_dict_MAE = resultados_MAE.to_dict()\n", "new_dict_MAE" ] }, { "cell_type": "code", "execution_count": 43, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0.0, 0.0),\n", " (1.6666666666666667, 0.6597158000000001),\n", " (2.65, 0.65602775),\n", " (3.8666666666666667, 0.86615025),\n", " (5.35, 1.3430282833333334),\n", " (6.35, 1.3512326166666666),\n", " (8.333333333333334, 1.327234),\n", " (9.333333333333334, 1.3431667833333334),\n", " (10.333333333333334, 1.3436341166666668),\n", " (11.366666666666667, 1.3522078333333334),\n", " (14.1, 2.1217446499999997),\n", " (15.866666666666667, 2.8623272833333333),\n", " (17.316666666666666, 3.3247682000000003),\n", " (19.75, 4.752453566666667),\n", " (20.65, 4.68338055),\n", " (22.016666666666666, 5.0200082),\n", " (23.016666666666666, 5.018004483333333),\n", " (24.6, 5.603450616666667),\n", " (25.6, 5.596859033333333)]" ] }, "execution_count": 43, "metadata": {}, "output_type": "execute_result" } ], "source": [ "MAE = []\n", "for key in new_dict_MAE.keys():\n", " MAE.append((key/60,new_dict_MAE[key]/60))\n", "\n", "MAE" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "[(0.0, nan),\n", " (1.6666666666666667, 39.582948),\n", " (2.65, 24.755764150943396),\n", " (3.8666666666666667, 22.400437500000006),\n", " (5.35, 25.103332398753892),\n", " (6.35, 21.27925380577428),\n", " (8.333333333333334, 15.926808000000007),\n", " (9.333333333333334, 14.391072678571431),\n", " (10.333333333333334, 13.00291080645162),\n", " (11.366666666666667, 11.89625659824047),\n", " (14.1, 15.047834397163115),\n", " (15.866666666666667, 18.039877836134455),\n", " (17.316666666666666, 19.199816361886427),\n", " (19.75, 24.06305603375527),\n", " (20.65, 22.679808958837775),\n", " (22.016666666666666, 22.800945647236944),\n", " (23.016666666666666, 21.80161252715424),\n", " (24.6, 22.778254539295386),\n", " (25.6, 21.862730598958326)]" ] }, "execution_count": 44, "metadata": {}, "output_type": "execute_result" } ], "source": [ "MAPE = []\n", "for key in new_dict_MAPE.keys():\n", " MAPE.append((key/60,new_dict_MAPE[key]))\n", "\n", "MAPE" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "fig, ax1 = plt.subplots()\n", "\n", "# Dibujar el primer gráfico con eje y a la izquierda\n", "ax1.plot([data[0] for data in MAE], [data[1] for data in MAE])\n", "ax1.set_xlabel('Remaining time (minutes)')\n", "ax1.set_ylabel('MAE (minutes)')\n", "\n", "ax2 = ax1.twinx()\n", "ax2.plot([data[0] for data in MAPE], [data[1] for data in MAPE], 'orange')\n", "ax2.set_ylabel('MAPE (%)')\n", "\n", "plt.title('CRTM API estimation error')\n", "plt.gca().invert_xaxis()\n" ] }, { "cell_type": "code", "execution_count": 48, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot([data[0] for data in MAPE], [data[1] for data in MAPE])\n", "plt.ylim(0,40)\n", "plt.xlim(-1,27)\n", "plt.gca().invert_xaxis()\n", "plt.grid(True, linestyle='--', linewidth=0.5)\n", "plt.title('CRTM API estimation error')\n", "plt.xlabel('Remaining time (minutes)')\n", "plt.ylabel('MAPE (%)')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 162, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plt.plot([data[0] for data in MAE], [data[1] for data in MAE])\n", "plt.gca().invert_xaxis()\n", "plt.grid(True, linestyle='--', linewidth=0.5)\n", "plt.title('CRTM API estimation error')\n", "plt.xlabel('Remaining time (minutes)')\n", "plt.ylabel('MAE (minutes)')\n", "plt.show()" ] }, { "cell_type": "code", "execution_count": 44, "metadata": {}, "outputs": [], "source": [ "dict = KPI_data.sort('estimateArrive').select('estimateArrive','KPI_value').to_dict()" ] }, { "cell_type": "code", "execution_count": 45, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "plot_KPI(dict)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Para un día generalizando lo anterior" ] }, { "cell_type": "code", "execution_count": 239, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (1_136_086, 14)
PKdatedatetimebuslinestoppositionBusLonpositionBusLatDistanceBusdestinationMinimunFrequencyisHeaddayTypeestimateArrive
strdatedatetime[μs]strstri64f64f64i64stri64u8stri64
"2024-03-26 19:…2024-03-262024-03-26 19:04:07.830005"2592""178"1487-3.69715440.4875282377"MONTECARMELO"60"LA"729
"2024-03-26 11:…2024-03-262024-03-26 11:13:06.952057"8828""9"218-3.64936140.4723211724"SOL SEVILLA"null0"LA"496
"2024-03-26 13:…2024-03-262024-03-26 13:39:08.711052"2515""175"3769-3.69638440.4870486472"LAS TABLAS NOR…90"LA"918
"2024-03-26 13:…2024-03-262024-03-26 13:41:09.490335"8831""171"5911-3.63198240.4689284366"VALDEBEBAS"null0"LA"1159
"2024-03-26 16:…2024-03-262024-03-26 16:30:07.126711"8857""176"3261-3.68764740.4702783590"LAS TABLAS SUR…60"LA"375
"2024-03-26 19:…2024-03-262024-03-26 19:44:09.474926"4737""49"5329-3.70489340.47915281"PITIS"40"LA"137
"2024-03-26 14:…2024-03-262024-03-26 14:26:08.037914"2078""174"3256-3.68516640.4668314128"VALDEBEBAS"70"LA"922
"2024-03-26 09:…2024-03-262024-03-26 09:05:08.090547"8856""176"2653-3.68878940.4674261094"LAS TABLAS SUR…60"LA"553
"2024-03-26 13:…2024-03-262024-03-26 13:47:09.444763"53""BR1"5397-3.69422340.4878138534"VALDEBEBAS"null0"LA"1073
"2024-03-26 08:…2024-03-262024-03-26 08:16:07.762153"5563""67"1604-3.68662440.4765243230"BARRIO PEÑAGRA…90"LA"1322
" ], "text/plain": [ "shape: (1_136_086, 14)\n", "┌─────────────┬────────────┬─────────────┬──────┬───┬─────────────┬────────┬─────────┬─────────────┐\n", "│ PK ┆ date ┆ datetime ┆ bus ┆ … ┆ MinimunFreq ┆ isHead ┆ dayType ┆ estimateArr │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ ┆ uency ┆ --- ┆ --- ┆ ive │\n", "│ str ┆ date ┆ datetime[μs ┆ str ┆ ┆ --- ┆ u8 ┆ str ┆ --- │\n", "│ ┆ ┆ ] ┆ ┆ ┆ i64 ┆ ┆ ┆ i64 │\n", "╞═════════════╪════════════╪═════════════╪══════╪═══╪═════════════╪════════╪═════════╪═════════════╡\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2592 ┆ … ┆ 6 ┆ 0 ┆ LA ┆ 729 │\n", "│ 19:04:07.83 ┆ ┆ 19:04:07.83 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 0005_B2592… ┆ ┆ 0005 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 8828 ┆ … ┆ null ┆ 0 ┆ LA ┆ 496 │\n", "│ 11:13:06.95 ┆ ┆ 11:13:06.95 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2057_B8828… ┆ ┆ 2057 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2515 ┆ … ┆ 9 ┆ 0 ┆ LA ┆ 918 │\n", "│ 13:39:08.71 ┆ ┆ 13:39:08.71 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 1052_B2515… ┆ ┆ 1052 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 8831 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1159 │\n", "│ 13:41:09.49 ┆ ┆ 13:41:09.49 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 0335_B8831… ┆ ┆ 0335 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 8857 ┆ … ┆ 6 ┆ 0 ┆ LA ┆ 375 │\n", "│ 16:30:07.12 ┆ ┆ 16:30:07.12 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 6711_B8857… ┆ ┆ 6711 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 4737 ┆ … ┆ 4 ┆ 0 ┆ LA ┆ 137 │\n", "│ 19:44:09.47 ┆ ┆ 19:44:09.47 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 4926_B4737… ┆ ┆ 4926 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 2078 ┆ … ┆ 7 ┆ 0 ┆ LA ┆ 922 │\n", "│ 14:26:08.03 ┆ ┆ 14:26:08.03 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 7914_B2078… ┆ ┆ 7914 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 8856 ┆ … ┆ 6 ┆ 0 ┆ LA ┆ 553 │\n", "│ 09:05:08.09 ┆ ┆ 09:05:08.09 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 0547_B8856… ┆ ┆ 0547 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 53 ┆ … ┆ null ┆ 0 ┆ LA ┆ 1073 │\n", "│ 13:47:09.44 ┆ ┆ 13:47:09.44 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 4763_B53_L… ┆ ┆ 4763 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2024-03-26 ┆ 2024-03-26 ┆ 2024-03-26 ┆ 5563 ┆ … ┆ 9 ┆ 0 ┆ LA ┆ 1322 │\n", "│ 08:16:07.76 ┆ ┆ 08:16:07.76 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "│ 2153_B5563… ┆ ┆ 2153 ┆ ┆ ┆ ┆ ┆ ┆ │\n", "└─────────────┴────────────┴─────────────┴──────┴───┴─────────────┴────────┴─────────┴─────────────┘" ] }, "execution_count": 239, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data" ] }, { "cell_type": "code", "execution_count": 230, "metadata": {}, "outputs": [], "source": [ "sample_data_02 = sample_data.group_by(pl.col('bus'),pl.col('line'),pl.col('stop'),pl.col('destination')).count()" ] }, { "cell_type": "code", "execution_count": 231, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "shape: (7_913, 5)
buslinestopdestinationcount
strstri64stru32
"5726""147"29"BARRIO DEL PIL…114
"8835""171"5892"VALDEBEBAS"29
"2550""150"60"VIRGEN CORTIJO…123
"2298""29"214"MANOTERAS"171
"4796""45"66"REINA VICTORIA…207
"577""70"5603"ALSACIA"119
"4794""45"60"REINA VICTORIA…65
"4836""132"1364"HOSPITAL LA PA…108
"539""70"212"ALSACIA"185
"54""BR1"5917"VALDEBEBAS"29
" ], "text/plain": [ "shape: (7_913, 5)\n", "┌──────┬──────┬──────┬──────────────────┬───────┐\n", "│ bus ┆ line ┆ stop ┆ destination ┆ count │\n", "│ --- ┆ --- ┆ --- ┆ --- ┆ --- │\n", "│ str ┆ str ┆ i64 ┆ str ┆ u32 │\n", "╞══════╪══════╪══════╪══════════════════╪═══════╡\n", "│ 5726 ┆ 147 ┆ 29 ┆ BARRIO DEL PILAR ┆ 114 │\n", "│ 8835 ┆ 171 ┆ 5892 ┆ VALDEBEBAS ┆ 29 │\n", "│ 2550 ┆ 150 ┆ 60 ┆ VIRGEN CORTIJO ┆ 123 │\n", "│ 2298 ┆ 29 ┆ 214 ┆ MANOTERAS ┆ 171 │\n", "│ 4796 ┆ 45 ┆ 66 ┆ REINA VICTORIA ┆ 207 │\n", "│ … ┆ … ┆ … ┆ … ┆ … │\n", "│ 577 ┆ 70 ┆ 5603 ┆ ALSACIA ┆ 119 │\n", "│ 4794 ┆ 45 ┆ 60 ┆ REINA VICTORIA ┆ 65 │\n", "│ 4836 ┆ 132 ┆ 1364 ┆ HOSPITAL LA PAZ ┆ 108 │\n", "│ 539 ┆ 70 ┆ 212 ┆ ALSACIA ┆ 185 │\n", "│ 54 ┆ BR1 ┆ 5917 ┆ VALDEBEBAS ┆ 29 │\n", "└──────┴──────┴──────┴──────────────────┴───────┘" ] }, "execution_count": 231, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data_02" ] }, { "cell_type": "code", "execution_count": 232, "metadata": {}, "outputs": [], "source": [ "sample_data_pd = sample_data_02.to_pandas()" ] }, { "cell_type": "code", "execution_count": 233, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
buslinestopdestinationcount
0572614729BARRIO DEL PILAR114
188351715892VALDEBEBAS29
2255015060VIRGEN CORTIJO123
3229829214MANOTERAS171
447964566REINA VICTORIA207
..................
7908577705603ALSACIA119
790947944560REINA VICTORIA65
791048361321364HOSPITAL LA PAZ108
791153970212ALSACIA185
791254BR15917VALDEBEBAS29
\n", "

7913 rows × 5 columns

\n", "
" ], "text/plain": [ " bus line stop destination count\n", "0 5726 147 29 BARRIO DEL PILAR 114\n", "1 8835 171 5892 VALDEBEBAS 29\n", "2 2550 150 60 VIRGEN CORTIJO 123\n", "3 2298 29 214 MANOTERAS 171\n", "4 4796 45 66 REINA VICTORIA 207\n", "... ... ... ... ... ...\n", "7908 577 70 5603 ALSACIA 119\n", "7909 4794 45 60 REINA VICTORIA 65\n", "7910 4836 132 1364 HOSPITAL LA PAZ 108\n", "7911 539 70 212 ALSACIA 185\n", "7912 54 BR1 5917 VALDEBEBAS 29\n", "\n", "[7913 rows x 5 columns]" ] }, "execution_count": 233, "metadata": {}, "output_type": "execute_result" } ], "source": [ "sample_data_pd" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for index,row in sample_data_pd.iterrows():\n", " print(type(str(row[1])))\n", " dataset_aux = sample_data.filter(pl.col('bus')==str(row[0]), pl.col('line')==str(row[1]),pl.col('stop')==int(row[2]), pl.col('destination')==str(row[3]))\n", " print(dataset_aux.head())\n" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# FUNCION \n", "Dado un bus, linea, parada y destino fijos, obtenemos el dataset con la PK, HORA PREDICHA, HORA EXACTA " ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "ROOT_PATH = os.path.dirname(os.path.dirname(os.path.dirname(os.path.abspath(os.getcwd()))))\n", "DATA_PATH = os.path.join(ROOT_PATH, \"data\", \"processed\")\n", "EMT_DATA_PATH = os.path.join(DATA_PATH, \"emt\")" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "data = pl.scan_csv(os.path.join(EMT_DATA_PATH, \"2024\", \"03\", f\"emt_202403.csv\"))\n", "list_day = ['02','03','04','05','06','07','08','11','12','13','14','15','16','17','18','19','20','21','22','23','24','25','26','27','28','29','30','31']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "random.seed(1234) \n", "day = random.randint(2, 31)\n", "\n", "print(day)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sample_data = pl.scan_csv(os.path.join(EMT_DATA_PATH, \"2024\", \"03\",str(day), f\"emt_202403{str(day)}.csv\"))" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def create_final_dataset(sample_data):\n", " sample_data = sample_data.with_columns((pl.col('datetime').cast(pl.String)+\"_B\"+pl.col('bus').cast(pl.String)+\"_L\"+ pl.col('line').cast(pl.String)+\"_S\"+pl.col('stop').cast(pl.String)).alias('PK'))\n", " \n", " # ETA <2400\n", " sample_data = sample_data.filter(pl.col('estimateArrive')<888888)\n", " sample_data = sample_data.group_by('PK').min()\n", " \n", " sample_data = sample_data.with_columns(pl.col(\"date\").cast(pl.Date),pl.col('line').cast(pl.String),pl.col('isHead').cast(pl.UInt8))\n", " \n", " sample_data = sample_data.with_columns(pl.col('datetime').map_elements(lambda x: datetime.strptime(x, \"%Y-%m-%d %H:%M:%S.%f\")))\n", " \n", " # Rellenamos valores nulos de dayType\n", " sample_data = sample_data.with_columns(pl.when(pl.col('dayType').is_null()).then(pl.col('date').apply(get_type_day)).otherwise(pl.col('dayType')).alias('dayType'))\n", " \n", " # Eliminamos variables\n", " sample_data = sample_data.drop('positionTypeBus','deviation','MaximumFrequency','StartTime','StopTime','strike')\n", " \n", " return sample_data.collect()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "def get_type_day(date):\n", " \n", " day = date.strftime(\"%A\")\n", " \n", " if day in ['Monday','Tuesday','Wednesday','Thursday','Friday']:\n", " \n", " type = 'LA'\n", " elif day == 'Saturday':\n", " type = 'SA'\n", " else:\n", " type = 'FE'\n", " \n", " return type" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "sample_data = create_final_dataset(sample_data)" ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "def calculate_predict_arrival_date(date_datetime,second):\n", " new_date_datetime = date_datetime + timedelta(seconds=second)\n", " \n", " return new_date_datetime" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "def create_auxiliar_dataset(filter_data):\n", " # Creamos la hora predicha\n", " filter_data = filter_data.with_columns(pl.struct(datetime = pl.col('datetime'), estimateArrive = pl.col('estimateArrive').alias('struct')).map_elements(lambda x: calculate_predict_arrival_date(x['datetime'], x['estimateArrive'])).alias('predict_arrival_date'))\n", " \n", " # Creamos los bloques\n", " sample_data_pd = filter_data.to_pandas()\n", " sample_data_pd['bloque_id'] = None\n", "\n", " bloque_actual = 1\n", " for i in range(0, len(sample_data_pd)-1):\n", " if (sample_data_pd['datetime'][i + 1] - sample_data_pd['datetime'][i]) > timedelta(minutes=5) and (sample_data_pd['estimateArrive'][i] < sample_data_pd['estimateArrive'][i + 1]):\n", " sample_data_pd['bloque_id'][i] = bloque_actual\n", " bloque_actual += 1\n", " \n", " else: \n", " sample_data_pd['bloque_id'][i] = bloque_actual\n", " \n", " sample_data_pd.at[sample_data_pd.index[-1], 'bloque_id'] = bloque_actual\n", " \n", " # Cremos el dataset exacto\n", " sample_data_pl = pl.from_pandas(sample_data_pd)\n", " small_sample_data = sample_data_pl.filter(pl.col('estimateArrive')<=60).group_by(pl.col('bus'),pl.col('line'),pl.col('stop'),pl.col('destination'),pl.col('date'),pl.col('bloque_id')).min().with_columns(pl.col('predict_arrival_date').alias('reliable_arrival_date')) \n", " \n", " final_sample_data = sample_data_pl.join(small_sample_data,on=pl.col('bloque_id'),how = 'left') \n", " final_sample_data = final_sample_data.select(pl.col('PK'),pl.col('reliable_arrival_date'),pl.col('predict_arrival_date'),pl.col('bloque_id'),pl.col('estimateArrive'))\n", " \n", " return final_sample_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "venv", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.12" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }