{ "cells": [ { "cell_type": "code", "execution_count": 19, "id": "e0f70a01", "metadata": { "ExecuteTime": { "end_time": "2024-06-25T13:10:13.229189Z", "start_time": "2024-06-25T13:10:13.223736Z" } }, "outputs": [], "source": [ "import vortexasdk as v\n", "import pandas as pd \n", "import plotly.express as px \n", "from datetime import datetime\n", "import dateutil.relativedelta\n", "import os\n", "import requests\n", "import json" ] }, { "cell_type": "code", "execution_count": 20, "id": "bc09cbc6", "metadata": { "ExecuteTime": { "end_time": "2024-06-25T13:10:13.407312Z", "start_time": "2024-06-25T13:10:13.403186Z" } }, "outputs": [], "source": [ "# search for geography ids (remove hashtags to search)\n", "\n", "# full_length_df = v.Geographies().search(term=[\"Northeast As\"]).to_df()\n", "# print(full_length_df.to_string(index=False))\n", "\n", "meg='0427e0f9d52b38c1a98b68c59b8fd80cb1c508e44882e96611e48ef5b140d927'\n", "europe='f39d455f5d38907394d6da3a91da4e391f9a34bd6a17e826d6042761067e88f4'\n", "russia='b996521be9c996db3560ca234a56286ac38f798d34be229437555fab4f12a6a5'\n", "brazil='6ac49669e238276d51719480fddfee2e18cf265f43678ed995071d363ee943f5'\n", "gom='37c8c4eeb730d1cd41f90ca6bf95c923222b0734b1b0336a475acce821f87ebd'\n", "nea='5600764fc92f8cce630f3994e7af4d900945d54b29f6e4a07fde0c5e17f7d738'\n", "\n", "# search for product ids (remove hashtags to search)\n", "\n", "# product_search = v.Products().search(term=['diesel']).to_df()\n", "# print (product_search.to_string(index=False))\n", "\n", "crude='54af755a090118dcf9b0724c9a4e9f14745c26165385ffa7f1445bc768f06f11'\n", "cpp='b68cbb746f8b9098c50e2ba36bcad83001a53bd362e9031fb49085d02c36659c'\n", "lpg='364ccbb996c944055b479810a8e74863267885dc1b01407cb0f00ab26dafe1e1'\n", "diesel_go='deda35eb9ca56b54e74f0ff370423f9a8c61cf6a3796fcb18eaeeb32a8c290bb'" ] }, { "cell_type": "code", "execution_count": 21, "id": "0243e196", "metadata": { "ExecuteTime": { "end_time": "2024-06-25T13:10:13.644300Z", "start_time": "2024-06-25T13:10:13.605357Z" } }, "outputs": [], "source": [ "# Helper function for pulling vessel position history\n", "api_key = os.environ['VORTEXA_API_KEY']\n", "\n", "def extract_vessel_positions(list_of_vessels, start_timestamp, end_timestamp, interval):\n", " \n", " URL1 = \"https://api.vortexa.com/v6/signals/vessel-positions\"\n", " \n", " URL2 = \"https://api.vortexa.com/v6/signals/vessel-draughts\"\n", "\n", " URL3 = \"https://api.vortexa.com/v6/signals/vessel-declared-destination\"\n", " \n", " start_timestamp_iso = start_timestamp.isoformat()\n", " end_timestamp_iso = end_timestamp.isoformat()\n", " \n", " ## Make API call\n", " PARAMS = {\n", " 'apikey':api_key,\n", " 'time_min':start_timestamp_iso,\n", " 'time_max': end_timestamp_iso,\n", " 'interval': interval,\n", " 'vessel_id': list_of_vessels,\n", " }\n", " vessel_positions = requests.get(url = URL1, params = PARAMS)\n", " vessel_draughts = requests.get(url = URL2, params = PARAMS)\n", " vessel_declared_destinations = requests.get(url = URL3, params = PARAMS)\n", "\n", " \n", " data1 = vessel_positions.json()\n", " data2 = vessel_draughts.json()\n", " data3 = vessel_declared_destinations.json()\n", "\n", " \n", " return data1, data2, data3\n", "\n", "# Function to group vessels into a specified group size and query each group\n", "def vessel_position_history(group, vessel_class, list_of_vessels, start_timestamp, end_timestamp, interval):\n", " \n", " if vessel_class!=None:\n", " # Obtain vessel information and list of IDs for specified classes\n", " vessels_df=v.Vessels().search(vessel_classes=vessel_class).to_df()\n", " vessel_list=list(vessels_df['id'].unique())\n", " \n", " else:\n", " vessel_list=list_of_vessels\n", " \n", " # Initialise data frame to store data\n", " store=pd.DataFrame()\n", " \n", " # loop through each group of x vessels\n", " for x in range(0, len(vessel_list), group):\n", " \n", " temp_vessels=vessel_list[x:x+group]\n", "\n", " # query signals data for current group of vessels\n", " data = extract_vessel_positions(list_of_vessels=temp_vessels, \n", " start_timestamp=start_timestamp, \n", " end_timestamp=end_timestamp, \n", " interval=interval)\n", " \n", " one=pd.DataFrame(data[0]['data'])\n", " two=pd.DataFrame(data[1]['data'])\n", " three=pd.DataFrame(data[2]['data'])\n", " \n", " one['timestamp']=pd.to_datetime(one['timestamp'])\n", " two['start_timestamp']=pd.to_datetime(two['start_timestamp'])\n", " three['timestamp']=pd.to_datetime(three['timestamp'])\n", "\n", " one['date'] = one['timestamp'].dt.date\n", " two['start_date'] = two['start_timestamp'].dt.date\n", " three['date']=three['timestamp'].dt.date\n", " \n", " three = three.drop_duplicates(subset=['vessel_id', 'date'])\n", "\n", " four=pd.merge(one, two, left_on=['vessel_id', 'date'], right_on=['vessel_id', 'start_date'], how='left')\n", "\n", " five=pd.merge(four, three, on=['vessel_id', 'date'], how='left')\n", "\n", " six=five.fillna('')\n", "\n", " seven=six[['lat_x', 'lon_x', 'vessel_id', 'timestamp_x', \n", " 'heading', 'speed', 'value', \n", " 'declared_destination', 'declared_eta']]\n", "\n", "\n", " seven=seven.copy()\n", " seven.rename(columns={'lat_x':'lat', 'lon_x':'lon', \n", " 'timestamp_x':'timestamp', 'value':'draught'}, \n", " inplace=True)\n", " \n", " df2=seven\n", " \n", " # append to inital data frame\n", " store=pd.concat([store, df2])\n", " \n", " # if not all vessels done, print progress\n", " if ((x+group) < len(vessel_list)):\n", "\n", " print((x+group), 'vessels done out of', len(vessel_list), 'at:', datetime.today())\n", " \n", " # if all vessels done, print completion note\n", " else:\n", " print(len(vessel_list), 'vessels done out of', len(vessel_list), 'at:', datetime.today())\n", " \n", " # reset index\n", " store.reset_index(drop=True, inplace=True)\n", " \n", " return store\n", "\n", "\n", "def merge_voyages_and_signals(start_y, start_m, start_d, origin, origin_excl, destination, destination_excl, prod, prod_excl, vessels, vessels_excl, unit, status, group, interval):\n", " \n", " # Set dates\n", " start=datetime(start_y, start_m, start_d)\n", " end=datetime.today() \n", " \n", " # query vortexa vessels\n", " vessels_df = v.Vessels().search().to_df() #Creating dataframe of all Vortexa vessels \n", "\n", " print(\"starting voyages pull at:\", datetime.today())\n", " \n", " # query voyages\n", " voyages=v.VoyagesSearchEnriched().search(\n", " time_min=start,\n", " time_max=end,\n", " voyage_status=status, \n", " latest_products=prod,\n", " latest_products_excluded=prod_excl,\n", " origins=origin,\n", " origins_excluded=origin_excl,\n", " destinations=destination,\n", " destinations_excluded=destination_excl,\n", " vessels=vessels,\n", " vessels_excluded=vessels_excl,\n", " unit=unit,\n", " columns=['vessel_name', 'imo', 'voyage_id', 'dwt', 'quantity', \n", " 'latest_product', 'latest_product_group', \n", " 'latest_product_category','latest_product_grade',\n", " 'first_origin_port', 'first_origin_country', \n", " 'first_origin_shipping_region', 'final_destination_port', \n", " 'final_destination_country', 'final_destination_shipping_region', \n", " 'start_date', 'end_date', 'voyage_status']).to_df()\n", " \n", " print(\"finished voyages pull at:\", datetime.today()) \n", " \n", " # rename columns\n", " vessels_df.rename(columns={'imo' : 'IMO'}, inplace=True) #Renaming IMO column to match header of voyages column \n", "\n", " \n", " # convert IMOs to numeric values\n", " voyages['IMO'] = pd.to_numeric(voyages['IMO'])\n", " vessels_df[\"IMO\"]= pd.to_numeric(vessels_df['IMO'])\n", "\n", " imo_list = list(voyages['IMO'].unique()) #creating list of unique voyages \n", "\n", " vessels_df_2 = vessels_df[vessels_df['IMO'].isin(imo_list)] #Vessel info for each vessel that appears in the voyages query \n", "\n", " vessel_ids = list(vessels_df_2['id'].unique()) # Obtain list of IDs for vessels in question\n", " \n", " print(len(vessel_ids), 'vessels to query')\n", "\n", " # merge voyages and vessel info (need to do this to get vessel ID in voyages output)\n", " merged=pd.merge(voyages, vessels_df_2[['id', 'IMO']], on='IMO', how='left')\n", " print(\"starting signals pull at:\", datetime.today())\n", "\n", " # query signals\n", " positions=vessel_position_history(group=group, vessel_class=None, \n", " list_of_vessels=vessel_ids,\n", " start_timestamp=start, \n", " end_timestamp=end, \n", " interval=interval)\n", " print(\"finished signals pull at:\", datetime.today())\n", " \n", " # convert voyage and signals dates to datetime values\n", " merged['START DATE']=pd.to_datetime(merged['START DATE'], utc=True)\n", " merged['END DATE']=pd.to_datetime(merged['END DATE'], utc=True)\n", " positions['date']=pd.to_datetime(positions['timestamp'])\n", "\n", " # rename columns\n", " merged.rename(columns={'id':'vessel_id'}, inplace=True)\n", " merged['vessel_id']=merged['vessel_id'].str[0:16] #Preparing first 16 characters from long ID in order to match short IDs in crude_postions dataframe \n", " \n", " \n", " # merge on vessel_id\n", " merged_df = pd.merge(positions, merged, on='vessel_id', how='left')\n", "\n", " # filter rows where timestamp falls within the start and end date of voyages\n", " filtered_df = merged_df[(merged_df['date'] >= merged_df['START DATE']) & (merged_df['date'] <= merged_df['END DATE'])]\n", "\n", " # select relevant columns\n", " final_columns = ['lat', 'lon', 'vessel_id', 'timestamp', 'heading', 'speed', \n", " 'draught', 'declared_destination','declared_eta', \n", " 'VESSEL NAME', 'IMO', 'VOYAGE ID', 'DWT (t)', 'QUANTITY (t)', 'LATEST PRODUCT', \n", " 'LATEST PRODUCT GROUP', 'LATEST PRODUCT CATEGORY', 'LATEST PRODUCT GRADE', \n", " 'FIRST ORIGIN PORT', 'FIRST ORIGIN COUNTRY', 'FIRST ORIGIN SHIPPING REGION', \n", " 'FINAL DESTINATION PORT', 'FINAL DESTINATION COUNTRY', \n", " 'FINAL DESTINATION SHIPPING REGION', 'START DATE', 'END DATE', 'VOYAGE STATUS']\n", " final = filtered_df[final_columns]\n", "\n", " # ensure numeric columns are of data type numeric\n", " final=final.copy()\n", " final['QUANTITY (t)']=pd.to_numeric(final['QUANTITY (t)'])\n", " final['lat']=pd.to_numeric(final['lat'])\n", " final['lon']=pd.to_numeric(final['lon'])\n", " final['heading']=pd.to_numeric(final['heading'])\n", " final['speed']=pd.to_numeric(final['speed'])\n", " \n", " final['is_cabotage'] = final['FIRST ORIGIN COUNTRY'] == final['FINAL DESTINATION COUNTRY']\n", " final['no. of vessels']=1\n", "\n", "\n", " final.reset_index(drop=True, inplace=True)\n", " \n", " # Identify columns for duplicate identification\n", " duplicate_cols = ['lat', 'lon', 'vessel_id', 'timestamp', 'heading', 'speed', 'declared_eta', 'declared_destination']\n", "\n", " # Group by the columns and keep the row with the highest start_date\n", " df_unique = final.loc[final.groupby(duplicate_cols)['START DATE'].idxmax()]\n", " \n", " df_unique['timestamp'] = pd.to_datetime(df_unique['timestamp'])\n", " \n", " df_unique.sort_values(by='timestamp', ascending=True, inplace=True)\n", "\n", " # Reset the index for better readability\n", " df_unique = df_unique.reset_index(drop=True)\n", " \n", " return df_unique\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 22, "id": "e5f89bb8", "metadata": { "ExecuteTime": { "end_time": "2024-06-25T13:12:28.545146Z", "start_time": "2024-06-25T13:11:18.258319Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "starting voyages pull at: 2024-06-25 14:11:29.286650\n", "finished voyages pull at: 2024-06-25 14:11:37.311063\n", "793 vessels to query\n", "starting signals pull at: 2024-06-25 14:11:37.331231\n", "100 vessels done out of 793 at: 2024-06-25 14:11:42.591528\n", "200 vessels done out of 793 at: 2024-06-25 14:11:51.326785\n", "300 vessels done out of 793 at: 2024-06-25 14:11:56.869270\n", "400 vessels done out of 793 at: 2024-06-25 14:12:01.709078\n", "500 vessels done out of 793 at: 2024-06-25 14:12:09.123651\n", "600 vessels done out of 793 at: 2024-06-25 14:12:14.678199\n", "700 vessels done out of 793 at: 2024-06-25 14:12:20.646894\n", "793 vessels done out of 793 at: 2024-06-25 14:12:25.551174\n", "finished signals pull at: 2024-06-25 14:12:25.555364\n" ] } ], "source": [ "vessels=v.Vessels().search(vessel_classes='oil_aframax').to_df()\n", "vessels_list=list(vessels['id'])[0:100]\n", "\n", "\n", "test=merge_voyages_and_signals(start_y=2024, start_m=5, start_d=1, \n", " origin=None, origin_excl=None, \n", " destination=None, destination_excl=None, \n", " prod=crude, prod_excl=None, unit='t',\n", " vessels='oil_aframax_lr2', vessels_excl=None, \n", " status=None, group=100, interval='1d')\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 23, "id": "7ea511f4", "metadata": { "ExecuteTime": { "end_time": "2024-06-25T13:12:29.316220Z", "start_time": "2024-06-25T13:12:29.243068Z" } }, "outputs": [ { "data": { "text/html": [ "
| \n", " | lat | \n", "lon | \n", "vessel_id | \n", "timestamp | \n", "heading | \n", "speed | \n", "draught | \n", "declared_destination | \n", "declared_eta | \n", "VESSEL NAME | \n", "... | \n", "FIRST ORIGIN COUNTRY | \n", "FIRST ORIGIN SHIPPING REGION | \n", "FINAL DESTINATION PORT | \n", "FINAL DESTINATION COUNTRY | \n", "FINAL DESTINATION SHIPPING REGION | \n", "START DATE | \n", "END DATE | \n", "VOYAGE STATUS | \n", "is_cabotage | \n", "no. of vessels | \n", "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", "57.006513 | \n", "11.712312 | \n", "c218232be16e8a5b | \n", "2024-05-01 00:00:00+00:00 | \n", "161.7 | \n", "10.9 | \n", "\n", " | EETLL | \n", "2024-05-05T03:00:00.000Z | \n", "MARCH | \n", "... | \n", "India | \n", "India West Coast | \n", "Primorsk (Koivisto) [RU] | \n", "Russia | \n", "Baltic | \n", "2024-02-13 00:00:00+00:00 | \n", "2024-05-14 00:00:00+00:00 | \n", "Ballast | \n", "False | \n", "1 | \n", "
| 1 | \n", "4.993667 | \n", "104.059839 | \n", "183c2df9eeb4b7b3 | \n", "2024-05-01 00:00:00+00:00 | \n", "152.9 | \n", "10.3 | \n", "\n", " | \n", " | \n", " | PACIFIC JEWELS | \n", "... | \n", "Thailand | \n", "Southeast Asia (SEA) | \n", "Cossack, WA [AU] | \n", "\n", " | Oceania | \n", "2024-04-26 00:00:00+00:00 | \n", "2024-05-14 00:00:00+00:00 | \n", "Ballast | \n", "False | \n", "1 | \n", "
| 2 | \n", "-6.247512 | \n", "108.461041 | \n", "2b9f8d4aead350ed | \n", "2024-05-01 00:00:00+00:00 | \n", "64.4 | \n", "0.2 | \n", "\n", " | \n", " | \n", " | DF MYSTRAS | \n", "... | \n", "Gabon | \n", "West Africa (WAf) | \n", "Balongan [ID] | \n", "Indonesia | \n", "Southeast Asia (SEA) | \n", "2024-03-23 00:00:00+00:00 | \n", "2024-05-03 00:00:00+00:00 | \n", "Laden | \n", "False | \n", "1 | \n", "
| 3 | \n", "19.765696 | \n", "-83.432494 | \n", "1d1bc37238bf7c21 | \n", "2024-05-01 00:00:00+00:00 | \n", "132.0 | \n", "12.0 | \n", "\n", " | PACHG | \n", "2024-05-04T11:00:00.000Z | \n", "TORM HERMIA | \n", "... | \n", "United States | \n", "Gulf of Mexico (GoM) | \n", "Chiriqui Grande [PA] | \n", "Panama | \n", "Caribbean | \n", "2024-04-26 00:00:00+00:00 | \n", "2024-05-05 00:00:00+00:00 | \n", "Laden | \n", "False | \n", "1 | \n", "
| 4 | \n", "36.497196 | \n", "14.523806 | \n", "2ea187736a936e2a | \n", "2024-05-01 00:00:00+00:00 | \n", "110.5 | \n", "11.0 | \n", "\n", " | FR FOS > TR CKZ | \n", "2024-05-04T22:00:00.000Z | \n", "SAFEEN ELONA | \n", "... | \n", "France | \n", "Mediterranean (Med) | \n", "Novorossiysk [RU] | \n", "Russia | \n", "Black Sea and Caspian | \n", "2024-04-29 00:00:00+00:00 | \n", "2024-05-10 00:00:00+00:00 | \n", "Ballast | \n", "False | \n", "1 | \n", "
| ... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "... | \n", "
| 35469 | \n", "29.834449 | \n", "123.188475 | \n", "dedf013f49aff7ad | \n", "2024-06-25 00:00:00+00:00 | \n", "10.4 | \n", "0.1 | \n", "\n", " | KOZMINO | \n", "2024-06-29T14:00:00.000Z | \n", "HUIHAI PACIFIC | \n", "... | \n", "China | \n", "Northeast Asia (NEA) | \n", "Kozmino [RU] | \n", "Russia | \n", "Russia Far East | \n", "2024-06-23 00:00:00+00:00 | \n", "2024-07-01 00:00:00+00:00 | \n", "Ballast | \n", "False | \n", "1 | \n", "
| 35470 | \n", "59.183101 | \n", "2.802509 | \n", "d95b68b92c36d55f | \n", "2024-06-25 00:00:00+00:00 | \n", "16.9 | \n", "0.0 | \n", "\n", " | BALDER(NOR) | \n", "2024-06-27T04:00:00.000Z | \n", "SCOTT SPIRIT | \n", "... | \n", "Norway | \n", "Northwest Europe (NWE) | \n", "Liverpool [GB] | \n", "United Kingdom | \n", "Northwest Europe (NWE) | \n", "2024-06-15 00:00:00+00:00 | \n", "2024-06-28 00:00:00+00:00 | \n", "Laden | \n", "False | \n", "1 | \n", "
| 35471 | \n", "-26.694409 | \n", "153.257438 | \n", "258c6e78fd9784c2 | \n", "2024-06-25 00:00:00+00:00 | \n", "145.1 | \n", "0.0 | \n", "\n", " | AU BNE | \n", "2024-06-21T12:00:00.000Z | \n", "ESTEEM COWBOY | \n", "... | \n", "United States | \n", "Gulf of Mexico (GoM) | \n", "Brisbane Qld [AU] | \n", "\n", " | Oceania | \n", "2024-04-28 00:00:00+00:00 | \n", "2024-06-28 00:00:00+00:00 | \n", "Laden | \n", "False | \n", "1 | \n", "
| 35472 | \n", "59.913154 | \n", "28.633176 | \n", "3d0f10239fc55a1d | \n", "2024-06-25 00:00:00+00:00 | \n", "280.0 | \n", "0.0 | \n", "\n", " | RUULU | \n", "2024-06-12T03:00:00.000Z | \n", "ROBON | \n", "... | \n", "Turkey | \n", "Mediterranean (Med) | \n", "Primorsk (Koivisto) [RU] | \n", "Russia | \n", "Baltic | \n", "2024-05-28 00:00:00+00:00 | \n", "2024-06-25 00:00:00+00:00 | \n", "Ballast | \n", "False | \n", "1 | \n", "
| 35473 | \n", "34.301610 | \n", "31.089419 | \n", "fbd923996d68d310 | \n", "2024-06-25 00:00:00+00:00 | \n", "117.0 | \n", "6.3 | \n", "\n", " | LIMASSOL | \n", "2024-06-26T23:00:00.000Z | \n", "FOS DA VINCI | \n", "... | \n", "Turkey | \n", "Mediterranean (Med) | \n", "Sidi Kerir Terminal [EG] | \n", "Egypt | \n", "Mediterranean (Med) | \n", "2024-05-05 00:00:00+00:00 | \n", "2024-07-01 00:00:00+00:00 | \n", "Ballast | \n", "False | \n", "1 | \n", "
35474 rows × 29 columns
\n", "