{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Process SCADA and downtime data\n", "\n", "This notebook demonstrates the merging of all four CSV files containing SCADA\n", "and downtime data into single dataframes. Two files are older datasets, and\n", "the other two are newer datasets. Both old and new datasets have most of their\n", "timestamps in common.\n", "\n", "The older SCADA datasets were found to have some errors in the rotor speed\n", "readings. This merging replaces the old errorred data points with the new\n", "ones, and removes incomplete rows.\n", "\n", "This merging ensures the downtime data has the same range as the SCADA data,\n", "and removes incomplete rows." ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "# import libraries\n", "import os\n", "import glob\n", "import itertools\n", "import pandas as pd\n", "import numpy as np" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# create directory to store processed data\n", "os.makedirs(\"data/processed/\", exist_ok=True)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Downtime categories" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# read and view data\n", "data = pd.read_excel(\"data/Melogale Downtime Categories.xlsx\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Turbine CategoriesUnnamed: 1Environmental CategoriesUnnamed: 3Grid CategoriesUnnamed: 5Infrastructure CategoriesUnnamed: 7Availability CategoriesUnnamed: 9
0idOLD nameidnameidnameidnameidname
10Unknown0Unknown0Unknown0Unknown0Unknown
21OK1OK1OK1OK1Available/Non-penalising
32Anemometry2High Wind Shutdown2Planned Outage2Planned Outage2NOT available/penalising
43Rotor Brake3Icing3Unplanned Outage3Unplanned OutageNaNNaN
54Main Shaft4TurbulenceNaNNaNNaNNaNNaNNaN
65Gearbox5Work HaltedNaNNaNNaNNaNNaNNaN
76Generator6LightningNaNNaNNaNNaNNaNNaN
87Yaw System7Sector ManagementNaNNaNNaNNaNNaNNaN
98Electrical Controls8Low TemperatureNaNNaNNaNNaNNaNNaN
109Hydraulics11Low Wind ShutdownNaNNaNNaNNaNNaNNaN
1110Electrical System12Shadow casting stopNaNNaNNaNNaNNaNNaN
1211Pitch Control13Ambient temperature highNaNNaNNaNNaNNaNNaN
1312Unlogged manual stop14Cable unwindNaNNaNNaNNaNNaNNaN
1413Customer Stop15WildlifeNaNNaNNaNNaNNaNNaN
1514Noise Constraints16OtherNaNNaNNaNNaNNaNNaN
1615Scheduled MaintenanceNaNNaNNaNNaNNaNNaNNaNNaN
1716TowerNaNNaNNaNNaNNaNNaNNaNNaN
1817RetrofitNaNNaNNaNNaNNaNNaNNaNNaN
1918Cable UnwindNaNNaNNaNNaNNaNNaNNaNNaN
2019HubNaNNaNNaNNaNNaNNaNNaNNaN
2120Rotor BladesNaNNaNNaNNaNNaNNaNNaNNaN
2221Delayed StartupNaNNaNNaNNaNNaNNaNNaNNaN
2322OtherNaNNaNNaNNaNNaNNaNNaNNaN
\n", "
" ], "text/plain": [ " Turbine Categories Unnamed: 1 Environmental Categories \\\n", "0 id OLD name id \n", "1 0 Unknown 0 \n", "2 1 OK 1 \n", "3 2 Anemometry 2 \n", "4 3 Rotor Brake 3 \n", "5 4 Main Shaft 4 \n", "6 5 Gearbox 5 \n", "7 6 Generator 6 \n", "8 7 Yaw System 7 \n", "9 8 Electrical Controls 8 \n", "10 9 Hydraulics 11 \n", "11 10 Electrical System 12 \n", "12 11 Pitch Control 13 \n", "13 12 Unlogged manual stop 14 \n", "14 13 Customer Stop 15 \n", "15 14 Noise Constraints 16 \n", "16 15 Scheduled Maintenance NaN \n", "17 16 Tower NaN \n", "18 17 Retrofit NaN \n", "19 18 Cable Unwind NaN \n", "20 19 Hub NaN \n", "21 20 Rotor Blades NaN \n", "22 21 Delayed Startup NaN \n", "23 22 Other NaN \n", "\n", " Unnamed: 3 Grid Categories Unnamed: 5 \\\n", "0 name id name \n", "1 Unknown 0 Unknown \n", "2 OK 1 OK \n", "3 High Wind Shutdown 2 Planned Outage \n", "4 Icing 3 Unplanned Outage \n", "5 Turbulence NaN NaN \n", "6 Work Halted NaN NaN \n", "7 Lightning NaN NaN \n", "8 Sector Management NaN NaN \n", "9 Low Temperature NaN NaN \n", "10 Low Wind Shutdown NaN NaN \n", "11 Shadow casting stop NaN NaN \n", "12 Ambient temperature high NaN NaN \n", "13 Cable unwind NaN NaN \n", "14 Wildlife NaN NaN \n", "15 Other NaN NaN \n", "16 NaN NaN NaN \n", "17 NaN NaN NaN \n", "18 NaN NaN NaN \n", "19 NaN NaN NaN \n", "20 NaN NaN NaN \n", "21 NaN NaN NaN \n", "22 NaN NaN NaN \n", "23 NaN NaN NaN \n", "\n", " Infrastructure Categories Unnamed: 7 Availability Categories \\\n", "0 id name id \n", "1 0 Unknown 0 \n", "2 1 OK 1 \n", "3 2 Planned Outage 2 \n", "4 3 Unplanned Outage NaN \n", "5 NaN NaN NaN \n", "6 NaN NaN NaN \n", "7 NaN NaN NaN \n", "8 NaN NaN NaN \n", "9 NaN NaN NaN \n", "10 NaN NaN NaN \n", "11 NaN NaN NaN \n", "12 NaN NaN NaN \n", "13 NaN NaN NaN \n", "14 NaN NaN NaN \n", "15 NaN NaN NaN \n", "16 NaN NaN NaN \n", "17 NaN NaN NaN \n", "18 NaN NaN NaN \n", "19 NaN NaN NaN \n", "20 NaN NaN NaN \n", "21 NaN NaN NaN \n", "22 NaN NaN NaN \n", "23 NaN NaN NaN \n", "\n", " Unnamed: 9 \n", "0 name \n", "1 Unknown \n", "2 Available/Non-penalising \n", "3 NOT available/penalising \n", "4 NaN \n", "5 NaN \n", "6 NaN \n", "7 NaN \n", "8 NaN \n", "9 NaN \n", "10 NaN \n", "11 NaN \n", "12 NaN \n", "13 NaN \n", "14 NaN \n", "15 NaN \n", "16 NaN \n", "17 NaN \n", "18 NaN \n", "19 NaN \n", "20 NaN \n", "21 NaN \n", "22 NaN \n", "23 NaN " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "# drop first row\n", "data = data.drop([0])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# function to filter data for each category type\n", "catData = {}\n", "\n", "\n", "def categorise_data(cat, number):\n", " catData[cat] = data.filter(\n", " items=[cat+\" Categories\", \"Unnamed: \"+str(number)]\n", " )\n", " catData[cat].rename(\n", " columns={\n", " cat+\" Categories\": \"Category\", \"Unnamed: \"+str(number): \"Name\"\n", " },\n", " inplace=True\n", " )\n", " catData[cat][\"Type\"] = cat\n", " catData[cat].dropna(inplace=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# filtering\n", "categorise_data(\"Turbine\", 1)\n", "categorise_data(\"Environmental\", 3)\n", "categorise_data(\"Grid\", 5)\n", "categorise_data(\"Infrastructure\", 7)\n", "categorise_data(\"Availability\", 9)" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# concatenate data\n", "data = pd.concat(catData.values(), ignore_index=True)" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
CategoryNameType
00UnknownTurbine
11OKTurbine
22AnemometryTurbine
33Rotor BrakeTurbine
44Main ShaftTurbine
55GearboxTurbine
66GeneratorTurbine
77Yaw SystemTurbine
88Electrical ControlsTurbine
99HydraulicsTurbine
1010Electrical SystemTurbine
1111Pitch ControlTurbine
1212Unlogged manual stopTurbine
1313Customer StopTurbine
1414Noise ConstraintsTurbine
1515Scheduled MaintenanceTurbine
1616TowerTurbine
1717RetrofitTurbine
1818Cable UnwindTurbine
1919HubTurbine
2020Rotor BladesTurbine
2121Delayed StartupTurbine
2222OtherTurbine
230UnknownEnvironmental
241OKEnvironmental
252High Wind ShutdownEnvironmental
263IcingEnvironmental
274TurbulenceEnvironmental
285Work HaltedEnvironmental
296LightningEnvironmental
307Sector ManagementEnvironmental
318Low TemperatureEnvironmental
3211Low Wind ShutdownEnvironmental
3312Shadow casting stopEnvironmental
3413Ambient temperature highEnvironmental
3514Cable unwindEnvironmental
3615WildlifeEnvironmental
3716OtherEnvironmental
380UnknownGrid
391OKGrid
402Planned OutageGrid
413Unplanned OutageGrid
420UnknownInfrastructure
431OKInfrastructure
442Planned OutageInfrastructure
453Unplanned OutageInfrastructure
460UnknownAvailability
471Available/Non-penalisingAvailability
482NOT available/penalisingAvailability
\n", "
" ], "text/plain": [ " Category Name Type\n", "0 0 Unknown Turbine\n", "1 1 OK Turbine\n", "2 2 Anemometry Turbine\n", "3 3 Rotor Brake Turbine\n", "4 4 Main Shaft Turbine\n", "5 5 Gearbox Turbine\n", "6 6 Generator Turbine\n", "7 7 Yaw System Turbine\n", "8 8 Electrical Controls Turbine\n", "9 9 Hydraulics Turbine\n", "10 10 Electrical System Turbine\n", "11 11 Pitch Control Turbine\n", "12 12 Unlogged manual stop Turbine\n", "13 13 Customer Stop Turbine\n", "14 14 Noise Constraints Turbine\n", "15 15 Scheduled Maintenance Turbine\n", "16 16 Tower Turbine\n", "17 17 Retrofit Turbine\n", "18 18 Cable Unwind Turbine\n", "19 19 Hub Turbine\n", "20 20 Rotor Blades Turbine\n", "21 21 Delayed Startup Turbine\n", "22 22 Other Turbine\n", "23 0 Unknown Environmental\n", "24 1 OK Environmental\n", "25 2 High Wind Shutdown Environmental\n", "26 3 Icing Environmental\n", "27 4 Turbulence Environmental\n", "28 5 Work Halted Environmental\n", "29 6 Lightning Environmental\n", "30 7 Sector Management Environmental\n", "31 8 Low Temperature Environmental\n", "32 11 Low Wind Shutdown Environmental\n", "33 12 Shadow casting stop Environmental\n", "34 13 Ambient temperature high Environmental\n", "35 14 Cable unwind Environmental\n", "36 15 Wildlife Environmental\n", "37 16 Other Environmental\n", "38 0 Unknown Grid\n", "39 1 OK Grid\n", "40 2 Planned Outage Grid\n", "41 3 Unplanned Outage Grid\n", "42 0 Unknown Infrastructure\n", "43 1 OK Infrastructure\n", "44 2 Planned Outage Infrastructure\n", "45 3 Unplanned Outage Infrastructure\n", "46 0 Unknown Availability\n", "47 1 Available/Non-penalising Availability\n", "48 2 NOT available/penalising Availability" ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Downtime time series" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 data/Last_six_months_downtime.csv (5367, 12)\n", "1 data/Prior_two_years_downtime.csv (16958, 24)\n" ] } ], "source": [ "dt = {}\n", "dtList = glob.glob(\"data/*downtime*.csv\")\n", "for num, df in enumerate(dtList):\n", " dt[num] = pd.read_csv(df)\n", " print(num, df, dt[num].shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtimestamp_starttimestamp_endturbine_idalarm_idGridCategory_idInfrastructureCategory_idEnvironmentalCategory_idTurbineCategory_idAvailabilityCategory_idcommentworkorder_id
07154632016-10-05 11:27:58.0002016-10-05 11:28:37.000185111131Advanced rescue procedures108128.0
17154642016-10-05 11:28:37.0002016-10-05 11:58:15.000185111131Advanced rescue procedures108128.0
27154652016-10-05 11:58:15.0002016-10-05 12:00:04.000185111131Advanced rescue procedures108128.0
37154662016-10-05 12:00:50.0002016-10-05 12:48:00.000185111131Advanced rescue procedures108128.0
47154672016-10-26 04:56:44.0002016-10-26 05:20:00.00018391111181FM1003Yaw CableRewindNaN
\n", "
" ], "text/plain": [ " id timestamp_start timestamp_end turbine_id \\\n", "0 715463 2016-10-05 11:27:58.000 2016-10-05 11:28:37.000 18 \n", "1 715464 2016-10-05 11:28:37.000 2016-10-05 11:58:15.000 18 \n", "2 715465 2016-10-05 11:58:15.000 2016-10-05 12:00:04.000 18 \n", "3 715466 2016-10-05 12:00:50.000 2016-10-05 12:48:00.000 18 \n", "4 715467 2016-10-26 04:56:44.000 2016-10-26 05:20:00.000 18 \n", "\n", " alarm_id GridCategory_id InfrastructureCategory_id \\\n", "0 5 1 1 \n", "1 5 1 1 \n", "2 5 1 1 \n", "3 5 1 1 \n", "4 391 1 1 \n", "\n", " EnvironmentalCategory_id TurbineCategory_id AvailabilityCategory_id \\\n", "0 1 13 1 \n", "1 1 13 1 \n", "2 1 13 1 \n", "3 1 13 1 \n", "4 1 18 1 \n", "\n", " comment workorder_id \n", "0 Advanced rescue procedures 108128.0 \n", "1 Advanced rescue procedures 108128.0 \n", "2 Advanced rescue procedures 108128.0 \n", "3 Advanced rescue procedures 108128.0 \n", "4 FM1003Yaw CableRewind NaN " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt[0].head(5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtimestamp_starttimestamp_endturbine_idalarm_idGridCategory_idInfrastructureCategory_idEnvironmentalCategory_idTurbineCategory_idAvailabilityCategory_id...timestamp_end.1turbine_id.1alarm_id.1GridCategory_id.1InfrastructureCategory_id.1EnvironmentalCategory_id.1TurbineCategory_id.1AvailabilityCategory_id.1comment.1workorder_id.1
0858512015-01-09 01:31:23.0002015-01-09 01:42:07.000638911211...2015-01-09 01:42:07.000638911211NaNNaN
1858522015-01-09 02:01:28.0002015-01-09 02:06:43.000638911211...2015-01-09 02:06:43.000638911211NaNNaN
2858532015-01-09 03:08:50.0002015-01-09 03:20:00.000638911211...2015-01-09 03:20:00.000638911211NaNNaN
3858542015-01-09 04:21:58.0002015-01-09 05:30:00.000638911211...2015-01-09 05:30:00.000638911211NaNNaN
4858552015-01-09 05:43:41.0002015-01-09 05:54:57.000638911211...2015-01-09 05:54:57.000638911211NaNNaN
\n", "

5 rows × 24 columns

\n", "
" ], "text/plain": [ " id timestamp_start timestamp_end turbine_id \\\n", "0 85851 2015-01-09 01:31:23.000 2015-01-09 01:42:07.000 6 \n", "1 85852 2015-01-09 02:01:28.000 2015-01-09 02:06:43.000 6 \n", "2 85853 2015-01-09 03:08:50.000 2015-01-09 03:20:00.000 6 \n", "3 85854 2015-01-09 04:21:58.000 2015-01-09 05:30:00.000 6 \n", "4 85855 2015-01-09 05:43:41.000 2015-01-09 05:54:57.000 6 \n", "\n", " alarm_id GridCategory_id InfrastructureCategory_id \\\n", "0 389 1 1 \n", "1 389 1 1 \n", "2 389 1 1 \n", "3 389 1 1 \n", "4 389 1 1 \n", "\n", " EnvironmentalCategory_id TurbineCategory_id AvailabilityCategory_id ... \\\n", "0 2 1 1 ... \n", "1 2 1 1 ... \n", "2 2 1 1 ... \n", "3 2 1 1 ... \n", "4 2 1 1 ... \n", "\n", " timestamp_end.1 turbine_id.1 alarm_id.1 GridCategory_id.1 \\\n", "0 2015-01-09 01:42:07.000 6 389 1 \n", "1 2015-01-09 02:06:43.000 6 389 1 \n", "2 2015-01-09 03:20:00.000 6 389 1 \n", "3 2015-01-09 05:30:00.000 6 389 1 \n", "4 2015-01-09 05:54:57.000 6 389 1 \n", "\n", " InfrastructureCategory_id.1 EnvironmentalCategory_id.1 \\\n", "0 1 2 \n", "1 1 2 \n", "2 1 2 \n", "3 1 2 \n", "4 1 2 \n", "\n", " TurbineCategory_id.1 AvailabilityCategory_id.1 comment.1 workorder_id.1 \n", "0 1 1 NaN NaN \n", "1 1 1 NaN NaN \n", "2 1 1 NaN NaN \n", "3 1 1 NaN NaN \n", "4 1 1 NaN NaN \n", "\n", "[5 rows x 24 columns]" ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt[1].head(5)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['id', 'timestamp_start', 'timestamp_end', 'turbine_id', 'alarm_id',\n", " 'GridCategory_id', 'InfrastructureCategory_id',\n", " 'EnvironmentalCategory_id', 'TurbineCategory_id',\n", " 'AvailabilityCategory_id', 'comment', 'workorder_id', 'id.1',\n", " 'timestamp_start.1', 'timestamp_end.1', 'turbine_id.1', 'alarm_id.1',\n", " 'GridCategory_id.1', 'InfrastructureCategory_id.1',\n", " 'EnvironmentalCategory_id.1', 'TurbineCategory_id.1',\n", " 'AvailabilityCategory_id.1', 'comment.1', 'workorder_id.1'],\n", " dtype='object')" ] }, "execution_count": 5, "metadata": {}, "output_type": "execute_result" } ], "source": [ "dt[1].columns" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": [ "# drop duplicate columns\n", "dt[1].drop(columns=list(dt[1].filter(regex=\".1\")), inplace=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# convert timestamps to datetime data type\n", "for key in dt.keys():\n", " for col in list(dt[key].filter(regex=\"timestamp\")):\n", " dt[key][col] = pd.to_datetime(dt[key][col])" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "# concatenate data\n", "data = pd.concat(dt.values(), join=\"outer\")" ] }, { "cell_type": "code", "execution_count": 9, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
idtimestamp_starttimestamp_endturbine_idalarm_idGridCategory_idInfrastructureCategory_idEnvironmentalCategory_idTurbineCategory_idAvailabilityCategory_idcommentworkorder_id
07154632016-10-05 11:27:582016-10-05 11:28:37185111131Advanced rescue procedures108128.0
17154642016-10-05 11:28:372016-10-05 11:58:15185111131Advanced rescue procedures108128.0
27154652016-10-05 11:58:152016-10-05 12:00:04185111131Advanced rescue procedures108128.0
37154662016-10-05 12:00:502016-10-05 12:48:00185111131Advanced rescue procedures108128.0
47154672016-10-26 04:56:442016-10-26 05:20:0018391111181FM1003Yaw CableRewindNaN
\n", "
" ], "text/plain": [ " id timestamp_start timestamp_end turbine_id alarm_id \\\n", "0 715463 2016-10-05 11:27:58 2016-10-05 11:28:37 18 5 \n", "1 715464 2016-10-05 11:28:37 2016-10-05 11:58:15 18 5 \n", "2 715465 2016-10-05 11:58:15 2016-10-05 12:00:04 18 5 \n", "3 715466 2016-10-05 12:00:50 2016-10-05 12:48:00 18 5 \n", "4 715467 2016-10-26 04:56:44 2016-10-26 05:20:00 18 391 \n", "\n", " GridCategory_id InfrastructureCategory_id EnvironmentalCategory_id \\\n", "0 1 1 1 \n", "1 1 1 1 \n", "2 1 1 1 \n", "3 1 1 1 \n", "4 1 1 1 \n", "\n", " TurbineCategory_id AvailabilityCategory_id comment \\\n", "0 13 1 Advanced rescue procedures \n", "1 13 1 Advanced rescue procedures \n", "2 13 1 Advanced rescue procedures \n", "3 13 1 Advanced rescue procedures \n", "4 18 1 FM1003Yaw CableRewind \n", "\n", " workorder_id \n", "0 108128.0 \n", "1 108128.0 \n", "2 108128.0 \n", "3 108128.0 \n", "4 NaN " ] }, "execution_count": 9, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.head(5)" ] }, { "cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "(22325, 12)" ] }, "execution_count": 10, "metadata": {}, "output_type": "execute_result" } ], "source": [ "data.shape" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## SCADA time series" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 data/Last_six_months_SCADA.csv (651600, 17)\n", "1 data/Prior_two_years_SCADA.csv (2550346, 17)\n" ] } ], "source": [ "# old SCADA data\n", "scada = {}\n", "scadaList = glob.glob(\"data/*SCADA.csv\")\n", "for num, df in enumerate(scadaList):\n", " scada[num] = pd.read_csv(df)\n", " print(num, df, scada[num].shape)" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampturbineap_avap_devap_maxreactive_powerws_avws_1ws_2wd_avwd_1wd_2gen_sprs_avnac_pospitchruntime
001/11/2016 15:40380.3757229.46033178.0-9.2927994.0882114.1267624.088211290.4030-17.849820-4.077392797.07120.0294.48020.0600.0
101/11/2016 15:404141.6858053.87635242.01.3393214.7459115.0711024.745911316.6896-15.205220-6.955624814.98140.0323.64520.0600.0
201/11/2016 15:40514.8528517.7993391.0-2.0370412.8860490.0000002.886049259.86340.000000-2.367190780.92070.0262.23120.0600.0
301/11/2016 15:406139.7100063.40798273.0-55.8026104.5178204.5847904.517820282.0574-180.000000-6.154136816.21230.0288.21190.0600.0
401/11/2016 15:40737.4050540.17854161.0-145.5875003.5408813.5408813.369349228.9863-3.396181-4.141267787.75130.0232.38280.0600.0
\n", "
" ], "text/plain": [ " timestamp turbine ap_av ap_dev ap_max reactive_power \\\n", "0 01/11/2016 15:40 3 80.37572 29.46033 178.0 -9.292799 \n", "1 01/11/2016 15:40 4 141.68580 53.87635 242.0 1.339321 \n", "2 01/11/2016 15:40 5 14.85285 17.79933 91.0 -2.037041 \n", "3 01/11/2016 15:40 6 139.71000 63.40798 273.0 -55.802610 \n", "4 01/11/2016 15:40 7 37.40505 40.17854 161.0 -145.587500 \n", "\n", " ws_av ws_1 ws_2 wd_av wd_1 wd_2 gen_sp \\\n", "0 4.088211 4.126762 4.088211 290.4030 -17.849820 -4.077392 797.0712 \n", "1 4.745911 5.071102 4.745911 316.6896 -15.205220 -6.955624 814.9814 \n", "2 2.886049 0.000000 2.886049 259.8634 0.000000 -2.367190 780.9207 \n", "3 4.517820 4.584790 4.517820 282.0574 -180.000000 -6.154136 816.2123 \n", "4 3.540881 3.540881 3.369349 228.9863 -3.396181 -4.141267 787.7513 \n", "\n", " rs_av nac_pos pitch runtime \n", "0 0.0 294.4802 0.0 600.0 \n", "1 0.0 323.6452 0.0 600.0 \n", "2 0.0 262.2312 0.0 600.0 \n", "3 0.0 288.2119 0.0 600.0 \n", "4 0.0 232.3828 0.0 600.0 " ] }, "execution_count": 3, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scada[0].head(5)" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampturbineap_avap_devap_minreactive_powerws_avws_1ws_2wd_avwd_1wd_2gen_sprs_avnac_pospitchruntime
02014-11-01 02:20:008204.682474.51731419.0-38.030385.0292915.0292915.152810204.47852.978069-5.033260836.952112.28072201.50000.0600.0
12014-11-01 02:20:009228.119882.34712400.091.854195.5069185.5069185.225697215.67160.671585-0.585945845.920112.40917215.00000.0600.0
22014-11-01 02:20:0010159.302464.77182302.0-932.910805.4423065.4423065.653325223.25632.3569110.643579822.55470.00000220.90000.0600.0
32014-11-01 02:20:0011119.305531.69469199.0-146.529104.2819904.2819904.095518224.03600.8529581.169835808.556011.84853223.18320.0600.0
42014-11-01 02:20:0012168.368985.72588387.077.636214.9621064.9621064.764067240.3239-3.776284-5.501996942.053813.78513244.10000.0600.0
\n", "
" ], "text/plain": [ " timestamp turbine ap_av ap_dev ap_min reactive_power \\\n", "0 2014-11-01 02:20:00 8 204.6824 74.51731 419.0 -38.03038 \n", "1 2014-11-01 02:20:00 9 228.1198 82.34712 400.0 91.85419 \n", "2 2014-11-01 02:20:00 10 159.3024 64.77182 302.0 -932.91080 \n", "3 2014-11-01 02:20:00 11 119.3055 31.69469 199.0 -146.52910 \n", "4 2014-11-01 02:20:00 12 168.3689 85.72588 387.0 77.63621 \n", "\n", " ws_av ws_1 ws_2 wd_av wd_1 wd_2 gen_sp \\\n", "0 5.029291 5.029291 5.152810 204.4785 2.978069 -5.033260 836.9521 \n", "1 5.506918 5.506918 5.225697 215.6716 0.671585 -0.585945 845.9201 \n", "2 5.442306 5.442306 5.653325 223.2563 2.356911 0.643579 822.5547 \n", "3 4.281990 4.281990 4.095518 224.0360 0.852958 1.169835 808.5560 \n", "4 4.962106 4.962106 4.764067 240.3239 -3.776284 -5.501996 942.0538 \n", "\n", " rs_av nac_pos pitch runtime \n", "0 12.28072 201.5000 0.0 600.0 \n", "1 12.40917 215.0000 0.0 600.0 \n", "2 0.00000 220.9000 0.0 600.0 \n", "3 11.84853 223.1832 0.0 600.0 \n", "4 13.78513 244.1000 0.0 600.0 " ] }, "execution_count": 4, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scada[1].head(5)" ] }, { "cell_type": "code", "execution_count": 52, "metadata": {}, "outputs": [], "source": [ "# rename ap_min in scada[1] to ap_max\n", "scada[1].rename(columns={\"ap_min\": \"ap_max\"}, inplace=True)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [], "source": [ "# fixing rotor speed readings due to errors in data\n", "def fix_rs(c):\n", " if c[\"turbine\"] <= 20:\n", " return c[\"rs_av\"]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "for df in scada.keys():\n", " scada[df][\"rs_av_old\"] = scada[df].apply(fix_rs, axis=1)\n", " scada[df] = scada[df].drop(\"rs_av\", axis=1)" ] }, { "cell_type": "code", "execution_count": 32, "metadata": {}, "outputs": [], "source": [ "# concatenate old datasets\n", "scadaOld = pd.concat(scada.values())" ] }, { "cell_type": "code", "execution_count": 33, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampturbineap_avap_devap_maxreactive_powerws_avws_1ws_2wd_avwd_1wd_2gen_spnac_pospitchruntimers_av_old
001/11/2016 15:40380.3757229.46033178.0-9.2927994.0882114.1267624.088211290.4030-17.849820-4.077392797.0712294.48020.0600.00.0
101/11/2016 15:404141.6858053.87635242.01.3393214.7459115.0711024.745911316.6896-15.205220-6.955624814.9814323.64520.0600.00.0
201/11/2016 15:40514.8528517.7993391.0-2.0370412.8860490.0000002.886049259.86340.000000-2.367190780.9207262.23120.0600.00.0
301/11/2016 15:406139.7100063.40798273.0-55.8026104.5178204.5847904.517820282.0574-180.000000-6.154136816.2123288.21190.0600.00.0
401/11/2016 15:40737.4050540.17854161.0-145.5875003.5408813.5408813.369349228.9863-3.396181-4.141267787.7513232.38280.0600.00.0
\n", "
" ], "text/plain": [ " timestamp turbine ap_av ap_dev ap_max reactive_power \\\n", "0 01/11/2016 15:40 3 80.37572 29.46033 178.0 -9.292799 \n", "1 01/11/2016 15:40 4 141.68580 53.87635 242.0 1.339321 \n", "2 01/11/2016 15:40 5 14.85285 17.79933 91.0 -2.037041 \n", "3 01/11/2016 15:40 6 139.71000 63.40798 273.0 -55.802610 \n", "4 01/11/2016 15:40 7 37.40505 40.17854 161.0 -145.587500 \n", "\n", " ws_av ws_1 ws_2 wd_av wd_1 wd_2 gen_sp \\\n", "0 4.088211 4.126762 4.088211 290.4030 -17.849820 -4.077392 797.0712 \n", "1 4.745911 5.071102 4.745911 316.6896 -15.205220 -6.955624 814.9814 \n", "2 2.886049 0.000000 2.886049 259.8634 0.000000 -2.367190 780.9207 \n", "3 4.517820 4.584790 4.517820 282.0574 -180.000000 -6.154136 816.2123 \n", "4 3.540881 3.540881 3.369349 228.9863 -3.396181 -4.141267 787.7513 \n", "\n", " nac_pos pitch runtime rs_av_old \n", "0 294.4802 0.0 600.0 0.0 \n", "1 323.6452 0.0 600.0 0.0 \n", "2 262.2312 0.0 600.0 0.0 \n", "3 288.2119 0.0 600.0 0.0 \n", "4 232.3828 0.0 600.0 0.0 " ] }, "execution_count": 33, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scadaOld.head(5)" ] }, { "cell_type": "code", "execution_count": 35, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "0 data/NS_SCADA_2017_v2.csv (543425, 16)\n", "1 data/NS_SCADA_v2.csv (3064604, 16)\n" ] } ], "source": [ "# new SCADA data\n", "scada = {}\n", "scadaList = glob.glob(\"data/NS_SCADA*.csv\")\n", "for num, df in enumerate(scadaList):\n", " scada[num] = pd.read_csv(df)\n", " print(num, df, scada[num].shape)" ] }, { "cell_type": "code", "execution_count": 36, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampturbine_idap_avap_devap_maxreactive_powerws_avws_1ws_2wd_avwd_1wd_2gen_sprs_avnac_pospitch
02017-01-01 00:00:00.0001275.567679.71668455-128.1261005.9501706.0655215.950170347.890903.2166853.490916868.209412.78084344.400000.0
12017-01-01 00:00:00.0002280.952358.14750433-141.4628005.9201395.9201390.011563125.380901.3810444.931978871.991112.73835124.000000.0
22017-01-01 00:00:00.0003307.096654.00394449-72.8269706.4080665.7019106.40806612.70238-9.5184352.202345878.987712.9215410.500000.0
32017-01-01 00:00:00.0004348.8079112.206406331.5845036.2698176.5690556.26981718.58538-6.8219292.269212909.992613.3974716.316180.0
42017-01-01 00:00:00.0005315.882760.93330538-95.8382705.8482610.0000005.848261319.472700.0000005.373057882.635812.97570314.100000.0
\n", "
" ], "text/plain": [ " timestamp turbine_id ap_av ap_dev ap_max \\\n", "0 2017-01-01 00:00:00.000 1 275.5676 79.71668 455 \n", "1 2017-01-01 00:00:00.000 2 280.9523 58.14750 433 \n", "2 2017-01-01 00:00:00.000 3 307.0966 54.00394 449 \n", "3 2017-01-01 00:00:00.000 4 348.8079 112.20640 633 \n", "4 2017-01-01 00:00:00.000 5 315.8827 60.93330 538 \n", "\n", " reactive_power ws_av ws_1 ws_2 wd_av wd_1 \\\n", "0 -128.126100 5.950170 6.065521 5.950170 347.89090 3.216685 \n", "1 -141.462800 5.920139 5.920139 0.011563 125.38090 1.381044 \n", "2 -72.826970 6.408066 5.701910 6.408066 12.70238 -9.518435 \n", "3 1.584503 6.269817 6.569055 6.269817 18.58538 -6.821929 \n", "4 -95.838270 5.848261 0.000000 5.848261 319.47270 0.000000 \n", "\n", " wd_2 gen_sp rs_av nac_pos pitch \n", "0 3.490916 868.2094 12.78084 344.40000 0.0 \n", "1 4.931978 871.9911 12.73835 124.00000 0.0 \n", "2 2.202345 878.9877 12.92154 10.50000 0.0 \n", "3 2.269212 909.9926 13.39747 16.31618 0.0 \n", "4 5.373057 882.6358 12.97570 314.10000 0.0 " ] }, "execution_count": 36, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scada[0].head(5)" ] }, { "cell_type": "code", "execution_count": 37, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampturbine_idap_avap_devap_maxreactive_powerws_avws_1ws_2wd_avwd_1wd_2gen_sprs_avnac_pospitch
02015-01-01 00:00:00.00011478.246269.615117800.18136110.9073910.9073911.467810246.67472.7305454.8841121142.7950016.754220243.94383.475755
12015-01-01 00:00:00.00022237.340146.20532501-8.89993314.1628314.162830.017031211.18323.6175299.5061441155.9110016.886540207.56535.934744
22015-01-01 00:00:00.00032076.400465.80872769334.47450012.8383412.8383413.927340242.88191.8661249.4658781141.4920016.771280241.01542.050259
32015-01-01 00:00:00.00040.0000.000000.00000013.1827613.1827613.569020164.892228.99234035.90561026.761130.481272135.900087.000000
42015-01-01 00:00:00.00051648.039223.36871906251.51540011.8492911.2166511.849290223.8225-68.4558302.7342301146.7210016.831610221.08883.055791
\n", "
" ], "text/plain": [ " timestamp turbine_id ap_av ap_dev ap_max \\\n", "0 2015-01-01 00:00:00.000 1 1478.246 269.6151 1780 \n", "1 2015-01-01 00:00:00.000 2 2237.340 146.2053 2501 \n", "2 2015-01-01 00:00:00.000 3 2076.400 465.8087 2769 \n", "3 2015-01-01 00:00:00.000 4 0.000 0.0000 0 \n", "4 2015-01-01 00:00:00.000 5 1648.039 223.3687 1906 \n", "\n", " reactive_power ws_av ws_1 ws_2 wd_av wd_1 \\\n", "0 0.181361 10.90739 10.90739 11.467810 246.6747 2.730545 \n", "1 -8.899933 14.16283 14.16283 0.017031 211.1832 3.617529 \n", "2 334.474500 12.83834 12.83834 13.927340 242.8819 1.866124 \n", "3 0.000000 13.18276 13.18276 13.569020 164.8922 28.992340 \n", "4 251.515400 11.84929 11.21665 11.849290 223.8225 -68.455830 \n", "\n", " wd_2 gen_sp rs_av nac_pos pitch \n", "0 4.884112 1142.79500 16.754220 243.9438 3.475755 \n", "1 9.506144 1155.91100 16.886540 207.5653 5.934744 \n", "2 9.465878 1141.49200 16.771280 241.0154 2.050259 \n", "3 35.905610 26.76113 0.481272 135.9000 87.000000 \n", "4 2.734230 1146.72100 16.831610 221.0888 3.055791 " ] }, "execution_count": 37, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scada[1].head(5)" ] }, { "cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [], "source": [ "# concatenate new datasets\n", "scadaNew = pd.concat(scada.values())" ] }, { "cell_type": "code", "execution_count": 39, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
timestampturbine_idap_avap_devap_maxreactive_powerws_avws_1ws_2wd_avwd_1wd_2gen_sprs_avnac_pospitch
02017-01-01 00:00:00.0001275.567679.71668455-128.1261005.9501706.0655215.950170347.890903.2166853.490916868.209412.78084344.400000.0
12017-01-01 00:00:00.0002280.952358.14750433-141.4628005.9201395.9201390.011563125.380901.3810444.931978871.991112.73835124.000000.0
22017-01-01 00:00:00.0003307.096654.00394449-72.8269706.4080665.7019106.40806612.70238-9.5184352.202345878.987712.9215410.500000.0
32017-01-01 00:00:00.0004348.8079112.206406331.5845036.2698176.5690556.26981718.58538-6.8219292.269212909.992613.3974716.316180.0
42017-01-01 00:00:00.0005315.882760.93330538-95.8382705.8482610.0000005.848261319.472700.0000005.373057882.635812.97570314.100000.0
\n", "
" ], "text/plain": [ " timestamp turbine_id ap_av ap_dev ap_max \\\n", "0 2017-01-01 00:00:00.000 1 275.5676 79.71668 455 \n", "1 2017-01-01 00:00:00.000 2 280.9523 58.14750 433 \n", "2 2017-01-01 00:00:00.000 3 307.0966 54.00394 449 \n", "3 2017-01-01 00:00:00.000 4 348.8079 112.20640 633 \n", "4 2017-01-01 00:00:00.000 5 315.8827 60.93330 538 \n", "\n", " reactive_power ws_av ws_1 ws_2 wd_av wd_1 \\\n", "0 -128.126100 5.950170 6.065521 5.950170 347.89090 3.216685 \n", "1 -141.462800 5.920139 5.920139 0.011563 125.38090 1.381044 \n", "2 -72.826970 6.408066 5.701910 6.408066 12.70238 -9.518435 \n", "3 1.584503 6.269817 6.569055 6.269817 18.58538 -6.821929 \n", "4 -95.838270 5.848261 0.000000 5.848261 319.47270 0.000000 \n", "\n", " wd_2 gen_sp rs_av nac_pos pitch \n", "0 3.490916 868.2094 12.78084 344.40000 0.0 \n", "1 4.931978 871.9911 12.73835 124.00000 0.0 \n", "2 2.202345 878.9877 12.92154 10.50000 0.0 \n", "3 2.269212 909.9926 13.39747 16.31618 0.0 \n", "4 5.373057 882.6358 12.97570 314.10000 0.0 " ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [ "scadaNew.head(5)" ] } ], "metadata": { "interpreter": { "hash": "9181ee88e849e721a02348715c04c9aa1108ae16ca1904107a83d5bfc178fda8" }, "kernelspec": { "display_name": "Python 3.9.5 64-bit ('env': venv)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.4" } }, "nbformat": 4, "nbformat_minor": 4 }