{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Process SCADA and downtime data\n",
"\n",
"This notebook demonstrates the merging of all four CSV files containing SCADA\n",
"and downtime data into single dataframes. Two files are older datasets, and\n",
"the other two are newer datasets. Both old and new datasets have most of their\n",
"timestamps in common.\n",
"\n",
"The older SCADA datasets were found to have some errors in the rotor speed\n",
"readings. This merging replaces the old errorred data points with the new\n",
"ones, and removes incomplete rows.\n",
"\n",
"This merging ensures the downtime data has the same range as the SCADA data,\n",
"and removes incomplete rows."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"# import libraries\n",
"import os\n",
"import glob\n",
"import itertools\n",
"import pandas as pd\n",
"import numpy as np"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"# create directory to store processed data\n",
"os.makedirs(\"data/processed/\", exist_ok=True)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Downtime categories"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
"# read and view data\n",
"data = pd.read_excel(\"data/Melogale Downtime Categories.xlsx\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Turbine Categories | \n",
" Unnamed: 1 | \n",
" Environmental Categories | \n",
" Unnamed: 3 | \n",
" Grid Categories | \n",
" Unnamed: 5 | \n",
" Infrastructure Categories | \n",
" Unnamed: 7 | \n",
" Availability Categories | \n",
" Unnamed: 9 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" id | \n",
" OLD name | \n",
" id | \n",
" name | \n",
" id | \n",
" name | \n",
" id | \n",
" name | \n",
" id | \n",
" name | \n",
"
\n",
" \n",
" 1 | \n",
" 0 | \n",
" Unknown | \n",
" 0 | \n",
" Unknown | \n",
" 0 | \n",
" Unknown | \n",
" 0 | \n",
" Unknown | \n",
" 0 | \n",
" Unknown | \n",
"
\n",
" \n",
" 2 | \n",
" 1 | \n",
" OK | \n",
" 1 | \n",
" OK | \n",
" 1 | \n",
" OK | \n",
" 1 | \n",
" OK | \n",
" 1 | \n",
" Available/Non-penalising | \n",
"
\n",
" \n",
" 3 | \n",
" 2 | \n",
" Anemometry | \n",
" 2 | \n",
" High Wind Shutdown | \n",
" 2 | \n",
" Planned Outage | \n",
" 2 | \n",
" Planned Outage | \n",
" 2 | \n",
" NOT available/penalising | \n",
"
\n",
" \n",
" 4 | \n",
" 3 | \n",
" Rotor Brake | \n",
" 3 | \n",
" Icing | \n",
" 3 | \n",
" Unplanned Outage | \n",
" 3 | \n",
" Unplanned Outage | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 5 | \n",
" 4 | \n",
" Main Shaft | \n",
" 4 | \n",
" Turbulence | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 6 | \n",
" 5 | \n",
" Gearbox | \n",
" 5 | \n",
" Work Halted | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 7 | \n",
" 6 | \n",
" Generator | \n",
" 6 | \n",
" Lightning | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 8 | \n",
" 7 | \n",
" Yaw System | \n",
" 7 | \n",
" Sector Management | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 9 | \n",
" 8 | \n",
" Electrical Controls | \n",
" 8 | \n",
" Low Temperature | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 10 | \n",
" 9 | \n",
" Hydraulics | \n",
" 11 | \n",
" Low Wind Shutdown | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 11 | \n",
" 10 | \n",
" Electrical System | \n",
" 12 | \n",
" Shadow casting stop | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 12 | \n",
" 11 | \n",
" Pitch Control | \n",
" 13 | \n",
" Ambient temperature high | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 13 | \n",
" 12 | \n",
" Unlogged manual stop | \n",
" 14 | \n",
" Cable unwind | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 14 | \n",
" 13 | \n",
" Customer Stop | \n",
" 15 | \n",
" Wildlife | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 15 | \n",
" 14 | \n",
" Noise Constraints | \n",
" 16 | \n",
" Other | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 16 | \n",
" 15 | \n",
" Scheduled Maintenance | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 17 | \n",
" 16 | \n",
" Tower | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 18 | \n",
" 17 | \n",
" Retrofit | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 19 | \n",
" 18 | \n",
" Cable Unwind | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 20 | \n",
" 19 | \n",
" Hub | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 21 | \n",
" 20 | \n",
" Rotor Blades | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 22 | \n",
" 21 | \n",
" Delayed Startup | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 23 | \n",
" 22 | \n",
" Other | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Turbine Categories Unnamed: 1 Environmental Categories \\\n",
"0 id OLD name id \n",
"1 0 Unknown 0 \n",
"2 1 OK 1 \n",
"3 2 Anemometry 2 \n",
"4 3 Rotor Brake 3 \n",
"5 4 Main Shaft 4 \n",
"6 5 Gearbox 5 \n",
"7 6 Generator 6 \n",
"8 7 Yaw System 7 \n",
"9 8 Electrical Controls 8 \n",
"10 9 Hydraulics 11 \n",
"11 10 Electrical System 12 \n",
"12 11 Pitch Control 13 \n",
"13 12 Unlogged manual stop 14 \n",
"14 13 Customer Stop 15 \n",
"15 14 Noise Constraints 16 \n",
"16 15 Scheduled Maintenance NaN \n",
"17 16 Tower NaN \n",
"18 17 Retrofit NaN \n",
"19 18 Cable Unwind NaN \n",
"20 19 Hub NaN \n",
"21 20 Rotor Blades NaN \n",
"22 21 Delayed Startup NaN \n",
"23 22 Other NaN \n",
"\n",
" Unnamed: 3 Grid Categories Unnamed: 5 \\\n",
"0 name id name \n",
"1 Unknown 0 Unknown \n",
"2 OK 1 OK \n",
"3 High Wind Shutdown 2 Planned Outage \n",
"4 Icing 3 Unplanned Outage \n",
"5 Turbulence NaN NaN \n",
"6 Work Halted NaN NaN \n",
"7 Lightning NaN NaN \n",
"8 Sector Management NaN NaN \n",
"9 Low Temperature NaN NaN \n",
"10 Low Wind Shutdown NaN NaN \n",
"11 Shadow casting stop NaN NaN \n",
"12 Ambient temperature high NaN NaN \n",
"13 Cable unwind NaN NaN \n",
"14 Wildlife NaN NaN \n",
"15 Other NaN NaN \n",
"16 NaN NaN NaN \n",
"17 NaN NaN NaN \n",
"18 NaN NaN NaN \n",
"19 NaN NaN NaN \n",
"20 NaN NaN NaN \n",
"21 NaN NaN NaN \n",
"22 NaN NaN NaN \n",
"23 NaN NaN NaN \n",
"\n",
" Infrastructure Categories Unnamed: 7 Availability Categories \\\n",
"0 id name id \n",
"1 0 Unknown 0 \n",
"2 1 OK 1 \n",
"3 2 Planned Outage 2 \n",
"4 3 Unplanned Outage NaN \n",
"5 NaN NaN NaN \n",
"6 NaN NaN NaN \n",
"7 NaN NaN NaN \n",
"8 NaN NaN NaN \n",
"9 NaN NaN NaN \n",
"10 NaN NaN NaN \n",
"11 NaN NaN NaN \n",
"12 NaN NaN NaN \n",
"13 NaN NaN NaN \n",
"14 NaN NaN NaN \n",
"15 NaN NaN NaN \n",
"16 NaN NaN NaN \n",
"17 NaN NaN NaN \n",
"18 NaN NaN NaN \n",
"19 NaN NaN NaN \n",
"20 NaN NaN NaN \n",
"21 NaN NaN NaN \n",
"22 NaN NaN NaN \n",
"23 NaN NaN NaN \n",
"\n",
" Unnamed: 9 \n",
"0 name \n",
"1 Unknown \n",
"2 Available/Non-penalising \n",
"3 NOT available/penalising \n",
"4 NaN \n",
"5 NaN \n",
"6 NaN \n",
"7 NaN \n",
"8 NaN \n",
"9 NaN \n",
"10 NaN \n",
"11 NaN \n",
"12 NaN \n",
"13 NaN \n",
"14 NaN \n",
"15 NaN \n",
"16 NaN \n",
"17 NaN \n",
"18 NaN \n",
"19 NaN \n",
"20 NaN \n",
"21 NaN \n",
"22 NaN \n",
"23 NaN "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
"# drop first row\n",
"data = data.drop([0])"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# function to filter data for each category type\n",
"catData = {}\n",
"\n",
"\n",
"def categorise_data(cat, number):\n",
" catData[cat] = data.filter(\n",
" items=[cat+\" Categories\", \"Unnamed: \"+str(number)]\n",
" )\n",
" catData[cat].rename(\n",
" columns={\n",
" cat+\" Categories\": \"Category\", \"Unnamed: \"+str(number): \"Name\"\n",
" },\n",
" inplace=True\n",
" )\n",
" catData[cat][\"Type\"] = cat\n",
" catData[cat].dropna(inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# filtering\n",
"categorise_data(\"Turbine\", 1)\n",
"categorise_data(\"Environmental\", 3)\n",
"categorise_data(\"Grid\", 5)\n",
"categorise_data(\"Infrastructure\", 7)\n",
"categorise_data(\"Availability\", 9)"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# concatenate data\n",
"data = pd.concat(catData.values(), ignore_index=True)"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Category | \n",
" Name | \n",
" Type | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0 | \n",
" Unknown | \n",
" Turbine | \n",
"
\n",
" \n",
" 1 | \n",
" 1 | \n",
" OK | \n",
" Turbine | \n",
"
\n",
" \n",
" 2 | \n",
" 2 | \n",
" Anemometry | \n",
" Turbine | \n",
"
\n",
" \n",
" 3 | \n",
" 3 | \n",
" Rotor Brake | \n",
" Turbine | \n",
"
\n",
" \n",
" 4 | \n",
" 4 | \n",
" Main Shaft | \n",
" Turbine | \n",
"
\n",
" \n",
" 5 | \n",
" 5 | \n",
" Gearbox | \n",
" Turbine | \n",
"
\n",
" \n",
" 6 | \n",
" 6 | \n",
" Generator | \n",
" Turbine | \n",
"
\n",
" \n",
" 7 | \n",
" 7 | \n",
" Yaw System | \n",
" Turbine | \n",
"
\n",
" \n",
" 8 | \n",
" 8 | \n",
" Electrical Controls | \n",
" Turbine | \n",
"
\n",
" \n",
" 9 | \n",
" 9 | \n",
" Hydraulics | \n",
" Turbine | \n",
"
\n",
" \n",
" 10 | \n",
" 10 | \n",
" Electrical System | \n",
" Turbine | \n",
"
\n",
" \n",
" 11 | \n",
" 11 | \n",
" Pitch Control | \n",
" Turbine | \n",
"
\n",
" \n",
" 12 | \n",
" 12 | \n",
" Unlogged manual stop | \n",
" Turbine | \n",
"
\n",
" \n",
" 13 | \n",
" 13 | \n",
" Customer Stop | \n",
" Turbine | \n",
"
\n",
" \n",
" 14 | \n",
" 14 | \n",
" Noise Constraints | \n",
" Turbine | \n",
"
\n",
" \n",
" 15 | \n",
" 15 | \n",
" Scheduled Maintenance | \n",
" Turbine | \n",
"
\n",
" \n",
" 16 | \n",
" 16 | \n",
" Tower | \n",
" Turbine | \n",
"
\n",
" \n",
" 17 | \n",
" 17 | \n",
" Retrofit | \n",
" Turbine | \n",
"
\n",
" \n",
" 18 | \n",
" 18 | \n",
" Cable Unwind | \n",
" Turbine | \n",
"
\n",
" \n",
" 19 | \n",
" 19 | \n",
" Hub | \n",
" Turbine | \n",
"
\n",
" \n",
" 20 | \n",
" 20 | \n",
" Rotor Blades | \n",
" Turbine | \n",
"
\n",
" \n",
" 21 | \n",
" 21 | \n",
" Delayed Startup | \n",
" Turbine | \n",
"
\n",
" \n",
" 22 | \n",
" 22 | \n",
" Other | \n",
" Turbine | \n",
"
\n",
" \n",
" 23 | \n",
" 0 | \n",
" Unknown | \n",
" Environmental | \n",
"
\n",
" \n",
" 24 | \n",
" 1 | \n",
" OK | \n",
" Environmental | \n",
"
\n",
" \n",
" 25 | \n",
" 2 | \n",
" High Wind Shutdown | \n",
" Environmental | \n",
"
\n",
" \n",
" 26 | \n",
" 3 | \n",
" Icing | \n",
" Environmental | \n",
"
\n",
" \n",
" 27 | \n",
" 4 | \n",
" Turbulence | \n",
" Environmental | \n",
"
\n",
" \n",
" 28 | \n",
" 5 | \n",
" Work Halted | \n",
" Environmental | \n",
"
\n",
" \n",
" 29 | \n",
" 6 | \n",
" Lightning | \n",
" Environmental | \n",
"
\n",
" \n",
" 30 | \n",
" 7 | \n",
" Sector Management | \n",
" Environmental | \n",
"
\n",
" \n",
" 31 | \n",
" 8 | \n",
" Low Temperature | \n",
" Environmental | \n",
"
\n",
" \n",
" 32 | \n",
" 11 | \n",
" Low Wind Shutdown | \n",
" Environmental | \n",
"
\n",
" \n",
" 33 | \n",
" 12 | \n",
" Shadow casting stop | \n",
" Environmental | \n",
"
\n",
" \n",
" 34 | \n",
" 13 | \n",
" Ambient temperature high | \n",
" Environmental | \n",
"
\n",
" \n",
" 35 | \n",
" 14 | \n",
" Cable unwind | \n",
" Environmental | \n",
"
\n",
" \n",
" 36 | \n",
" 15 | \n",
" Wildlife | \n",
" Environmental | \n",
"
\n",
" \n",
" 37 | \n",
" 16 | \n",
" Other | \n",
" Environmental | \n",
"
\n",
" \n",
" 38 | \n",
" 0 | \n",
" Unknown | \n",
" Grid | \n",
"
\n",
" \n",
" 39 | \n",
" 1 | \n",
" OK | \n",
" Grid | \n",
"
\n",
" \n",
" 40 | \n",
" 2 | \n",
" Planned Outage | \n",
" Grid | \n",
"
\n",
" \n",
" 41 | \n",
" 3 | \n",
" Unplanned Outage | \n",
" Grid | \n",
"
\n",
" \n",
" 42 | \n",
" 0 | \n",
" Unknown | \n",
" Infrastructure | \n",
"
\n",
" \n",
" 43 | \n",
" 1 | \n",
" OK | \n",
" Infrastructure | \n",
"
\n",
" \n",
" 44 | \n",
" 2 | \n",
" Planned Outage | \n",
" Infrastructure | \n",
"
\n",
" \n",
" 45 | \n",
" 3 | \n",
" Unplanned Outage | \n",
" Infrastructure | \n",
"
\n",
" \n",
" 46 | \n",
" 0 | \n",
" Unknown | \n",
" Availability | \n",
"
\n",
" \n",
" 47 | \n",
" 1 | \n",
" Available/Non-penalising | \n",
" Availability | \n",
"
\n",
" \n",
" 48 | \n",
" 2 | \n",
" NOT available/penalising | \n",
" Availability | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Category Name Type\n",
"0 0 Unknown Turbine\n",
"1 1 OK Turbine\n",
"2 2 Anemometry Turbine\n",
"3 3 Rotor Brake Turbine\n",
"4 4 Main Shaft Turbine\n",
"5 5 Gearbox Turbine\n",
"6 6 Generator Turbine\n",
"7 7 Yaw System Turbine\n",
"8 8 Electrical Controls Turbine\n",
"9 9 Hydraulics Turbine\n",
"10 10 Electrical System Turbine\n",
"11 11 Pitch Control Turbine\n",
"12 12 Unlogged manual stop Turbine\n",
"13 13 Customer Stop Turbine\n",
"14 14 Noise Constraints Turbine\n",
"15 15 Scheduled Maintenance Turbine\n",
"16 16 Tower Turbine\n",
"17 17 Retrofit Turbine\n",
"18 18 Cable Unwind Turbine\n",
"19 19 Hub Turbine\n",
"20 20 Rotor Blades Turbine\n",
"21 21 Delayed Startup Turbine\n",
"22 22 Other Turbine\n",
"23 0 Unknown Environmental\n",
"24 1 OK Environmental\n",
"25 2 High Wind Shutdown Environmental\n",
"26 3 Icing Environmental\n",
"27 4 Turbulence Environmental\n",
"28 5 Work Halted Environmental\n",
"29 6 Lightning Environmental\n",
"30 7 Sector Management Environmental\n",
"31 8 Low Temperature Environmental\n",
"32 11 Low Wind Shutdown Environmental\n",
"33 12 Shadow casting stop Environmental\n",
"34 13 Ambient temperature high Environmental\n",
"35 14 Cable unwind Environmental\n",
"36 15 Wildlife Environmental\n",
"37 16 Other Environmental\n",
"38 0 Unknown Grid\n",
"39 1 OK Grid\n",
"40 2 Planned Outage Grid\n",
"41 3 Unplanned Outage Grid\n",
"42 0 Unknown Infrastructure\n",
"43 1 OK Infrastructure\n",
"44 2 Planned Outage Infrastructure\n",
"45 3 Unplanned Outage Infrastructure\n",
"46 0 Unknown Availability\n",
"47 1 Available/Non-penalising Availability\n",
"48 2 NOT available/penalising Availability"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Downtime time series"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 data/Last_six_months_downtime.csv (5367, 12)\n",
"1 data/Prior_two_years_downtime.csv (16958, 24)\n"
]
}
],
"source": [
"dt = {}\n",
"dtList = glob.glob(\"data/*downtime*.csv\")\n",
"for num, df in enumerate(dtList):\n",
" dt[num] = pd.read_csv(df)\n",
" print(num, df, dt[num].shape)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" timestamp_start | \n",
" timestamp_end | \n",
" turbine_id | \n",
" alarm_id | \n",
" GridCategory_id | \n",
" InfrastructureCategory_id | \n",
" EnvironmentalCategory_id | \n",
" TurbineCategory_id | \n",
" AvailabilityCategory_id | \n",
" comment | \n",
" workorder_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 715463 | \n",
" 2016-10-05 11:27:58.000 | \n",
" 2016-10-05 11:28:37.000 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 715464 | \n",
" 2016-10-05 11:28:37.000 | \n",
" 2016-10-05 11:58:15.000 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 715465 | \n",
" 2016-10-05 11:58:15.000 | \n",
" 2016-10-05 12:00:04.000 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 715466 | \n",
" 2016-10-05 12:00:50.000 | \n",
" 2016-10-05 12:48:00.000 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 715467 | \n",
" 2016-10-26 04:56:44.000 | \n",
" 2016-10-26 05:20:00.000 | \n",
" 18 | \n",
" 391 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 18 | \n",
" 1 | \n",
" FM1003Yaw CableRewind | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id timestamp_start timestamp_end turbine_id \\\n",
"0 715463 2016-10-05 11:27:58.000 2016-10-05 11:28:37.000 18 \n",
"1 715464 2016-10-05 11:28:37.000 2016-10-05 11:58:15.000 18 \n",
"2 715465 2016-10-05 11:58:15.000 2016-10-05 12:00:04.000 18 \n",
"3 715466 2016-10-05 12:00:50.000 2016-10-05 12:48:00.000 18 \n",
"4 715467 2016-10-26 04:56:44.000 2016-10-26 05:20:00.000 18 \n",
"\n",
" alarm_id GridCategory_id InfrastructureCategory_id \\\n",
"0 5 1 1 \n",
"1 5 1 1 \n",
"2 5 1 1 \n",
"3 5 1 1 \n",
"4 391 1 1 \n",
"\n",
" EnvironmentalCategory_id TurbineCategory_id AvailabilityCategory_id \\\n",
"0 1 13 1 \n",
"1 1 13 1 \n",
"2 1 13 1 \n",
"3 1 13 1 \n",
"4 1 18 1 \n",
"\n",
" comment workorder_id \n",
"0 Advanced rescue procedures 108128.0 \n",
"1 Advanced rescue procedures 108128.0 \n",
"2 Advanced rescue procedures 108128.0 \n",
"3 Advanced rescue procedures 108128.0 \n",
"4 FM1003Yaw CableRewind NaN "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt[0].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" timestamp_start | \n",
" timestamp_end | \n",
" turbine_id | \n",
" alarm_id | \n",
" GridCategory_id | \n",
" InfrastructureCategory_id | \n",
" EnvironmentalCategory_id | \n",
" TurbineCategory_id | \n",
" AvailabilityCategory_id | \n",
" ... | \n",
" timestamp_end.1 | \n",
" turbine_id.1 | \n",
" alarm_id.1 | \n",
" GridCategory_id.1 | \n",
" InfrastructureCategory_id.1 | \n",
" EnvironmentalCategory_id.1 | \n",
" TurbineCategory_id.1 | \n",
" AvailabilityCategory_id.1 | \n",
" comment.1 | \n",
" workorder_id.1 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 85851 | \n",
" 2015-01-09 01:31:23.000 | \n",
" 2015-01-09 01:42:07.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 2015-01-09 01:42:07.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 1 | \n",
" 85852 | \n",
" 2015-01-09 02:01:28.000 | \n",
" 2015-01-09 02:06:43.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 2015-01-09 02:06:43.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 2 | \n",
" 85853 | \n",
" 2015-01-09 03:08:50.000 | \n",
" 2015-01-09 03:20:00.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 2015-01-09 03:20:00.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 3 | \n",
" 85854 | \n",
" 2015-01-09 04:21:58.000 | \n",
" 2015-01-09 05:30:00.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 2015-01-09 05:30:00.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
" 4 | \n",
" 85855 | \n",
" 2015-01-09 05:43:41.000 | \n",
" 2015-01-09 05:54:57.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" ... | \n",
" 2015-01-09 05:54:57.000 | \n",
" 6 | \n",
" 389 | \n",
" 1 | \n",
" 1 | \n",
" 2 | \n",
" 1 | \n",
" 1 | \n",
" NaN | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
5 rows × 24 columns
\n",
"
"
],
"text/plain": [
" id timestamp_start timestamp_end turbine_id \\\n",
"0 85851 2015-01-09 01:31:23.000 2015-01-09 01:42:07.000 6 \n",
"1 85852 2015-01-09 02:01:28.000 2015-01-09 02:06:43.000 6 \n",
"2 85853 2015-01-09 03:08:50.000 2015-01-09 03:20:00.000 6 \n",
"3 85854 2015-01-09 04:21:58.000 2015-01-09 05:30:00.000 6 \n",
"4 85855 2015-01-09 05:43:41.000 2015-01-09 05:54:57.000 6 \n",
"\n",
" alarm_id GridCategory_id InfrastructureCategory_id \\\n",
"0 389 1 1 \n",
"1 389 1 1 \n",
"2 389 1 1 \n",
"3 389 1 1 \n",
"4 389 1 1 \n",
"\n",
" EnvironmentalCategory_id TurbineCategory_id AvailabilityCategory_id ... \\\n",
"0 2 1 1 ... \n",
"1 2 1 1 ... \n",
"2 2 1 1 ... \n",
"3 2 1 1 ... \n",
"4 2 1 1 ... \n",
"\n",
" timestamp_end.1 turbine_id.1 alarm_id.1 GridCategory_id.1 \\\n",
"0 2015-01-09 01:42:07.000 6 389 1 \n",
"1 2015-01-09 02:06:43.000 6 389 1 \n",
"2 2015-01-09 03:20:00.000 6 389 1 \n",
"3 2015-01-09 05:30:00.000 6 389 1 \n",
"4 2015-01-09 05:54:57.000 6 389 1 \n",
"\n",
" InfrastructureCategory_id.1 EnvironmentalCategory_id.1 \\\n",
"0 1 2 \n",
"1 1 2 \n",
"2 1 2 \n",
"3 1 2 \n",
"4 1 2 \n",
"\n",
" TurbineCategory_id.1 AvailabilityCategory_id.1 comment.1 workorder_id.1 \n",
"0 1 1 NaN NaN \n",
"1 1 1 NaN NaN \n",
"2 1 1 NaN NaN \n",
"3 1 1 NaN NaN \n",
"4 1 1 NaN NaN \n",
"\n",
"[5 rows x 24 columns]"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt[1].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['id', 'timestamp_start', 'timestamp_end', 'turbine_id', 'alarm_id',\n",
" 'GridCategory_id', 'InfrastructureCategory_id',\n",
" 'EnvironmentalCategory_id', 'TurbineCategory_id',\n",
" 'AvailabilityCategory_id', 'comment', 'workorder_id', 'id.1',\n",
" 'timestamp_start.1', 'timestamp_end.1', 'turbine_id.1', 'alarm_id.1',\n",
" 'GridCategory_id.1', 'InfrastructureCategory_id.1',\n",
" 'EnvironmentalCategory_id.1', 'TurbineCategory_id.1',\n",
" 'AvailabilityCategory_id.1', 'comment.1', 'workorder_id.1'],\n",
" dtype='object')"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dt[1].columns"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
"# drop duplicate columns\n",
"dt[1].drop(columns=list(dt[1].filter(regex=\".1\")), inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# convert timestamps to datetime data type\n",
"for key in dt.keys():\n",
" for col in list(dt[key].filter(regex=\"timestamp\")):\n",
" dt[key][col] = pd.to_datetime(dt[key][col])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
"# concatenate data\n",
"data = pd.concat(dt.values(), join=\"outer\")"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" id | \n",
" timestamp_start | \n",
" timestamp_end | \n",
" turbine_id | \n",
" alarm_id | \n",
" GridCategory_id | \n",
" InfrastructureCategory_id | \n",
" EnvironmentalCategory_id | \n",
" TurbineCategory_id | \n",
" AvailabilityCategory_id | \n",
" comment | \n",
" workorder_id | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 715463 | \n",
" 2016-10-05 11:27:58 | \n",
" 2016-10-05 11:28:37 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 715464 | \n",
" 2016-10-05 11:28:37 | \n",
" 2016-10-05 11:58:15 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 715465 | \n",
" 2016-10-05 11:58:15 | \n",
" 2016-10-05 12:00:04 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 715466 | \n",
" 2016-10-05 12:00:50 | \n",
" 2016-10-05 12:48:00 | \n",
" 18 | \n",
" 5 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 13 | \n",
" 1 | \n",
" Advanced rescue procedures | \n",
" 108128.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 715467 | \n",
" 2016-10-26 04:56:44 | \n",
" 2016-10-26 05:20:00 | \n",
" 18 | \n",
" 391 | \n",
" 1 | \n",
" 1 | \n",
" 1 | \n",
" 18 | \n",
" 1 | \n",
" FM1003Yaw CableRewind | \n",
" NaN | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" id timestamp_start timestamp_end turbine_id alarm_id \\\n",
"0 715463 2016-10-05 11:27:58 2016-10-05 11:28:37 18 5 \n",
"1 715464 2016-10-05 11:28:37 2016-10-05 11:58:15 18 5 \n",
"2 715465 2016-10-05 11:58:15 2016-10-05 12:00:04 18 5 \n",
"3 715466 2016-10-05 12:00:50 2016-10-05 12:48:00 18 5 \n",
"4 715467 2016-10-26 04:56:44 2016-10-26 05:20:00 18 391 \n",
"\n",
" GridCategory_id InfrastructureCategory_id EnvironmentalCategory_id \\\n",
"0 1 1 1 \n",
"1 1 1 1 \n",
"2 1 1 1 \n",
"3 1 1 1 \n",
"4 1 1 1 \n",
"\n",
" TurbineCategory_id AvailabilityCategory_id comment \\\n",
"0 13 1 Advanced rescue procedures \n",
"1 13 1 Advanced rescue procedures \n",
"2 13 1 Advanced rescue procedures \n",
"3 13 1 Advanced rescue procedures \n",
"4 18 1 FM1003Yaw CableRewind \n",
"\n",
" workorder_id \n",
"0 108128.0 \n",
"1 108128.0 \n",
"2 108128.0 \n",
"3 108128.0 \n",
"4 NaN "
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"(22325, 12)"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"data.shape"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## SCADA time series"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 data/Last_six_months_SCADA.csv (651600, 17)\n",
"1 data/Prior_two_years_SCADA.csv (2550346, 17)\n"
]
}
],
"source": [
"# old SCADA data\n",
"scada = {}\n",
"scadaList = glob.glob(\"data/*SCADA.csv\")\n",
"for num, df in enumerate(scadaList):\n",
" scada[num] = pd.read_csv(df)\n",
" print(num, df, scada[num].shape)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timestamp | \n",
" turbine | \n",
" ap_av | \n",
" ap_dev | \n",
" ap_max | \n",
" reactive_power | \n",
" ws_av | \n",
" ws_1 | \n",
" ws_2 | \n",
" wd_av | \n",
" wd_1 | \n",
" wd_2 | \n",
" gen_sp | \n",
" rs_av | \n",
" nac_pos | \n",
" pitch | \n",
" runtime | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 01/11/2016 15:40 | \n",
" 3 | \n",
" 80.37572 | \n",
" 29.46033 | \n",
" 178.0 | \n",
" -9.292799 | \n",
" 4.088211 | \n",
" 4.126762 | \n",
" 4.088211 | \n",
" 290.4030 | \n",
" -17.849820 | \n",
" -4.077392 | \n",
" 797.0712 | \n",
" 0.0 | \n",
" 294.4802 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 01/11/2016 15:40 | \n",
" 4 | \n",
" 141.68580 | \n",
" 53.87635 | \n",
" 242.0 | \n",
" 1.339321 | \n",
" 4.745911 | \n",
" 5.071102 | \n",
" 4.745911 | \n",
" 316.6896 | \n",
" -15.205220 | \n",
" -6.955624 | \n",
" 814.9814 | \n",
" 0.0 | \n",
" 323.6452 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 01/11/2016 15:40 | \n",
" 5 | \n",
" 14.85285 | \n",
" 17.79933 | \n",
" 91.0 | \n",
" -2.037041 | \n",
" 2.886049 | \n",
" 0.000000 | \n",
" 2.886049 | \n",
" 259.8634 | \n",
" 0.000000 | \n",
" -2.367190 | \n",
" 780.9207 | \n",
" 0.0 | \n",
" 262.2312 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 01/11/2016 15:40 | \n",
" 6 | \n",
" 139.71000 | \n",
" 63.40798 | \n",
" 273.0 | \n",
" -55.802610 | \n",
" 4.517820 | \n",
" 4.584790 | \n",
" 4.517820 | \n",
" 282.0574 | \n",
" -180.000000 | \n",
" -6.154136 | \n",
" 816.2123 | \n",
" 0.0 | \n",
" 288.2119 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 01/11/2016 15:40 | \n",
" 7 | \n",
" 37.40505 | \n",
" 40.17854 | \n",
" 161.0 | \n",
" -145.587500 | \n",
" 3.540881 | \n",
" 3.540881 | \n",
" 3.369349 | \n",
" 228.9863 | \n",
" -3.396181 | \n",
" -4.141267 | \n",
" 787.7513 | \n",
" 0.0 | \n",
" 232.3828 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp turbine ap_av ap_dev ap_max reactive_power \\\n",
"0 01/11/2016 15:40 3 80.37572 29.46033 178.0 -9.292799 \n",
"1 01/11/2016 15:40 4 141.68580 53.87635 242.0 1.339321 \n",
"2 01/11/2016 15:40 5 14.85285 17.79933 91.0 -2.037041 \n",
"3 01/11/2016 15:40 6 139.71000 63.40798 273.0 -55.802610 \n",
"4 01/11/2016 15:40 7 37.40505 40.17854 161.0 -145.587500 \n",
"\n",
" ws_av ws_1 ws_2 wd_av wd_1 wd_2 gen_sp \\\n",
"0 4.088211 4.126762 4.088211 290.4030 -17.849820 -4.077392 797.0712 \n",
"1 4.745911 5.071102 4.745911 316.6896 -15.205220 -6.955624 814.9814 \n",
"2 2.886049 0.000000 2.886049 259.8634 0.000000 -2.367190 780.9207 \n",
"3 4.517820 4.584790 4.517820 282.0574 -180.000000 -6.154136 816.2123 \n",
"4 3.540881 3.540881 3.369349 228.9863 -3.396181 -4.141267 787.7513 \n",
"\n",
" rs_av nac_pos pitch runtime \n",
"0 0.0 294.4802 0.0 600.0 \n",
"1 0.0 323.6452 0.0 600.0 \n",
"2 0.0 262.2312 0.0 600.0 \n",
"3 0.0 288.2119 0.0 600.0 \n",
"4 0.0 232.3828 0.0 600.0 "
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scada[0].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timestamp | \n",
" turbine | \n",
" ap_av | \n",
" ap_dev | \n",
" ap_min | \n",
" reactive_power | \n",
" ws_av | \n",
" ws_1 | \n",
" ws_2 | \n",
" wd_av | \n",
" wd_1 | \n",
" wd_2 | \n",
" gen_sp | \n",
" rs_av | \n",
" nac_pos | \n",
" pitch | \n",
" runtime | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2014-11-01 02:20:00 | \n",
" 8 | \n",
" 204.6824 | \n",
" 74.51731 | \n",
" 419.0 | \n",
" -38.03038 | \n",
" 5.029291 | \n",
" 5.029291 | \n",
" 5.152810 | \n",
" 204.4785 | \n",
" 2.978069 | \n",
" -5.033260 | \n",
" 836.9521 | \n",
" 12.28072 | \n",
" 201.5000 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2014-11-01 02:20:00 | \n",
" 9 | \n",
" 228.1198 | \n",
" 82.34712 | \n",
" 400.0 | \n",
" 91.85419 | \n",
" 5.506918 | \n",
" 5.506918 | \n",
" 5.225697 | \n",
" 215.6716 | \n",
" 0.671585 | \n",
" -0.585945 | \n",
" 845.9201 | \n",
" 12.40917 | \n",
" 215.0000 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2014-11-01 02:20:00 | \n",
" 10 | \n",
" 159.3024 | \n",
" 64.77182 | \n",
" 302.0 | \n",
" -932.91080 | \n",
" 5.442306 | \n",
" 5.442306 | \n",
" 5.653325 | \n",
" 223.2563 | \n",
" 2.356911 | \n",
" 0.643579 | \n",
" 822.5547 | \n",
" 0.00000 | \n",
" 220.9000 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 2014-11-01 02:20:00 | \n",
" 11 | \n",
" 119.3055 | \n",
" 31.69469 | \n",
" 199.0 | \n",
" -146.52910 | \n",
" 4.281990 | \n",
" 4.281990 | \n",
" 4.095518 | \n",
" 224.0360 | \n",
" 0.852958 | \n",
" 1.169835 | \n",
" 808.5560 | \n",
" 11.84853 | \n",
" 223.1832 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 2014-11-01 02:20:00 | \n",
" 12 | \n",
" 168.3689 | \n",
" 85.72588 | \n",
" 387.0 | \n",
" 77.63621 | \n",
" 4.962106 | \n",
" 4.962106 | \n",
" 4.764067 | \n",
" 240.3239 | \n",
" -3.776284 | \n",
" -5.501996 | \n",
" 942.0538 | \n",
" 13.78513 | \n",
" 244.1000 | \n",
" 0.0 | \n",
" 600.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp turbine ap_av ap_dev ap_min reactive_power \\\n",
"0 2014-11-01 02:20:00 8 204.6824 74.51731 419.0 -38.03038 \n",
"1 2014-11-01 02:20:00 9 228.1198 82.34712 400.0 91.85419 \n",
"2 2014-11-01 02:20:00 10 159.3024 64.77182 302.0 -932.91080 \n",
"3 2014-11-01 02:20:00 11 119.3055 31.69469 199.0 -146.52910 \n",
"4 2014-11-01 02:20:00 12 168.3689 85.72588 387.0 77.63621 \n",
"\n",
" ws_av ws_1 ws_2 wd_av wd_1 wd_2 gen_sp \\\n",
"0 5.029291 5.029291 5.152810 204.4785 2.978069 -5.033260 836.9521 \n",
"1 5.506918 5.506918 5.225697 215.6716 0.671585 -0.585945 845.9201 \n",
"2 5.442306 5.442306 5.653325 223.2563 2.356911 0.643579 822.5547 \n",
"3 4.281990 4.281990 4.095518 224.0360 0.852958 1.169835 808.5560 \n",
"4 4.962106 4.962106 4.764067 240.3239 -3.776284 -5.501996 942.0538 \n",
"\n",
" rs_av nac_pos pitch runtime \n",
"0 12.28072 201.5000 0.0 600.0 \n",
"1 12.40917 215.0000 0.0 600.0 \n",
"2 0.00000 220.9000 0.0 600.0 \n",
"3 11.84853 223.1832 0.0 600.0 \n",
"4 13.78513 244.1000 0.0 600.0 "
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scada[1].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 52,
"metadata": {},
"outputs": [],
"source": [
"# rename ap_min in scada[1] to ap_max\n",
"scada[1].rename(columns={\"ap_min\": \"ap_max\"}, inplace=True)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"# fixing rotor speed readings due to errors in data\n",
"def fix_rs(c):\n",
" if c[\"turbine\"] <= 20:\n",
" return c[\"rs_av\"]"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for df in scada.keys():\n",
" scada[df][\"rs_av_old\"] = scada[df].apply(fix_rs, axis=1)\n",
" scada[df] = scada[df].drop(\"rs_av\", axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [],
"source": [
"# concatenate old datasets\n",
"scadaOld = pd.concat(scada.values())"
]
},
{
"cell_type": "code",
"execution_count": 33,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timestamp | \n",
" turbine | \n",
" ap_av | \n",
" ap_dev | \n",
" ap_max | \n",
" reactive_power | \n",
" ws_av | \n",
" ws_1 | \n",
" ws_2 | \n",
" wd_av | \n",
" wd_1 | \n",
" wd_2 | \n",
" gen_sp | \n",
" nac_pos | \n",
" pitch | \n",
" runtime | \n",
" rs_av_old | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 01/11/2016 15:40 | \n",
" 3 | \n",
" 80.37572 | \n",
" 29.46033 | \n",
" 178.0 | \n",
" -9.292799 | \n",
" 4.088211 | \n",
" 4.126762 | \n",
" 4.088211 | \n",
" 290.4030 | \n",
" -17.849820 | \n",
" -4.077392 | \n",
" 797.0712 | \n",
" 294.4802 | \n",
" 0.0 | \n",
" 600.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 01/11/2016 15:40 | \n",
" 4 | \n",
" 141.68580 | \n",
" 53.87635 | \n",
" 242.0 | \n",
" 1.339321 | \n",
" 4.745911 | \n",
" 5.071102 | \n",
" 4.745911 | \n",
" 316.6896 | \n",
" -15.205220 | \n",
" -6.955624 | \n",
" 814.9814 | \n",
" 323.6452 | \n",
" 0.0 | \n",
" 600.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 01/11/2016 15:40 | \n",
" 5 | \n",
" 14.85285 | \n",
" 17.79933 | \n",
" 91.0 | \n",
" -2.037041 | \n",
" 2.886049 | \n",
" 0.000000 | \n",
" 2.886049 | \n",
" 259.8634 | \n",
" 0.000000 | \n",
" -2.367190 | \n",
" 780.9207 | \n",
" 262.2312 | \n",
" 0.0 | \n",
" 600.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 01/11/2016 15:40 | \n",
" 6 | \n",
" 139.71000 | \n",
" 63.40798 | \n",
" 273.0 | \n",
" -55.802610 | \n",
" 4.517820 | \n",
" 4.584790 | \n",
" 4.517820 | \n",
" 282.0574 | \n",
" -180.000000 | \n",
" -6.154136 | \n",
" 816.2123 | \n",
" 288.2119 | \n",
" 0.0 | \n",
" 600.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 01/11/2016 15:40 | \n",
" 7 | \n",
" 37.40505 | \n",
" 40.17854 | \n",
" 161.0 | \n",
" -145.587500 | \n",
" 3.540881 | \n",
" 3.540881 | \n",
" 3.369349 | \n",
" 228.9863 | \n",
" -3.396181 | \n",
" -4.141267 | \n",
" 787.7513 | \n",
" 232.3828 | \n",
" 0.0 | \n",
" 600.0 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp turbine ap_av ap_dev ap_max reactive_power \\\n",
"0 01/11/2016 15:40 3 80.37572 29.46033 178.0 -9.292799 \n",
"1 01/11/2016 15:40 4 141.68580 53.87635 242.0 1.339321 \n",
"2 01/11/2016 15:40 5 14.85285 17.79933 91.0 -2.037041 \n",
"3 01/11/2016 15:40 6 139.71000 63.40798 273.0 -55.802610 \n",
"4 01/11/2016 15:40 7 37.40505 40.17854 161.0 -145.587500 \n",
"\n",
" ws_av ws_1 ws_2 wd_av wd_1 wd_2 gen_sp \\\n",
"0 4.088211 4.126762 4.088211 290.4030 -17.849820 -4.077392 797.0712 \n",
"1 4.745911 5.071102 4.745911 316.6896 -15.205220 -6.955624 814.9814 \n",
"2 2.886049 0.000000 2.886049 259.8634 0.000000 -2.367190 780.9207 \n",
"3 4.517820 4.584790 4.517820 282.0574 -180.000000 -6.154136 816.2123 \n",
"4 3.540881 3.540881 3.369349 228.9863 -3.396181 -4.141267 787.7513 \n",
"\n",
" nac_pos pitch runtime rs_av_old \n",
"0 294.4802 0.0 600.0 0.0 \n",
"1 323.6452 0.0 600.0 0.0 \n",
"2 262.2312 0.0 600.0 0.0 \n",
"3 288.2119 0.0 600.0 0.0 \n",
"4 232.3828 0.0 600.0 0.0 "
]
},
"execution_count": 33,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scadaOld.head(5)"
]
},
{
"cell_type": "code",
"execution_count": 35,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"0 data/NS_SCADA_2017_v2.csv (543425, 16)\n",
"1 data/NS_SCADA_v2.csv (3064604, 16)\n"
]
}
],
"source": [
"# new SCADA data\n",
"scada = {}\n",
"scadaList = glob.glob(\"data/NS_SCADA*.csv\")\n",
"for num, df in enumerate(scadaList):\n",
" scada[num] = pd.read_csv(df)\n",
" print(num, df, scada[num].shape)"
]
},
{
"cell_type": "code",
"execution_count": 36,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timestamp | \n",
" turbine_id | \n",
" ap_av | \n",
" ap_dev | \n",
" ap_max | \n",
" reactive_power | \n",
" ws_av | \n",
" ws_1 | \n",
" ws_2 | \n",
" wd_av | \n",
" wd_1 | \n",
" wd_2 | \n",
" gen_sp | \n",
" rs_av | \n",
" nac_pos | \n",
" pitch | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 1 | \n",
" 275.5676 | \n",
" 79.71668 | \n",
" 455 | \n",
" -128.126100 | \n",
" 5.950170 | \n",
" 6.065521 | \n",
" 5.950170 | \n",
" 347.89090 | \n",
" 3.216685 | \n",
" 3.490916 | \n",
" 868.2094 | \n",
" 12.78084 | \n",
" 344.40000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 2 | \n",
" 280.9523 | \n",
" 58.14750 | \n",
" 433 | \n",
" -141.462800 | \n",
" 5.920139 | \n",
" 5.920139 | \n",
" 0.011563 | \n",
" 125.38090 | \n",
" 1.381044 | \n",
" 4.931978 | \n",
" 871.9911 | \n",
" 12.73835 | \n",
" 124.00000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 3 | \n",
" 307.0966 | \n",
" 54.00394 | \n",
" 449 | \n",
" -72.826970 | \n",
" 6.408066 | \n",
" 5.701910 | \n",
" 6.408066 | \n",
" 12.70238 | \n",
" -9.518435 | \n",
" 2.202345 | \n",
" 878.9877 | \n",
" 12.92154 | \n",
" 10.50000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 4 | \n",
" 348.8079 | \n",
" 112.20640 | \n",
" 633 | \n",
" 1.584503 | \n",
" 6.269817 | \n",
" 6.569055 | \n",
" 6.269817 | \n",
" 18.58538 | \n",
" -6.821929 | \n",
" 2.269212 | \n",
" 909.9926 | \n",
" 13.39747 | \n",
" 16.31618 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 5 | \n",
" 315.8827 | \n",
" 60.93330 | \n",
" 538 | \n",
" -95.838270 | \n",
" 5.848261 | \n",
" 0.000000 | \n",
" 5.848261 | \n",
" 319.47270 | \n",
" 0.000000 | \n",
" 5.373057 | \n",
" 882.6358 | \n",
" 12.97570 | \n",
" 314.10000 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp turbine_id ap_av ap_dev ap_max \\\n",
"0 2017-01-01 00:00:00.000 1 275.5676 79.71668 455 \n",
"1 2017-01-01 00:00:00.000 2 280.9523 58.14750 433 \n",
"2 2017-01-01 00:00:00.000 3 307.0966 54.00394 449 \n",
"3 2017-01-01 00:00:00.000 4 348.8079 112.20640 633 \n",
"4 2017-01-01 00:00:00.000 5 315.8827 60.93330 538 \n",
"\n",
" reactive_power ws_av ws_1 ws_2 wd_av wd_1 \\\n",
"0 -128.126100 5.950170 6.065521 5.950170 347.89090 3.216685 \n",
"1 -141.462800 5.920139 5.920139 0.011563 125.38090 1.381044 \n",
"2 -72.826970 6.408066 5.701910 6.408066 12.70238 -9.518435 \n",
"3 1.584503 6.269817 6.569055 6.269817 18.58538 -6.821929 \n",
"4 -95.838270 5.848261 0.000000 5.848261 319.47270 0.000000 \n",
"\n",
" wd_2 gen_sp rs_av nac_pos pitch \n",
"0 3.490916 868.2094 12.78084 344.40000 0.0 \n",
"1 4.931978 871.9911 12.73835 124.00000 0.0 \n",
"2 2.202345 878.9877 12.92154 10.50000 0.0 \n",
"3 2.269212 909.9926 13.39747 16.31618 0.0 \n",
"4 5.373057 882.6358 12.97570 314.10000 0.0 "
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scada[0].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 37,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timestamp | \n",
" turbine_id | \n",
" ap_av | \n",
" ap_dev | \n",
" ap_max | \n",
" reactive_power | \n",
" ws_av | \n",
" ws_1 | \n",
" ws_2 | \n",
" wd_av | \n",
" wd_1 | \n",
" wd_2 | \n",
" gen_sp | \n",
" rs_av | \n",
" nac_pos | \n",
" pitch | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2015-01-01 00:00:00.000 | \n",
" 1 | \n",
" 1478.246 | \n",
" 269.6151 | \n",
" 1780 | \n",
" 0.181361 | \n",
" 10.90739 | \n",
" 10.90739 | \n",
" 11.467810 | \n",
" 246.6747 | \n",
" 2.730545 | \n",
" 4.884112 | \n",
" 1142.79500 | \n",
" 16.754220 | \n",
" 243.9438 | \n",
" 3.475755 | \n",
"
\n",
" \n",
" 1 | \n",
" 2015-01-01 00:00:00.000 | \n",
" 2 | \n",
" 2237.340 | \n",
" 146.2053 | \n",
" 2501 | \n",
" -8.899933 | \n",
" 14.16283 | \n",
" 14.16283 | \n",
" 0.017031 | \n",
" 211.1832 | \n",
" 3.617529 | \n",
" 9.506144 | \n",
" 1155.91100 | \n",
" 16.886540 | \n",
" 207.5653 | \n",
" 5.934744 | \n",
"
\n",
" \n",
" 2 | \n",
" 2015-01-01 00:00:00.000 | \n",
" 3 | \n",
" 2076.400 | \n",
" 465.8087 | \n",
" 2769 | \n",
" 334.474500 | \n",
" 12.83834 | \n",
" 12.83834 | \n",
" 13.927340 | \n",
" 242.8819 | \n",
" 1.866124 | \n",
" 9.465878 | \n",
" 1141.49200 | \n",
" 16.771280 | \n",
" 241.0154 | \n",
" 2.050259 | \n",
"
\n",
" \n",
" 3 | \n",
" 2015-01-01 00:00:00.000 | \n",
" 4 | \n",
" 0.000 | \n",
" 0.0000 | \n",
" 0 | \n",
" 0.000000 | \n",
" 13.18276 | \n",
" 13.18276 | \n",
" 13.569020 | \n",
" 164.8922 | \n",
" 28.992340 | \n",
" 35.905610 | \n",
" 26.76113 | \n",
" 0.481272 | \n",
" 135.9000 | \n",
" 87.000000 | \n",
"
\n",
" \n",
" 4 | \n",
" 2015-01-01 00:00:00.000 | \n",
" 5 | \n",
" 1648.039 | \n",
" 223.3687 | \n",
" 1906 | \n",
" 251.515400 | \n",
" 11.84929 | \n",
" 11.21665 | \n",
" 11.849290 | \n",
" 223.8225 | \n",
" -68.455830 | \n",
" 2.734230 | \n",
" 1146.72100 | \n",
" 16.831610 | \n",
" 221.0888 | \n",
" 3.055791 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp turbine_id ap_av ap_dev ap_max \\\n",
"0 2015-01-01 00:00:00.000 1 1478.246 269.6151 1780 \n",
"1 2015-01-01 00:00:00.000 2 2237.340 146.2053 2501 \n",
"2 2015-01-01 00:00:00.000 3 2076.400 465.8087 2769 \n",
"3 2015-01-01 00:00:00.000 4 0.000 0.0000 0 \n",
"4 2015-01-01 00:00:00.000 5 1648.039 223.3687 1906 \n",
"\n",
" reactive_power ws_av ws_1 ws_2 wd_av wd_1 \\\n",
"0 0.181361 10.90739 10.90739 11.467810 246.6747 2.730545 \n",
"1 -8.899933 14.16283 14.16283 0.017031 211.1832 3.617529 \n",
"2 334.474500 12.83834 12.83834 13.927340 242.8819 1.866124 \n",
"3 0.000000 13.18276 13.18276 13.569020 164.8922 28.992340 \n",
"4 251.515400 11.84929 11.21665 11.849290 223.8225 -68.455830 \n",
"\n",
" wd_2 gen_sp rs_av nac_pos pitch \n",
"0 4.884112 1142.79500 16.754220 243.9438 3.475755 \n",
"1 9.506144 1155.91100 16.886540 207.5653 5.934744 \n",
"2 9.465878 1141.49200 16.771280 241.0154 2.050259 \n",
"3 35.905610 26.76113 0.481272 135.9000 87.000000 \n",
"4 2.734230 1146.72100 16.831610 221.0888 3.055791 "
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scada[1].head(5)"
]
},
{
"cell_type": "code",
"execution_count": 38,
"metadata": {},
"outputs": [],
"source": [
"# concatenate new datasets\n",
"scadaNew = pd.concat(scada.values())"
]
},
{
"cell_type": "code",
"execution_count": 39,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" timestamp | \n",
" turbine_id | \n",
" ap_av | \n",
" ap_dev | \n",
" ap_max | \n",
" reactive_power | \n",
" ws_av | \n",
" ws_1 | \n",
" ws_2 | \n",
" wd_av | \n",
" wd_1 | \n",
" wd_2 | \n",
" gen_sp | \n",
" rs_av | \n",
" nac_pos | \n",
" pitch | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 1 | \n",
" 275.5676 | \n",
" 79.71668 | \n",
" 455 | \n",
" -128.126100 | \n",
" 5.950170 | \n",
" 6.065521 | \n",
" 5.950170 | \n",
" 347.89090 | \n",
" 3.216685 | \n",
" 3.490916 | \n",
" 868.2094 | \n",
" 12.78084 | \n",
" 344.40000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 1 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 2 | \n",
" 280.9523 | \n",
" 58.14750 | \n",
" 433 | \n",
" -141.462800 | \n",
" 5.920139 | \n",
" 5.920139 | \n",
" 0.011563 | \n",
" 125.38090 | \n",
" 1.381044 | \n",
" 4.931978 | \n",
" 871.9911 | \n",
" 12.73835 | \n",
" 124.00000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 2 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 3 | \n",
" 307.0966 | \n",
" 54.00394 | \n",
" 449 | \n",
" -72.826970 | \n",
" 6.408066 | \n",
" 5.701910 | \n",
" 6.408066 | \n",
" 12.70238 | \n",
" -9.518435 | \n",
" 2.202345 | \n",
" 878.9877 | \n",
" 12.92154 | \n",
" 10.50000 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 3 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 4 | \n",
" 348.8079 | \n",
" 112.20640 | \n",
" 633 | \n",
" 1.584503 | \n",
" 6.269817 | \n",
" 6.569055 | \n",
" 6.269817 | \n",
" 18.58538 | \n",
" -6.821929 | \n",
" 2.269212 | \n",
" 909.9926 | \n",
" 13.39747 | \n",
" 16.31618 | \n",
" 0.0 | \n",
"
\n",
" \n",
" 4 | \n",
" 2017-01-01 00:00:00.000 | \n",
" 5 | \n",
" 315.8827 | \n",
" 60.93330 | \n",
" 538 | \n",
" -95.838270 | \n",
" 5.848261 | \n",
" 0.000000 | \n",
" 5.848261 | \n",
" 319.47270 | \n",
" 0.000000 | \n",
" 5.373057 | \n",
" 882.6358 | \n",
" 12.97570 | \n",
" 314.10000 | \n",
" 0.0 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" timestamp turbine_id ap_av ap_dev ap_max \\\n",
"0 2017-01-01 00:00:00.000 1 275.5676 79.71668 455 \n",
"1 2017-01-01 00:00:00.000 2 280.9523 58.14750 433 \n",
"2 2017-01-01 00:00:00.000 3 307.0966 54.00394 449 \n",
"3 2017-01-01 00:00:00.000 4 348.8079 112.20640 633 \n",
"4 2017-01-01 00:00:00.000 5 315.8827 60.93330 538 \n",
"\n",
" reactive_power ws_av ws_1 ws_2 wd_av wd_1 \\\n",
"0 -128.126100 5.950170 6.065521 5.950170 347.89090 3.216685 \n",
"1 -141.462800 5.920139 5.920139 0.011563 125.38090 1.381044 \n",
"2 -72.826970 6.408066 5.701910 6.408066 12.70238 -9.518435 \n",
"3 1.584503 6.269817 6.569055 6.269817 18.58538 -6.821929 \n",
"4 -95.838270 5.848261 0.000000 5.848261 319.47270 0.000000 \n",
"\n",
" wd_2 gen_sp rs_av nac_pos pitch \n",
"0 3.490916 868.2094 12.78084 344.40000 0.0 \n",
"1 4.931978 871.9911 12.73835 124.00000 0.0 \n",
"2 2.202345 878.9877 12.92154 10.50000 0.0 \n",
"3 2.269212 909.9926 13.39747 16.31618 0.0 \n",
"4 5.373057 882.6358 12.97570 314.10000 0.0 "
]
},
"execution_count": 39,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"scadaNew.head(5)"
]
}
],
"metadata": {
"interpreter": {
"hash": "9181ee88e849e721a02348715c04c9aa1108ae16ca1904107a83d5bfc178fda8"
},
"kernelspec": {
"display_name": "Python 3.9.5 64-bit ('env': venv)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.10.4"
}
},
"nbformat": 4,
"nbformat_minor": 4
}