{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Load TSML filters"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"using TSML"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Create artificial data function"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [],
"source": [
"using DataFrames\n",
"using Dates\n",
"using Random\n",
"\n",
"ENV[\"COLUMNS\"]=1000 # for dataframe column size\n",
"\n",
"function generateXY()\n",
" Random.seed!(123)\n",
" gdate = DateTime(2014,1,1):Dates.Minute(15):DateTime(2014,1,5)\n",
" gval = Array{Union{Missing,Float64}}(rand(length(gdate)))\n",
" gmissing = floor(0.30*length(gdate)) |> Integer\n",
" gndxmissing = Random.shuffle(1:length(gdate))[1:gmissing]\n",
" X = DataFrame(Date=gdate,Value=gval)\n",
" X.Value[gndxmissing] .= missing\n",
" Y = rand(length(gdate))\n",
" (X,Y)\n",
"end;"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Generate artificial data with missing"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"
| Date | Value |
|---|
| DateTime | Float64⍰ |
|---|
10 rows × 2 columns
| 1 | 2014-01-01T00:00:00 | 0.768448 |
|---|
| 2 | 2014-01-01T00:15:00 | 0.940515 |
|---|
| 3 | 2014-01-01T00:30:00 | 0.673959 |
|---|
| 4 | 2014-01-01T00:45:00 | 0.395453 |
|---|
| 5 | 2014-01-01T01:00:00 | missing |
|---|
| 6 | 2014-01-01T01:15:00 | 0.662555 |
|---|
| 7 | 2014-01-01T01:30:00 | 0.586022 |
|---|
| 8 | 2014-01-01T01:45:00 | missing |
|---|
| 9 | 2014-01-01T02:00:00 | 0.26864 |
|---|
| 10 | 2014-01-01T02:15:00 | missing |
|---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& Date & Value\\\\\n",
"\t\\hline\n",
"\t& DateTime & Float64⍰\\\\\n",
"\t\\hline\n",
"\t1 & 2014-01-01T00:00:00 & 0.768448 \\\\\n",
"\t2 & 2014-01-01T00:15:00 & 0.940515 \\\\\n",
"\t3 & 2014-01-01T00:30:00 & 0.673959 \\\\\n",
"\t4 & 2014-01-01T00:45:00 & 0.395453 \\\\\n",
"\t5 & 2014-01-01T01:00:00 & \\\\\n",
"\t6 & 2014-01-01T01:15:00 & 0.662555 \\\\\n",
"\t7 & 2014-01-01T01:30:00 & 0.586022 \\\\\n",
"\t8 & 2014-01-01T01:45:00 & \\\\\n",
"\t9 & 2014-01-01T02:00:00 & 0.26864 \\\\\n",
"\t10 & 2014-01-01T02:15:00 & \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"10×2 DataFrame\n",
"│ Row │ Date │ Value │\n",
"│ │ \u001b[90mDateTime\u001b[39m │ \u001b[90mFloat64⍰\u001b[39m │\n",
"├─────┼─────────────────────┼──────────┤\n",
"│ 1 │ 2014-01-01T00:00:00 │ 0.768448 │\n",
"│ 2 │ 2014-01-01T00:15:00 │ 0.940515 │\n",
"│ 3 │ 2014-01-01T00:30:00 │ 0.673959 │\n",
"│ 4 │ 2014-01-01T00:45:00 │ 0.395453 │\n",
"│ 5 │ 2014-01-01T01:00:00 │ \u001b[90mmissing\u001b[39m │\n",
"│ 6 │ 2014-01-01T01:15:00 │ 0.662555 │\n",
"│ 7 │ 2014-01-01T01:30:00 │ 0.586022 │\n",
"│ 8 │ 2014-01-01T01:45:00 │ \u001b[90mmissing\u001b[39m │\n",
"│ 9 │ 2014-01-01T02:00:00 │ 0.26864 │\n",
"│ 10 │ 2014-01-01T02:15:00 │ \u001b[90mmissing\u001b[39m │"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"(df,outY)=generateXY()\n",
"first(df,10)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## User Pipeline and Plotter to plot artificial data"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 4,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"pltr=Plotter(Dict(:interactive => false))\n",
"\n",
"mypipeline = Pipeline(Dict(\n",
" :transformers => [pltr]\n",
" )\n",
")\n",
"\n",
"fit!(mypipeline, df)\n",
"transform!(mypipeline, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get statistics including blocks of missing data"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | tstart | tend | sfreq | count | max | min | median | mean | q1 | q2 | q25 | q75 | q8 | q9 | kurtosis | skewness | variation | entropy | autocor | pacf | bmedian | bmean | bq25 | bq75 | bmin | bmax |
|---|
| DateTime | DateTime | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 |
|---|
1 rows × 26 columns
| 1 | 2014-01-01T00:00:00 | 2014-01-05T00:00:00 | 0.249351 | 270 | 0.995414 | 0.000412399 | 0.521184 | 0.505873 | 0.121582 | 0.213152 | 0.279623 | 0.745784 | 0.781425 | 0.870951 | -1.14079 | -0.065312 | 0.546211 | 69.5203 | 0.320605 | 0.312706 | 1.0 | 1.32184 | 1.0 | 2.0 | 1.0 | 3.0 |
|---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccccccccccccccccccccccccc}\n",
"\t& tstart & tend & sfreq & count & max & min & median & mean & q1 & q2 & q25 & q75 & q8 & q9 & kurtosis & skewness & variation & entropy & autocor & pacf & bmedian & bmean & bq25 & bq75 & bmin & bmax\\\\\n",
"\t\\hline\n",
"\t& DateTime & DateTime & Float64 & Int64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64\\\\\n",
"\t\\hline\n",
"\t1 & 2014-01-01T00:00:00 & 2014-01-05T00:00:00 & 0.249351 & 270 & 0.995414 & 0.000412399 & 0.521184 & 0.505873 & 0.121582 & 0.213152 & 0.279623 & 0.745784 & 0.781425 & 0.870951 & -1.14079 & -0.065312 & 0.546211 & 69.5203 & 0.320605 & 0.312706 & 1.0 & 1.32184 & 1.0 & 2.0 & 1.0 & 3.0 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"1×26 DataFrame\n",
"│ Row │ tstart │ tend │ sfreq │ count │ max │ min │ median │ mean │ q1 │ q2 │ q25 │ q75 │ q8 │ q9 │ kurtosis │ skewness │ variation │ entropy │ autocor │ pacf │ bmedian │ bmean │ bq25 │ bq75 │ bmin │ bmax │\n",
"│ │ \u001b[90mDateTime\u001b[39m │ \u001b[90mDateTime\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼─────────────────────┼─────────────────────┼──────────┼───────┼──────────┼─────────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼──────────┼───────────┼───────────┼─────────┼──────────┼──────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┤\n",
"│ 1 │ 2014-01-01T00:00:00 │ 2014-01-05T00:00:00 │ 0.249351 │ 270 │ 0.995414 │ 0.000412399 │ 0.521184 │ 0.505873 │ 0.121582 │ 0.213152 │ 0.279623 │ 0.745784 │ 0.781425 │ 0.870951 │ -1.14079 │ -0.065312 │ 0.546211 │ 69.5203 │ 0.320605 │ 0.312706 │ 1.0 │ 1.32184 │ 1.0 │ 2.0 │ 1.0 │ 3.0 │"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"statfier = Statifier(Dict(:processmissing=>true))\n",
"\n",
"mypipeline = Pipeline(Dict(\n",
" :transformers => [statfier]\n",
" )\n",
")\n",
"\n",
"fit!(mypipeline, df)\n",
"res = transform!(mypipeline, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Use Pipeline: aggregate, impute, and plot "
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 6,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))\n",
"valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))\n",
"\n",
"mypipeline = Pipeline(Dict(\n",
" :transformers => [valgator,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(mypipeline, df)\n",
"transform!(mypipeline, df)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Try real data"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
"fname = joinpath(dirname(pathof(TSML)),\"../data/testdata.csv\")\n",
"csvreader = CSVDateValReader(Dict(:filename=>fname,:dateformat=>\"dd/mm/yyyy HH:MM\"))\n",
"\n",
"outputname = joinpath(dirname(pathof(TSML)),\"/tmp/testdata_output.csv\")\n",
"csvwriter = CSVDateValWriter(Dict(:filename=>outputname))\n",
"\n",
"valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))\n",
"valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))\n",
"stfier = Statifier(Dict(:processmissing=>true))\n",
"outliernicer = Outliernicer(Dict(:dateinterval=>Dates.Hour(1)));"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot real data with missing values"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 8,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mpipeline1 = Pipeline(Dict(\n",
" :transformers => [csvreader,valgator,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(mpipeline1)\n",
"transform!(mpipeline1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Get statistics including blocks of missing data"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | tstart | tend | sfreq | count | max | min | median | mean | q1 | q2 | q25 | q75 | q8 | q9 | kurtosis | skewness | variation | entropy | autocor | pacf | bmedian | bmean | bq25 | bq75 | bmin | bmax |
|---|
| DateTime | DateTime | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 |
|---|
1 rows × 26 columns
| 1 | 2014-01-01T00:00:00 | 2015-01-01T00:00:00 | 0.999886 | 3830 | 18.8 | 8.5 | 10.35 | 11.557 | 9.9 | 10.0 | 10.0 | 12.3 | 13.0 | 16.0 | 0.730635 | 1.41283 | 0.200055 | -1.09145e5 | 4.39315 | 1.04644 | 5.0 | 10.5589 | 3.0 | 6.0 | 1.0 | 2380.0 |
|---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccccccccccccccccccccccccc}\n",
"\t& tstart & tend & sfreq & count & max & min & median & mean & q1 & q2 & q25 & q75 & q8 & q9 & kurtosis & skewness & variation & entropy & autocor & pacf & bmedian & bmean & bq25 & bq75 & bmin & bmax\\\\\n",
"\t\\hline\n",
"\t& DateTime & DateTime & Float64 & Int64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64\\\\\n",
"\t\\hline\n",
"\t1 & 2014-01-01T00:00:00 & 2015-01-01T00:00:00 & 0.999886 & 3830 & 18.8 & 8.5 & 10.35 & 11.557 & 9.9 & 10.0 & 10.0 & 12.3 & 13.0 & 16.0 & 0.730635 & 1.41283 & 0.200055 & -1.09145e5 & 4.39315 & 1.04644 & 5.0 & 10.5589 & 3.0 & 6.0 & 1.0 & 2380.0 \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"1×26 DataFrame\n",
"│ Row │ tstart │ tend │ sfreq │ count │ max │ min │ median │ mean │ q1 │ q2 │ q25 │ q75 │ q8 │ q9 │ kurtosis │ skewness │ variation │ entropy │ autocor │ pacf │ bmedian │ bmean │ bq25 │ bq75 │ bmin │ bmax │\n",
"│ │ \u001b[90mDateTime\u001b[39m │ \u001b[90mDateTime\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼─────────────────────┼─────────────────────┼──────────┼───────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼──────────┼──────────┼───────────┼────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┤\n",
"│ 1 │ 2014-01-01T00:00:00 │ 2015-01-01T00:00:00 │ 0.999886 │ 3830 │ 18.8 │ 8.5 │ 10.35 │ 11.557 │ 9.9 │ 10.0 │ 10.0 │ 12.3 │ 13.0 │ 16.0 │ 0.730635 │ 1.41283 │ 0.200055 │ -1.09145e5 │ 4.39315 │ 1.04644 │ 5.0 │ 10.5589 │ 3.0 │ 6.0 │ 1.0 │ 2380.0 │"
]
},
"execution_count": 9,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mpipeline1 = Pipeline(Dict(\n",
" :transformers => [csvreader,valgator,stfier]\n",
" )\n",
")\n",
"\n",
"fit!(mpipeline1)\n",
"respipe1 = transform!(mpipeline1)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Try imputing and get statistics"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
" | tstart | tend | sfreq | count | max | min | median | mean | q1 | q2 | q25 | q75 | q8 | q9 | kurtosis | skewness | variation | entropy | autocor | pacf | bmedian | bmean | bq25 | bq75 | bmin | bmax |
|---|
| DateTime | DateTime | Float64 | Int64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 | Float64 |
|---|
1 rows × 26 columns
| 1 | 2014-01-01T00:00:00 | 2015-01-01T00:00:00 | 0.999886 | 8761 | 18.8 | 8.5 | 10.0 | 11.1362 | 9.95 | 10.0 | 10.0 | 11.5 | 12.0 | 14.95 | 2.37274 | 1.87452 | 0.187997 | -2.36714e5 | 4.47886 | 1.06917 | NaN | NaN | NaN | NaN | NaN | NaN |
|---|
"
],
"text/latex": [
"\\begin{tabular}{r|cccccccccccccccccccccccccc}\n",
"\t& tstart & tend & sfreq & count & max & min & median & mean & q1 & q2 & q25 & q75 & q8 & q9 & kurtosis & skewness & variation & entropy & autocor & pacf & bmedian & bmean & bq25 & bq75 & bmin & bmax\\\\\n",
"\t\\hline\n",
"\t& DateTime & DateTime & Float64 & Int64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64 & Float64\\\\\n",
"\t\\hline\n",
"\t1 & 2014-01-01T00:00:00 & 2015-01-01T00:00:00 & 0.999886 & 8761 & 18.8 & 8.5 & 10.0 & 11.1362 & 9.95 & 10.0 & 10.0 & 11.5 & 12.0 & 14.95 & 2.37274 & 1.87452 & 0.187997 & -2.36714e5 & 4.47886 & 1.06917 & NaN & NaN & NaN & NaN & NaN & NaN \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"1×26 DataFrame\n",
"│ Row │ tstart │ tend │ sfreq │ count │ max │ min │ median │ mean │ q1 │ q2 │ q25 │ q75 │ q8 │ q9 │ kurtosis │ skewness │ variation │ entropy │ autocor │ pacf │ bmedian │ bmean │ bq25 │ bq75 │ bmin │ bmax │\n",
"│ │ \u001b[90mDateTime\u001b[39m │ \u001b[90mDateTime\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mInt64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │ \u001b[90mFloat64\u001b[39m │\n",
"├─────┼─────────────────────┼─────────────────────┼──────────┼───────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼──────────┼──────────┼───────────┼────────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┼─────────┤\n",
"│ 1 │ 2014-01-01T00:00:00 │ 2015-01-01T00:00:00 │ 0.999886 │ 8761 │ 18.8 │ 8.5 │ 10.0 │ 11.1362 │ 9.95 │ 10.0 │ 10.0 │ 11.5 │ 12.0 │ 14.95 │ 2.37274 │ 1.87452 │ 0.187997 │ -2.36714e5 │ 4.47886 │ 1.06917 │ NaN │ NaN │ NaN │ NaN │ NaN │ NaN │"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mpipeline2 = Pipeline(Dict(\n",
" :transformers => [csvreader,valgator,valnner,stfier]\n",
" )\n",
")\n",
"\n",
"fit!(mpipeline2)\n",
"respipe2 = transform!(mpipeline2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot imputted data"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"mpipeline2 = Pipeline(Dict(\n",
" :transformers => [csvreader,valgator,valnner,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(mpipeline2)\n",
"transform!(mpipeline2)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"### Monotonicer"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [],
"source": [
"regularfile = joinpath(dirname(pathof(TSML)),\"../data/typedetection/regular.csv\")\n",
"monofile = joinpath(dirname(pathof(TSML)),\"../data/typedetection/monotonic.csv\")\n",
"dailymonofile = joinpath(dirname(pathof(TSML)),\"../data/typedetection/dailymonotonic.csv\")\n",
"\n",
"regularfilecsv = CSVDateValReader(Dict(:filename=>regularfile,:dateformat=>\"dd/mm/yyyy HH:MM\"))\n",
"monofilecsv = CSVDateValReader(Dict(:filename=>monofile,:dateformat=>\"dd/mm/yyyy HH:MM\"))\n",
"dailymonofilecsv = CSVDateValReader(Dict(:filename=>dailymonofile,:dateformat=>\"dd/mm/yyyy HH:MM\"))\n",
"\n",
"valgator = DateValgator(Dict(:dateinterval=>Dates.Hour(1)))\n",
"valnner = DateValNNer(Dict(:dateinterval=>Dates.Hour(1)))\n",
"stfier = Statifier(Dict(:processmissing=>true))\n",
"mono = Monotonicer(Dict())\n",
"stfier = Statifier(Dict(:processmissing=>true))\n",
"outliernicer = Outliernicer(Dict(:dateinterval=>Dates.Hour(1)));"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot of monotonic data"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"monopipeline = Pipeline(Dict(\n",
" :transformers => [monofilecsv,valgator,valnner,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(monopipeline)\n",
"transform!(monopipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot after normalization of monotonic data"
]
},
{
"cell_type": "code",
"execution_count": 14,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 14,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"monopipeline = Pipeline(Dict(\n",
" :transformers => [monofilecsv,valgator,valnner,mono,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(monopipeline)\n",
"transform!(monopipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot with Monotonicer and Outliernicer"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"monopipeline = Pipeline(Dict(\n",
" :transformers => [monofilecsv,valgator,valnner,mono,outliernicer,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(monopipeline)\n",
"transform!(monopipeline)\n"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot of daily monotonic"
]
},
{
"cell_type": "code",
"execution_count": 16,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dailymonopipeline = Pipeline(Dict(\n",
" :transformers => [dailymonofilecsv,valgator,valnner,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(dailymonopipeline)\n",
"transform!(dailymonopipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot of daily monotonic data with Monotonicer"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dailymonopipeline = Pipeline(Dict(\n",
" :transformers => [dailymonofilecsv,valgator,valnner,mono,pltr]\n",
" )\n",
")\n",
"fit!(dailymonopipeline)\n",
"transform!(dailymonopipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot of daily monotonic with Monotonicer and Outliernicer"
]
},
{
"cell_type": "code",
"execution_count": 18,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 18,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"dailymonopipeline = Pipeline(Dict(\n",
" :transformers => [dailymonofilecsv,valgator,valnner,mono,outliernicer,pltr]\n",
" )\n",
")\n",
"fit!(dailymonopipeline)\n",
"transform!(dailymonopipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot regular TS after monotonic normalization"
]
},
{
"cell_type": "code",
"execution_count": 19,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 19,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"regpipeline = Pipeline(Dict(\n",
" :transformers => [regularfilecsv,valgator,valnner,mono,pltr]\n",
" )\n",
")\n",
"\n",
"fit!(regpipeline)\n",
"transform!(regpipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Plot of regular TS with outlier normalization"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [
{
"data": {
"image/svg+xml": [
"\n",
"\n"
]
},
"execution_count": 20,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"regpipeline = Pipeline(Dict(\n",
" :transformers => [regularfilecsv,valgator,valnner,mono,outliernicer,pltr]\n",
" )\n",
")\n",
"fit!(regpipeline)\n",
"transform!(regpipeline)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## TS Discovery by automatic data type classification"
]
},
{
"cell_type": "code",
"execution_count": 21,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"getting stats of AirOffTemp1.csv\n",
"getting stats of AirOffTemp2.csv\n",
"getting stats of AirOffTemp3.csv\n",
"getting stats of Energy1.csv\n",
"getting stats of Energy10.csv\n",
"getting stats of Energy2.csv\n",
"getting stats of Energy3.csv\n",
"getting stats of Energy4.csv\n",
"getting stats of Energy6.csv\n",
"getting stats of Energy7.csv\n",
"getting stats of Energy8.csv\n",
"getting stats of Energy9.csv\n",
"getting stats of Pressure1.csv\n",
"getting stats of Pressure3.csv\n",
"getting stats of Pressure4.csv\n",
"getting stats of Pressure6.csv\n",
"getting stats of RetTemp11.csv\n",
"getting stats of RetTemp21.csv\n",
"getting stats of RetTemp41.csv\n",
"getting stats of RetTemp51.csv\n",
"getting stats of AirOffTemp4.csv\n",
"getting stats of AirOffTemp5.csv\n",
"getting stats of Energy5.csv\n",
"getting stats of Pressure5.csv\n",
"getting stats of RetTemp31.csv\n",
"loading model from file: /Users/ppalmes/.julia/packages/TSML/lqjQn/src/../data/realdatatsclassification/model/juliarfmodel.serialized\n"
]
},
{
"data": {
"text/html": [
" | fname | predtype |
|---|
| String | SubStrin… |
|---|
5 rows × 2 columns
| 1 | AirOffTemp4.csv | AirOffTemp |
|---|
| 2 | AirOffTemp5.csv | AirOffTemp |
|---|
| 3 | Energy5.csv | Energy |
|---|
| 4 | Pressure5.csv | Pressure |
|---|
| 5 | RetTemp31.csv | Energy |
|---|
"
],
"text/latex": [
"\\begin{tabular}{r|cc}\n",
"\t& fname & predtype\\\\\n",
"\t\\hline\n",
"\t& String & SubStrin…\\\\\n",
"\t\\hline\n",
"\t1 & AirOffTemp4.csv & AirOffTemp \\\\\n",
"\t2 & AirOffTemp5.csv & AirOffTemp \\\\\n",
"\t3 & Energy5.csv & Energy \\\\\n",
"\t4 & Pressure5.csv & Pressure \\\\\n",
"\t5 & RetTemp31.csv & Energy \\\\\n",
"\\end{tabular}\n"
],
"text/plain": [
"5×2 DataFrame\n",
"│ Row │ fname │ predtype │\n",
"│ │ \u001b[90mString\u001b[39m │ \u001b[90mSubStrin…\u001b[39m │\n",
"├─────┼─────────────────┼────────────┤\n",
"│ 1 │ AirOffTemp4.csv │ AirOffTemp │\n",
"│ 2 │ AirOffTemp5.csv │ AirOffTemp │\n",
"│ 3 │ Energy5.csv │ Energy │\n",
"│ 4 │ Pressure5.csv │ Pressure │\n",
"│ 5 │ RetTemp31.csv │ Energy │"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"using TSML: TSClassifier\n",
"Random.seed!(12)\n",
"\n",
"trdirname = joinpath(dirname(pathof(TSML)),\"../data/realdatatsclassification/training\")\n",
"tstdirname = joinpath(dirname(pathof(TSML)),\"../data/realdatatsclassification/testing\")\n",
"modeldirname = joinpath(dirname(pathof(TSML)),\"../data/realdatatsclassification/model\")\n",
"\n",
"tscl = TSClassifier(Dict(:trdirectory=>trdirname,\n",
" :tstdirectory=>tstdirname,\n",
" :modeldirectory=>modeldirname,\n",
" :feature_range => 6:20,\n",
" :num_trees=>10)\n",
")\n",
"\n",
"fit!(tscl)\n",
"dfresults = transform!(tscl)"
]
},
{
"cell_type": "code",
"execution_count": 23,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"80.0"
]
},
"execution_count": 23,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"apredict = dfresults.predtype\n",
"fnames = dfresults.fname\n",
"myregex = r\"(?[A-Z _ - a-z]+)(?\\d*).(?\\w+)\"\n",
"mtypes=map(fnames) do fname\n",
" mymatch=match(myregex,fname)\n",
" mymatch[:dtype]\n",
"end\n",
"\n",
"sum(mtypes .== apredict)/length(mtypes) * 100"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"@webio": {
"lastCommId": null,
"lastKernelId": null
},
"kernelspec": {
"display_name": "Julia 1.2.0",
"language": "julia",
"name": "julia-1.2"
},
"language_info": {
"file_extension": ".jl",
"mimetype": "application/julia",
"name": "julia",
"version": "1.2.0"
}
},
"nbformat": 4,
"nbformat_minor": 2
}