#------------------------------ Loading the Packages -------------------------------------
set.seed(1234)
#Setting the required packages
pkgs <- c("shiny", "shinydashboard", "shinyWidgets",
"plotly", "caret",
"dplyr", "data.table", "lubridate", "reshape2",
"DT", "knitr", "kableExtra",
"datasets"
)
for(pkg in pkgs){
if(!(pkg %in% rownames(installed.packages()))){
install.packages(pkg, dependencies = TRUE)
}
lapply(pkg, FUN = function(X) {
do.call("require", list(X))
})
}
#------------------------------ Loading Functions -------------------------------------
col_num <- function(df){
if(ncol(df)%%3 !=0){
x <- ncol(df)%/%3 +1
} else {x <- ncol(df)%/%3}
return (x)
}
# Checking the available memory (RAM)
get_free_ram <- function(){
if(Sys.info()[["sysname"]] == "Windows"){
x <- system2("wmic", args = "OS get FreePhysicalMemory /Value", stdout = TRUE)
x <- x[grepl("FreePhysicalMemory", x)]
x <- gsub("FreePhysicalMemory=", "", x, fixed = TRUE)
x <- gsub("\r", "", x, fixed = TRUE)
as.integer(x)
} else {
stop("Only supported on Windows OS")
}
}
#------------------------------ Confusion Matrix Function -------------------------------------
cm_fun <- function(train, test){
cm_train <- cm_test <- data.frame(matrix(NA, ncol = dim(train)[1], nrow = dim(train)[2]))
for(i in 1:(dim(train)[1])){
cm_train[i,] <- train[i,]
cm_test[i,] <- test[i,]
}
colnames(cm_train) <- colnames(train)
colnames(cm_test) <- colnames(test)
cm_train <- data.frame(rownames(train), cm_train)
names(cm_train) <- c("Prediction", colnames(train))
cm_test <- data.frame(rownames(test), cm_test)
names(cm_test) <- c("Prediction", colnames(test))
cm_df <- rbind(cm_train, cm_test)
colnames(cm_df) <- c("Prediction", colnames(train))
options(knitr.table.format = "html")
table_out <- knitr::kable(cm_df) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
group_rows("Training Set", 1, dim(train)[1]) %>%
group_rows("Testing Set", (dim(train)[1] + 1), dim(train)[1] * 2) %>%
add_header_above(c("", "Reference" = dim(train)[1] ))
return(table_out)
}
cm_fun_v <- function(train, test, valid){
cm_train <- cm_test <- cm_valid <- data.frame(matrix(NA, ncol = dim(train)[1], nrow = dim(train)[2]))
for(i in 1:(dim(train)[1])){
cm_train[i,] <- train[i,]
cm_test[i,] <- test[i,]
cm_valid[i,] <- valid[i,]
}
colnames(cm_train) <- colnames(train)
colnames(cm_test) <- colnames(test)
colnames(cm_valid) <- colnames(valid)
cm_train <- data.frame(rownames(train), cm_train)
names(cm_train) <- c("Prediction", colnames(train))
cm_valid <- data.frame(rownames(valid), cm_valid)
names(cm_valid) <- c("Prediction", colnames(valid))
cm_test <- data.frame(rownames(test), cm_test)
names(cm_test) <- c("Prediction", colnames(test))
cm_df <- rbind(cm_train, cm_valid, cm_test)
colnames(cm_df) <- c("Prediction", colnames(train))
options(knitr.table.format = "html")
table_out <- knitr::kable(cm_df) %>%
kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
full_width = FALSE) %>%
group_rows("Training Set", 1, dim(train)[1]) %>%
group_rows("Validation Set", (dim(train)[1] + 1), dim(train)[1] * 2) %>%
group_rows("Testing Set", (dim(train)[1] * 2 + 1), dim(train)[1] * 3) %>%
add_header_above(c("", "Reference" = dim(train)[1] ))
return(table_out)
}
#------------------------------ Accuracy Matrix Function -------------------------------------
accuracy_fun <- function(train, test){
a_df <- data.frame(matrix(0, ncol = 2, nrow = 4))
names(a_df) <- c("Training", "Testing")
a_df[1,] <- c(train$overall[1], test$overall[1])
a_df[2,] <- c(paste("[", round(train$overall[3], 2), ", ", round(train$overall[4], 2), "]", sep = "") ,
paste("[", round(test$overall[3], 2),", ", round(test$overall[4], 2), "]", sep = ""))
a_df[3,] <- c(train$overall[2], test$overall[2])
a_df[4,] <- c(train$overall[2], test$overall[2])
}
#------------------------------ Creating list of the installed packages datasets -------------------------------------
r_dataset <- NULL
pack_list <- installed.packages()
packages.list <- as.data.frame(installed.packages(), stringsAsFactors = FALSE)
d <- data(package = packages.list$Package)
#d <- data(package = pack_list[,"Package"])
dataset.df <- data.frame(package = d$results[,"Package"], dataset = d$results[,"Item"] ,
space = regexpr(" ",d$results[,"Item"]),
stringsAsFactors = FALSE )
dataset.df$dataset.fixed <- ifelse(dataset.df$space != -1,
substr(dataset.df$dataset, 1, (dataset.df$space - 1)),
dataset.df$dataset)
installed_datasets <- as.list(paste(dataset.df$package,"-" ,dataset.df$dataset.fixed, sep = " "))
#------------------------------ Creating list of the avilable data frames/matrices/ time series -------------------------------------
df_list <- c(names(which(sapply(.GlobalEnv, is.data.frame))),
names(which(sapply(.GlobalEnv, is.matrix))),
names(which(sapply(.GlobalEnv, is.data.table)))
)
ts_list <- c(names(which(sapply(.GlobalEnv, is.ts))))
#------------------------------ Server Function -------------------------------------
server <- function(input, output,session) {
#------------------------------ input_df - set initial parameters -------------------------------------
input_df <- reactiveValues(counter = 0,
data_name = NULL,
ts_obj = NULL,
mts_obj = NULL,
df_list = NULL,
df_class = NULL,
names_list = NULL,
df = NULL,
class = NULL,
var_summary = NULL)
#------------------------------ Data tab 1 summary boxes -------------------------------------
output$installed_datasets <- renderValueBox({
valueBox(
length(prev_table$r_datasets), "Installed R Datasets Available", icon = icon("folder-open"),
color = "green"
)
})
output$in_memory_df <- renderValueBox({
valueBox(
length(prev_table$data_frame_list), "In Memory Data Frame", icon = icon("superscript"),
color = "light-blue"
)
})
output$load_datasets <- renderValueBox({
valueBox(
ifelse(is.null(input_df$df_list), 0, length(input_df$df_list)), "Loaded Datasets", icon = icon("list"),
color = "maroon"
)
})
#------------------------------ Data tab 2 summary boxes -------------------------------------
output$data_name <- renderValueBox({
valueBox(
input$select_df, input_df$class, icon = icon("folder-open"),
color = "green"
)
})
output$num_var <- renderValueBox({
valueBox(
ifelse(is.ts(input_df$df),frequency(input_df$df), ncol(input_df$df)),
ifelse(is.ts(input_df$df),"Frequency", "Variables"),
icon = icon("superscript"),
color = "light-blue"
)
})
output$num_obs <- renderValueBox({
valueBox(
ifelse(is.ts(input_df$df),length(input_df$df),nrow(input_df$df)), "Observations", icon = icon("list"),
color = "maroon"
)
})
#------------------------------ Selecting the Data Input -------------------------------------
prev_table <- reactiveValues(inputs_list = NULL, # Get the list of avilable dataset to load
data_frame_list = df_list, # List of avilable dataframes in memory
time_series_list = ts_list, # List of avilable time series in memory
r_datasets = installed_datasets, # List of avilable datasets within the installed packages
file_name = NULL, # If loading csv file, the name of the file
file_path = NULL, # If loading csv file, the path of the file
class = NULL, # Identify the class of the selected dataset
df_name = NULL # The name of the selected dataset
)
observeEvent(input$data_source,{
#------------------------------ Loading from data frame or package -------------------------------------
prev_table$inputs_list <- switch(input$data_source,
"data_frame" = {# Case I - load in memory data frames
# If threre is no any data frame available in memory
if(length(prev_table$data_frame_list) == 0){
showModal(modalDialog(
title = "Warning - No Data Frame",
HTML(paste("There is no any data frame avialable",
"to load in the R Global Environment",
sep = "
")
), size = "s"
))
df_return_list <- NA
# Set the condition for the load button
output$load_flag <- reactive('0')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
} else { # Otherwise return the list of available data frames in memory
df_return_list <- prev_table$data_frame_list
# Set the condition for the load button
output$load_flag <- reactive('1')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
}
df_return_list
},
"time_series" = {# Case II - load in memory time series
# If threre is no any data frame available in memory
if(length(prev_table$time_series_list) == 0){
showModal(modalDialog(
title = "Warning - No Time Series Data",
HTML(paste("There is no any time series data avialable",
"to load in the R Global Environment",
sep = "
")
), size = "s"
))
df_return_list <- NA
# Set the condition for the load button
output$load_flag <- reactive('0')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
} else { # Otherwise return the list of available time series in memory
df_return_list <- prev_table$time_series_list
# Set the condition for the load button
output$load_flag <- reactive('1')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
}
df_return_list
},
"inst_pack" = {# Case III - load datasets from installed packages
# If threre is no any dataset available in the installed packages
if(length(prev_table$r_datasets) == 0){
showModal(modalDialog(
title = "Warning - No Datasets",
HTML(paste("There is no any dataset avialable",
"to load from the installed R packages",
sep = "
")
), size = "s"
))
dataset_list <- NA
output$load_flag <- reactive('0')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
} else {
dataset_list <- prev_table$r_datasets
output$load_flag <- reactive('1')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
}
dataset_list
}
)
})
#------------------------------ Setting the csv file path-------------------------------------
observeEvent(input$file1,{
output$load_flag <- reactive('0')
inFile <- input$file1
if(!is.null(inFile$datapath)){
prev_table$file_name <- inFile$name
prev_table$file_path <- inFile$datapath
output$load_flag <- reactive('2')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
} else{
output$load_flag <- reactive('0')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
}
})
#------------------------------ Loading from data frame or package -------------------------------------
# Feed the list of data frames and
#avialable datasets to the menue selection
output$df_list <- renderUI({
if(input$data_source == "data_frame" ) {
selectInput("df_to_load", "Select Data Frame",
choices = prev_table$inputs_list )
} else if(input$data_source == "time_series" ) {
selectInput("df_to_load", "Select Series",
choices = prev_table$inputs_list )
} else if(input$data_source == "inst_pack" ){
selectInput("df_to_load", "Select Dataset",
choices = prev_table$inputs_list )
}
})
# Load the data according to the user selection
df_tbl_view <- reactive({
prev_table$class <- NULL
if(input$data_source == "data_frame" & length(prev_table$data_frame_list) != 0){
df_view <- NULL
prev_table$df_name <- input$df_to_load
df_view <- get(input$df_to_load)
if(length(class(df_view)) > 1 & "data.frame" %in% class(df_view)){
prev_table$class <- "data.frame"
df_view <- as.data.frame(df_view)
} else if(length(class(df_view)) > 1){
prev_table$class <- class(df_view)[1]
df_view <- as.data.frame(df_view)
} else{
prev_table$class <- class(df_view)
df_view <- as.data.frame(df_view)
}
} else if(input$data_source == "time_series" & length(prev_table$time_series_list) != 0){
df_view <- NULL
prev_table$df_name <- input$df_to_load
input_df$ts_obj <- get(input$df_to_load)
df_view <- get(input$df_to_load)
if(is.mts(df_view)){
df_view <- data.frame(date=time(df_view), as.matrix(df_view))
} else if(is.ts(df_view)){
df_view <- data.frame(date=time(df_view), as.matrix(df_view))
names(df_view) <- c("date", prev_table$df_name)
}
if(length(class(input_df$ts_obj)) > 1 & "ts" %in% class(input_df$ts_obj)){
prev_table$class <- "ts"
} else if(length(class(input_df$ts_obj)) > 1){
prev_table$class <- class(input_df$ts_obj)[1]
} else{
prev_table$class <- class(input_df$ts_obj)
}
# Loading from installed package
} else if(input$data_source == "inst_pack" & length(prev_table$r_datasets) != 0){
df_view <- NULL
dataset_name <- NULL
dataset_name <- substr(input$df_to_load,
regexpr("-", input$df_to_load) + 2,
nchar(trimws(input$df_to_load)))
package_name <- substr(input$df_to_load,
1, (regexpr("-", input$df_to_load) - 2)
)
if(!paste("package:", package_name, sep = "") %in% search()){
p <- NULL
p <- as.list(package_name)
do.call("require", p)
}
# Loading the selected dataset
prev_table$df_name <- dataset_name
if(!is.na(dataset_name)){
if(dataset_name != "NA"){
df_view <- try(get(dataset_name), silent = TRUE)
if(class(df_view) == "try-error" & !is.na(dataset_name)){
showModal(modalDialog(
title = "Warning - Cannot Load the Dataset",
HTML(paste("Cannot Load the Dataset:",
"- The package index name is not match the package name",
"- or the dataset format cannot be loaded",
sep = "
")
), size = "s"
))
output$load_flag <- reactive('0')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
}
}
}
if(class(df_view) != "try-error"){
output$load_flag <- reactive('2')
outputOptions(output, "load_flag", suspendWhenHidden = FALSE)
if(is.mts(df_view)){
input_df$mts_obj <- df_view
df_view <- data.frame(date=time(df_view), as.matrix(df_view))
prev_table$class <- "mts"
} else if(is.ts(df_view)){
input_df$ts_obj <- df_view
df_view <- data.frame(date=time(df_view), as.matrix(df_view))
names(df_view) <- c("date", prev_table$df_name)
prev_table$class <- "ts"
} else if(any(class(df_view) %in% c("data.frame","matrix", "data.table", "table"))){
if(length(class(df_view)) > 1 & "data.frame" %in% class(df_view)){
prev_table$class <- "data.frame"
df_view <- as.data.frame(df_view)
} else if(length(class(df_view)) > 1){
prev_table$class <- class(df_view)[1]
df_view <- as.data.frame(df_view)
} else{
prev_table$class <- class(df_view)
df_view <- as.data.frame(df_view)
}
}
}
} else if(input$data_source == "import" & !is.null(prev_table$file_path)){
df_view <- NULL
prev_table$class <- NULL
prev_table$df_name <- substr(prev_table$file_name,1,regexpr(".", prev_table$file_name, fixed = T)-1)
df_view <- read.csv(prev_table$file_path, stringsAsFactors = FALSE)
prev_table$class <- class(df_view)
} else {
df_view <- NULL
}
return(df_view)
})
# View of the data
output$view_table <- DT::renderDataTable(
df_tbl_view(),
server = FALSE,
rownames = FALSE,
options = list(pageLength = 10,
lengthMenu = c(10, 25, 50))
)
#------------------------------ Loading a selected dataset -------------------------------------
observeEvent(input$load, {
name <- prev_table$df_name
type <- NULL
type <- ifelse(prev_table$class == "data.frame", "Data Frame",
ifelse(prev_table$class == "ts", "Time Series",
ifelse(prev_table$class == "mts", "Multiple Time Series",
ifelse(prev_table$class == "matrix", "Matrix",
prev_table$class ))))
if(!name %in% input_df$data_name){
input_df$data_name <- c(input_df$data_name, name)
if(is.null(input_df$loaded_table)){
input_df$loaded_table <- data.frame(name = name,
var = ncol(df_tbl_view()),
row = nrow(df_tbl_view()),
class = type,
stringsAsFactors = FALSE)
} else {
temp <- data.frame(name = name,
var = ncol(df_tbl_view()),
row = nrow(df_tbl_view()),
class = type,
stringsAsFactors = FALSE)
input_df$loaded_table <- rbind(input_df$loaded_table,temp)
temp <- NULL
}
if(is.null(input_df$df_list)){
if(prev_table$class != "ts"){
input_df$df_list <- list(df_tbl_view())
} else {
input_df$df_list <- list(input_df$ts_obj)
}
input_df$df_class <- list(type)
} else {
if(prev_table$class != "ts"){
input_df$df_list[[length(input_df$df_list) + 1]] <- df_tbl_view()
} else {
input_df$df_list[[length(input_df$df_list) + 1]] <- input_df$ts_obj
}
input_df$df_class[[length(input_df$df_list)]] <- type
}
names(input_df$df_list)[length(input_df$df_list)] <- name
input_df$names_list <- names(input_df$df_list)
} else{
if(prev_table$class != "ts"){
input_df$df_list[[which(names(input_df$df_list) == name)]] <- df_tbl_view()
} else {
input_df$df_list[[which(names(input_df$df_list) == name)]] <- input_df$ts_obj
}
input_df$df_class[[which(names(input_df$df_list) == name)]] <- type
}
})
#------------------------------ Setting the condition for the "Remove" button -------------------------------------
observeEvent(input_df$loaded_table,{
if(is.null(input_df$loaded_table)){
output$loaded_table_flag <- reactive("0")
outputOptions(output, "loaded_table_flag", suspendWhenHidden = FALSE)
} else {
output$loaded_table_flag <- reactive("1")
outputOptions(output, "loaded_table_flag", suspendWhenHidden = FALSE)
}
})
#------------------------------ Activate the "Remove" button -------------------------------------
observeEvent(input$remove,{
if(length(input_df$df_list)>1){
input_df$df_list[[input$list_loaded_df_rows_selected]] <- NULL
input_df$df_class[[input$list_loaded_df_rows_selected]] <- NULL
input_df$loaded_table <- input_df$loaded_table[-input$list_loaded_df_rows_selected,]
input_df$data_name <- names(input_df$df_list)
input_df$names_list <- input_df$data_name
} else {
input_df$df_list <- NULL
input_df$loaded_table <- NULL
input_df$data_name <- NULL
input_df$names_list <- NULL
input_df$df_class <- NULL
input_df$names_list <- "NA"
output$loaded_table_flag <- reactive("0")
outputOptions(output, "loaded_table_flag", suspendWhenHidden = FALSE)
}
})
#------------------------------ Loaded dataset table -------------------------------------
output$list_loaded_df <- DT::renderDataTable(
data.frame(input_df$loaded_table),
colnames = c("Dataset Name", "Num. of Variables", "Num. of Obs", "Data Type"),
selection = list(selected = 1, mode = 'single'),
options = list(pageLength = 10,
lengthMenu = c(10, 25, 50))
)
#------------------------------ DATA TAB 2 -------------------------------------
observeEvent({
input_df$names_list
},{
output$loaded_ds_list <- renderUI({
selectInput("select_df", "Select Dataset",
choices = input_df$names_list
)
})
})
observeEvent(input$select_df, {
if(!is.null(input$select_df)){
input_df$df <- (
input_df$df_list[[which(names(input_df$df_list) == input$select_df)]]
)
input_df$class <- input_df$df_class[[which(names(input_df$df_list) == input$select_df)]]
output$data_tab2_table <- DT::renderDataTable(
data.frame(input_df$df),selection = list(selected = 1, mode = 'single'),
options = list(pageLength = 10,
lengthMenu = c(10, 25, 50))
)
} else{
input_df$df <- NULL
input_df$class <- NULL
output$data_tab2_table <- NULL
}
})
#------------------------------ Data tab 2 - Data Prep -------------------------------------
#------------------------------ Data tab 2 - Creating Variables Table -------------------------------------
observeEvent({input$data_option
input_df$df
input$select_df
}, {
if(!is.ts(input_df$df)){
if(input$data_option == "var_attr" &
!is.null(input_df$df) &
!is.null(input_df$loaded_table)
){
var.names <- names(input_df$df)
var.class <- NULL
for(i in 1:ncol(input_df$df)){
if(length(class(input_df$df[,i])) > 1){
if("factor" %in% class(input_df$df[,i])){
var.class <- c(var.class, "factor")
} else {
var.class <- c(var.class, "NA")
}
} else {
var.class <- c(var.class, class(input_df$df[,i])[1])
}
}
input_df$var_summary <- data.frame(var.names, var.class, stringsAsFactors = FALSE)
names(input_df$var_summary) <- c("Name", "Class")
output$data_tab2_var <- DT::renderDataTable(
input_df$var_summary,
server = FALSE, rownames = FALSE,
selection = list(selected = 1, mode = 'single'),
options = list(lengthMenu = c(5, 10, 15, 20), pageLength = 10, dom = 'p')
)
}
} else {
output$data_tab2_ts <- renderPlotly({
if(!input$ts_plot_log){
plot_ly( x = time(input_df$df), y = input_df$df, type = "scatter", mode = input$ts_prep_mode)
} else if(input$ts_plot_log){
plot_ly( x = time(input_df$df),
y = log(input_df$df, base = exp(1)), type = "scatter", mode = input$ts_prep_mode) %>%
layout(title = "Log Transformation")
}
})
}
})
output$class_df_flag <- reactive({
ifelse(is.ts(input_df$df), TRUE, FALSE)
})
outputOptions(output, "class_df_flag", suspendWhenHidden = FALSE)
#------------------------------ Data tab 2 - Creating Variable Summary -------------------------------------
observeEvent({input$data_tab2_var_rows_selected
input$select_df
input_df$df},{
if(!is.ts(input_df$df)){
r1 <- input$data_tab2_var_rows_selected
if(is.numeric(input_df$df[, r1]) | is.integer(input_df$df[, r1])){
var.mean <- mean(input_df$df[, r1], na.rm = TRUE)
var.min <- min(input_df$df[, r1], na.rm = TRUE)
var.max <- max(input_df$df[, r1], na.rm = TRUE)
var.median <- median(input_df$df[, r1], na.rm = TRUE)
var.sd <- sd(input_df$df[, r1])
summary.vec <- c(var.mean, var.min, var.max, var.median, var.sd)
var_s <- data.frame(summary.vec)
names(var_s) <- names(input_df$df)[r1]
row.names(var_s) <- c("Mean", "Min", "Max", "Median", "Standard Deviation")
p <- plot_ly(y = ~ input_df$df[, r1], type = "box", name = names(input_df$df)[r1],
boxpoints = "all", jitter = 0.3,
pointpos = -1.8)%>%
layout(yaxis = list(title = "Range"))
} else if(is.factor(input_df$df[, r1])){
var.n.levels <- length(levels(input_df$df[, r1]))
var.levels <- NULL
for(i in 1:var.n.levels){var.levels <- c(var.levels,levels(input_df$df[, r1])[i])}
var_s <- c(var.n.levels)
var_s <- data.frame(var_s)
row.names(var_s) <- c("Number of Levels")
names(var_s) <- names(input_df$df)[r1]
factor.df <- group_by(input_df$df, get(names(input_df$df)[r1])) %>%
summarise(count = n())
names(factor.df) <- c(names(names(input_df$df)[r1]), "Count")
p <- plot_ly(data = factor.df, name = "Levels",
x = ~ get(names(factor.df)[1]),
y = ~ get(names(factor.df)[2]),
type = "bar") %>%
layout(yaxis = list(title = "Count"),
xaxis = list(title = "Levels"))
} else if(is.Date(input_df$df[, r1])){
var_s <- NULL
var_s <- data.frame(c(as.character(min(input_df$df[, r1])),
as.character(max(input_df$df[, r1]))), row.names = c("Start/Min Date", "End/Max Date"))
names(var_s) <- names(input_df$df)[r1]
p <- NULL
}
# render the data summary into table
output$data_tab2_var_summary <- renderTable(var_s, rownames = TRUE)
output$data_tab2_summary_plot <- renderPlotly(p)
} else {
ts_table <- data.frame(c(paste(start(input_df$df), collapse = "-"),
paste(end(input_df$df), collapse = "-"),
min(input_df$df, na.rm = TRUE),
max(input_df$df, na.rm = TRUE),
round(sd(input_df$df, na.rm = TRUE),2)),
row.names = c("Start Date",
"End Date", "Min Value",
"Max Value","Standard Deviation"))
names(ts_table) <- input$select_df
output$ts_table <- renderTable(ts_table, rownames = TRUE)
}
})
#------------------------------ Data tab 2 - Midifing Variables Attributes -------------------------------------
observeEvent(input$var_modify,{
if(!is.ts(input_df$df)){
r2 <- input$data_tab2_var_rows_selected
input_df$df[,r2] <- switch(input$class_selection,
"numeric" = as.numeric(input_df$df[,r2]),
"factor" = as.factor(input_df$df[,r2]),
"character" = as.character(input_df$df[,r2]),
"date" = {eval(parse(text =
paste("lubridate::",
input$date_format,
"('",
as.character(input_df$df[,input$data_tab2_var_rows_selected]),
"')",
sep = "")))
}
)
input_df$df_list[[which(names(input_df$df_list) == input$select_df)]] <- input_df$df
}
})
observeEvent({input$date_format
input$data_tab2_var_rows_selected
input$class_selection
input$select_df
},{
if(!is.ts(input_df$df)){
new.date <- input_df$df[1,input$data_tab2_var_rows_selected]
new.date <- as.character(new.date)
output$date_prev <- renderPrint(eval(parse(text =
paste("lubridate::",
input$date_format,
"('",
new.date[1],
"')",
sep = "")))
)
}
})
observeEvent(input$tabs,{
if(input$tabs != "data1" & is.null(input_df$df_list)){
showModal(modalDialog(
title = "Warning - No Loaded Dataset",
HTML(paste("There is no any loaded dataset ",
"Please select input and load it",
sep = "
")
), size = "s"
))
}
})
#------------------------------ Data tab 2 - End -------------------------------------
#------------------------------ Visualization Tab Start -------------------------------------
# Selecting the Dataset
# Setting reactive values
vis_df <- reactiveValues(df = NULL,
class = NULL,
var_factor = NULL,
var_numeric = NULL,
var_date = NULL)
# Setting the data selection
observeEvent({
input_df$names_list
},{
output$loaded_ds_list_vis <- renderUI({
selectInput("select_df_vis", "Select Dataset",
choices = input_df$names_list
)
})
})
observeEvent({
input$var_modify
input$select_df_vis
}, {
if(!is.null(input$select_df_vis)){
vis_df$df <- (
input_df$df_list[[which(names(input_df$df_list) == input$select_df_vis)]]
)
vis_df$class <- input_df$df_class[[which(names(input_df$df_list) == input$select_df_vis)]]
vis_df$var_numeric <- vis_df$var_factor <- NULL
if(!is.ts(vis_df$df)){
for(i in 1:ncol(vis_df$df)){
if(is.factor(vis_df$df[,i])){
vis_df$var_factor <- c(vis_df$var_factor, names(vis_df$df)[i])
} else if(is.numeric(vis_df$df[,i]) | is.integer(vis_df$df[,i])){
vis_df$var_numeric <- c(vis_df$var_numeric,names(vis_df$df)[i])
}
}
}
} else{
vis_df$df <- NULL
vis_df$class <- NULL
vis_df$var_factor <- NULL
vis_df$var_numeric <- NULL
}
})
observeEvent({input$var_modify
input$select_df_vis},{
if(!is.null(vis_df$var_numeric) & !is.ts(vis_df$df)){
###################### NEED TO ADD CASE FOR ONLY ONE VARIABE !!!!!!
if(length(vis_df$var_numeric) == 1 ){
output$vis_plot_type <- renderUI({
selectInput("plot_type", "Select the Plot Type",
choices = list("Boxplot" = "box",
"Histogram" = "hist",
"Density" = "density"))
})
output$vis_one_var <- renderUI({
selectInput("plot_var", "Select a Variable",
choices = vis_df$var_numeric,
selected = vis_df$var_numeric[1]
)
})
output$vis_factor <- renderUI({
if(!is.null(vis_df$var_factor)){
selectInput(
"plot_factor", "Add Categorical Variable",
choices = c("None", as.character(vis_df$var_factor))
)
} else {
selectInput(
"plot_factor", "Add Categorical Variable",
choices = "NA"
)
}
})
} else if(length(vis_df$var_numeric) > 1 ){
output$vis_plot_type <- renderUI({
selectInput("plot_type", "Select the Plot Type",
choices = list("Scatter" = "scatter",
"Line" = "line",
"Boxplot" = "box",
"Histogram" = "hist",
"Density" = "density",
"Correlation" = "cor"))
})
output$vis_one_var <- renderUI({
selectInput("plot_var", "Select a Variable",
choices = vis_df$var_numeric,
selected = vis_df$var_numeric[1]
)
})
output$vis_x <- renderUI({
selectInput("plot_x", "Select the X Axis",
choices = vis_df$var_numeric,
selected = vis_df$var_numeric[1]
)
})
output$vis_y <- renderUI({
selectInput(
"plot_y", "Select the Y Axis",
choices = vis_df$var_numeric,
selected = vis_df$var_numeric[2]
)
})
output$vis_factor <- renderUI({
if(!is.null(vis_df$var_factor)){
selectInput(
"plot_factor", "Add Categorical Variable",
choices = c("None", as.character(vis_df$var_factor))
)
} else {
selectInput(
"plot_factor", "Add Categorical Variable",
choices = "NA"
)
}
})
}
} else if(is.null(vis_df$var_numeric) & !is.ts(vis_df$df)){
output$vis_x <- renderUI({
selectInput("plot_x", "Select Variables",
choices = "No Available Numeric Variables"
)
})
} else if(is.ts(vis_df$df)){
output$vis_plot_type <- renderUI({
selectInput("plot_type", "Select the Plot Type",
choices = list("Scatter" = "scatter",
"Line" = "line",
"Boxplot" = "box",
"Seasonal Plot" = "seasonal_plot",
"Lags Plot" = "lags_plot"))
})
}
})
observeEvent({input$var_modify
input$plot_factor
input$plot_var
input$plot_x
input$plot_y
input$plot_type
vis_df$df
input$select_df_vis
},{
output$main_plot <- renderPlotly({
if(!is.ts(vis_df$df)){
p <- x <- y <- color <- NULL
if(length(vis_df$var_numeric) > 1){
y <- vis_df$df[,input$plot_y]
} else if(length(vis_df$var_numeric) == 1){
y <- NA
}
if(input$plot_type == "box" | input$plot_type == "density"){
x <- vis_df$df[, input$plot_var]
} else {
x <- vis_df$df[,input$plot_x]
}
if(input$plot_factor != "None" & input$plot_factor != "NA" & !is.null(input$plot_factor)){
color <- vis_df$df[,input$plot_factor]
type <- vis_df$df[,input$plot_factor]
} else {
color <- NULL
type <- input$plot_var
}
p <- switch(input$plot_type,
"scatter" = {
plot_ly(x = x, y = y, color = color) %>%
layout(xaxis = list(title = input$plot_x),
yaxis = list(title = input$plot_y))
},
"line" = {
plot_ly(x = x, y = y, mode = "lines", color = NULL)%>%
layout(xaxis = list(title = input$plot_x),
yaxis = list(title = input$plot_y))
},
"box" = {
plot_ly(y = x, type = "box", color = color,
name = names(vis_df$df)[which(names(vis_df$df) == input$plot_factor)],
boxpoints = "all", jitter = 0.3,
pointpos = -1.8)%>%
layout(yaxis = list(title = names(vis_df$df)[which(names(vis_df$df) == input$plot_x)]),
xaxis = list(title = "")
)
},
"hist" = {
p_hist <- NULL
if(input$plot_factor == "None" | input$plot_factor == "NA"){
p_hist <- plot_ly(x = vis_df$df[,input$plot_var], type = "histogram")
} else if(input$plot_factor != "None" &
input$plot_factor != "NA" &
!is.null(input$plot_factor)){
plot_list <- l <- NULL
for(l1 in levels(vis_df$df[,input$plot_factor])){
hist.sub.df <- subset(vis_df$df, vis_df$df[,input$plot_factor] == l1)
l <- length(plot_list)
plot_list[[l + 1]] <- plot_ly(hist.sub.df,
x = hist.sub.df[,input$plot_var],
name = l1) %>%
layout(xaxis = list(title = l1),
title = input$plot_var)
}
p_hist <- subplot(plot_list, titleX = TRUE, shareX = TRUE) %>%
hide_legend()
}
p_hist
},
"density" = {
plot_den <- NULL
if(input$plot_factor == "None" | input$plot_factor == "NA"){
dens <- density(x)
dens.df <- data.frame(x = dens$x, y = dens$y)
min_y <- 0
max_y <- max(dens.df$y)
plot_den <- plot_ly(data = dens.df, x = ~x,
y = ~y)
} else if(input$plot_factor != "None" &
input$plot_factor != "NA" &
!is.null(input$plot_factor)){
plot_list_den <- l <- NULL
for(l2 in levels(vis_df$df[, input$plot_factor])){
df.den <- subset(vis_df$df,
vis_df$df[, input$plot_factor] == l2)
l <- length(plot_list_den)
dens <- density(df.den[,input$plot_var])
dens.df <- data.frame(x = dens$x, y = dens$y)
plot_list_den[[l + 1]] <- plot_ly(data = dens.df,
x = ~x,
y = ~y)%>%
layout(xaxis = list(title = l2),
title = input$plot_var)
}
plot_den <- subplot(plot_list_den, titleX = TRUE, shareX = TRUE)%>%
hide_legend()
}
plot_den
},
"cor" = {
c <- NULL
c <- round(cor(vis_df$df[, which(colnames(vis_df$df) %in% vis_df$var_numeric)]), 3)
plot_ly(x = vis_df$var_numeric, y = vis_df$var_numeric, z = c,
key = c, type = "heatmap", source = "heatplot")
}
)
} else if(is.ts(vis_df$df)){
ts.df <- data.frame(dec_left = floor(time(vis_df$df)),
dec_right = round((time(vis_df$df) - floor(time(vis_df$df))) *
frequency(vis_df$df) + 1),
value = as.numeric(vis_df$df))
p <- switch(input$plot_type,
"line" = {
plot_ly( x = time(vis_df$df), y = vis_df$df, type = "scatter", mode = "line")
},
"scatter" = {
plot_ly( x = time(vis_df$df), y = vis_df$df, type = "scatter")
},
"box" = {
plot_ly(data = ts.df, y = ~ value ,
color = ~ as.factor(dec_right),
type = "box",
boxpoints = "all", jitter = 0,
pointpos = -1.8)
},
"seasonal_plot" = {
if(frequency(vis_df$df) == 1){
p <- plot_ly()
showModal(modalDialog(
title = "Warning - Seasonal Plot is Not Available",
HTML(paste("Seasonal plot is not available",
"for time series object with yearly frequancy",
sep = "
")
), size = "s"
))
p
} else {
ts.df_wide <- reshape2::dcast(ts.df, dec_right ~ dec_left )
p <- plot_ly()
for(f in 2:ncol(ts.df_wide)){
p <- p %>% add_trace(x = ts.df_wide[,1], y = ts.df_wide[,f],
name = paste("time", names(ts.df_wide)[f], sep = " " ),
mode = "line")
}
p
}
},
"lags_plot" = {
lag <- NULL
lag_plots <- NULL
max.lags <- 12
for(g in 1:max.lags){
if(g == 1){
lag <- c(NA, ts.df$value[- nrow(ts.df)])
} else {
lag <- c(NA,lag[-nrow(ts.df)])
}
lag_plots[[g]] <- plot_ly(x = lag, y = ts.df$value,
name = paste("Lag", g, sep = " ")) %>%
layout(xaxis = list(title = paste("Lag", g, sep = " "),
range = c( min(na.omit(as.numeric(lag))),
max(na.omit(as.numeric(lag))))),
yaxis = list(title = paste("Series", sep = ""),
range = c( min(na.omit(as.numeric(ts.df$value))),
max(na.omit(as.numeric(ts.df$value))))),
title = paste(input$select_df_vis,"Series vs Lags", sep = " "),
annotations = list(
# x = median(na.omit(as.numeric(lag))),
# y = median(na.omit(as.numeric(ts.df$value))),
showarrow = FALSE,
# arrowhead = 4,
# arrowsize = 0.5,
# ax = 20,
# ay = -20,
xref = paste("x", g, sep = ""),
yref = paste("y", g, sep = ""),
text = paste("Lag", g, sep = " "))
)
}
subplot(lag_plots,
titleX = FALSE, titleY = TRUE,
shareX = FALSE, shareY = FALSE,
margin = 0.05,
nrows = ceiling(length(lag_plots) / 3))%>%
hide_legend()
}
)
}
})
return(p)
})
output$class_df_flag_vis <- reactive({
ifelse(is.ts(vis_df$df), TRUE, FALSE)
})
outputOptions(output, "class_df_flag_vis", suspendWhenHidden = FALSE)
#------------------------------ Regression and Classification Models -------------------------------------
models_df <- reactiveValues(df = NULL, # Load the selected data frame
var_list = NULL, # Create a variable list
independent_var = NULL, # Create the independent variables list
var_dep_class = NULL # The class of the dependent variable
)
# Select the dataset
observeEvent({
input$var_modify
input_df$names_list
},{
if(length(input_df$names_list[which(input_df$df_class == "Data Frame")]) == 0){
output$models1_df_list <- renderUI({
output$model_tab_input <- reactive("0")
outputOptions(output, "model_tab_input", suspendWhenHidden = FALSE)
models_df$var_list <- models_df$df <- NULL
showModal(modalDialog(
title = "Warning - No Available Data Frame",
HTML(paste("No available data frame in the platform",
"Use the Data tab to load data",
sep = "
")
), size = "s"
))
output$models1_df_list <- renderUI({
selectInput("models1_select_df", "Select Dataset",
choices = "NA"
)
})
})
} else if(length(input_df$names_list[which(input_df$df_class == "Data Frame")]) > 0){
output$model_tab_input <- reactive("1")
outputOptions(output, "model_tab_input", suspendWhenHidden = FALSE)
output$models1_df_list <- renderUI({
selectInput("models1_select_df", "Select Dataset",
choices = input_df$names_list[which(input_df$df_class == "Data Frame")]
)
})
}
})
# Update the dataset selection
observeEvent({
input$var_modify
input$models1_select_df
},{
if(length(input_df$names_list[which(input_df$df_class == "Data Frame")]) > 0){
output$model_tab_input <- reactive("1")
outputOptions(output, "model_tab_input", suspendWhenHidden = FALSE)
models_df$df <- input_df$df_list[[which(input_df$names_list == input$models1_select_df)]]
} else {
output$model_tab_input <- reactive("0")
outputOptions(output, "model_tab_input", suspendWhenHidden = FALSE)
models_df$df <- NULL
}
})
# Dependent variable
observeEvent({
input$var_modify
input$models1_select_df
}, {
if(!is.null(models_df$df)){
models_df$var_list <- names(models_df$df)
output$model_tab_ind <- reactive("1")
outputOptions(output, "model_tab_ind", suspendWhenHidden = FALSE)
output$models1_var_list <- renderUI({
selectInput("models1_select_var", "Select the Dependent Variable",
choices = c("Select Variable",models_df$var_list)
)
})
} else if(is.null(models_df$df)){
models_df$var_list <- NULL
output$model_tab_ind <- reactive("0")
outputOptions(output, "model_tab_ind", suspendWhenHidden = FALSE)
}
})
# Independent variable
observeEvent(input$models1_select_var, {
if(input$models1_select_var != "Select Variable"){
models_df$var_dep_class <- class(models_df$df[,which(names(models_df$df) == input$models1_select_var)])
models_df$independent_var <- setdiff(names(models_df$df), c(input$models1_select_var, "name"))
output$model_tab_ind <- reactive("1")
outputOptions(output, "model_tab_ind", suspendWhenHidden = FALSE)
output$models1_independent_list <- renderUI({
pickerInput(inputId = "models1_independent",
label = "Select the Independent Variable",
choices = models_df$independent_var, options = list(`actions-box` = TRUE),
multiple = TRUE,
selected = models_df$independent_var)
})
} else if(input$models1_select_var == "Select Variable"){
models_df$independent_var <- NULL
models_df$var_dep_class <- NULL
output$model_tab_ind <- reactive("0")
outputOptions(output, "model_tab_ind", suspendWhenHidden = FALSE)
}
})
observeEvent({
models_df$var_dep_class
input$models1_select_var
},{
if(!is.null(models_df$var_dep_class)){
if(is.factor(models_df$df[,which(names(models_df$df) == input$models1_select_var)])){
if(length(levels(models_df$df[,which(names(models_df$df) == input$models1_select_var)])) == 2){
output$model_binomial <- reactive("1") # set condition for binomial model
outputOptions(output, "model_binomial", suspendWhenHidden = FALSE)
h2o_df$binomial <- NULL
h2o_df$binomial <- "binomial"
} else if(length(levels(models_df$df[,which(names(models_df$df) == input$models1_select_var)])) > 2){
output$model_binomial <- reactive("2") # set condition for multinomial model
outputOptions(output, "model_binomial", suspendWhenHidden = FALSE)
h2o_df$binomial <- NULL
h2o_df$binomial <- "multinomial"
} else {
output$model_binomial <- reactive("0") # not engough levels for binomial/multinomial
outputOptions(output, "model_binomial", suspendWhenHidden = FALSE)
}
output$dep_var_class <- reactive("1") # flag for factor variable
outputOptions(output, "dep_var_class", suspendWhenHidden = FALSE)
} else if (models_df$var_dep_class == "numeric" |
models_df$var_dep_class == "integer") {
output$dep_var_class <- reactive("2") # flag for numeric/integer variable
outputOptions(output, "dep_var_class", suspendWhenHidden = FALSE)
#
output$model_binomial <- reactive("0") # reseting the binomial flag
outputOptions(output, "model_binomial", suspendWhenHidden = FALSE)
}
}
})
#------------------------------ H2O Connection -------------------------------------
h2o_df <- reactiveValues(status = FALSE,
num_cpus = NULL,
free_mem = NULL,
df = NULL,
x = NULL,
y = NULL,
train = NULL,
test = NULL,
valid = NULL,
model = NULL,
binomial = NULL)
observeEvent( input$model_package,{
if("H2O" %in% input$model_package & !h2o_df$status){
if(!"h2o" %in% installed.packages()){
showModal(modalDialog(
title = "Warning - H2O is not Available",
HTML(paste("The H2O package is not installed.",
"Please install the package to continue.",
"More infromation is available here - https://www.h2o.ai/download/",
sep = "
")
), size = "s"
))
output$h2o_flag <- reactive("0")
outputOptions(output, "h2o_flag", suspendWhenHidden = FALSE)
} else {
require(h2o)
try(h2o.init(nthreads=-1,
max_mem_size = paste(ceiling(get_free_ram()/1024^2),"g", sep = "")),
silent = TRUE)
if(h2o.clusterIsUp()){
output$h2o_flag <- reactive("1")
outputOptions(output, "h2o_flag", suspendWhenHidden = FALSE)
h2o_df$status <- TRUE
cluster_status <- h2o.clusterStatus()
h2o_df$free_mem <- as.numeric(cluster_status$free_mem)
h2o_df$num_cpus <- as.numeric(cluster_status$num_cpus)
} else {
showModal(modalDialog(
title = "Warning - H2O is not Connect",
HTML(paste("Couldn't connect to H2O cluster,",
"please check in R if the package installed",
sep = "
")
), size = "s"
))
output$h2o_flag <- reactive("0")
outputOptions(output, "h2o_flag", suspendWhenHidden = FALSE)
}
}
} else if(!"H2O" %in% input$model_package & h2o_df$status){
try(h2o.shutdown(prompt=FALSE), silent = TRUE)
h2o_df$status <- FALSE
h2o_df$free_mem <- NULL
h2o_df$num_cpus <- NULL
output$h2o_flag <- reactive("0")
outputOptions(output, "h2o_flag", suspendWhenHidden = FALSE)
}
})
output$h2o_status_box <- renderValueBox({
valueBox(
ifelse(h2o_df$status, "Connected","Disconnected" ), "H2O Status", icon = icon("signal"),
color = ifelse(h2o_df$status, "green","red" )
)
})
output$h2o_cluster_mem <- renderValueBox({
valueBox(
paste(round((h2o_df$free_mem / 1024^3), 2), "GB", sep = ""),
"H2O Cluster Total Memory", icon = icon("microchip"),
color = "maroon"
)
})
output$h2o_cpu <- renderValueBox({
valueBox(
h2o_df$num_cpus,
"Number of CPUs in Use", icon = icon("microchip"),
color = "light-blue"
)
})
observeEvent(input$h2o_run_class, {
h2o.removeAll()
# Check if there are any ordered factor
ordered_factor <- NULL
ordered_factor <- which(lapply(models_df$df, is.ordered) == TRUE)
if(length(ordered_factor) > 0){
if(input$models1_select_var == colnames(models_df$df)[ordered_factor]){
showModal(modalDialog(
title = "Warning - Ordered Factor",
HTML(paste("H2O doesn't support ordered factor class.",
"Please select different dependent variable",
sep = "
")
), size = "s"
))
h2o_df$df <- NULL
}else if(input$models1_select_var != colnames(models_df$df)[ordered_factor]){
showModal(modalDialog(
title = "Warning - Ordered Factor",
HTML(paste("H2O doesn't support ordered factor class.",
paste("the variable '",
colnames(models_df$df)[ordered_factor],
"' will be exclude", sep = ""),
sep = "
")
), size = "s"
))
h2o_df$df <- as.h2o(models_df$df[, -ordered_factor])
}} else if(length(ordered_factor) == 0){
h2o_df$df <- as.h2o(models_df$df)
}
if(!is.null(h2o_df$df)){
h2o_df$y <- h2o_df$x <- h2o_df$model <- NULL
h2o_df$train <- h2o_df$test <- h2o_df$valid <- NULL
h2o_df$y <- match(input$models1_select_var, names(h2o_df$df))
h2o_df$x <- match(input$models1_independent, names(h2o_df$df))
n_folds <- NULL
if(input$nfolds_flag){
n_folds <- input$nfolds
} else {
n_folds <- 0
}
if(input$h2o_validation){
splits <- h2o.splitFrame(
data = h2o_df$df,
ratios = c(input$h2o_split_v[1],(input$h2o_split_v[2] - input$h2o_split_v[1])),
destination_frames = c("train", "valid", "test"), seed = 1234
)
h2o_df$train <- splits[[1]]
h2o_df$valid <- splits[[2]]
h2o_df$test <- splits[[3]]
if(input$binomial_models == "h2o_rf"){
# h2o_df$model <- NULL
h2o_df$model <- h2o.randomForest(
training_frame = h2o_df$train,
validation_frame = h2o_df$valid,
x = h2o_df$x,
y = h2o_df$y,
nfolds = n_folds,
ntrees = input$h2o_rf_ntree,
max_depth = input$h2o_rf_max_depth,
col_sample_rate_change_per_level = input$h2o_rf_col_sample_rate_change_per_level,
col_sample_rate_per_tree = input$h2o_rf_col_sample_rate_per_tree,
sample_rate = input$h2o_rf_sample_rate,
histogram_type = input$rf_histogram_type
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
valid_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$valid))
valid_df <- as.data.frame(h2o_df$valid[, h2o_df$y])
valid_cm <- confusionMatrix(valid_pred$predict, valid_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun_v(train = train_cm$table, valid = valid_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_rmse,
type = "scatter", mode = "lines+markers", name = "Training") %>%
add_trace(x = ~number_of_trees, y = ~ validation_rmse,
type = "scatter", mode = "lines+markers", name = "Validation")%>%
layout(
title = "Random Forest - RMSE Score History",
yaxis = list(title = "RMSE", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_classification_error,
type = "scatter", mode = "lines+markers", name = "Training") %>%
add_trace(x = ~number_of_trees, y = ~ validation_classification_error,
type = "scatter", mode = "lines+markers", name = "Validation")%>%
layout(
title = "Random Forest - Classification Error Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_logloss,
type = "scatter", mode = "lines+markers", name = "Training") %>%
add_trace(x = ~number_of_trees, y = ~ validation_logloss,
type = "scatter", mode = "lines+markers", name = "Validation")%>%
layout(
title = "Random Forest - Logloss Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
var_imp <- h2o.varimp(h2o_df$model)
var_imp <- var_imp[order(var_imp$scaled_importance),]
var_order <- var_imp$variable
var_imp$variable <- factor(var_imp$variable, levels = var_order)
plot_ly(data = var_imp, y = ~ variable, x = ~ round(scaled_importance,2),
type = "bar", orientation = 'h'
) %>%
layout(
title = "Random Forest - Variable Importance",
yaxis = list(title = ""),
xaxis = list(title = "Scaled Importance"),
margin = list(l = 155)
)
})
} else if(input$binomial_models == "h2o_gbm"){
# h2o_df$model <- NULL
h2o_df$model <- h2o.gbm(
training_frame = h2o_df$train,
validation_frame = h2o_df$valid,
x = h2o_df$x,
y = h2o_df$y,
nfolds = n_folds,
ntrees = input$h2o_gbm_ntree,
max_depth = input$h2o_gbm_max_depth,
learn_rate = input$h2o_gbm_learn_rate,
learn_rate_annealing = input$h2o_gbm_learn_rate_annealing,
min_rows = input$h2o_gbm_min_rows,
min_split_improvement = input$h2o_gbm_min_split_improvement,
histogram_type = input$gbm_histogram_type
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
valid_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$valid))
valid_df <- as.data.frame(h2o_df$valid[, h2o_df$y])
valid_cm <- confusionMatrix(valid_pred$predict, valid_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun_v(train = train_cm$table, valid = valid_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_rmse,
type = "scatter", mode = "lines+markers", name = "Training") %>%
add_trace(x = ~number_of_trees, y = ~ validation_rmse,
type = "scatter", mode = "lines+markers", name = "Validation")%>%
layout(
title = "GBM - RMSE Score History",
yaxis = list(title = "RMSE", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_classification_error,
type = "scatter", mode = "lines+markers", name = "Training") %>%
add_trace(x = ~number_of_trees, y = ~ validation_classification_error,
type = "scatter", mode = "lines+markers", name = "Validation")%>%
layout(
title = "GBM - Classification Error Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_logloss,
type = "scatter", mode = "lines+markers", name = "Training") %>%
add_trace(x = ~number_of_trees, y = ~ validation_logloss,
type = "scatter", mode = "lines+markers", name = "Validation")%>%
layout(
title = "GBM - Logloss Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
var_imp <- h2o.varimp(h2o_df$model)
var_imp <- var_imp[order(var_imp$scaled_importance),]
var_order <- var_imp$variable
var_imp$variable <- factor(var_imp$variable, levels = var_order)
plot_ly(data = var_imp, y = ~ variable, x = ~ round(scaled_importance,2),
type = "bar", orientation = 'h'
) %>%
layout(
title = "GBM - Variable Importance",
yaxis = list(title = ""),
xaxis = list(title = "Scaled Importance"),
margin = list(l = 155)
)
})
} else if(input$binomial_models == "h2o_dl"){
# h2o_df$model <- NULL
if(input$h2o_dl_num_hidden == 1){
hidden <- c(input$h2o_dl_layer1)
} else if(input$h2o_dl_num_hidden == 2){
hidden <- c(input$h2o_dl_layer1, input$h2o_dl_layer2)
} else if(input$h2o_dl_num_hidden == 3){
hidden <- c(input$h2o_dl_layer1, input$h2o_dl_layer2, input$h2o_dl_layer3)
} else if(input$h2o_dl_num_hidden == 4){
hidden <- c(input$h2o_dl_layer1, input$h2o_dl_layer2, input$h2o_dl_layer3, input$h2o_dl_layer4)
}
h2o_df$model <- h2o.deeplearning(
training_frame = h2o_df$train,
validation_frame = h2o_df$valid,
x = h2o_df$x,
y = h2o_df$y,
nfolds = n_folds,
hidden = hidden,
epochs = input$h2o_dl_epochs,
l1 = input$h2o_dl_l1,
l2 = input$h2o_dl_l2
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
valid_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$valid))
valid_df <- as.data.frame(h2o_df$valid[, h2o_df$y])
valid_cm <- confusionMatrix(valid_pred$predict, valid_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun_v(train = train_cm$table, valid = valid_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
# plot_ly(data = sh, x = ~number_of_trees, y = ~ training_rmse,
# type = "scatter", mode = "lines+markers", name = "Training") %>%
# add_trace(x = ~number_of_trees, y = ~ validation_rmse,
# type = "scatter", mode = "lines+markers", name = "Validation")%>%
# layout(
# title = "GBM - RMSE Score History",
# yaxis = list(title = "RMSE", domain = c(0, 0.95)),
# xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
# )
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
# plot_ly(data = sh, x = ~number_of_trees, y = ~ training_classification_error,
# type = "scatter", mode = "lines+markers", name = "Training") %>%
# add_trace(x = ~number_of_trees, y = ~ validation_classification_error,
# type = "scatter", mode = "lines+markers", name = "Validation")%>%
# layout(
# title = "GBM - Classification Error Score History",
# yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
# xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
# )
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
# plot_ly(data = sh, x = ~number_of_trees, y = ~ training_logloss,
# type = "scatter", mode = "lines+markers", name = "Training") %>%
# add_trace(x = ~number_of_trees, y = ~ validation_logloss,
# type = "scatter", mode = "lines+markers", name = "Validation")%>%
# layout(
# title = "GBM - Logloss Score History",
# yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
# xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
# )
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
var_imp <- h2o.varimp(h2o_df$model)
var_imp <- var_imp[order(var_imp$scaled_importance),]
var_order <- var_imp$variable
var_imp$variable <- factor(var_imp$variable, levels = var_order)
plot_ly(data = var_imp, y = ~ variable, x = ~ round(scaled_importance,2),
type = "bar", orientation = 'h'
) %>%
layout(
title = "Deep Learning - Variable Importance",
yaxis = list(title = ""),
xaxis = list(title = "Scaled Importance"),
margin = list(l = 155)
)
})
} else if(input$binomial_models == "h2o_glm"){
if(input$h2o_glm_lambda_search){
lambda_search <- TRUE
lambda_min_ratio <- input$h2o_glm_lambda_min_ratio
nlambdas <- input$h2o_glm_nlambdas
} else {
lambda_search <- FALSE
lambda_min_ratio <- NULL
nlambdas <- NULL
}
h2o_df$model <- h2o.glm(
training_frame = h2o_df$train,
validation_frame = h2o_df$valid,
x = h2o_df$x,
y = h2o_df$y,
family = h2o_df$binomial,
alpha = input$h2o_glm_alpha,
solver = input$h2o_glm_solver,
max_iterations = input$h2o_glm_max_iterations,
lambda_search = lambda_search,
lambda_min_ratio = lambda_min_ratio,
nlambdas = nlambdas
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
valid_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$valid))
valid_df <- as.data.frame(h2o_df$valid[, h2o_df$y])
valid_cm <- confusionMatrix(valid_pred$predict, valid_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun_v(train = train_cm$table, valid = valid_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
NULL
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
NULL
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
NULL
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
NULL
})
}
# If not using validation
} else if(!input$h2o_validation){
splits <- h2o.splitFrame(
data = h2o_df$df,
ratios = c(input$h2o_split),
destination_frames = c("train", "test"), seed = 1234
)
h2o_df$train <- splits[[1]]
h2o_df$test <- splits[[2]]
if(input$binomial_models == "h2o_rf"){
h2o_df$model <- h2o.randomForest(
training_frame = h2o_df$train,
x = h2o_df$x,
y = h2o_df$y,
nfolds = n_folds,
ntrees = input$h2o_rf_ntree,
max_depth = input$h2o_rf_max_depth,
histogram_type = input$rf_histogram_type,
col_sample_rate_change_per_level = input$h2o_rf_col_sample_rate_change_per_level,
col_sample_rate_per_tree = input$h2o_rf_col_sample_rate_per_tree,
sample_rate = input$h2o_rf_sample_rate
)
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
print("2")
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun(train = train_cm$table, test = test_cm$table)
}
# RMSE plot without validation set
output$rmse_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_rmse,
type = "scatter", mode = "lines+markers", name = "Training") %>%
layout(
title = "Random Forest - RMSE Score History",
yaxis = list(title = "RMSE", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Classification error plor without validation set
output$classification_error_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_classification_error,
type = "scatter", mode = "lines+markers", name = "Training") %>%
layout(
title = "Random Forest - Classification Error Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Logloss plot without validation set
output$logloss_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_logloss,
type = "scatter", mode = "lines+markers", name = "Training") %>%
layout(
title = "Random Forest - Logloss Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
var_imp <- h2o.varimp(h2o_df$model)
var_imp <- var_imp[order(var_imp$scaled_importance),]
var_order <- var_imp$variable
var_imp$variable <- factor(var_imp$variable, levels = var_order)
plot_ly(data = var_imp, y = ~ variable, x = ~ round(scaled_importance,2),
type = "bar", orientation = 'h'
) %>%
layout(
title = "Random Forest - Variable Importance",
yaxis = list(title = ""),
xaxis = list(title = "Scaled Importance"),
margin = list(l = 155)
)
})
} else if(input$binomial_models == "h2o_gbm"){
# h2o_df$model <- NULL
h2o_df$model <- h2o.gbm(
training_frame = h2o_df$train,
x = h2o_df$x,
y = h2o_df$y,
nfolds = n_folds,
ntrees = input$h2o_gbm_ntree,
max_depth = input$h2o_gbm_max_depth,
learn_rate = input$h2o_gbm_learn_rate,
learn_rate_annealing = input$h2o_gbm_learn_rate_annealing,
min_rows = input$h2o_gbm_min_rows,
min_split_improvement = input$h2o_gbm_min_split_improvement,
histogram_type = input$gbm_histogram_type
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun(train = train_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_rmse,
type = "scatter", mode = "lines+markers", name = "Training") %>%
layout(
title = "GBM - RMSE Score History",
yaxis = list(title = "RMSE", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_classification_error,
type = "scatter", mode = "lines+markers", name = "Training") %>%
layout(
title = "GBM - Classification Error Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
plot_ly(data = sh, x = ~number_of_trees, y = ~ training_logloss,
type = "scatter", mode = "lines+markers", name = "Training") %>%
layout(
title = "GBM - Logloss Score History",
yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
)
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
var_imp <- h2o.varimp(h2o_df$model)
var_imp <- var_imp[order(var_imp$scaled_importance),]
var_order <- var_imp$variable
var_imp$variable <- factor(var_imp$variable, levels = var_order)
plot_ly(data = var_imp, y = ~ variable, x = ~ round(scaled_importance,2),
type = "bar", orientation = 'h'
) %>%
layout(
title = "GBM - Variable Importance",
yaxis = list(title = ""),
xaxis = list(title = "Scaled Importance"),
margin = list(l = 155)
)
})
} else if(input$binomial_models == "h2o_glm"){
if(input$h2o_glm_lambda_search){
lambda_search <- TRUE
lambda_min_ratio <- input$h2o_glm_lambda_min_ratio
nlambdas <- input$h2o_glm_nlambdas
} else {
lambda_search <- FALSE
lambda_min_ratio <- NULL
nlambdas <- NULL
}
h2o_df$model <- h2o.glm(
training_frame = h2o_df$train,
x = h2o_df$x,
y = h2o_df$y,
family = h2o_df$binomial,
alpha = input$h2o_glm_alpha,
solver = input$h2o_glm_solver,
max_iterations = input$h2o_glm_max_iterations,
lambda_search = lambda_search,
lambda_min_ratio = lambda_min_ratio,
nlambdas = nlambdas
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun(train = train_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
NULL
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
NULL
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
NULL
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
NULL
})
}else if(input$binomial_models == "h2o_dl"){
# h2o_df$model <- NULL
if(input$h2o_dl_num_hidden == 1){
hidden <- c(input$h2o_dl_layer1)
} else if(input$h2o_dl_num_hidden == 2){
hidden <- c(input$h2o_dl_layer1, input$h2o_dl_layer2)
} else if(input$h2o_dl_num_hidden == 3){
hidden <- c(input$h2o_dl_layer1, input$h2o_dl_layer2, input$h2o_dl_layer3)
} else if(input$h2o_dl_num_hidden == 4){
hidden <- c(input$h2o_dl_layer1, input$h2o_dl_layer2, input$h2o_dl_layer3, input$h2o_dl_layer4)
}
h2o_df$model <- h2o.deeplearning(
training_frame = h2o_df$train,
x = h2o_df$x,
y = h2o_df$y,
nfolds = n_folds,
hidden = hidden,
epochs = input$h2o_dl_epochs,
l1 = input$h2o_dl_l1,
l2 = input$h2o_dl_l2
)
# Setting the confusion matrix output
train_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$train))
train_df <- as.data.frame(h2o_df$train[, h2o_df$y])
train_cm <- confusionMatrix(train_pred$predict, train_df[,1])
test_pred <- as.data.frame(h2o.predict(
object = h2o_df$model,
newdata = h2o_df$test))
test_df <- as.data.frame(h2o_df$test[, h2o_df$y])
test_cm <- confusionMatrix(test_pred$predict, test_df[,1])
output$cm_table <- function(){
cm_fun(train = train_cm$table, test = test_cm$table)
}
sh <- as.data.frame(h2o.scoreHistory(h2o_df$model))
# RMSE plot with validation set
output$rmse_plot <- renderPlotly({
# plot_ly(data = sh, x = ~number_of_trees, y = ~ training_rmse,
# type = "scatter", mode = "lines+markers", name = "Training") %>%
# add_trace(x = ~number_of_trees, y = ~ validation_rmse,
# type = "scatter", mode = "lines+markers", name = "Validation")%>%
# layout(
# title = "GBM - RMSE Score History",
# yaxis = list(title = "RMSE", domain = c(0, 0.95)),
# xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
# )
})
# Classification error plot with validation set
output$classification_error_plot <- renderPlotly({
# plot_ly(data = sh, x = ~number_of_trees, y = ~ training_classification_error,
# type = "scatter", mode = "lines+markers", name = "Training") %>%
# add_trace(x = ~number_of_trees, y = ~ validation_classification_error,
# type = "scatter", mode = "lines+markers", name = "Validation")%>%
# layout(
# title = "GBM - Classification Error Score History",
# yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
# xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
# )
})
# Logloss plot with validation set
output$logloss_plot <- renderPlotly({
# plot_ly(data = sh, x = ~number_of_trees, y = ~ training_logloss,
# type = "scatter", mode = "lines+markers", name = "Training") %>%
# add_trace(x = ~number_of_trees, y = ~ validation_logloss,
# type = "scatter", mode = "lines+markers", name = "Validation")%>%
# layout(
# title = "GBM - Logloss Score History",
# yaxis = list(title = "Classification Error", domain = c(0, 0.95)),
# xaxis = list(title = "Number of Trees", domain = c(0, 0.95))
# )
})
# Variable importance plot
output$var_imp_plot <- renderPlotly({
var_imp <- h2o.varimp(h2o_df$model)
var_imp <- var_imp[order(var_imp$scaled_importance),]
var_order <- var_imp$variable
var_imp$variable <- factor(var_imp$variable, levels = var_order)
plot_ly(data = var_imp, y = ~ variable, x = ~ round(scaled_importance,2),
type = "bar", orientation = 'h'
) %>%
layout(
title = "Deep Learning - Variable Importance",
yaxis = list(title = ""),
xaxis = list(title = "Scaled Importance"),
margin = list(l = 155)
)
})
}
}
}
})
#------------------------------ Server Function - End -------------------------------------
}
#------------------------------ UI Function -------------------------------------
ui <- dashboardPage(
dashboardHeader(),
#------------------------------ Side Bar Function -------------------------------------
dashboardSidebar(
sidebarMenu(id = "tabs",
menuItem("Data", tabName = "data", icon = icon("table"), startExpanded = TRUE,
menuSubItem("Data", tabName = "data1"),
menuSubItem("Data Prep", tabName = "data2")
),
menuItem("Visualization", icon = icon("bar-chart-o"), tabName = "vis"),
menuItem("Models", icon = icon("cog"), tabName = "models",
menuSubItem("Regression & Classification", tabName = "models1")
)
)
),
#------------------------------ Dashboard Body -------------------------------------
dashboardBody(
#------------------------------ Tags Style -------------------------------------
tags$style(type="text/css",
".shiny-output-error { visibility: hidden; }",
".shiny-output-error:before { visibility: hidden; }"
),
#------------------------------ Tabs Start -------------------------------------
tabItems(
#------------------------------ Tabs Data Start-------------------------------------
tabItem(tabName = "data1",
#------------------------------ Tabs Data - fluid page start -------------------------------------
fluidPage(
#------------------------------ Tabs Data - fluid row 1 -------------------------------------
fluidRow(
infoBoxOutput("installed_datasets"),
infoBoxOutput("in_memory_df"),
infoBoxOutput("load_datasets")
),
#------------------------------ Tabs Data - fluid row 2 -------------------------------------
fluidRow(
box(
width = 4, height = 100,
selectInput('data_source', 'Select Data Source',
list(
"R Data Frame" = "data_frame",
"R Time Series" = "time_series",
"Installed Package Dataset" = "inst_pack",
"Import CSV File" = "import"
))
),
box(width = 4, height = 100,
conditionalPanel(condition = "input.data_source.includes('data_frame') || input.data_source.includes('inst_pack') || input.data_source.includes('time_series')",
uiOutput("df_list")
),
conditionalPanel(condition = "input.data_source == 'import'",
dropdownButton(
fileInput('file1', 'Choose CSV File',
accept=c('text/csv',
'text/comma-separated-values,text/plain',
'.csv')),
awesomeCheckbox(inputId = "csv_header",
label = "Header",
value = TRUE),
radioButtons('sep', 'Separator',
c(Comma=',',
Semicolon=';',
Tab='\t'),
','),
radioButtons('quote', 'Quote',
c(None='',
'Double Quote'='"',
'Single Quote'="'"),
'"'),
circle = TRUE, status = "danger",
icon = icon("file-text", lib = "font-awesome"), width = "300px",
tooltip = tooltipOptions(title = "Click to set csv file parameters !")
)
)
),
box(width = 4, height = 100,
conditionalPanel(condition = "(output.load_flag == '2' && input.data_source == 'inst_pack') || (output.load_flag == '2' && input.data_source == 'import' ) || (output.load_flag == '1' && input.data_source == 'data_frame' ) || ( output.load_flag == '1' && input.data_source == 'inst_pack') || (output.load_flag == '1' && input.data_source == 'time_series')",
actionButton("load", "Load")
),
conditionalPanel(condition = "output.loaded_table_flag == '1'",
actionButton("remove", "Remove")
)
)
),
fluidRow(
box(width = 7, title = "Preview Table",
div(style = 'overflow-x: scroll',
DT::dataTableOutput('view_table'))
),
box(width = 5, title = "Loaded Datasets",
div(style = 'overflow-x: scroll',
DT::dataTableOutput('list_loaded_df'))
)
)
#------------------------------ Tabs Data - fluid row 2 -------------------------------------
)
#------------------------------ Tabs Data - fluid page end -------------------------------------
),
#------------------------------ Tabs Data2 Start-------------------------------------
tabItem(tabName = "data2",
fluidPage(
fluidRow(
conditionalPanel(condition = "output.loaded_table_flag == '1'",
infoBoxOutput("data_name"),
infoBoxOutput("num_var"),
infoBoxOutput("num_obs")
)
),
fluidRow(
conditionalPanel(condition = "output.loaded_table_flag == '1'",
box(width = 4, title = "Select Dataset",
uiOutput("loaded_ds_list"),
conditionalPanel(condition = "output.loaded_table_flag == '1' && output.class_df_flag == false ",
selectInput('data_option', 'Select Option',
list(
"Variables Attributes" = "var_attr",
"Data Summarise" = "data_summary",
"Reshape Options" = "data_reshape"
))
),
conditionalPanel(condition = "output.loaded_table_flag == '1' && output.class_df_flag == false && input.data_option == 'var_attr'",
radioButtons("class_selection", label = "Variables Modification",
choices = list(Numeric = "numeric", Factor = "factor",
Character = "character",
Date = "date"),
selected = "numeric"),
conditionalPanel(condition = "input.class_selection == 'date' && output.loaded_table_flag == '1' && output.class_df_flag == false && input.data_option == 'var_attr'",
selectInput('date_format', "Select the Date Format",
list(
YMD = "ymd",
YDM = "ydm",
MYD = "myd",
MDY = "mdy",
DMY = "dmy",
DYM = "dym"
)),
#titlePanel(h5("Date Preview")),
tags$h5("Date Preview"),
verbatimTextOutput("date_prev")
),
actionButton("var_modify", "Modify")
),
conditionalPanel(condition = "output.loaded_table_flag == '1' && output.class_df_flag == true ",
tableOutput("ts_table")
)
)),
conditionalPanel(condition = "output.loaded_table_flag == '1' && output.class_df_flag == false ",
box(width = 4, title = "List of Variables",
DT::dataTableOutput("data_tab2_var")
),
box(width = 4, title = "Variable Summary",
plotlyOutput("data_tab2_summary_plot",height = 200),
tableOutput("data_tab2_var_summary")
)
),
conditionalPanel(condition = "output.loaded_table_flag == '1' && output.class_df_flag == true ",
box(width = 8, title = "Time Series Plot",
dropdownButton(
tags$h3("List of Input"),
materialSwitch(inputId = "ts_plot_log", label = "Log Transformation",
status = "primary", right = FALSE),
awesomeRadio(inputId = "ts_prep_mode",
label = "Radio buttons",
choices = c("lines","lines+markers", "markers")
, selected = "lines"),
circle = TRUE, status = "danger", icon = icon("gear"), width = "200px",
tooltip = tooltipOptions(title = "Plot Setting")
),
plotlyOutput("data_tab2_ts")
)
)
),
fluidRow(
conditionalPanel(condition = "output.loaded_table_flag == '1'",
div(style = 'overflow-x: scroll',
DT::dataTableOutput("data_tab2_table"))
)
)
)
),
#------------------------------ Tabs Data End-------------------------------------
#------------------------------ Tabs Visualization Start-------------------------------------
tabItem(tabName = "vis",
conditionalPanel(condition = "output.loaded_table_flag == '1'",
fluidPage(
fluidRow(
box(width = 2,
uiOutput("loaded_ds_list_vis"),
uiOutput("vis_plot_type")
),
conditionalPanel(condition = "output.loaded_table_flag == '1' && output.class_df_flag_vis == false && input.plot_type != 'cor' ",
box(width = 2,
conditionalPanel(condition = "input.plot_type != 'density' && input.plot_type != 'box' && input.plot_type != 'hist'",
uiOutput("vis_x"),
uiOutput("vis_y")
),
conditionalPanel(condition = "input.plot_type == 'density' || input.plot_type == 'hist' || input.plot_type == 'box'",
uiOutput("vis_one_var")
)
),
conditionalPanel(condition = "input.plot_type == 'scatter' || input.plot_type == 'box'|| input.plot_type == 'hist' || input.plot_type == 'density'",
box(width = 2,
uiOutput("vis_factor"))
)
)
),
fluidRow(
box(width = 12, title = "plot",
plotlyOutput("main_plot")
)
)
)
)
),
#------------------------------ Tabs Visualization End-------------------------------------
#------------------------------ Tabs Classification Start-------------------------------------
tabItem(tabName = "models1",
fluidRow(conditionalPanel(condition = "input.model_package.includes('H2O')",
infoBoxOutput("h2o_status_box"),
infoBoxOutput("h2o_cpu"),
infoBoxOutput("h2o_cluster_mem")
)
),
fluidRow(
box(width = 2, title = "Model Inputs",
conditionalPanel(condition = "input.model_package.includes('H2O')",
uiOutput("models1_df_list"),
conditionalPanel(condition = "output.model_tab_input == '1' || output.model_tab_input == '2'",
uiOutput("models1_var_list"),
uiOutput("models1_independent_list")
)
),
awesomeCheckboxGroup(inputId = "model_package",
label = "Set Packages",
choices = c("H2O"), selected = NULL,
inline = TRUE)
),
conditionalPanel(condition = "output.dep_var_class == '1' && output.h2o_flag == '1'",
tabBox(
title = "Model Setting & Output", width = 10,
id = "class_setting", height = "500px",
tabPanel("Model Setting",
fluidRow(
box(width = 3, title = "Model Setting",
selectInput("binomial_models", "Select Classification Model",
choices = c("Deep Learning (H2O)" = "h2o_dl",
"GLM (H2O)" = "h2o_glm",
"GBM (H2O)" = "h2o_gbm",
"Random Forest (H2O)" = "h2o_rf")
),
materialSwitch(inputId = "h2o_validation",
label = "Add Validation Partition",
status = "primary", right = FALSE),
conditionalPanel(condition = "input.h2o_validation == true",
sliderInput("h2o_split_v", "Set the Training/Testing/Validation Partitions:",
min = 0.05, max = 1,
value = c(0.6,0.8))),
conditionalPanel(condition = "input.h2o_validation == false",
sliderInput("h2o_split", "Set the Training/Testing Partitions:",
min = 0.05, max = 1,
value = 0.7)),
materialSwitch(inputId = "nfolds_flag",
label = "N-fold Cross-Validation",
status = "primary", right = FALSE),
conditionalPanel(condition = "input.nfolds_flag == true",
sliderInput("nfolds", "Set the Number of folds:",
min = 3, max = 10, step = 1,
value = 5))
),
box(width = 3, title = "Model Tuning",
conditionalPanel( condition = "input.binomial_models == 'h2o_rf'",
dropdownButton(
tags$h4("More Tunning Parameters"),
selectInput("rf_histogram_type", "Optimal Split Histogram Type",
choices = c("AUTO" = "AUTO",
"UniformAdaptive" = "UniformAdaptive",
"Random" = "Random",
"QuantilesGlobal" = "QuantilesGlobal",
"RoundRobin" = "RoundRobin"
),
selected = "AUTO"
),
sliderInput("h2o_rf_col_sample_rate_change_per_level", "Column Sample Rate Change Per Level",
min = 0, max = 2,
value = 1, step = 0.01
),
sliderInput("h2o_rf_col_sample_rate_per_tree", "Column Sample Rate Per Tree",
min = 0, max = 1,
value = 1, step = 0.01
),
sliderInput("h2o_rf_sample_rate", "Row Sampling Rate",
min = 0, max = 1,
value = 0.632, step = 0.01
),
circle = TRUE, status = "danger", icon = icon("gear"), width = "300px",
tooltip = tooltipOptions(title = "More Tunning ")
),
sliderInput("h2o_rf_ntree", "Number of Trees",
min = 25, max = 1000,
value = 50, step = 25),
sliderInput("h2o_rf_max_depth", "Maximum Tree Depth",
min = 1, max = 30,
value = 20
)
),
conditionalPanel( condition = "input.binomial_models == 'h2o_gbm'",
dropdownButton(
tags$h4("More Tunning Parameters"),
selectInput("gbm_histogram_type", "Optimal Split Histogram Type",
choices = c("AUTO" = "AUTO",
"UniformAdaptive" = "UniformAdaptive",
"Random" = "Random",
"QuantilesGlobal" = "QuantilesGlobal",
"RoundRobin" = "RoundRobin"
),
selected = "AUTO"
),
sliderInput("h2o_gbm_learn_rate", "Learning Rate",
min = 0, max = 1,
value = 0.1
),
sliderInput("h2o_gbm_learn_rate_annealing", "Learning Rate",
min = 0, max = 1,
value = 1
),
sliderInput("h2o_gbm_min_rows", "Min. Rows",
min = 5, max = 20,
value = 1
),
sliderInput("h2o_gbm_min_split_improvement", "Min. Split Improvement",
min = 1e-10, max = 1e-3,
value = 1e-10
),
circle = TRUE, status = "danger", icon = icon("gear"), width = "300px",
tooltip = tooltipOptions(title = "More Tunning ")
),
sliderInput("h2o_gbm_ntree", "Number of Trees",
min = 25, max = 1000,
value = 50, step = 25),
sliderInput("h2o_gbm_max_depth", "Maximum Tree Depth",
min = 1, max = 30,
value = 5
)
),
fluidRow(
conditionalPanel( condition = "input.binomial_models == 'h2o_dl'",
fluidRow(
column(width = 2,
dropdownButton(
tags$h4("Layer Setting"),
sliderInput("h2o_dl_num_hidden", "Number of Hidden Layers",
min = 1, max = 4,
value = 2, step = 1
),
conditionalPanel(condition = "input.h2o_dl_num_hidden == 1",
sliderInput("h2o_dl_layer1", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
)
),
conditionalPanel(condition = "input.h2o_dl_num_hidden == 2",
sliderInput("h2o_dl_layer1", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
),
sliderInput("h2o_dl_layer2", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
)
),
conditionalPanel(condition = "input.h2o_dl_num_hidden == 3",
sliderInput("h2o_dl_layer1", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
),
sliderInput("h2o_dl_layer2", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
),
sliderInput("h2o_dl_layer3", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
)
),
conditionalPanel(condition = "input.h2o_dl_num_hidden == 4",
sliderInput("h2o_dl_layer1", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
),
sliderInput("h2o_dl_layer2", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
),
sliderInput("h2o_dl_layer3", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
),
sliderInput("h2o_dl_layer4", "Number of Hidden Layers",
min = 1, max = 1000,
value = 200, step = 1
)
),
circle = TRUE, status = "danger", icon = icon("gear"), width = "300px",
tooltip = tooltipOptions(title = "Layer Setting ")
)),
column(width = 2, offset = 2,
dropdownButton(
tags$h4("Layer Setting"),
circle = TRUE, status = "danger", icon = icon("gear"), width = "300px",
tooltip = tooltipOptions(title = "Early Stop")
)))
),
sliderInput("h2o_dl_epochs", "Number of Epochs",
min = 1, max = 10000,
value = 10, step = 1),
sliderInput("h2o_dl_l1", "L1 Regularization",
min = 0, max = 1,
value = 0, step = 1e-5
),
sliderInput("h2o_dl_l2", "L2 Regularization",
min = 0, max = 1,
value = 0, step = 1e-5
)
),
conditionalPanel( condition = "input.binomial_models == 'h2o_glm'",
dropdownButton(
tags$h4("More Tunning Parameters"),
selectInput("glm_solver", "Solver",
choices = c("AUTO" = "AUTO",
"IRLSM" = "IRLSM",
"L_BFGS" = "L_BFGS",
"COORDINATE_DESCENT" = "COORDINATE_DESCENT",
"COORDINATE_DESCENT_NAIVE" = "COORDINATE_DESCENT_NAIVE"
),
selected = "AUTO"
),
sliderInput("h2o_glm_max_iterations", "Solver Max Iterations",
min = 10, max = 1000,
value = 50, step = 10),
circle = TRUE, status = "danger", icon = icon("gear"), width = "300px",
tooltip = tooltipOptions(title = "More Tunning ")
),
sliderInput("h2o_glm_alpha", "Alpha Parameter",
min = 0, max = 1,
value = 0.5, step = 0.01),
materialSwitch(inputId = "h2o_glm_lambda_search",
label = "Lambda Search",
status = "primary", right = FALSE,
value = TRUE),
conditionalPanel(condition = "input.h2o_glm_lambda_search == true",
sliderInput("h2o_glm_lambda_min_ratio", "Lambda Min. Ratio",
min = 0.0001, max = 0.001,
value = 0.0001, step = 0.0001),
sliderInput("h2o_glm_nlambdas", "Number of Lambdas",
min = 10, max = 200,
value = 100, step = 1)
)
),
actionButton("h2o_run_class", "Run Model")
),
box(width = 3, title = "Confusion Matrix",
tableOutput("cm_table"))
)
),
tabPanel("Variable Importance", plotlyOutput("var_imp_plot")),
tabPanel("RMSE", plotlyOutput("rmse_plot")),
tabPanel("Classification Error", plotlyOutput("classification_error_plot")),
tabPanel("Logloss", plotlyOutput("logloss_plot"))
)
)
)
)
#------------------------------ Tabs Classification End-------------------------------------
)
)
)
#------------------------------ Call the App -------------------------------------
runApp(list(ui = ui, server = server), launch.browser = TRUE)