#! /usr/bin/env Rscript

.suppress <- suppressPackageStartupMessages
.suppress(library(neuroim2))
.suppress(library(neurosurf))
.suppress(library(rMVPA))
.suppress(library(optparse))
.suppress(library(caret))
.suppress(library(futile.logger))
.suppress(library(io))
.suppress(library(doParallel))


## should be able to use "test_subset" without test_data
## that means: train only on one subset and test only on another subset within single design

## if train_design only, the "test_subset" subsets the the train_design. Check to make sure no overlap in columns.
## if test_design provided, the test design must have matching test_data

## should output importance maps for methods that supply them (e.g. sda)


option_list <- list(
                    make_option(c("-t", "--train_design"), type="character", help="the file name of the training design table"),
                    make_option("--test_design", type="character", help="the file name of the test design table"),
                    make_option("--train_data", type="character", help="the name of the training data file as (4D .nii file)"), 
                    make_option("--test_data", type="character", help="the name of the testing data file as (4D .nii file)"),  
                    make_option(c("-n", "--normalize"), action="store_true", type="logical", help="center and scale each image volume"),
                    make_option(c("-m", "--model"), type="character", help="name of the classifier model"),
                    make_option(c("-a", "--mask"), type="character", help="name of binary image mask file (.nii format)"),
                    make_option(c("-p", "--pthreads"), type="numeric", help="the number of parallel threads"),
                    make_option(c("-l", "--label_column"), type="character", help="the name of the column in the design file containing the training labels"),
                    make_option(c("--test_label_column"), type="character", help="the name of the column in the test design file containing the test labels"),
                    make_option(c("-o", "--output"), type="character", help="the name of the output folder where results will be placed"),
                    make_option(c("-b", "--block_column"), type="character", help="the name of the column in the design file indicating the block variable used for cross-validation"),
                    make_option(c("-g", "--tune_grid"), type="character", help="string containing grid parameters in the following form: a=\\(1,2\\), b=\\('one', 'two'\\)"),
                    make_option(c("--tune_length"), type="numeric", help="an integer denoting the number of levels for each model tuning parameter"),
                    make_option(c("--save_predictors"), type="logical", action="store_true", help="save model fits (one per ROI) for predicting new data sets (default is FALSE)"),
                    make_option(c("--skip_if_folder_exists"), type="logical", action="store_true", help="skip, if output folder already exists"),
                    ## should be skip_if_folder_exists or "overwrite_folder"
                    make_option(c("--class_metrics"), type="logical", help="write out performance metrics for each class in multiclass settings"),
                    make_option(c("--ensemble_predictor"), type="logical", help="predictor is based on average prediction of all cross-validated runs"),
                    make_option(c("-c", "--config"), type="character", help="name of configuration file used to specify program parameters"))


oparser <- OptionParser(usage = "MVPA_Regional.R [options]", option_list=option_list)
opt <- parse_args(oparser, positional_arguments=TRUE)
args <- opt$options

flog.info("command line args are ", args, capture=TRUE)

## set up configuration 
config <- rMVPA:::initialize_configuration(args)
## set default parameters
config <- rMVPA:::initialize_standard_parameters(config, args, "mvpa_regional")


## Regional Specific Params
rMVPA:::set_arg("save_predictors", config, args, FALSE)


config$tune_grid <- rMVPA:::initialize_tune_grid(args, config)
config_params <- as.list(config)

flog.info("initializing design structure")
config$design <- rMVPA:::initialize_design(config)
design <- config$design


flog.info("loading training data: %s", config$train_data)


if (config$data_mode == "image") {
  region_mask <- rMVPA:::load_mask(config)
  mask_volume <- as(region_mask, "LogicalNeuroVol")
  dataset <- rMVPA:::initialize_image_data(config, region_mask)
  dataset <- list(dataset)
  names(dataset) <- ""
  flog.info("image mask contains %s voxels", sum(mask_volume))
  flog.info("image mask contains %s regions", length(table(region_mask)) - 1)
} else if (config$data_mode == "surface") {
  dataset <- rMVPA:::initialize_surface_data(config)
} else {
  flog.error("unrecognized data_mode: %s", config$data_mode)
}

print(dataset)
row_indices <- which(config$train_subset)

flog.info("number of trials: %s", length(row_indices))
flog.info("max trial index: %s", max(row_indices))


#config$ROIVolume <- loadMask(config)

# if (!is.null(config$roi_subset)) {
#   form <- try(eval(parse(text=config$roi_subset)))
#   if (inherits(form, "try-error")) {
#     flog.error("could not parse roi_subset parameter: %s", config$roi_subset)
#     stop()
#   }
#   
#   if (class(form) != "formula") {
#     flog.error("roi_subset argument must be an expression that starts with a ~ character")
#     stop()
#   }
#   
#   res <- as.logical(eval(form[[2]], list(x=config$ROIVolume)))
#   
#   config$ROIVolume[!res] <- 0
#   flog.info("roi_subset contains %s voxels", sum(config$ROIVolume > 0))
#   
#   if (sum(config$ROIVolume) <= 1) {
#     flog.info("ROI must contain more than one voxel, aborting.")
#     stop()
#   }
# }


#config$maskVolume <- as(Reduce("+", lapply(config$ROIVolume, function(roivol) as(roivol, "LogicalBrainVolume"))), "LogicalBrainVolume")
#config <- initializeData(config)

if (!is.null(config$custom_performance)) {
  flog.info("custom performance function provided: ", config$custom_performance)
}


crossval <- rMVPA:::initialize_crossval(config, design)
feature_selector <- rMVPA:::initialize_feature_selection(config)

if (is.numeric(design$y_train)) {
  flog.info("labels are continuous, running a regression analysis.")
} else {
  flog.info("labels are categorical, running a classification analysis.")
}

write_output <- function(res, name="", output, data_mode="image") {
  if (data_mode == "image") {
    for (i in 1:length(res)) {
      out <- paste0(output, "/", names(res)[i], "_", name, ".nii")
      ##TODO hacky
      if (inherits(res[[i]], "NeuroVol")) {
        write_vol(res[[i]], out)  
      } else if (inherits(res[[i]], "NeuroVec")) {
        write_vec(res[[i]], out) 
      }
      #write_vol(res[[i]], out)  
    }
  } else if (data_mode == "surface") {
    for (i in 1:length(res)) {
      out <- paste0(output, "/", names(res)[i], "_", name)
      neurosurf::writeSurfaceData(res[[i]], out)  
    }
  } else {
    stop(paste("wrong data_mode:", data_mode))
  }
}

output <- rMVPA:::make_output_dir(config$output)

for (i in 1:length(dataset)) {
  dset <- dataset[[i]]
  dname <- names(dataset)[i]
  
  if (names(dataset)[i] != "") {
    flog.info("running mvpa_regional for %s dataset: ", names(dataset)[i])
  } else {
    flog.info("running mvpa_regional")
  }
  
  mvals <- as.vector(dset$mask)
  
  flog.info("number of regions is %s", length(table(mvals[mvals != 0])))
  
  mvpa_mod <- rMVPA:::load_mvpa_model(config, dset, design,crossval,feature_selector)
  
  flog.info("mvpa model: ", mvpa_mod, capture=TRUE)
  
  regional_res <- rMVPA:::run_regional(mvpa_mod, region_mask, return_fits=TRUE)
  
  print(regional_res$performance)
  
  write_output(regional_res$vol_results, name=names(dataset)[i], output, data_mode=config$data_mode)
  
  out_perf <-
    if (dname != "")
      paste0(paste0(output, paste0("/", dname, "_performance_table.txt")))
  else paste0(output, "/" , "performance_table.txt")
  
  out_pred <-
    if (dname != "")
      paste0(paste0(output, paste0("/", dname, "_prediction_table.txt")))
  else paste0(output, "/" , "prediction_table.txt")
  
  write.table(regional_res$performance, out_perf, row.names=FALSE, quote=FALSE)
  
  write.table(regional_res$prediction_table, out_pred, row.names=FALSE, quote=FALSE)
  
}