#Developer: Befekadu Taddesse WOLDEGIORGIS (befekadu.woldegiorgis@umanitoba.ca, or taddessebefekadu@gmail.com) # A simplified version of R script to run a parallel computation job that calculates the 95 % confidence interval of parameter uncertainty. #Assumption is made that the posterior parameter set is already generated (e.g. using Bayesian inference), and quasi-random parametersets are sampled from the posterior parameter distributions. library(parallel) library(iterators) library(foreach) library(doParallel) library(bigmemory) #Initialize a pseudorandom number generator for reproducibility set.seed(123456789) #Save the session info sessionInfo() #Read the quasi random parameter sets sampled from the posterior distributions of the model parameters par_set <- read.table("Resampled_parset_HPC_test.csv",sep=",",header = T) str(par_set) #Create a file-backed shared memory file to write matrix data to bigmemory_shared_object_par <- big.matrix(nrow = 112, ncol = 6575, type = "double" , init = 0, backingfile = "bigmemory_par_uncert_hydr_HPC_presntation.bin", descriptorfile = "bigmemory_par_uncert_hydr_HPC_presntation.desc") bigmemory_shared_object_par <- attach.big.matrix("bigmemory_par_uncert_hydr_HPC_presntation.desc") parameter_uncertainty <- data.frame(seq(from=1, to = ncol(bigmemory_shared_object_par),by = 1)) names(parameter_uncertainty) <- c("timestep") parameter_uncertainty$Date <- seq(as.POSIXct("2003-01-01",format = "%Y-%m-%d"),as.POSIXct("2020-12-31",format = "%Y-%m-%d"),by = "day") parameter_uncertainty$mean <- parameter_uncertainty$timestep parameter_uncertainty$stdev <- parameter_uncertainty$timestep parameter_uncertainty$Lower_limit <- parameter_uncertainty$timestep parameter_uncertainty$Upper_limit <- parameter_uncertainty$timestep parameter_uncertainty$median <- parameter_uncertainty$timestep predictive_uncertainty <- data.frame(seq(from=1, to = ncol(bigmemory_shared_object_par),by = 1)) names(predictive_uncertainty) <- c("timestep") predictive_uncertainty$Date <- seq(as.POSIXct("2003-01-01",format = "%Y-%m-%d"),as.POSIXct("2020-12-31",format = "%Y-%m-%d"),by = "day") predictive_uncertainty$mean <- predictive_uncertainty$timestep predictive_uncertainty$stdev <- predictive_uncertainty$timestep predictive_uncertainty$Lower_limit <- predictive_uncertainty$timestep predictive_uncertainty$Upper_limit <- predictive_uncertainty$timestep predictive_uncertainty$median <- predictive_uncertainty$timestep #Define a matrix for postprocessing. parameter_uncertainty_bigmemory <- big.matrix(nrow = 6575, ncol = 5, type = "double" , init = 0, backingfile = "parameter_uncertainty_bigmemory_postproc_HPC_presntation.bin", descriptorfile = "parameter_uncertainty_bigmemory_postproc_HPC_presntation.desc") #Specify the number of cores to be used for parallel processing no_cores <- 16 #Create the clusters cl<-parallel:: makeCluster(no_cores) registerDoParallel(cl) #Export the variables that are defined outside the cluster to make them available for the parallel processes clusterExport(cl=cl, c('par_set','parameter_uncertainty')) clusterEvalQ(cl, { .libPaths("/home/mehariy/R/x86_64-pc-linux-gnu-library/4.2") library(bigmemory, help, pos = 2, lib.loc = "/home/mehariy/R/x86_64-pc-linux-gnu-library/4.2") }) for (i in seq(1,dim(par_set)[1]/no_cores)) { print(paste("Simulation",i*no_cores,"of",dim(par_set)[1],sep = " ")) #Initiate the parallel process foreach(id = 1:no_cores) %dopar% { bigmemory_shared_object_par <- attach.big.matrix("bigmemory_par_uncert_hydr_HPC_presntation.desc") #automated name of the sub-folders. Here, assumption is made that the subfolders are already created and the necessary files/symbolic links are created folder_i <- as.character(paste0("MdCrk",id)) #Edit the parameters for the parallel runs rowi <- (i-1)*no_cores+id this.par.set <- list(rep(par_set[rowi,1],3), rep(par_set[rowi,2],3), rep(par_set[rowi,3],3), rep(par_set[rowi,4],3), rep(par_set[rowi,5],3), rep(par_set[rowi,6],3), rep(par_set[rowi,7],3), rep(par_set[rowi,8],3), rep(par_set[rowi,9],2), par_set[rowi,10], rep(par_set[rowi,11],2), par_set[rowi,12], par_set[rowi,13], rep(par_set[rowi,14],3), rep(par_set[rowi,15],2), rep(par_set[rowi,16],2)) #Organize the parameters in a temporary file ("model.in.file") for the parameter editor Fortran application to read from it and edit the model parameter file accordingly. model.in.file <-as.character(paste0("MdCrk",id,"/model.in")) this.par.set <- do.call(rbind, lapply(this.par.set, function(x) paste(x,collapse=" "))) #The row names of the parameters to be edited are defined as follows. Each row represents the values of a parameter for each class (e.g. as many as the number of land uses for a land use-dependent parameter) model.in.rows<- c("mactrinfx","srratex", "wcfcx","mactrsmx", "rrcs1x","macratex", "rrcs2x","mperc1x", "kcx","epotdistx","albx", "rivvelx","lpx","wcep1x","tredax", "cmltx") write.table(this.par.set,file=model.in.file,quote=FALSE, row.names=model.in.rows,col.names=FALSE) # Run the shell script wrapper that modifies the parameters and runs the model. bfname_parEdit_sim<-as.character(paste0("./run_par_edit_and_run_the_model_",id,".sh")) #These shell script wrapers are assumed to exist in the main project directory (not the subfolders). An example of its contents are provided in the presentation slides. They must be made executable before running. system(bfname_parEdit_sim) #Read the output file for each parallel process output.rch.file<-as.character(paste0(folder_i,"/0000005.txt")) output_MeadowCrk <- read.table(output.rch.file, sep = "\t", header = F, skip = 2) names(output_MeadowCrk)<-c("Date", "prec", "temp", "cout", "totP") output_MeadowCrk$Date <- as.POSIXct(output_MeadowCrk$Date,format = "%Y-%m-%d") #Initialize the sliding window index in the parset data for the new loop of the paralle process sim_i_initial <- (i-1)*no_cores #The streamflow was saved to the outputfile with a column name "cout". ModY_hydr <- output_MeadowCrk$cout #Store the streamflow data simulated by each parallel process to the file-backed shared memory matrix bigmemory_shared_object_par[sim_i_initial + id,] <- as.numeric(ModY_hydr) #Delete the log files (if not deleted, the number of files can exceed the disc cota) mydir <- paste0("/scratch/mehariy/HPC_test","/MdCrk",id) delfiles <- dir(path=mydir ,pattern="hyss*") file.remove(file.path(mydir, delfiles)) delfiles2 <- dir(path=mydir ,pattern="tests*") file.remove(file.path(mydir, delfiles2)) } } #Calculate the moments of the simulations for each time step using a parallel computation to speed up the process for (i in seq(1,411)) { print(paste0("Moment calculation ",i/411*100," %")) if(i==411){ no_cores <- 15 } foreach(id = 1:no_cores) %dopar% { bigmemory_shared_object_par <- attach.big.matrix("bigmemory_par_uncert_hydr_HPC_presntation.desc") parameter_uncertainty_bigmemory <- attach.big.matrix("parameter_uncertainty_bigmemory_postproc_HPC_presntation.desc") col_moment <- (i-1)*16 + id n_row <- nrow(bigmemory_shared_object_par[,]) parameter_uncertainty_bigmemory[col_moment,1] <- mean(unlist(bigmemory_shared_object_par[1:n_row,col_moment]),na.rm = TRUE) parameter_uncertainty_bigmemory[col_moment,2] <- sd(unlist(bigmemory_shared_object_par[1:n_row,col_moment]),na.rm = TRUE) parameter_uncertainty_bigmemory[col_moment,3] <- quantile(unlist(bigmemory_shared_object_par[1:n_row,col_moment]), 0.025,na.rm = TRUE) parameter_uncertainty_bigmemory[col_moment,4] <- quantile(unlist(bigmemory_shared_object_par[1:n_row,col_moment]), 0.975, na.rm = TRUE) parameter_uncertainty_bigmemory[col_moment,5] <- median(unlist(bigmemory_shared_object_par[1:n_row,col_moment]), na.rm = TRUE) } } str(parameter_uncertainty_bigmemory) parameter_uncertainty[,-c(1,2)] <- parameter_uncertainty_bigmemory[,] write.table(parameter_uncertainty,"par_uncert_hydro_HPC_test.csv", sep = ",", col.names = T,row.names = F) stopCluster(cl)