#rm(list = ls(all = TRUE)) #install the following packages if required! if(!("sp" %in% rownames(installed.packages()))) install.packages("sp") if(!("INLA" %in% rownames(installed.packages()))) install.packages("INLA", repos=c(getOption("repos"), INLA="https://inla.r-inla-download.org/R/stable"), dep=TRUE) if(!("bigmemory" %in% rownames(installed.packages()))) install.packages("bigmemory") if(!("ade4" %in% rownames(installed.packages()))) install.packages("ade4") if(!("hash" %in% rownames(installed.packages()))) install.packages("hash") if(!("speedglm" %in% rownames(installed.packages()))) install.packages("speedglm") if(!("stringi" %in% rownames(installed.packages()))) install.packages("stringi") if(!("biglm" %in% rownames(installed.packages()))) install.packages("biglm") if(!("glmnet" %in% rownames(installed.packages()))) install.packages("glmnet") if(!("BAS" %in% rownames(installed.packages()))) install.packages("https://github.com/aliaksah/EMJMCMC2016/blob/master/examples/BAS%20archive/BAS_0.91.tar.gz?raw=true", repos = NULL) if(!("BAS" %in% rownames(installed.packages()))) install.packages("BAS") if(!("MASS" %in% rownames(installed.packages()))) install.packages("MASS") if(!("parallel" %in% rownames(installed.packages()))) install.packages("parallel") if(!("stats" %in% rownames(installed.packages()))) install.packages("stats") #if(!("inline" %in% rownames(installed.packages()))) # install.packages("inline") if(!("RCurl" %in% rownames(installed.packages()))) install.packages("RCurl") if(!("gnlm" %in% rownames(installed.packages()))) install.packages("gnlm") #library(inline) library(gnlm) library(glmnet) library(biglm) library(hash) library(sp) library(INLA) library(parallel) library(bigmemory) library(MASS) library(ade4) library(BAS)# should be version 0.9 !!!! otherwise some dependencies might not work! library(stringi) require(stats) #includes <- '#include ' #code <- 'int wstat; while (waitpid(-1, &wstat, WNOHANG) > 0) {};' #wait <- cfunction(body=code, includes=includes, convention='.C') m<-function(a,b)a*b estimate.bas.glm <- function(formula, data, family, prior, logn) { #only poisson and binomial families are currently adopted X <- model.matrix(object = formula,data = data) out <- bayesglm.fit(x = X, y = data[,1], family=family,coefprior=prior) # use dic and aic as bic and aic correspondinly return(list(mlik = out$logmarglik,waic = -(out$deviance + 2*out$rank) , dic = -(out$deviance + logn*out$rank),summary.fixed =list(mean = coefficients(out)))) } factorial<-function(x) ifelse(x<=170,gamma(abs(x) + 1),gamma(171)) estimate.logic.glm <- function(formula, data, family, n, m, r = 1) { X <- model.matrix(object = formula,data = data) out <- bayesglm.fit(x = X, y = data[,51], family=family,coefprior=aic.prior()) p <- out$rank fmla.proc<-as.character(formula)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam <-stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj<-(stri_count_fixed(str = fparam, pattern = "&")) sj<-sj+(stri_count_fixed(str = fparam, pattern = "|")) sj<-sj+1 Jprior <- sum(log(factorial(sj)/((m^sj)*2^(2*sj-2)))) mlik = (-(out$deviance + log(n)*(out$rank)) + 2*(Jprior))/2+n if(mlik==-Inf) mlik = -10000 return(list(mlik = mlik,waic = -(out$deviance + 2*out$rank) , dic = -(out$deviance + log(n)*out$rank),summary.fixed =list(mean = coefficients(out)))) } #define the function estimating parameters of a given Bernoulli logic regression with Jeffrey's prior estimate.logic.bern = function(formula = NULL, data, family = binomial(), n=1000, m=50, r = 1,k.max=21) { if(length(formula)==0) return(list(mlik = -10000 + rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) if(is.na(formula)) return(list(mlik = -10000 + rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) out = glm(formula = formula,data = data, family=family) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc=as.character(formula)[2:3] fobserved = fmla.proc[1] fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 Jprior = sum(log(factorial(sj)/((m^sj)*2^(2*sj-2)))) mlik = (-(out$deviance + log(n)*(out$rank)) + 2*(Jprior))/2+n if(mlik==-Inf) mlik = -10000 return(list(mlik = mlik,waic = -(out$deviance + 2*out$rank) , dic = -(out$deviance + log(n)*out$rank),summary.fixed =list(mean = coefficients(out)))) } #define the function estimating parameters of a given Bernoulli logic regression with robust g prior estimate.logic.bern.tCCH = function(formula = NULL,y.id = 51, data, n=1000, m=50, r = 1, p.a = 1, p.b = 2, p.r = 1.5, p.s = 0, p.v=-1, p.k = 1,k.max=21) { if(length(formula)==0) return(list(mlik = -10000 + rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) if(is.na(formula)) return(list(mlik = -10000 + rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) X = scale(model.matrix(object = formula,data = data),center = T,scale = F) X[,1] = 1 fmla.proc=as.character(formula)[2:3] out = glm(formula = as.formula(paste0(fmla.proc[1],"~X+0")),data=data,family = binomial()) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } beta=coef(out)[-1] if(length(which(is.na(beta)))>0) { return(list(mlik = -10000 + rnorm(1,0,1),waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 p.v = (n+1)/(p+1) sout = summary(out) J.a.hat = 1/sout$cov.unscaled[1,1] if(length(beta)>0&&length(beta)==(dim(sout$cov.unscaled)[1]-1)&&length(which(is.na(beta)))==0) { Q = t(beta)%*%solve(sout$cov.unscaled[-1,-1])%*%beta }else{ return(list(mlik = -10000 + rnorm(1,0,1),waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } Jprior = sum(log(factorial(sj)/((m^sj)*2^(2*sj-2)))) mlik = (logLik(out)- 0.5*log(J.a.hat) - 0.5*p*log(p.v) -0.5*Q/p.v + log(beta((p.a+p)/2,p.b/2)) + log(phi1(p.b/2,p.r,(p.a+p.b+p)/2,(p.s+Q)/2/p.v,1-p.k))+Jprior + p*log(r)+n) if(is.na(mlik)||mlik==-Inf) mlik = -10000+ rnorm(1,0,1) return(list(mlik = mlik,waic = AIC(out) , dic = BIC(out),summary.fixed =list(mean = coefficients(out)))) } #define the function estimating parameters of a given Gaussian logic regression with robust g prior estimate.logic.lm.tCCH = function(formula = NULL, data, n=1000, m=50, r = 1, p.a = 1, p.b = 2, p.r = 1.5, p.s = 0, p.v=-1, p.k = 1,k.max=21) { if(is.null(formula)) return(list(mlik = -10000,waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) fmla.proc=as.character(formula)[2:3] fobserved = fmla.proc[1] if(fmla.proc[2]=="-1") return(list(mlik = -10000,waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) out = lm(formula = formula,data = data) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 Jprior = prod(factorial(sj)/((m^sj)*2^(2*sj-2))) p.v = (n+1)/(p+1) R.2 = summary(out)$r.squared mlik = (-0.5*p*log(p.v) -0.5*(n-1)*log(1-(1-1/p.v)*R.2) + log(beta((p.a+p)/2,p.b/2)) - log(beta(p.a/2,p.b/2)) + log(phi1(p.b/2,(n-1)/2,(p.a+p.b+p)/2,p.s/2/p.v,R.2/(p.v-(p.v-1)*R.2))) - hypergeometric1F1(p.b/2,(p.a+p.b)/2,p.s/2/p.v,log = T)+log(Jprior) + p*log(r)+n) if(mlik==-Inf||is.na(mlik)||is.nan(mlik)) mlik = -10000 return(list(mlik = mlik,waic = AIC(out)-n , dic = BIC(out)-n,summary.fixed =list(mean = coef(out)))) } #define the function estimating parameters of a given Gaussian logic regression with Jeffrey's prior estimate.logic.lm = function(formula= NULL, data, n, m, r = 1,k.max=21) { if(is.null(formula)) return(list(mlik = -10000,waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) out = lm(formula = formula,data = data) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc=as.character(formula)[2:3] fobserved = fmla.proc[1] fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 Jprior = prod(factorial(sj)/((m^sj)*2^(2*sj-2))) mlik = (-BIC(out)+2*log(Jprior) + 2*p*log(r)+n)/2 if(mlik==-Inf) mlik = -10000 return(list(mlik = mlik,waic = AIC(out)-n , dic = BIC(out)-n,summary.fixed =list(mean = coef(out)))) } #define the function simplifying logical expressions at the end of the search simplifyposteriors.infer=function(X,posteriors,th=0.0000001,thf=0.5, resp) { todel = which(posteriors[,2]0) posteriors=posteriors[-todel,] rhash=hash() for(i in 1:length(posteriors[,1])) { expr=posteriors[i,1] #print(expr) res=model.matrix(data=X,object = as.formula(paste0(resp,"~",expr))) res[,1]=res[,1]-res[,2] ress=c(stri_flatten(res[,1],collapse = ""),stri_flatten(res[,2],collapse = ""),posteriors[i,2],expr) ress[1] = stri_sub(ress[1],from = 1,to = 9999) if(!(ress[1] %in% values(rhash)||(ress[2] %in% values(rhash)))) rhash[[ress[1]]]=ress else { if(ress[1] %in% keys(rhash)) { rhash[[ress[1]]][3]= (as.numeric(rhash[[ress[1]]][3]) + as.numeric(ress[3])) if(stri_length(rhash[[ress[1]]][4])>stri_length(expr)) rhash[[ress[1]]][4]=expr } else { rhash[[ress[2]]][3]= (as.numeric(rhash[[ress[2]]][3]) + as.numeric(ress[3])) if(stri_length(rhash[[ress[2]]][4])>stri_length(expr)) rhash[[ress[2]]][4]=expr } } } res=as.data.frame(t(values(rhash)[c(3,4),])) res$V1=as.numeric(as.character(res$V1)) tokeep = which(res$V1>thf) if(length(tokeep)>0) { res=res[tokeep,] }else warning(paste0("No features with posteriors above ",thf,". Returning everything")) res=res[order(res$V1, decreasing = T),] clear(rhash) rm(rhash) tokeep = which(res[,1]>1) if(length(tokeep)>0) res[tokeep,1]=1 colnames(res)=c("posterior","feature") rownames(res) = NULL return(res) } #define a function performing the map step for a given thread runpar.infer=function(vect) { ret = NULL tryCatch({ set.seed(as.integer(vect$cpu)) do.call(runemjmcmc, vect[1:(length(vect)-5)]) vals=values(hashStat) fparam=mySearch$fparam cterm=max(vals[1,],na.rm = T) ppp=mySearch$post_proceed_results_hash(hashStat = hashStat) post.populi=sum(exp(values(hashStat)[1,][1:vect$NM]-cterm),na.rm = T) betas = NULL mliks = NULL if(vect$save.beta){ #get the modes of beta coefficients for the explored models Nvars=mySearch$Nvars linx =mySearch$Nvars+4 lHash=length(hashStat) mliks = values(hashStat)[which((1:(lHash * linx)) %% linx == 1)] betas = values(hashStat)[which((1:(lHash * linx)) %% linx == 4)] for(i in 1:(Nvars-1)) { betas=cbind(betas,values(hashStat)[which((1:(lHash * linx)) %% linx == (4+i))]) } betas=cbind(betas,values(hashStat)[which((1:(lHash * linx)) %% linx == (0))]) } preds = NULL if(vect$predict) { preds=mySearch$forecast.matrix.na(link.g = vect$link, covariates = (vect$test),betas = betas,mliks.in = mliks)$forecast } ret = list(post.populi = post.populi, p.post = ppp$p.post, cterm = cterm, preds = preds, fparam = fparam, betas = betas, mliks = mliks ) if(length(cterm)==0){ vect$cpu=as.integer(vect$cpu)+as.integer(runif(1,1,10000)) if(vect$cpu<50000) ret = runpar.infer(vect) else ret = NULL } },error = function(err){ print(paste0("error in thread", vect[length(vect)])) print(err) vect$cpu=as.integer(vect$cpu)+as.integer(runif(1,1,10000)) if(vect$cpu<50000) ret = runpar.infer(vect) else ret =err },finally = { #clear(hashStat) #rm(hashStat) #rm(vals) gc() return(ret) }) } #define the function estimating parameters of a given Bernoulli logic regression with Jeffrey's prior estimate.logic.bern = function(formula, data, family = binomial(), n=1000, m=50, r = 1,k.max=21) { if(is.null(formula)) return(list(mlik = -10000 + rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) out = glm(formula = formula,data = data, family=family) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc=as.character(formula)[2:3] fobserved = fmla.proc[1] fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 Jprior = sum(log(factorial(sj)/((m^sj)*2^(2*sj-2)))) mlik = (-(out$deviance + log(n)*(out$rank)) + 2*(Jprior))/2+n if(mlik==-Inf) mlik = -10000 return(list(mlik = mlik,waic = -(out$deviance + 2*out$rank) , dic = -(out$deviance + log(n)*out$rank),summary.fixed =list(mean = coefficients(out)))) } #define the function estimating parameters of a given Bernoulli logic regression with robust g prior estimate.logic.bern.tCCH = function(formula = NULL,y.id = 51, data, n=1000, m=50, r = 1, p.a = 1, p.b = 2, p.r = 1.5, p.s = 0, p.v=-1, p.k = 1,k.max=21) { if(is.null(formula)) return(list(mlik = -10000 + rnorm(1,0,1),waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) X = scale(model.matrix(object = formula,data = data),center = T,scale = F) X[,1] = 1 fmla.proc=as.character(formula)[2:3] out = glm(formula = as.formula(paste0(fmla.proc[1],"~X+0")),data=data,family = binomial()) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } beta=coef(out)[-1] if(length(which(is.na(beta)))>0) { return(list(mlik = -10000 + rnorm(1,0,1),waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 p.v = (n+1)/(p+1) sout = summary(out) J.a.hat = 1/sout$cov.unscaled[1,1] if(length(beta)>0&&length(beta)==(dim(sout$cov.unscaled)[1]-1)&&length(which(is.na(beta)))==0) { Q = t(beta)%*%solve(sout$cov.unscaled[-1,-1])%*%beta }else{ return(list(mlik = -10000 + rnorm(1,0,1),waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } Jprior = sum(log(factorial(sj)/((m^sj)*2^(2*sj-2)))) mlik = (logLik(out)- 0.5*log(J.a.hat) - 0.5*p*log(p.v) -0.5*Q/p.v + log(beta((p.a+p)/2,p.b/2)) + log(phi1(p.b/2,p.r,(p.a+p.b+p)/2,(p.s+Q)/2/p.v,1-p.k))+Jprior + p*log(r)+n) if(is.na(mlik)||mlik==-Inf) mlik = -10000+ rnorm(1,0,1) return(list(mlik = mlik,waic = AIC(out) , dic = BIC(out),summary.fixed =list(mean = coefficients(out)))) } #define the function estimating parameters of a given Gaussian logic regression with robust g prior estimate.logic.lm.tCCH = function(formula = NULL, data, n=1000, m=50, r = 1, p.a = 1, p.b = 2, p.r = 1.5, p.s = 0, p.v=-1, p.k = 1,k.max=21) { if(is.null(formula)) return(list(mlik = -10000,waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) fmla.proc=as.character(formula)[2:3] fobserved = fmla.proc[1] if(fmla.proc[2]=="-1") return(list(mlik = -10000,waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) out = lm(formula = formula,data = data) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 Jprior = prod(factorial(sj)/((m^sj)*2^(2*sj-2))) p.v = (n+1)/(p+1) R.2 = summary(out)$r.squared mlik = (-0.5*p*log(p.v) -0.5*(n-1)*log(1-(1-1/p.v)*R.2) + log(beta((p.a+p)/2,p.b/2)) - log(beta(p.a/2,p.b/2)) + log(phi1(p.b/2,(n-1)/2,(p.a+p.b+p)/2,p.s/2/p.v,R.2/(p.v-(p.v-1)*R.2))) - hypergeometric1F1(p.b/2,(p.a+p.b)/2,p.s/2/p.v,log = T)+log(Jprior) + p*log(r)+n) if(mlik==-Inf||is.na(mlik)||is.nan(mlik)) mlik = -10000 return(list(mlik = mlik,waic = AIC(out)-n , dic = BIC(out)-n,summary.fixed =list(mean = coef(out)))) } #define the function estimating parameters of a given Gaussian logic regression with Jeffrey's prior estimate.logic.lm.jef = function(formula= NULL, data, n, m, r = 1,k.max=21) { if(is.null(formula)) return(list(mlik = -10000,waic =10000 , dic = 10000,summary.fixed =list(mean = 1))) out = lm(formula = formula,data = data) p = out$rank if(p>k.max) { return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = 0))) } fmla.proc=as.character(formula)[2:3] fobserved = fmla.proc[1] fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]=stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam =stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj=(stri_count_fixed(str = fparam, pattern = "&")) sj=sj+(stri_count_fixed(str = fparam, pattern = "|")) sj=sj+1 Jprior = prod(factorial(sj)/((m^sj)*2^(2*sj-2))) mlik = (-BIC(out)+2*log(Jprior) + 2*p*log(r)+n)/2 if(mlik==-Inf) mlik = -10000 return(list(mlik = mlik,waic = AIC(out)-n , dic = BIC(out)-n,summary.fixed =list(mean = coef(out)))) } LogicRegr = function(formula, data, family = "Gaussian",prior = "J",report.level = 0.5, d = 20, cmax = 5, kmax = 20, p.and = 0.9, p.not = 0.05, p.surv = 0.1,ncores = -1, n.mods = 1000 ,advanced = list(presearch = T,locstop = F ,estimator = estimate.logic.bern.tCCH,estimator.args = list(data = data.example,n = 1000, m = 50,r=1),recalc_margin = 250, save.beta = F,interact = T,relations = c("","lgx2","cos","sigmoid","tanh","atan","erf"),relations.prob =c(0.4,0.0,0.0,0.0,0.0,0.0,0.0),interact.param=list(allow_offsprings=1,mutation_rate = 300,last.mutation = 5000, max.tree.size = 1, Nvars.max = 100,p.allow.replace=0.9,p.allow.tree=0.2,p.nor=0.2,p.and = 1),n.models = 10000,unique = T,max.cpu = 4,max.cpu.glob = 4,create.table = F,create.hash = T,pseudo.paral = T,burn.in = 50,outgraphs=F,print.freq = 1000,advanced.param = list( max.N.glob=as.integer(10), min.N.glob=as.integer(5), max.N=as.integer(3), min.N=as.integer(1), printable = F))) { data.example = data advanced$formula = formula advanced$data = data advanced$interact.param$Nvars.max = d advanced$interact.param$ max.tree.size= cmax - 1 advanced$interact.param$p.and = p.and advanced$interact.param$p.nor = p.not advanced$interact.param$p.allow.tree = p.surv if(!prior %in% c("J","G")) { warning("Wrong prior supplied. J (for Jeffrey's) and G (for robust g) priors are allowd only. Setting J as default.") prior = "J" } if(!family %in% c("Gaussian","Bernoulli")) { warning("Wrong familty supplied. Gaussian and Bernoulli families are allowd only. Setting Gaussian as default.") family = "Gaussian" } if(family == "Gaussian") { if(prior == "J") { advanced$estimator = estimate.logic.lm.jef advanced$estimator.args = list(data = data, n = dim(data)[1], m =stri_count_fixed(as.character(formula)[3],"+"),k.max = kmax) }else{ advanced$estimator = estimate.logic.lm.tCCH advanced$estimator.args = list(data = data, n = dim(data)[1], m =stri_count_fixed(as.character(formula)[3],"+"),k.max = kmax) } }else{ if(prior == "J") { advanced$estimator = estimate.logic.bern advanced$estimator.args = list(data = data, n = dim(data)[1], m =stri_count_fixed(as.character(formula)[3],"+"),k.max = kmax) }else{ advanced$estimator = estimate.logic.bern.tCCH advanced$estimator.args = list(data = data, n = dim(data)[1], m =stri_count_fixed(as.character(formula)[3],"+"),k.max = kmax) } } if(ncores<1) ncores = detectCores() return(pinferunemjmcmc(n.cores = ncores,report.level = report.level, simplify = T, num.mod.best = n.mods, predict = F, runemjmcmc.params = advanced)) } mcgmjpar = function(X,FUN,mc.cores) mclapply(X= X,FUN = FUN,mc.preschedule = T,mc.cores = mc.cores) mcgmjpse = function(X,FUN,mc.cores) lapply(X,FUN) pinferunemjmcmc = function(n.cores = 4, mcgmj = mcgmjpar, report.level = 0.5, simplify = F, num.mod.best = 1000, predict = F, test.data = 1, link.function = function(z)z, runemjmcmc.params) { if(predict) { runemjmcmc.params$save.beta = T if(length(test.data)==0) { warning("Test data is not provided. No predictions will be made!") } } params = list(runemjmcmc.params)[rep(1,n.cores)] for(i in 1:n.cores) { params[[i]]$test = test.data params[[i]]$link = link.function params[[i]]$predict = predict params[[i]]$NM = num.mod.best params[[i]]$cpu=i } M = n.cores #results = runpar.infer(params[[1]]) results=mcgmj(X = params,FUN = runpar.infer,mc.cores = n.cores) #print(results) #clean up gc() #prepare the data structures for final analysis of the runs compmax = runemjmcmc.params$interact.param$Nvars.max + 1 resa=array(data = 0,dim = c(compmax,M*3)) post.popul = array(0,M) max.popul = array(0,M) nulls=NULL not.null=1 #check which threads had non-zero exit status for(k in 1:M) { if(length(results[[k]])<=1||length(results[[k]]$cterm)==0||length(results[[k]]$p.post)!=runemjmcmc.params$interact.param$Nvars.max) { nulls=c(nulls,k) #warning(paste0("Thread ",k,"did not converge or was killed by OS!")) next } else { not.null = k } } if(length(nulls) == M) { warning("All threads did not converge or gave an error! Returning stats from the threads only!") return(list(feat.stat = NULL,predictions = NULL,allposteriors = NULL, threads.stats = results)) } #for all of the successful runs collect the results into the corresponding data structures for(k in 1:M) { if(k %in% nulls) { results[[k]]=results[[not.null]] } max.popul[k]=results[[k]]$cterm post.popul[k]=results[[k]]$post.populi resa[,k*3-2]=c(results[[k]]$fparam,"Post.Gen.Max") resa[,k*3-1]=c(results[[k]]$p.post,results[[k]]$cterm) resa[,k*3]=rep(post.popul[k],length(results[[k]]$p.post)+1) } #renormalize estimates of the marginal inclusion probabilities #based on all of the runs ml.max=max(max.popul) post.popul=post.popul*exp(-ml.max+max.popul) p.gen.post=post.popul/sum(post.popul) #perform BMA of the redictions across the runs pred = NULL if(predict){ pred = results[[1]]$preds*p.gen.post[1] if(M > 1) { for(i in 2:M) { pred=pred+results[[i]]$preds*p.gen.post[i] } } } hfinal=hash() for(ii in 1:M) { resa[,ii*3]=p.gen.post[ii]*as.numeric(resa[,ii*3-1]) resa[length(resa[,ii*3]),ii*3]=p.gen.post[ii] if(p.gen.post[ii]>0) { for(jj in 1:(length(resa[,ii*3])-1)) { if(resa[jj,ii*3]>0) { if(as.integer(has.key(hash = hfinal,key =resa[jj,ii*3-2]))==0) hfinal[[resa[jj,ii*3-2]]]=as.numeric(resa[jj,ii*3]) else hfinal[[resa[jj,ii*3-2]]]=hfinal[[resa[jj,ii*3-2]]]+as.numeric(resa[jj,ii*3]) } } } } posteriors=values(hfinal) clear(hfinal) #delete the unused further variables rm(hfinal) rm(resa) rm(post.popul) rm(max.popul) #simplify the found trees and their posteriors posteriors=as.data.frame(posteriors) posteriors=data.frame(X=row.names(posteriors),x=posteriors$posteriors) posteriors$X=as.character(posteriors$X) res1 = NULL if(simplify){ res1=simplifyposteriors.infer(X = runemjmcmc.params$data,posteriors = posteriors, thf = report.level,resp = as.character(runemjmcmc.params$formula)[2]) rownames(res1) = NULL res1$feature = as.character(res1$feature) } posteriors=posteriors[order(posteriors$x, decreasing = T),] colnames(posteriors)=c("feature","posterior") rm(params) gc() return(list(feat.stat = cbind(res1$feature,res1$posterior),predictions = pred,allposteriors = posteriors, threads.stats = results)) } estimate.logic.lm <- function(formula, data, n, m, r = 1) { out <- lm(formula = formula,data = data) p <- out$rank fmla.proc<-as.character(formula)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam <-stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] sj<-(stri_count_fixed(str = fparam, pattern = "&")) sj<-sj+(stri_count_fixed(str = fparam, pattern = "|")) sj<-sj+1 Jprior <- prod(factorial(sj)/((m^sj)*2^(2*sj-2))) #tn<-sum(stri_count_fixed(str = fmla.proc[2], pattern = "I(")) mlik = (-BIC(out)+2*log(Jprior) + 2*p*log(r)+n)/2 if(mlik==-Inf) mlik = -10000 return(list(mlik = mlik,waic = AIC(out)-n , dic = BIC(out)-n,summary.fixed =list(mean = coef(out)))) } #estimate elastic nets estimate.elnet <- function(formula,response, data, family,alpha) { X <- model.matrix(object = formula,data = data) if(dim(X)[2]<=2) return(list(mlik = -10000,waic = 10000 , dic = 10000,summary.fixed =list(mean = array(0,dim=dim(X)[2])))) out <- glmnet(x=X[,-1], y = data[[response]], family=family, a=alpha) dout<- as.numeric(-deviance(out)[[out$dim[2]]]) return(list(mlik = dout,waic = -dout , dic = -dout,summary.fixed =list(mean = coef(out)[,out$dim[2]]))) } # use dic and aic as bic and aic correspondinly estimate.speedglm <- function(formula, data, family, prior, logn) # weird behaviour, bad control of singularity { X <- model.matrix(object = formula,data = data) out <- speedglm.wfit(y = data[,1], X = X, intercept=FALSE, family=family,eigendec = T, method = "Cholesky") if(prior == "AIC") return(list(mlik = -out$aic ,waic = -(out$deviance + 2*out$rank) , dic = -(out$RSS),summary.fixed =list(mean = out$coefficients))) if(prior=="BIC") return(list(mlik = -out$RSS-logn*out$rank ,waic = -(out$deviance + 2*out$rank) , dic = -(out$RSS+logn*out$rank),summary.fixed =list(mean = out$coefficients))) } estimate.bigm <- function(formula, data, family, prior,n, maxit = 2,chunksize = 1000000) # nice behaviour { out <- bigglm(data = data, family=family,formula = formula, sandwich = F,maxit = maxit, chunksize = chunksize) if(prior == "AIC") return(list(mlik = -AIC(out,k = 2) ,waic = AIC(out,k = 2) , dic = AIC(out,k = n),summary.fixed =list(mean = coef(out)))) if(prior=="BIC") return(list(mlik = -AIC(out,k = n) ,waic = AIC(out, k = 2) , dic = AIC(out,k = n),summary.fixed =list(mean = coef(out)))) } sigmoid<- function(x) { return(1/(1+(exp(-x)))) } erf <- function(x) { return(2 * pnorm(x * sqrt(2)) - 1) } estimate.bas.lm <- function(formula, data, prior, n, g = 0) { out <- lm(formula = formula,data = data) # 1 for aic, 2 bic prior, else g.prior p <- out$rank if(prior == 1) { ss<-sum(out$residuals^2) logmarglik <- -0.5*(log(ss)+2*p) } else if(prior ==2) { ss<-sum(out$residuals^2) logmarglik <- -0.5*(log(ss)+log(n)*p) } else { Rsquare <- summary(out)$r.squared #logmarglik = .5*(log(1.0 + g) * (n - p -1) - log(1.0 + g * (1.0 - Rsquare)) * (n - 1))*(p!=1) logmarglik = .5*(log(1.0 + g) * (n - p) - log(1.0 + g * (1.0 - Rsquare)) * (n - 1))*(p!=1) } # use dic and aic as bic and aic correspondinly return(list(mlik = logmarglik,waic = AIC(out) , dic = BIC(out),summary.fixed =list(mean = coef(out)))) } estimate.inla <- function(formula, args) { out<-NULL tryCatch(capture.output({ out <- do.call(inla, c(args,formula = formula))})) if(is.null(out)) return(list(mlik = -10000,waic = 10000 , dic = 10000, summary.fixed =list(mean = NULL))) # use dic and aic as bic and aic correspondinly coef<-out$summary.fixed$mode coef[1]<-coef[1]+out$summary.hyperpar$mode[1] return(list(mlik = out$mlik[1],waic = out$waic[1] , dic = out$dic[1], summary.fixed =list(mean = coef))) } estimate.inla.poisson <- function(formula, data,r = 1.0/200.0,logn=log(200.0), relat=c("*","+","cos","sigmoid","tanh","atan","sin","erf")) { out<-NULL capture.output({tryCatch(capture.output({ fmla.proc<-as.character(formula)[2:3] sj<-sum(stri_count_fixed(str = fmla.proc[2],pattern = relat)) out <-inla(family = "poisson",silent = 2L,data = data,formula = formula,control.compute = list(dic = TRUE, waic = TRUE, mlik = TRUE)) }))}) if(is.null(out)) return(list(mlik = -10000+log(r)*(sj),waic = 10000 , dic = 10000, summary.fixed =list(mean = NULL))) if(length(out$waic[1]$waic)==0) return(list(mlik = -10000+log(r)*(sj),waic = 10000 , dic = 10000, summary.fixed =list(mean = NULL))) # use dic and aic as bic and aic correspondinly coef<-out$summary.fixed$mode #coef[1]<-coef[1]+out$summary.hyperpar$mode[1] return(list(mlik = out$mlik[1]+log(r)*(sj),waic = out$waic[1]$waic , dic = out$dic[1]$dic, summary.fixed =list(mean = coef))) } estimate.gamma.cpen <- function(formula, data,r = 1.0/1000.0,logn=log(1000.0),relat=c("cos","sigmoid","tanh","atan","sin","erf")) { fparam<-NULL fmla.proc<-as.character(formula)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam <-stri_split_fixed(str = fmla.proc[2],pattern = "+I",omit_empty = F)[[1]] sj<-(stri_count_fixed(str = fparam, pattern = "*")) sj<-sj+(stri_count_fixed(str = fparam, pattern = "+")) for(rel in relat) sj<-sj+(stri_count_fixed(str = fparam, pattern = rel)) #sj<-sj+1 tryCatch(capture.output({ out <- glm(formula = formula,data = data, family = gaussian) # 1 for aic, 2 bic prior, else g.prior mlik = (-(BIC(out) -2*log(r)*sum(sj))+1000)/2 waic = (out$deviance + 2*out$rank)+10000 dic = (out$deviance + logn*out$rank)+10000 summary.fixed =list(mean = coefficients(out)) }, error = function(err) { print(err) mlik = -10000 waic = 10000 dic = 10000 summary.fixed =list(mean = array(0,dim=length(fparam))) })) return(list(mlik = mlik,waic = waic , dic = dic,summary.fixed =summary.fixed)) } parallelize<-function(X,FUN) { max.cpu <- length(X) cl <-makeCluster(max.cpu ,type = paral.type,outfile = "")#outfile = "" clusterEvalQ(cl = cl,expr = c(library(INLA),library(bigmemory))) if(exists("statistics")) { clusterExport(cl=cl, "statistics") clusterEvalQ(cl,{statistics <- attach.resource(statistics);1}) } res.par <- parLapply(cl = cl, X, FUN) stopCluster(cl) return(res.par) } estimate.glm <- function(formula, data, family, prior, n=1, g = 0) { out <- glm(formula = formula, family = family, data = data) # 1 for aic, 2 bic prior, else g.prior p <- out$rank if(prior == 1) { logmarglik <- -AIC(out) } else if(prior ==2) { logmarglik <- -BIC(out) } else { Rsquare <- summary(out)$r.squared #logmarglik = .5*(log(1.0 + g) * (n - p -1) - log(1.0 + g * (1.0 - Rsquare)) * (n - 1))*(p!=1) logmarglik = .5*(log(1.0 + g) * (n - p) - log(1.0 + g * (1.0 - Rsquare)) * (n - 1))*(p!=1) } # use dic and aic as bic and aic correspondinly return(list(mlik = logmarglik,waic = AIC(out) , dic = BIC(out),summary.fixed =list(mean = coef(out)))) } simplify.formula<-function(fmla,names) { fmla.proc<-as.character(fmla)[2:3] fobserved <- fmla.proc[1] fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = " ",replacement = "") fmla.proc[2]<-stri_replace_all(str = fmla.proc[2],fixed = "\n",replacement = "") fparam <- names[which(names %in% stri_split_fixed(str = fmla.proc[2],pattern = "+",omit_empty = F)[[1]] )] return(list(fparam = fparam,fobserved = fobserved)) } simplifyposteriors<-function(X,posteriors,th=0.0001,thf=0.2, resp) { posteriors<-posteriors[-which(posteriors[,2]stri_length(expr)) rhash[[ress[1]]][4]<-expr } } } res<-as.data.frame(t(values(rhash)[c(3,4),])) res$V1<-as.numeric(as.character(res$V1)) res<-res[which(res$V1>thf),] res<-res[order(res$V1, decreasing = T),] clear(rhash) rm(rhash) res[which(res[,1]>1),1]<-1 colnames(res)<-c("posterior","tree") return(res) } do.call.emjmcmc<-function(vect) { set.seed(as.integer(vect$cpu)) do.call(runemjmcmc, vect[1:vect$simlen]) vals<-values(hashStat) fparam<-mySearch$fparam cterm<-max(vals[1,],na.rm = T) ppp<-mySearch$post_proceed_results_hash(hashStat = hashStat) post.populi<-sum(exp(values(hashStat)[1,][1:vect$NM]-cterm),na.rm = T) clear(hashStat) #rm(hashStat) rm(vals) gc() return(list(post.populi = post.populi, p.post = ppp$p.post, cterm = cterm, fparam = fparam)) } parall.gmj <- function(X,M=16,preschedule = F) mclapply(X = X, FUN = do.call.emjmcmc,mc.preschedule = preschedule, mc.cores = M,mc.cleanup = T) # a function that creates an EMJMCMC2016 object with specified values of some parameters and default values of other parameters runemjmcmc<-function(formula, data, secondary = vector(mode="character", length=0), latnames="", estimator,estimator.args = "list",n.models,p.add.default = 1,p.add = 0.5, unique = F,save.beta=F, locstop.nd = F, latent="",max.cpu=4,max.cpu.glob=2,create.table=T, hash.length = 20, presearch=T, locstop =F ,pseudo.paral = F,interact = F,deep.method =1,relations = c("","sin","cos","sigmoid","tanh","atan","erf"),relations.prob =c(0.4,0.1,0.1,0.1,0.1,0.1,0.1),gen.prob = c(1,10,5,1,0),pool.cross = 0.9,p.epsilon = 0.0001, del.sigma = 0.5,pool.cor.prob = F, interact.param=list(allow_offsprings=2,mutation_rate = 100,last.mutation=2000, max.tree.size = 10000, Nvars.max = 100, p.allow.replace = 0.7,p.allow.tree=0.1,p.nor=0.3,p.and = 0.7), prand = 0.01,keep.origin = T, sup.large.n = 5000, recalc_margin = 2^10, create.hash=F,interact.order=1,burn.in=1, eps = 10^6, max.time = 120,max.it = 25000, print.freq = 100,outgraphs=F,advanced.param=NULL, distrib_of_neighbourhoods=t(array(data = c(7.6651604,16.773326,14.541629,12.839445,2.964227,13.048343,7.165434, 0.9936905,15.942490,11.040131,3.200394,15.349051,5.466632,14.676458, 1.5184551,9.285762,6.125034,3.627547,13.343413,2.923767,15.318774, 14.5295380,1.521960,11.804457,5.070282,6.934380,10.578945,12.455602, 6.0826035,2.453729,14.340435,14.863495,1.028312,12.685017,13.806295),dim = c(7,5))), distrib_of_proposals = c(76.91870,71.25264,87.68184,60.55921,15812.39852)) { #first create the object assign("data.example",data, envir=globalenv()) variables <- simplify.formula(formula,names(data.example)) assign("fparam.example",variables$fparam, envir=globalenv()) assign("fobserved.example",variables$fobserved, envir=globalenv()) #for(i in 1:length(fparam.example)) #{ # fparam.example[i]<<-paste("I(",variables$fparam[i],")",sep = "") #} fparam.tmp<- as.vector(sapply(FUN = paste,"I(",variables$fparam,")",sep="")[,1]) if(latnames[1]!="") fparam.example<<-c(fparam.tmp,latnames) else fparam.example<<-fparam.tmp #print(fparam.tmp) #print(fparam.example) assign("mySearch",EMJMCMC2016(), envir=globalenv()) if(length(secondary)>0) mySearch$filtered <<- sapply(FUN = paste,"I(",secondary,")",sep="") mySearch$estimator <<- estimator mySearch$latnames <<- latnames mySearch$estimator.args <<- estimator.args mySearch$latent.formula <<- latent mySearch$save.beta <<- save.beta mySearch$prand<<-prand mySearch$p.add<<-array(p.add,length(fparam.example)) mySearch$p.add.default<<-p.add.default mySearch$recalc.margin <<- as.integer(recalc_margin) mySearch$max.cpu <<- as.integer(max.cpu) mySearch$locstop.nd <<- locstop.nd mySearch$pool.cor.prob<<-pool.cor.prob mySearch$sup.large.n<<-as.integer(sup.large.n) mySearch$max.cpu.glob <<- as.integer(max.cpu.glob) mySearch$deep.method <<- as.integer(deep.method) if(interact) { mySearch$allow_offsprings <<- as.integer(interact.param$allow_offsprings) mySearch$mutation_rate <<- as.integer(interact.param$mutation_rate) mySearch$Nvars.max <<- as.integer(interact.param$Nvars.max) mySearch$max.tree.size <<- as.integer(interact.param$max.tree.size) mySearch$p.allow.replace <<- interact.param$p.allow.replace mySearch$p.allow.tree <<- interact.param$p.allow.tree mySearch$p.epsilon <<- p.epsilon mySearch$keep.origin<<- keep.origin mySearch$sigmas<<-relations mySearch$sigmas.prob<<-relations.prob mySearch$del.sigma<<-del.sigma mySearch$pool.cross<<-pool.cross mySearch$gen.prob<<-gen.prob mySearch$p.nor <<- interact.param$p.nor mySearch$p.and <<- interact.param$p.and mySearch$last.mutation <<- as.integer(interact.param$last.mutation) } if(!is.null(advanced.param)) { mySearch$max.N.glob<<-as.integer(advanced.param$max.N.glob) mySearch$min.N.glob<<-as.integer(advanced.param$min.N.glob) mySearch$max.N<<-as.integer(advanced.param$max.N) mySearch$min.N<<-as.integer(advanced.param$min.N) mySearch$printable.opt<<-advanced.param$printable } #distrib_of_proposals = с(0,0,0,0,10) if(exists("hashStat")) { clear(hashStat) remove(hashStat,envir=globalenv()) } if(exists("statistics1")) { remove(statistics,envir=globalenv() ) remove(statistics1,envir=globalenv()) } if(exists("hash.keys1")) { remove(hash.keys,envir=globalenv()) remove(hash.keys1,envir=globalenv()) } if(create.table) { if(pseudo.paral) mySearch$parallelize <<- lapply #carry the search (training out) assign("statistics1",big.matrix(nrow = 2 ^min((length(fparam.example)),hash.length)+1, ncol = 16+length(fparam.example)*save.beta,init = NA, type = "double"), envir=globalenv()) assign("statistics",describe(statistics1), envir=globalenv()) mySearch$g.results[4,1]<<-0 mySearch$g.results[4,2]<<-0 mySearch$p.add <<- array(data = 0.5,dim = length(fparam.example)) if((length(fparam.example))>20) { mySearch$hash.length<<-as.integer(hash.length) mySearch$double.hashing<<-T hash.keys1 <<- big.matrix(nrow = 2 ^(hash.length)+1, ncol = length(fparam.example),init = 0, type = "char") hash.keys <<- describe(hash.keys1) } }else if(create.hash) { assign("hashStat",hash(), envir=globalenv()) mySearch$parallelize <<- lapply mySearch$hash.length<<-as.integer(20) mySearch$double.hashing<<-F } # now as the object is created run the algorithm initsol=rbinom(n = length(fparam.example),size = 1,prob = 0.5) if(unique) resm<-mySearch$modejumping_mcmc(list(varcur=initsol,locstop=locstop,presearch=presearch,statid=5, distrib_of_proposals =distrib_of_proposals,distrib_of_neighbourhoods=distrib_of_neighbourhoods, eps = eps, trit = n.models*100, trest = n.models, burnin = burn.in, max.time = max.time, maxit = max.it, print.freq = print.freq)) else resm<-mySearch$modejumping_mcmc(list(varcur=initsol,locstop=locstop,presearch=presearch,statid=5, distrib_of_proposals =distrib_of_proposals,distrib_of_neighbourhoods=distrib_of_neighbourhoods, eps = eps, trit = n.models, trest = n.models*100, burnin = burn.in, max.time = max.time, maxit = max.it, print.freq = print.freq)) ppp<-1 print("MJMCMC is completed") if(create.table) { print("Post Proceed Results") ppp<-mySearch$post_proceed_results(statistics1 = statistics1) truth = ppp$p.post # make sure it is equal to Truth column from the article truth.m = ppp$m.post truth.prob = ppp$s.mass ordering = sort(ppp$p.post,index.return=T) print("pi truth") sprintf("%.10f",truth[ordering$ix]) sprintf(fparam.example[ordering$ix]) } else if(create.hash) { print("Post Proceed Results") ppp<-mySearch$post_proceed_results_hash(hashStat = hashStat) truth = ppp$p.post # make sure it is equal to Truth column from the article truth.m = ppp$m.post truth.prob = ppp$s.mass ordering = sort(ppp$p.post,index.return=T) print("pi truth") sprintf("%.10f",truth[ordering$ix]) sprintf(fparam.example[ordering$ix]) } if(outgraphs) { par(mar = c(10,4,4,2) + 4.1) barplot(resm$bayes.results$p.post,density = 46,border="black",main = "Marginal Inclusion (RM)",ylab="Probability",names.arg = mySearch$fparam,las=2) barplot(resm$p.post,density = 46,border="black",main = "Marginal Inclusion (MC)",ylab="Probability",names.arg = mySearch$fparam,las=2) } return(ppp) } # add plot(ppp), summary(ppp), print(ppp), ppp as a class itself, coef(ppp) EMJMCMC2016 <- setRefClass(Class = "EMJMCMC2016", fields = list(estimator.args = "list", max.cpu = "integer", objective = "integer", p.prior = "numeric", min.N = "integer", estimator = "function", parallelize = "function", parallelize.global = "function", parallelize.hyper = "function", min.N.randomize = "integer", max.N.randomize = "integer", type.randomize = "integer", thin_rate = "integer", aa = "numeric", cc = "numeric", printable.opt = "logical", fobserved = "vector", switch.type = "integer", n.size ="integer", deep.method = "integer", sup.large.n = "integer", LocImprove = "array", max.N = "integer", save.beta = "logical", keep.origin = "logical", recalc.margin = "numeric", max.N.glob = "integer", min.N.glob = "integer", max.cpu.glob = "integer", max.cpu.hyper = "integer", p.add.default="numeric", switch.type.glob = "integer", isobsbinary = "array", filtered = "vector", fparam = "vector", latnames = "vector", fparam.pool = "vector", p.add = "array", latent.formula = "character", Nvars = "integer", seed = "integer", M.nd = "integer", locstop.nd = "logical", pool.cor.prob = "logical", M.mcmc = "integer", SA.param = "list", p.epsilon = "numeric", p.allow.tree = "numeric", p.nor = "numeric", p.and = "numeric", sigmas.prob ="numeric", del.sigma = "numeric", pool.cross = "numeric", gen.prob = "numeric", sigmas = "vector", p.allow.replace = "numeric", max.tree.size = "integer", double.hashing = "logical", prand = "numeric", hash.length = "integer", update.marg.mc = "logical", Nvars.max = "integer", Nvars.init = "integer", last.mutation = "integer", allow_offsprings = "integer", mutation_rate = "integer", g.results = "big.matrix" ), methods = list( #class constructor initialize = function(estimator.function = inla, estimator.args.list = list(family = "gaussian",data = data.example, control.fixed=list(prec=list(default= 0.00001),prec.intercept = 0.00001,mean=list(default= 0),mean.intercept = 0), control.family = list(hyper = list(prec = list(prior = "loggamma",param = c(0.00001,0.00001),initial = 0))), control.compute = list(dic = TRUE, waic = TRUE, mlik = TRUE)), search.args.list = NULL,latent.formula = "") { estimator <<- estimator.function estimator.args <<- estimator.args.list latent.formula <<- latent.formula g.results <<- big.matrix(nrow = 4,ncol = 2) g.results[1,1]<- -Inf g.results[1,2]<- 1 g.results[2,1]<- Inf g.results[2,2]<- 1 g.results[3,1]<- Inf g.results[3,2]<- 1 g.results[4,1]<- 0 g.results[4,2]<- 0 if(is.null(search.args.list)) { max.cpu <<- as.integer(Nvars*0.05 + 1) objective <<- as.integer(1) if(Sys.info()['sysname']=="Windows") { parallelize <<- lapply parallelize.global <<- lapply parallelize.hyper <<- lapply } else { parallelize <<- mclapply parallelize.global <<- mclapply parallelize.hyper <<- mclapply } Nvars <<- as.integer(length(fparam.example)) min.N <<- as.integer(Nvars/6) min.N.glob <<- as.integer(Nvars/3) max.N.glob <<- as.integer(Nvars/2) max.N <<- as.integer(Nvars/5) switch.type.glob <<- as.integer(2) min.N.randomize <<- as.integer(1) max.N.randomize <<- as.integer(1) deep.method<<- as.integer(1) type.randomize <<- as.integer(3) pool.cor.prob<<-F prand<<-0.01 max.cpu.glob <<- as.integer(Nvars*0.05 + 1) max.cpu.hyper <<- as.integer(2) sup.large.n<<-as.integer(1000) save.beta <<- FALSE filtered<<-vector(mode="character", length=0) printable.opt <<- FALSE keep.origin<<-FALSE thin_rate<<- as.integer(-1) p.allow.tree <<- 0.6 p.epsilon<<- 0.0001 latnames<<-"" p.add.default<<-1 p.allow.replace <<- 0.3 sigmas<<-c("","sin","cos","sigmoid","tanh","atan","erf") sigmas.prob<<-c(0.4,0.1,0.1,0.1,0.1,0.1,0.1) del.sigma<<-0.5 pool.cross<<-0.9 gen.prob<<-c(1,1,1,1,1) p.nor <<- 0.3 p.and <<- 0.7 max.tree.size<<- as.integer(15) Nvars.max <<- as.integer(Nvars) Nvars.init <<- as.integer(Nvars) allow_offsprings <<- as.integer(0) mutation_rate <<- as.integer(100) locstop.nd <<-FALSE double.hashing <<- (Nvars > 20) hash.length <<- as.integer(25) filtered<<-vector(mode="character", length=0) aa <<- 0.9 cc <<- 0.0 M.nd <<- as.integer(min(Nvars,100)) M.mcmc <<- as.integer(5) SA.param <<- list(t.min = 0.0001,t.init = 10, dt = 3, M = as.integer(min(Nvars/5+1,20))) fobserved <<- fobserved.example switch.type <<- as.integer(2) n.size <<- as.integer(10) LocImprove <<- as.array(c(50,50,50,50,150)) isobsbinary <<- as.array(0:(length(fparam.example)-1)) fparam <<- fparam.example fparam.pool <<- fparam.example p.add <<- array(data = 0.5,dim = Nvars) if(exists("statistics")) { recalc.margin <<- 2^Nvars }else if(exists("statistics1")) { recalc.margin <<- 2^Nvars }else { recalc.margin <<- 2^Nvars } last.mutation<<-as.integer(2^(Nvars/2)*0.01) seed <<- as.integer(runif(n = 1,min = 1,max = 10000)) p.prior <<- runif(n = Nvars, min = 0.5,max = 0.5) } else { max.cpu <<- as.integer(search.args.list$max.cpu) objective <<- as.integer(search.args.list$objective) parallelize <<- search.args.list$parallelize latnames <<- search.args.list$latnames parallelize.global <<- search.args.list$parallelize.global parallelize.hyper <<- search.args.list$parallelize.hyper p.prior <<- search.args.list$p.prior min.N <<- as.integer(search.args.list$min.N) printable.opt <<- search.args.list$printable.opt min.N.glob <<- as.integer(search.args.list$min.N.glob) max.N.glob <<- as.integer(search.args.list$max.N.glob) switch.type.glob <<- as.integer(search.args.list$switch.type.glob) min.N.randomize <<- as.integer(search.args.list$min.N.randomize) max.N.randomize <<- as.integer(search.args.list$max.N.randomize) type.randomize <<- as.integer(search.args.list$type.randomize) max.cpu.glob <<- as.integer(search.args.list$max.cpu.glob) locstop.nd <<-search.args.list$locstop.nd max.cpu.hyper <<- as.integer(search.args.list$max.cpu.hyper) save.beta <<- search.args.list$save.beta aa <<- search.args.list$lambda.a prand<<-search.args.list$prand p.add.default<<-search.args.list$ p.add.default sup.large.n<<-search.args.list$sup.large.n thin_rate <-search.args.list$thin_rate keep.origin<<-search.args.list$keep.origin cc <<- search.args.list$lambda.c pool.cor.prob<<-search.args.list$pool.cor.prob M.nd <<- as.integer(search.args.list$stepsGreedy) M.mcmc <<- as.integer(search.args.list$stepsLocMCMC) SA.param <<- search.args.list$SA.params fobserved <<- search.args.list$fobserved switch.type <<- as.integer(search.args.list$fswitch.type) n.size <<- as.integer(search.args.list$n.size) LocImprove <<-search.args.list$prior.optimizer.freq max.N <<- as.integer(search.args.list$max.N) fparam <<- search.args.list$fparam fparam.pool <<- search.args.list$fparam isobsbinary <<- as.array(0:(length(fparam)-1)) p.add <<- as.array(search.args.list$p.add) recalc.margin <<- search.args.list$recalc.margin Nvars <<- as.integer(length(fparam)) seed <<- search.args.list$seed max.tree.size<<- as.integer(search.args.list$max.tree.size) Nvars.max <<- as.integer(search.args.list$Nvars.max) Nvars.init <<- as.integer(search.args.list$Nvars) allow_offsprings <<- as.integer(search.args.list$allow_offsprings) mutation_rate <<- as.integer(search.args.list$mutation_rate) p.allow.tree <<- search.args.list$p.allow.tree p.epsilon <<-search.args.list$p.epsilon p.allow.replace <<- search.args.list$p.allow.replace last.mutation<<-as.integer(search.args.list$last.mutation) p.nor <<- search.args.list$p.nor p.and <<- search.args.list$p.and deep.method<<-as.integer(search.args.list$deep.method) sigmas<<-search.args.list$sigmas sigmas.prob<<-search.args.list$sigmas.prob del.sigma<<-search.args.list$del.sigma pool.cross<<-search.args.list$pool.cross gen.prob<<-search.args.list$gen.prob double.hashing <<- search.args.list$double.hashing hash.length <<- as.integer(search.args.list$hash.length) } }, #transform binary numbers to decimal bittodec.alt = function(bit) #transform a binary vector into a natural number to correspond between vector of solutions and storage array { n<-length(bit) dec <- 0 for(i in 1:n) { j<-n-i dec <- dec + ((2)^j)*bit[i] } return(dec) }, bittodec = function(bit) #transform a binary vector into a natural number to correspond between vector of solutions and storage array { if(!double.hashing){ n<-length(bit) dec <- 0 for(i in 1:n) { j<-n-i dec <- dec + ((2)^j)*bit[i] } return(dec) } else { if(exists("statistics1")) { hash.level<-0 dec<- hashing(bit)+1 #print(dec) sum.one<- sum(bit)*which.max(bit) + sum(bit[Nvars -7:Nvars+1])*Nvars jjj<-1 while(!add.key(dec,bit,sum.one,T)) { hash.level<-hash.level+1 bit1<- dectobit.alt(2654435761*(dec+97*sum.one+hash.level*36599)+hash.level*59+hash.level) dec<- hashing(bit1)+1 #jjj<-jjj+1 #print(dec) } #print(jjj) dec <- dec - 1 } else if(exists("statistics")) { hash.level<-0 dec<- hashing(bit)+1 sum.one<- sum(bit)*which.max(bit) + sum(bit[Nvars -7:Nvars +1])*Nvars while(!add.key(dec,bit,sum.one,F)) { hash.level<-hash.level+1 bit1<- dectobit.alt(2654435761*(dec+97*sum.one+hash.level*36599)+hash.level*59+hash.level) dec<- hashing(bit1)+1 #print(dec) } dec <- dec - 1 } return(dec) } }, add.key = function(dec,bit,sum.one,levl) { lb<-length(bit) if(dec>2^hash.length) return(FALSE) if(levl) { if(is.na(statistics1[dec,1])) { statistics1[dec,16]<-sum.one hash.keys1[dec,]<-bit return(TRUE) } if(is.na(statistics1[dec,16])) { statistics1[dec,16]<-sum.one hash.keys1[dec,]<-bit#c(array(0,dim = Nvars-lb),bit) return(TRUE) } if(statistics1[dec,16]!=sum.one) return(FALSE) i<-1 #print(lb) lb <- length(bit) while(i<=lb&&(hash.keys1[dec,Nvars-i+1])==bit[lb - i +1]) i<-i+1 return((i-1)==lb) } else { if(is.na(statistics[dec,1])) { statistics[dec,16]<-sum.one hash.keys[dec,]<-bit return(TRUE) } if(is.na(statistics[dec,16])) { statistics[dec,16]<-sum.one hash.keys[dec,]<-bit#c(array(0,dim = Nvars-lb),bit) return(TRUE) } if(statistics[dec,16]!=sum.one) return(FALSE) i<-1 lb <- length(bit) while(i<=lb&&(hash.keys[dec,Nvars-i+1])==bit[lb - i +1]) i<-i+1 return((i-1)==lb) } }, hashing = function(bit)# a hash function to find where to place the key in the hash { n<-length(bit) if(n tol) { x <- x*x # update the index if (x >= 2) { x <- x/2 y <- y + b f <- log(exp(f)+exp(fb)) } # scale for the next bit b <- b/2 fb <- (fb-log(2)) } return(list(y = y,z = lx -1, f = f)) }, #transform decimal numbers to binary dectobit = function(dec) #transform a natural number into a binary vector to correspond between vector of solutions and storage array { if(!double.hashing){ if(dec == 0) return(0) q<-dec bin<-NULL while(q!=0) { r<-q/2 q=floor(r) bin<-c(as.integer(2*(r-q)),bin) } return(bin) } return(dehash(dec+1)) }, dectobit.alt = function(dec) #transform a natural number into a binary vector to correspond between vector of solutions and storage array { if(dec == 0) return(0) q<-dec bin<-NULL while(q!=0) { r<-q/2 q=floor(r) bin<-c(as.integer(2*(r-q)),bin) } return(bin) }, dehash=function(dec) { if(exists("hash.keys1")) return(hash.keys1[dec,]) if(exists("hash.keys")) return(hash.keys[dec,]) return(NULL) }, #calculate move probabilities calculate.move.logprobabilities = function(varold, varnew, switch.type,min.N,max.N) { if(switch.type == 1) # random size random N(x) { #warning("This option should not be chosen for randomization unless p.add == 0.5 ", call. = FALSE) min.N = max.N log.mod.switch.prob <- log(1/(max.N - min.N +1)) # probability of having that many differences KK<-sum(abs(varold-varnew)) log.mod.switch.prob <- 0 #always the same probabilities for moves within thenighbourhood for p=0.5 log.mod.switchback.prob <- 0 } else if(switch.type == 2) #fixed N(x) inverse operator { if(min.N!=max.N) { #warning("min.N should be equal to max.N in swap type neighbourhoods min.N:=max.N", call. = FALSE) min.N = max.N } log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-max.N log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob }else if(switch.type == 3) # random sized inverse N(x) { log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-sum(abs(varold-varnew)) log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob }else if(switch.type == 4) # fixed N(x) for reverse from type 2 swaps { if(min.N!=max.N) { #warning("min.N should be equal to max.N in swap type neighbourhoods min.N:=max.N", call. = FALSE) min.N = max.N } log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-max.N log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob }else if(switch.type > 4) { log.mod.switch.prob <- log(x = 1) log.mod.switchback.prob <-log(x = 1) } return(list(log.switch.forw.prob = log.mod.switch.prob, log.switch.back.prob = log.mod.switchback.prob)) }, #build a new model to draw from the old one return selection probabilities buildmodel= function(varcur.old,statid, shift.cpu,max.cpu,switch.type,min.N,max.N,changeble.coord = NULL) { vect<- vector(length = max.cpu,mode = "list") shift<-0 for(cpu in 1:(max.cpu)) { set.seed(runif(1,1,10000), kind = NULL, normal.kind = NULL) if(!is.null(varcur.old)&&length(varcur.old==Nvars)) { varcur.old[which(is.na(varcur.old))]<-1 varcur<-varcur.old }else { varcur<-rbinom(n = (Nvars),size = 1,0.5) varcur.old<-varcur } changeble <-FALSE if(!is.null(changeble.coord)) { changeble <- TRUE } if(switch.type == 1) # random size random N(x) # try avoiding when doing mcmc rather than optimization { log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-floor(runif(n=1, min.N, max.N + 0.999999999)) log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob change.buf <- array(data = 0,dim = Nvars) if(changeble){ for(ttt in 1:KK) { iid<-floor(runif(n = 1,min = 1,max = Nvars+0.999999999)) if(changeble.coord[iid]==1) next change.buf[iid] = 1 varcur[iid] <-rbinom(n = 1,size = 1,prob =round(p.add[iid],digits = 8)) log.mod.switch.prob = 0#log.mod.switch.prob + log(dbinom(x = varcur[iid],size = 1,prob = p.add[iid])) # this is one of the pathes to get there in general log.mod.switchback.prob = 0# log.mod.switchback.prob + log(dbinom(x = 1-varcur[iid],size = 1,prob = p.add[iid])) # but this is one of the ways to get back only } }else { iid<-floor(runif(n = KK,min = 1,max = Nvars+0.999999999)) change.buf[iid] = 1 varcur[iid] <-rbinom(n = KK,size = 1,prob = round(p.add,digits = 8)) } }else if(switch.type == 2) # fixed sized inverse N(x) { if(min.N!=max.N) { #warning("min.N should be equal to max.N in swap type neighbourhoods min.N:=max.N", call. = FALSE) min.N <- max.N } log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-max.N log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob change.buf <- array(data = 0,dim = Nvars) if(changeble){ for(ttt in 1:KK) { iid<-floor(runif(n = 1,min = 1,max = Nvars+0.999999999)) if(change.buf[iid]==1 || changeble.coord[iid]==1) { KK<-KK+1 next } change.buf[iid] = 1 varcur[iid] = 1-varcur[iid] } }else { iid<-floor(runif(n = KK,min = 1,max = Nvars+0.999999999)) change.buf[iid] = 1 varcur[iid] = 1-varcur[iid] } }else if(switch.type == 3) # random sized inverse N(x) { log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-floor(runif(n=1, min.N, max.N + 0.999999999)) log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob change.buf <- array(data = 0,dim = Nvars) if(changeble){ for(ttt in 1:KK) { iid<-floor(runif(n = 1,min = 1,max = Nvars+0.999999999)) if(change.buf[iid]==1 || changeble.coord[iid]==1) { KK<-KK+1 next } change.buf[iid] = 1 varcur[iid] = 1-varcur[iid] } }else { iid<-floor(runif(n = KK,min = 1,max = Nvars+0.999999999)) change.buf[iid] = 1 varcur[iid] = 1-varcur[iid] } }else if(switch.type == 4) # fixed N(x) for reverse from type 2 swaps { if(min.N!=max.N) { #warning("min.N should be equal to max.N in swap type neighbourhoods min.N:=max.N", call. = FALSE) min.N <- max.N } log.mod.switch.prob <- log(1/(max.N - min.N +1)) KK<-max.N log.mod.switch.prob <- log.mod.switch.prob + KK*log(factorial(Nvars - KK + 1)/factorial(Nvars)) log.mod.switchback.prob <- log.mod.switch.prob ids <- which(changeble.coord == 0) change.buf <- array(data = 0,dim = Nvars) if(changeble){ for(ttt in KK) { iid<-floor(runif(n = 1,min = 1,max = Nvars+0.999999999)) if(change.buf[iid]==1 || changeble.coord[iid]==1) { KK<-KK+1 next } change.buf[iid] = 1 varcur[iid] = 1-varcur[iid] }} else { iid<-floor(runif(n = KK,min = 1,max = Nvars+0.999999999)) change.buf[iid] = 1 varcur[iid] = 1-varcur[iid] } }else if(switch.type == 5) { change.buf <- array(data = 0,dim = (Nvars)) log.mod.switch.prob<-0 log.mod.switchback.prob <-0 add<-0 if(cpu+shift>Nvars) { shift<-1 - cpu varcur.old<-rep(0,times = Nvars) } if(varcur.old[cpu+shift]!=1) { varcur<-varcur.old varcur[cpu+shift]<-1 }else { if(cpu+shift=Nvars) { shift <- (Nvars - cpu) break } } varcur<-varcur.old varcur[cpu+shift]<-1 } }else if(switch.type == 6) { change.buf <- array(data = 0,dim = (Nvars)) log.mod.switch.prob<-0 log.mod.switchback.prob <-0 add<-0 if(cpu+shift>Nvars) { shift<-1 - cpu varcur.old<-rep(1,times = Nvars) } if(varcur.old[cpu+shift]!=0) { varcur<-varcur.old varcur[cpu+shift]<-0 }else { if(cpu+shift=Nvars) { shift <- (Nvars - cpu) break } } varcur<-varcur.old varcur[cpu+shift]<-0 } }else if(switch.type == 7) { change.buf <- array(data = 0,dim = (Nvars)) log.mod.switch.prob<-0 log.mod.switchback.prob <-0 vec<-dectobit(cpu + shift.cpu) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) #issues here }else if(switch.type == 8) { log.mod.switch.prob <-0 log.mod.switchback.prob <-0 varcur<-varcur.old change.buf <- array(data = 0,dim = Nvars) #if(printable.opt)print("type 8 invoked") #if(printable.opt)print(varcur) }else if(switch.type==9) { log.mod.switch.prob <-0 log.mod.switchback.prob <-0 change.buf <- array(data = 1,dim = Nvars) changevar<- rbinom(n = Nvars,size = 1,prob=prand) varcur<-varcur.old varcur[which(changevar==1)]<-(1-varcur[which(changevar==1)]) log.mod.switch.prob<-prand^length(which(changevar==1)) }else{ log.mod.switch.prob <- 0 log.mod.switchback.prob <-0 change.buf <- array(data = 1,dim = Nvars) varcur <-rbinom(n = Nvars,size = 1,prob = round(p.add,digits = 8)) #if(printable.opt)print("type 8 invoked") #if(printable.opt)print(varcur) } # for(g in 1:max(isobsbinary)) # { # if(length(varcur[which(isobsbinary == g && varcur %in% c(1,3))])==length(which(isobsbinary == g))) # varcur[which(isobsbinary == g && varcur %in% c(1,3))[1]]=0 # if(length(varcur[which(isobsbinary == g && varcur %in% c(2,3))])==length(which(isobsbinary == g))) # varcur[which(isobsbinary == g && varcur %in% c(2,3))[1]]=0 # } covobs <- if(fparam[1]=="Const")fparam[which(varcur[-1] == 1)+1]else fparam[which(varcur==1)] #obsconst<-2*as.integer((varcur[1] %in% c(1,3)) || length(covobs) == 0 || (length(covobs)==length(which(isobsbinary != 0))) ) -1 obsconst<-ifelse(fparam[1]=="Const",2*as.integer((varcur[1])) -1,1) id<-bittodec(varcur) id<-id+1 if(ifelse(exists("statistics1"),is.na(statistics1[id,1]),ifelse(exists("statistics"),is.na(statistics[id,1,]),ifelse(exists("hashStat"),!has.key(hash = hashStat,key = paste(varcur,collapse = "")),TRUE))))#||TRUE) { # formula <- NULL # formula <- as.formula(ifelse(length(covobs)>0,(stri_join(stri_flatten(fobserved[1]), " ~ ",obsconst,"+", stri_flatten(covobs, collapse=" + "), latent.formula)),(stri_join(stri_flatten(fobserved[1]), " ~ ",obsconst, latent.formula)))) # # if(is.null(formula)){ # formula <- as.formula(ifelse(length(covobs)>0,(stri_join(stri_flatten(fobserved[1]), " ~ ",obsconst,"+", stri_flatten(covobs, collapse=" + "))),(stri_join(stri_flatten(fobserved[1]), " ~ ",obsconst)))) # # } formula <- NULL capture.output({withRestarts(tryCatch(capture.output({formula <- as.formula(paste(paste(fobserved[1]), " ~ ",obsconst,ifelse(length(covobs)>0," + ",""), paste(covobs, collapse=" + "), latent.formula)) })), abort = function(){onerr<-TRUE;fm<-NULL})}) if(is.null(formula)){ formula <- as.formula(paste(paste(fobserved[1]), " ~ ",obsconst,ifelse(length(covobs)>0," + ",""), paste(covobs, collapse=" + "))) } }else { formula <- NULL } vect[[cpu]]<-list(formula = formula, varcur = varcur, statid = statid, changed = change.buf, log.mod.switch.prob = log.mod.switch.prob , log.mod.switchback.prob = log.mod.switchback.prob ) } return(vect) }, #fit the given model by means of the specified estimator fitmodel = function(model) { if(!is.null(model)) { fm<-NULL id<-bittodec(model$varcur) if(is.null(id)) id = 0 id<-id+1 if(exists("statistics1")){ if(is.na(statistics1[id,1])) { #if(printable.opt)print("Invoked from EMJMCMC environment") onerr<-FALSE #if(printable.opt)print("INLA internal error") statistics1[id,c(2,3)]<-100000 statistics1[id,1]<--100000 statistics1[id,4:14]<-0 #prior = "normal",param = c(model$beta.mu.prior,model$beta.tau.prior)) # capture.output({withRestarts(tryCatch(capture.output({fm<-inla(formula = model$formula,family = "binomial",Ntrials = data$total_bases,data = data,control.fixed = list(mean = list(default = model$beta.mu.prior),mean.intercept = model$beta.mu.prior, prec = list(default = model$beta.tau.prior), prec.intercept = model$beta.tau.prior) ,control.compute = list(dic = model$dic.t, waic = model$waic.t, mlik = model$mlik.t)) # })), abort = function(){onerr<-TRUE;fm<-NULL})}) # fit the modal, get local improvements # capture.output({withRestarts(tryCatch(capture.output({fm<-do.call(estimator, c(estimator.args, model$formula)) })), abort = function(){onerr<-TRUE;fm<-NULL})}) # fit the model, get local improvements if(!is.null(fm)) { statistics1[id,2]<-fm$waic[[1]] statistics1[id,1]<-fm$mlik[[1]] statistics1[id,3]<-fm$dic[[1]] if(save.beta) { if(fparam[1]=="Const") { inxx<-which(model$varcur==1) if(length(inxx)==length(fm$summary.fixed$mean)) statistics1[id,15+inxx]<-fm$summary.fixed$mean }else { inxx<-c(0,which(model$varcur==1)) if(length(inxx)==length(fm$summary.fixed$mean)) statistics1[id,16+inxx]<-fm$summary.fixed$mean } } if(fm$waic[[1]]g.results[1,1] && !is.na(fm$mlik[[1]])) { g.results[1,1]<-fm$mlik[[1]] g.results[1,2]<-as.integer(id) } if(fm$dic[[1]]g.results[1,1] && !is.na(fm$mlik[[1]])) { g.results[1,1]<-fm$mlik[[1]] g.results[1,2]<-as.integer(id) } if(fm$dic[[1]]Nvars) idd<- as.character(paste(c(model$varcur,array(0,Nvars.max-Nvars)),collapse = "")) else idd<- as.character(paste(c(model$varcur),collapse = "")) if(!has.key(key = idd,hash = hashStat)) { #if(printable.opt)print("Invoked from EMJMCMC hash table environment") onerr<-FALSE #if(printable.opt)print("INLA internal error") #prior = "normal",param = c(model$beta.mu.prior,model$beta.tau.prior)) # capture.output({withRestarts(tryCatch(capture.output({fm<-inla(formula = model$formula,family = "binomial",Ntrials = data$total_bases,data = data,control.fixed = list(mean = list(default = model$beta.mu.prior),mean.intercept = model$beta.mu.prior, prec = list(default = model$beta.tau.prior), prec.intercept = model$beta.tau.prior) ,control.compute = list(dic = model$dic.t, waic = model$waic.t, mlik = model$mlik.t)) # })), abort = function(){onerr<-TRUE;fm<-NULL})}) # fit the modal, get local improvements # capture.output({withRestarts(tryCatch(capture.output({fm<-do.call(estimator, c(estimator.args, model$formula)) })), abort = function(){onerr<-TRUE;fm<-NULL})}) # fit the modal, get local improvements if(!save.beta) { hashBuf<-array(data = NA,dim = 3) }else { if(allow_offsprings==0||Nvars.max1) # { # inxx<-which(model$varcur==1) # if(length(inxx)==length(fm$summary.fixed$mean)) # statistics[id,14+inxx]<-fm$summary.fixed$mean # } if(fm$waic[[1]]g.results[1,1] && !is.na(fm$mlik[[1]])) { g.results[1,1]<-fm$mlik[[1]] g.results[1,2]<-(id) } if(fm$dic[[1]]g.results[1,1] && !is.na(fm$mlik[[1]])) { g.results[1,1]<-fm$mlik[[1]] g.results[1,2]<-(id) } if(fm$dic[[1]]mlikglob)) #update the parameter of interest { if(printable.opt)print(paste("locMTMCMC update waic.glob = ", waiccand)) if(printable.opt)print(paste("locMTMCMC update waic.glob.mlik = ", mlikglob)) mlikglob<-mlikcand waicglob<-waiccand varglob<-varcand if(cluster) modglob<-fm } g1 <- waiccur if(waiccur == Inf) { g1 = 1 } p.select.y[mod_id]<-(mlikcand + vect[[mod_id]]$log.mod.switchback.prob+log(lambda(c = cc, alpha = aa, g1 = -g1, g2 = -waiccand,g.domain.pos = FALSE))) # correct for different criteria later if(is.na(p.select.y[mod_id])) p.select.y[mod_id] <- 0 if(is.infinite(p.select.y[mod_id]) || p.select.y[mod_id]>100000000) { #if(printable.opt)print(paste("very large log.w.y detected ",p.select.y[mod_id])) p.select.y[mod_id] <- 100000000 } } max.p.select.y <- max(p.select.y) p.select.y<-p.select.y-max.p.select.y if(printable.opt)print(paste("max log.w.y is ",max.p.select.y,"normilized log.w.n.y is ", paste(p.select.y,collapse = ", "))) ID<-sample.int(n = max.cpu,size = 1,prob = exp(p.select.y)) if(printable.opt)print(paste("cand ",ID," selected")) varcand<-vect[[ID]]$varcur if(cluster) { waiccand<-res.par[[ID]]$waic mlikcand<-res.par[[ID]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } #p.Q.cand<- p.select.y[ID]/sum(p.select.y) if(printable.opt)print("do reverse step") p.select.z <- array(data = 0.01, dim = max.cpu) if(max.cpu!=1) { vect1<-buildmodel(max.cpu = max.cpu -1,varcur.old = varcand,statid = model$statid,switch.type = switch.type, min.N = min.N, max.N = max.N) cluster<-TRUE flag1<-0 for(mod_id in 1:(max.cpu-1)) { if(is.null(vect1[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==(max.cpu-1)) { cluster<-FALSE if(printable.opt)print("!!!!MTMCMC reverse models already estimated!!!!") }else { res.par.back <- parallelize(X = vect1,FUN = .self$fitmodel) } for(mod_id in 1:(max.cpu-1)) { if(cluster) { if(is.null(fm)&&(is.na(res.par.back[[mod_id]]$waic))) { if(printable.opt)print("locMTMCMC Model Fit Error!?") next } } varcand.b<-vect1[[mod_id]]$varcur if(cluster) { waiccand.b<-res.par.back[[mod_id]]$waic mlikcand.b<-res.par.back[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand.b)+1 waiccand.b<-statistics1[iidd,2] mlikcand.b<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand.b,collapse = "") waiccand.b<-values(hashStat[iidd])[2] mlikcand.b<-values(hashStat[iidd])[1] } if((mlikcand.b>mlikglob)) { if(printable.opt)print(paste("locMTMCMC update waic.glob = ", waiccand.b)) if(printable.opt)print(paste("locMTMCMC update waic.glob.mlik = ", mlikcand.b)) mlikglob<-mlikcand.b waicglob<-waiccand.b varglob<-varcand.b if(cluster) modglob<-fm } g1 = waiccand if(waiccand == Inf) { g1 = 1 } p.select.z[mod_id]<-(mlikcand.b+vect1[[mod_id]]$log.mod.switchback.prob+(lambda(c = cc, alpha = aa, g1 = -g1, g2 = -waiccand.b,g.domain.pos = FALSE))) # correct for different criteria later if(is.na(p.select.z[mod_id])) p.select.z[mod_id]=0 if(is.infinite(p.select.z[mod_id]) || p.select.z[mod_id] > 100000000) { #if(printable.opt)print(paste("very large log.w.y detected ",p.select.z[mod_id])) p.select.z[mod_id] <- 100000000 } } } if( waiccur == Inf) { g1 = 1 } p.select.z[max.cpu] <- (mlikcur+vect[[ID]]$log.mod.switch.prob+(lambda(c = cc, alpha = aa, g1 = -g1, g2 = -waiccand,g.domain.pos = FALSE))) if(is.na(p.select.z[mod_id])) p.select.z[mod_id]=0 if(is.infinite(p.select.z[mod_id]) || p.select.z[mod_id] > 100000000) { #if(printable.opt)print(paste("very large log.w.y detected ",p.select.z[mod_id])) p.select.z[mod_id] <- 100000000 } max.p.select.z <- max(p.select.z) p.select.z<-p.select.z-max.p.select.z if(printable.opt)print(paste("max log.w.z is ",max.p.select.z,"normilized log.w.n.z is ", paste(p.select.z,collapse = ", "))) if(log(runif(n = 1,min = 0,max = 1)) < (log(sum(exp(p.select.y)))-log(sum(exp(p.select.z)))) + max.p.select.y - max.p.select.z ) { mlikcur<-mlikcand if(printable.opt)print(paste("locMTMCMC update ratcur = ", mlikcand)) if(printable.opt)print(paste("locMTMCMC accept move with ", waiccand)) varcur<-varcand waiccur<-waiccand } }),abort = function(){fm<-fmb;closeAllConnections();options(error=traceback); onerr<-TRUE}) } #if(printable.opt)print("FINISH LOCAL MTMCMC") #!#if(printable.opt)print(points) if(model$reverse == FALSE) { if(is.null(varcur)) { #if(printable.opt)print("No moves acceoted in the procedure") varcur<-varcand waiccur<-waiccand varcur<-varcand } vect<-buildmodel(max.cpu = 1,varcur.old = varcur,statid = model$statid,switch.type = type.randomize,min.N = min.N.randomize,max.N = max.N.randomize) varcur<-vect[[1]]$varcur #if(printable.opt)print(varcur) cluster<-TRUE if(is.null(vect[[1]]$formula)) { cluster<-FALSE if(printable.opt)print("!!!!MTMCMC reverse models already estimated!!!!") }else { mod<-fitmodel(vect[[1]]) } if(cluster) { waiccur<-mod$waic mlikcur<-mod$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcur)+1 waiccur<-statistics1[iidd,2] mlikcur<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcur,collapse = "") waiccur<-values(hashStat[iidd])[2] mlikcur<-values(hashStat[iidd])[1] } # incorporate what happens for the backward optimization model.prob<-vect[[1]]$log.mod.switch.prob model.prob.fix<-vect[[1]]$log.mod.switchback.prob }else # incorporate what happens for the reverse move { if(is.null(varcur)) { #if(printable.opt)print("No moves acceoted in the reverse procedure") varcur<-varcand waiccur<-waiccand } model.probs<-calculate.move.logprobabilities(varold = varcur, varnew = model$varold,switch.type = type.randomize,min.N = min.N.randomize,max.N = max.N.randomize) model.prob<-model.probs$log.switch.forw.prob model.prob.fix<-model.probs$log.switch.back.prob } if(is.null(varcur)) { #if(printable.opt)print("NO VARCUR OBTAINED") varcur<-i waiccur<-Inf varcur<-model$varold } return(list(varcur = varcur, waiccur = waiccur, mlikcur = mlikcur, log.prob.cur = model.prob,log.prob.fix = model.prob.fix, varglob = varglob, waicglob = waicglob, mlikglob = mlikglob, modglob = modglob)) }, #local simulated annealing optimization learnlocalSA=function(model) { t.min<-SA.param$t.min t<-SA.param$t.init dt<-SA.param$dt M<-SA.param$M varcand<-model$varcur varcurb<-model$varcur varcur<-model$varcur varglob<-model$varcur varglob<-NULL modglob<-NULL probcur<-1 probrevcur<-1 first.prob <- 1 fm<-NULL fmb<-NULL mlikcur<- model$mlikcur waiccur<-model$waiccur # estimate large jump in a reverse move # estimate large jump in a reverse move if((model$reverse && !model$sa2) || is.infinite(mlikcur)) { vectbg<-buildmodel(max.cpu = 1,varcur.old = varcur,statid = model$statid,switch.type=8, min.N = min.N,max.N = max.N) if(!is.null(vectbg[[1]]$formula)) { bgmod <- lapply(X = vectbg,FUN = .self$fitmodel) waiccur<-bgmod[[1]]$waic mlikcur<-bgmod[[1]]$mlik } else if(exists("statistics1")) { iidd<-bittodec(varcur)+1 waiccur<-statistics1[iidd,2] mlikcur<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcur,collapse = "") waiccur<-values(hashStat[iidd])[2] mlikcur<-values(hashStat[iidd])[1] } } if(printable.opt)print(paste("Begin with ", mlikcur)) mlikglob<- mlikcur mlikcand<- mlikcur waiccand<- waiccur waicglob<- waiccur waiccur<- waiccur while(t>t.min) { if(printable.opt)print(paste("anneal to ",t)) t.new<-t*exp(-dt) if(model$reverse == TRUE && t.new <= t.min) { M<-M -1 } for(m in 1:M) { withRestarts(tryCatch({ mmax.cpu = max.cpu if(model$switch.type == 5) { mmax.cpu = Nvars - sum(varcur) }else if(model$switch.type == 6) { mmax.cpu = sum(varcur) } if(mmax.cpu == 0) mmax.cpu = 1 vect<-buildmodel(max.cpu = mmax.cpu,varcur.old = varcur,statid = model$statid,switch.type=model$switch.type, min.N = min.N,max.N = max.N) cluster<-TRUE flag1<-0 for(mod_id in 1:mmax.cpu) { if(is.null(vect[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==mmax.cpu) { cluster<-FALSE if(printable.opt)print("!!!!SA Models already estimated!!!!") }else { res.par <- parallelize(X = vect,FUN = .self$fitmodel) } for(mod_id in 1:mmax.cpu) { if(cluster) { fmb<-fm fm<-res.par[[mod_id]] waiccand<-Inf if(is.null(fm)&&(is.na(res.par[[mod_id]]$waic))) { varcand<-varcurb if(printable.opt)print("SA Model Fit Error!?") next } } varcand<-vect[[mod_id]]$varcur if(cluster) { waiccand<-res.par[[mod_id]]$waic mlikcand<-res.par[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-stri_paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } if(objective==0) { objcand<-waiccand objcur<-waiccur objglob<-waicglob }else { objcand<- -mlikcand objcur<- -mlikcur objglob<- -mlikglob } if(t == SA.param$t.init && mod_id == 1 && m == 2) { delta<-objcand - objcur first.prob <- vect[[mod_id]]$log.mod.switchback.prob + log(punif(q = exp(x = delta/t),min = 0,max = 1)) } if(objcand0) { withRestarts(tryCatch({ if(printable.opt)print(paste("proceed with layer",layer)) if(printable.opt)print(paste("current solution is",as.character(varcand))) vect<-buildmodel(max.cpu = layer,varcur.old = varcurb,statid = model$statid, switch.type = 5,min.N = min.N, max.N = max.N) if(printable.opt)print(paste("finish preparing models at layer",layer)) cluster<-TRUE flag1<-0 for(mod_id in 1:layer) { if(is.null(vect[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==layer) { cluster<-FALSE if(printable.opt)print("!!!!forward Models already estimated!!!!") }else { res.par <- parallelize(X = vect,FUN = .self$fitmodel) } if(printable.opt)print(paste("end forward optimizing at layer",layer)) for(mod_id in 1:layer) { if(cluster){ fmb<-fm fm<-res.par[[mod_id]] waiccand<-Inf if(is.null(fm)&&(is.na(res.par[[mod_id]]$waic))) { varcand<-varcurb if(printable.opt)print("forward Model Fit Error!?") next } } varcand<-vect[[mod_id]]$varcur if(cluster) { waiccand<-res.par[[mod_id]]$waic mlikcand<-res.par[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } if(objective==0) { objcand<-waiccand objcur<-waiccur objglob<-waicglob }else { objcand<- -mlikcand objcur<- -mlikcur objglob<- -mlikglob } if(objcand<=objcur || mod_id ==1) { if(printable.opt)print(paste("forward accept with ", objcand)) objcur<-objcand varcurb<-varcand waiccur<-waiccand varcur<-varcand mlikcur<-mlikcand if(objcur0) { withRestarts(tryCatch({ if(printable.opt)print(paste("backward proceed with layer",layer)) if(printable.opt)print(paste("current backward solution is",as.character(varcand))) vect<-buildmodel(max.cpu = layer,varcur.old = varcurb,statid = model$statid, switch.type = 6,min.N = min.N, max.N = max.N) if(printable.opt)print(paste("finish backward preparing models at layer",layer)) cluster<-TRUE flag1<-0 for(mod_id in 1:layer) { if(is.null(vect[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==layer) { cluster<-FALSE if(printable.opt)print("!!!!backward Models already estimated!!!!") }else { res.par <- parallelize(X = vect,FUN = .self$fitmodel) } if(printable.opt)print(paste("end backward optimizing at layer",layer)) for(mod_id in 1:layer) { if(cluster){ fmb<-fm fm<-res.par[[mod_id]] waiccand<-Inf if(is.null(fm)&&(is.na(res.par[[mod_id]]$waic))) { varcand<-varcurb if(printable.opt)print("backward Model Fit Error!?") next } } varcand<-vect[[mod_id]]$varcur if(cluster) { waiccand<-res.par[[mod_id]]$waic mlikcand<-res.par[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } if(objective==0) { objcand<-waiccand objcur<-waiccur objglob<-waicglob }else { objcand<- -mlikcand objcur<- -mlikcur objglob<- -mlikglob } if(objcand<=objcur|| mod_id ==1) { if(printable.opt)print(paste("backward accept with ", objcand)) objcur<-objcand varcurb<-varcand waiccur<-waiccand varcur<-varcand mlikcur<-mlikcand if(objcur30) if(printable.opt)print("Finishing the procedure might well take forever!") varcand<-array(0,Nvars) varcurb<-varcand varglob<-varcand varglob<-NULL modglob<-NULL mlikglob<- model$mlikcur mlikcur<- model$mlikcur waiccand<- model$waiccur waicglob<-model$waiccur waiccur<-model$waiccur waiccurb<-model$waiccur fm<-NULL fmb<-NULL ubs<-as.integer(bittodec(array(1,Nvars)) + 1) ub<-model$ub totit<-as.integer(ubs/ub) + 1 if(model$totalitubs) { ub<-ubs - ub*(i-1)-1 if(printable.opt)print(paste("last ",ub," iterations to complete")) varcurb<-varcand } withRestarts(tryCatch({ vect<-buildmodel(max.cpu = ub,varcur.old = varcurb,statid = model$statid,switch.type = 7, shift.cpu = model$ub*(i-1),min.N = min.N,max.N = max.N) if(printable.opt)print(paste("proceed with full ecumeration")) if(printable.opt)print(paste("current solution is",as.character(varcand))) cluster<-TRUE flag1<-0 for(mod_id in 1:ub) { if(is.null(vect[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==ub) { cluster<-FALSE if(printable.opt)print("!!!!full models already estimated!!!!") }else { res.par <- parallelize(X = vect,FUN = .self$fitmodel) } if(printable.opt)print(paste("end optimizing full ecumeration")) for(mod_id in 1:ub) { if(cluster){ fmb<-fm fm<-res.par[[mod_id]] waiccand<-Inf if(is.null(fm)&&(is.na(res.par[[mod_id]]$waic))) { varcand<-varcurb if(printable.opt)print("full Model Fit Error!?") next } } varcand<-vect[[mod_id]]$varcur if(cluster) { waiccand<-res.par[[mod_id]]$waic mlikcand<-res.par[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } if(objective==0) { objcand<-waiccand objcur<-waiccur objglob<-waicglob }else { objcand<- -mlikcand objcur<- -mlikcur objglob<- -mlikglob } if(objcand<=objcur) { if(printable.opt)print(paste("full accept with ", objcand)) objcur<-objcand varcurb<-varcand waiccur<-waiccand varcur<-varcand mlikcur<-mlikcand if(objcurmlikcur) { mlikcur<-fff$mlikglob varcur<-fff$varglob waiccur<-fff$waicglob } } else { if(fff$waicglobmlikcur) { mlikcur<-bbb$mlikglob varcur<-bbb$varglob waiccur<-bbb$waicglob } } else { if(bbb$waicglob0) { vec<-dectobit(g.results[1,2]-1) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) waiccur<-g.results[2,1] waicglob<- g.results[2,1] mlikcur<- g.results[1,1] mlikglob<- g.results[1,1] ratcur<- g.results[1,1] print(paste("initial solution is set with mlik of ",mlikcur)) }else{ vec<-rbinom(n = Nvars,size = 1,prob = 0.5) # generate an initial solution varcur<-c(array(0,dim = (Nvars -length(vec))),vec) } }else if(length(glob.model$varcur[which(glob.model$varcur %in% c(0,1))])==Nvars) { varcur<-glob.model$varcur } else { if(printable.opt)print("Incorrect initial solution set be the user, a random one is generated") vec<-rbinom(n = Nvars,size = 1,prob = 0.5) # generate an initial solution varcur<-c(array(0,dim = (Nvars -length(vec))),vec) } varcurb<-varcur varglob<-varcur modglob<-NULL p1 = array(data = 0.0001,dim = Nvars) p2 = array(data = 0.9999,dim = Nvars) j<-0 j.a<-0 p.post<-array(data = 1,dim = Nvars) waiccur<-Inf waicglob<-Inf mlikcur<- -Inf mlikglob<- -Inf ratcur<- -Inf fm<-NULL eps.emp<-normprob(p1,p2) max.cpu.buf<-max.cpu delta.time <- 0 LocImprove<-0 LocNeighbor<-0 max.cpu.buf<-max.cpu.glob while((eps.emp>=glob.model$eps || j<= glob.model$maxit || j <= glob.model$burnin) && delta.time < glob.model$max.time && g.results[4,1]<= glob.model$trit && g.results[4,2]<= glob.model$trest) { p1<-p.post/acc_moves set.seed(runif(n = 1, min = 1, max = seed*100), kind = NULL, normal.kind = NULL) LocImprove <- (sample.int(n = 5,size = 1,prob = distrib_of_proposals) - 1) LocNeighbor<-(sample.int(n = 7,size = 1,prob = distrib_of_neighbourhoods[LocImprove+1,])) switch.type.glob.buf = LocNeighbor switch.type.buf = LocNeighbor if(LocNeighbor == 7) { switch.type.glob.buf = 9 switch.type.buf = 9 } #if(printable.opt)print(LocImprove) j<-j+1 j.a<-j.a+1 if(j%%glob.model$print.freq == 0) { print(paste(j," iterations completed up to now after ",delta.time," cpu minutes"," best MLIK found ",g.results[1,1] ," current mlik found ",mlikcur, "current acceptance ratio ",acc_moves/j.a)) } if(j%%100==0) seed = runif(n = 1,min = 0,max = 100000) # the small part of the code to be upgraded at least slightly if(allow_offsprings %in% c(1,2) && j%%mutation_rate == 0 && (j<=last.mutation || Nvars!=Nvars.max)) { if(Nvars>Nvars.max || j==mutation_rate) { #do the stuff here if(j==mutation_rate) fparam.pool<<-c(fparam.pool,filtered) to.del <- which(p.add < p.allow.tree) if(length(to.del)==Nvars) to.del<-to.del[-sample.int(n = Nvars,size = sample.int(n=Nvars-1,size = 1),prob = p.add+p.epsilon)] if(length(to.del)0) { clear(hashStat) #rm(hashStat) gc() #hashStat<<-hash(keys=keysarr.new,values=as.list(data.frame((values.new)))) hashStat<<-hash() fparam<<-fparam[-to.del] Nvars<<-length(fparam) Nvars.init<<-Nvars p.add<<-p.add[-to.del] p.post<-array(data = 1,dim = Nvars) #print(paste("mutation happended ",proposal," tree added")) varcurb<-varcurb[1:Nvars] varcand<-varcurb[1:Nvars] varglob<-varcurb[1:Nvars] p1 <- array(0,dim = (Nvars)) p2 <- array(1,dim = (Nvars)) acc_moves<-1 j.a<-1 } } else { if(Nvars>=Nvars.max) { idmut<-(which(p.add[(Nvars.init+1):Nvars] <= p.allow.replace) + Nvars.init) lidmut<-length(idmut) #maximal number of covariates that can die out if(lidmut>0) { p.del<-(lidmut - sum(p.add[idmut]))/lidmut lidmut<-rbinom(n = 1,size = lidmut,prob = p.del) } }else { idmut<-(Nvars+1):Nvars.max lidmut<-Nvars.max-Nvars } for(idel in 1:lidmut){ p.del<-1-(sum(p.add))/Nvars mother<-ifelse(runif(n = 1,min = 0,max = 1)<=p.del,fparam[which(rmultinom(n = 1,size = 1,prob = p.add/2)==1)],fparam[runif(n = 1,min = 1,max=Nvars.init)]) ltreem<-stri_length(mother) mother<-stri_sub(mother,from=2, to = ltreem) if(allow_offsprings==1) sjm<-sum(stri_count_fixed(str = mother, pattern = c("&","|"))) else sjm<-sum(stri_count_fixed(str = mother, pattern = c("+","*"))) if(sjm<=max.tree.size) { #p.del<-1-(sum(p.add))/Nvars father<-ifelse(runif(n = 1,min = 0,max = 1)<=p.del,fparam.pool[runif(n = 1,min = 1,max=length(fparam.pool))],fparam[which(rmultinom(n = 1,size = 1,prob = p.add/2)==1)]) ltreef<-stri_length(father) father<-stri_sub(father,from=2, to = ltreef) if(allow_offsprings==1) sjf<-sum(stri_count_fixed(str = father, pattern = c("&","|"))) else sjf<-sum(stri_count_fixed(str = father, pattern = c("+","*"))) if(sjm+sjf+1<=max.tree.size) { if(allow_offsprings==1) { if(!grepl(father, mother,fixed = T)&&!grepl(mother, father,fixed = T)) { proposal<-stri_paste(paste(ifelse(runif(n = 1,min = 0,max = 1)1) { t.d<-sample.int(size = 1,n = (max(sjm,sjf)+1)) if(sjm>=sjf) { loc<-c(1,stri_locate_all(str = mother,regex = "\\&|\\||\\*|\\+")[[1]][,1],stri_length(mother)) proposal<-stri_paste(stri_sub(mother,from = 1,to = loc[t.d]-1),stri_sub(mother,from = (loc[t.d+1]+(t.d==1)),to = stri_length(mother))) }else { loc<-c(1,stri_locate_all(str = father,regex = "\\&|\\||\\*|\\+")[[1]][,1],stri_length(father)) proposal<-stri_paste(stri_sub(father,from = 1,to = loc[t.d]-1),stri_sub(father,from = (loc[t.d+1]+(t.d==1)),to = stri_length(father))) } diffs<-(stri_count_fixed(str = proposal, pattern = "(")-stri_count_fixed(str = proposal, pattern = ")")) if(diffs>0) proposal<-stri_paste(proposal,stri_paste(rep(")",diffs),collapse = ""),collapse = "") if(diffs<0) proposal<-stri_paste(stri_paste(rep("(",-diffs),collapse = ""),proposal,collapse = "") proposal<-stri_paste("I",proposal) #print(paste("&&&&&&",mother,"ssss",father)) } else proposal<-stri_paste("I",mother) } #proposal<-stri_paste("I",mother) } else { proposal<-stri_paste(paste(ifelse(runif(n = 1,min = 0,max = 1)=sjf) { loc<-c(1,stri_locate_all(str = mother,regex = "\\&|\\||\\*|\\+")[[1]][,1],stri_length(mother)) proposal<-stri_paste(stri_sub(mother,from = 1,to = loc[t.d]-1),stri_sub(mother,from = (loc[t.d+1]+(t.d==1)),to = stri_length(mother))) }else { loc<-c(1,stri_locate_all(str = father,regex = "\\&|\\||\\*|\\+")[[1]][,1],stri_length(father)) proposal<-stri_paste(stri_sub(father,from = 1,to = loc[t.d]-1),stri_sub(father,from = (loc[t.d+1]+(t.d==1)),to = stri_length(father))) } diffs<-(stri_count_fixed(str = proposal, pattern = "(")-stri_count_fixed(str = proposal, pattern = ")")) if(diffs>0) proposal<-stri_paste(proposal,stri_paste(rep(")",diffs),collapse = ""),collapse = "") if(diffs<0) proposal<-stri_paste(stri_paste(rep("(",-diffs),collapse = ""),proposal,collapse = "") proposal<-stri_paste("I",proposal) #print(paste("!!!!!",mother,"ssss",father)) } #maybe check correlations here if( (!(proposal %in% fparam)) && Nvars0) { id.replace <- to.del[round(runif(n = 1,min = 1,max = lto.del))] if(printable.opt) print(paste("mutation happended ",proposal," tree replaced ", fparam[id.replace])) fparam[id.replace]<<-proposal keysarr <- as.array(keys(hashStat)) p.add[id.replace]<<-p.allow.replace for(jjj in 1:length(keysarr)) { if(stri_sub(keysarr[jjj],from = id.replace, to = id.replace)=="1") { del(x = keysarr[jjj],hash = hashStat) } } } } } } varcurb<-c(varcurb,array(1,dim = (Nvars -length(varcurb)))) varcand<-c(varcand,array(1,dim = (Nvars -length(varcand)))) varglob<-c(varglob,array(1,dim = (Nvars -length(varglob)))) p.post<- array(1,dim = (Nvars)) p1 = c(p1,array(0,dim = (Nvars -length(p1)))) p2 = c(p1,array(1,dim = (Nvars -length(p1)))) acc_moves<-1 j.a<-1 } } else if(allow_offsprings == 3 && j%%mutation_rate == 0 && (j<=last.mutation || Nvars!=Nvars.max)) { #if(latnames[1]!="") # perform preliminary filtration here if(Nvars>Nvars.max || j==mutation_rate) { #pool.cor.prob = T #do the stuff here if(j==mutation_rate) { fparam.pool<<-unique(c(fparam.pool,filtered)) if(!pool.cor.prob) pool.probs<-array(data = 1/length(fparam.pool),dim = length(fparam.pool)) else{ fobserved.cleaned<-fobserved fobserved.cleaned<-stri_replace(str = fobserved.cleaned,fixed = "I(",replacement = "") fobserved.cleaned<-stri_replace(str = fobserved.cleaned,fixed = ")",replacement = "") fparam.pool.cleaned<-fparam.pool fparam.pool.cleaned<-stri_replace(str = fparam.pool.cleaned,fixed = "I(",replacement = "") fparam.pool.cleaned<-stri_replace(str = fparam.pool.cleaned,fixed = ")",replacement = "") pool.probs<-abs(cor(estimator.args$data[[fobserved.cleaned]],estimator.args$data[,which(fparam.pool.cleaned %in% names(estimator.args$data))]))+p.epsilon rm(fobserved.cleaned) rm(fparam.pool.cleaned) #print(pool.probs[1:100]) } } to.del <- which(p.add < p.allow.tree) if(length(to.del)==Nvars) to.del<-to.del[-sample.int(n = Nvars,size = sample.int(n=Nvars-1,size = 1),prob = p.add+p.epsilon)] if(length(to.del)0) { clear(hashStat) #rm(hashStat) #gc() #hashStat<<-hash(keys=keysarr.new,values=as.list(data.frame((values.new)))) #rm(hashStat) hashStat<<-hash() #clear(hashStat) #hashStat<<-hash() fparam<<-fparam[-to.del] #if(!keep.origin) # pool.probs[which(fparam.pool %in% fparam)]<-1 Nvars<<-length(fparam) Nvars.init<<-Nvars p.add<<-p.add[-to.del] p.post<-array(data = 1,dim = Nvars) #print(paste("mutation happended ",proposal," tree added")) varcurb<-varcurb[-to.del] varcand<-varcurb[-to.del] varglob<-varcurb[-to.del] p1 <- array(0,dim = (Nvars)) p2 <- array(1,dim = (Nvars)) acc_moves<-1 j.a<-1 } } else { if(Nvars>=Nvars.max) { if(keep.origin) { idmut<-(which(p.add[(Nvars.init+1):Nvars] <= p.allow.replace) + Nvars.init) lidmut<-length(idmut) #maximal number of covariates that can die out if(lidmut>0) { p.del<-(lidmut - sum(p.add[idmut]))/lidmut lidmut<-rbinom(n = 1,size = lidmut,prob = p.del) } }else{ idmut<-which(p.add <= p.allow.replace) lidmut<-length(idmut) #maximal number of covariates that can die out if(lidmut>0) { p.del<-(lidmut - sum(p.add[idmut]))/lidmut lidmut<-rbinom(n = 1,size = lidmut,prob = p.del) } } #if(lidmut>0) #{ # p.del<-(lidmut - sum(p.add[idmut]))/lidmut # lidmut<-rbinom(n = 1,size = lidmut,prob = p.del) #} }else { idmut<-(Nvars+1):Nvars.max lidmut<-Nvars.max-Nvars } # now having chosen the candidates to be deleted we can propose new variables idel<-1 repsd = 1 while(idel <= lidmut && repsd <=lidmut*100){ repsd = repsd +1 #gen.prob<-c(1,1,1,1,1)#just uniform for now action.type <- sample.int(n = 5,size = 1,prob = gen.prob) if(action.type==1) { # mutation (add a leave not in the search space) proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1,prob = pool.probs)] }else if(action.type==2){ # crossover type of a proposal # generate a mother #actvars<-which(varcurb==1) mother<-ifelse(runif(n = 1,min = 0,max = 1)<=pool.cross,fparam[sample.int(n =Nvars, size =1,prob = p.add+p.epsilon)],fparam.pool[sample.int(n = length(fparam.pool),size =1,prob = pool.probs)]) ltreem<-stri_length(mother) mother<-stri_sub(mother,from=1, to = ltreem) #sjm<-sum(stri_count_fixed(str = mother, pattern = c("+","*"))) # generate a father father<-ifelse(runif(n = 1,min = 0,max = 1)<=pool.cross,fparam[sample.int(n = Nvars, size =1,prob = p.add+p.epsilon)],fparam.pool[sample.int(n = length(fparam.pool),size =1,prob = pool.probs)]) ltreef<-stri_length(father) father<-stri_sub(father,from=1, to = ltreef) #sjf<-sum(stri_count_fixed(str = father, pattern = c("+","*"))) proposal<-stri_paste("I(",stri_paste(mother,father,sep="*"),")",sep = "") }else if(action.type==3){ proposal<-ifelse(runif(n = 1,min = 0,max = 1)<=pool.cross,fparam[sample.int(n = Nvars,size =1, prob = p.add+p.epsilon)],fparam.pool[sample.int(n = length(fparam.pool),size =1,prob = pool.probs)]) proposal<-stri_paste("I(",sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)],"(",proposal,"))",sep = "") }else if(action.type==4){ # select a subset for the projection spad = sum(p.add) actvars <- which(rbinom(n = length(fparam),size = 1,prob = p.add/spad+p.epsilon)==1) if(length(actvars)<=1) { proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] }else{ capture.output(withRestarts(tryCatch({ #print(as.formula(stri_paste(fobserved,"~ 1 +",paste0(fparam[actvars],collapse = "+")))) # get the projection coefficients as the posterior mode of the fixed effects if(deep.method == 1){ bet.act <- do.call(.self$estimator, c(estimator.args,as.formula(stri_paste(fobserved,"~ 1 +",paste0(fparam[actvars],collapse = "+")))))$summary.fixed$mean nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 bet.act<-round(bet.act, digits = 8) bet.act<-stri_paste("m(",bet.act,",",sep = "") # make a projection bet.act<-stri_paste(bet.act,c("1",fparam[actvars]),")",sep = "") bet.act<-stri_paste("I(",bet.act,")",sep = "") proposal<-stri_paste("I(",stri_paste(bet.act,collapse = "+"),")",collapse = "") proposal<-stri_paste("I(",sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)],"(",proposal,"))",sep = "") #print(proposal) }else if(deep.method == 2){ cursigma = sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)] bet.act = gnlr(y=data.example[[fobserved]], distribution = estimator.args$distribution, mu = as.formula(stri_paste("~",estimator.args$link,"(",cursigma,"(","b0 +",paste0("b",1:length(actvars),"*",fparam[actvars],collapse = "+"),"))")), pmu = rep(0,length(actvars)+1))$coefficients nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 #bet.act<-round(bet.act, digits = 2) bet.act<-round(bet.act, digits = 8) bet.act<-stri_paste("m(",bet.act,",",sep = "") # make a projection bet.act<-stri_paste(bet.act,c("1",fparam[actvars]),")",sep = "") bet.act<-stri_paste("I(",bet.act,")",sep = "") proposal<-stri_paste("I(",stri_paste(bet.act,collapse = "+"),")",collapse = "") proposal<-stri_paste("I(",cursigma,"(",proposal,"))",sep = "") }else if(deep.method == 3){ cursigma = sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)] forstr = stri_paste("~",estimator.args$link,"(",cursigma,"(","m(0,1) +",paste0("m(",rnorm(length(actvars),0,1),",",fparam[actvars],")",collapse = "+"),"))") forstr = stri_replace_all_fixed(str = forstr,pattern = c("m(-"),replacement = "m(") forstr = stri_replace_all_fixed(str = forstr,pattern = c("m("),replacement = "m(b_") #print(as.formula(forstr)) nlrr = gnlr(y=data.example[[fobserved]], distribution = estimator.args$distribution, mu = as.formula(forstr), pmu = rnorm(stri_count_fixed(str = forstr,pattern = "m("),0,0.0001))#$coefficients beg.rep = stri_locate_all(str = forstr,fixed = "m(b")[[1]][,2] end.rep = stri_locate_all(str = forstr,fixed = "," )[[1]][,2] bet.act = nlrr$coefficients nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 bet.act<-round(bet.act, digits = 8) torepl = stri_sub(str = forstr,from = beg.rep,to = end.rep) for(i in 1:length(bet.act)) forstr = stri_replace_all_fixed(str = forstr,pattern =torepl[i],replacement = stri_paste(bet.act[i],",")) forstr = stri_replace_all_fixed(str = forstr,pattern =paste0("~",estimator.args$link),replacement = "I") proposal<-forstr } else{ bet.act <- rnorm(n = (length(actvars)+1),mean = 0,sd = 1) nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 bet.act<-round(bet.act, digits = 8) bet.act<-stri_paste("m(",bet.act,",",sep = "") # make a projection bet.act<-stri_paste(bet.act,c("1",fparam[actvars]),")",sep = "") bet.act<-stri_paste("I(",bet.act,")",sep = "") proposal<-stri_paste("I(",stri_paste(bet.act,collapse = "+"),")",collapse = "") proposal<-stri_paste("I(",sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)],"(",proposal,"))",sep = "") } }, error = function(err) { #print(err) proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] },finally = { proposal = proposal }))) #print(proposal) if(is.na(proposal)) { print(fparam[actvars]) print(actvars) print(bet.act) } } }else if(action.type==5) { #reduce an operator fparam[idel] #print("reduction") #print(idel) if(idel>length(fparam)) { proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] }else{ cpm<-sum(stri_count_fixed(str = fparam[idel], pattern = c("*"))) if(length(cpm)==0) cpm<-0 if(cpm>0) { t.d<-sample.int(size = 1,n = (cpm)) #print(fparam[idel]) loc<-c(1,stri_locate_all(str = fparam[idel],regex = "\\*")[[1]][,1],stri_length(fparam[idel])) proposal<-stri_paste(stri_sub(fparam[idel],from = 1,to = loc[t.d]-1+2*(t.d==1)),stri_sub(fparam[idel],from = (loc[t.d+1]+(t.d==1)),to = stri_length(fparam[idel]))) if(runif(n = 1,min = 0,max = 1)so){ proposal<-stri_paste(stri_paste("I",rep("(",sc-so), collapse = ''),proposal) }else if(scmax.tree.size) { proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] } add<-T ids.lat<-integer(0) if(latnames[1]!="") { ids.lat<-which(fparam %in% latnames) if(sum(stri_count_fixed(str = proposal,pattern = latnames))>0) add<-F } if(length(ids.lat)==0) ids.lat<-Nvars.max +1 #print(add) #print(proposal) if(proposal %in% latnames) { if((proposal %in% fparam[ids.lat])) add<-F }else{ if(add){ withRestarts(tryCatch(capture.output({ bet.act <- do.call(.self$estimator, c(estimator.args,as.formula(stri_paste(fobserved,"~ 1 +",paste0(c(fparam[-ids.lat],proposal),collapse = "+")))))$summary.fixed$mean if(is.na(bet.act[length(fparam[-ids.lat])+2])&& (action.type!=4&&gen.prob[2]==0||gen.prob[2]!=0)) { add<-F }else { idel<-idel+1 } }, error = function(err) { add<-F },finally = {} ))) } } #print(add) #print("next") if(add & Nvars0) { id.replace <- to.del[round(runif(n = 1,min = 1,max = lto.del))] if(printable.opt) print(paste("mutation happended ",proposal," tree replaced ", fparam[id.replace])) fparam[id.replace]<<-proposal keysarr <- as.array(keys(hashStat)) p.add[id.replace]<<-p.allow.replace for(jjj in 1:length(keysarr)) { if(stri_sub(keysarr[jjj],from = id.replace, to = id.replace)=="1") { del(x = keysarr[jjj],hash = hashStat) } } } } } } if(p.add.default<1) p.add<<-array(p.add.default,Nvars) varcurb<-c(varcurb,array(1,dim = (Nvars -length(varcurb)))) varcand<-c(varcand,array(1,dim = (Nvars -length(varcand)))) varglob<-c(varglob,array(1,dim = (Nvars -length(varglob)))) p.post<- array(1,dim = (Nvars)) p1 = c(p1,array(0,dim = (Nvars -length(p1)))) p2 = c(p1,array(1,dim = (Nvars -length(p1)))) acc_moves<-1 j.a<-1 }else if(allow_offsprings == 4 && j%%mutation_rate == 0 && (j<=last.mutation || Nvars!=Nvars.max)) { add.buf<-F # perform preliminary filtration here if(Nvars>Nvars.max || j==mutation_rate) { on.suggested <- 1 preaccepted<-F #do the stuff here if(j==mutation_rate) { fparam.pool<<-unique(c(fparam.pool,filtered)) if(!pool.cor.prob) pool.probs<-array(data = 1/length(fparam.pool),dim = length(fparam.pool)) else{ fobserved.cleaned<-fobserved fobserved.cleaned<-stri_replace(str = fobserved.cleaned,fixed = "I(",replacement = "") fobserved.cleaned<-stri_replace(str = fobserved.cleaned,fixed = ")",replacement = "") fparam.pool.cleaned<-fparam.pool fparam.pool.cleaned<-stri_replace(str = fparam.pool.cleaned,fixed = "I(",replacement = "") fparam.pool.cleaned<-stri_replace(str = fparam.pool.cleaned,fixed = ")",replacement = "") pool.probs<-abs(cor(estimator.args$data[[fobserved.cleaned]],estimator.args$data[,which(fparam.pool.cleaned %in% names(estimator.args$data))]))+p.epsilon rm(fobserved.cleaned) rm(fparam.pool.cleaned) #print(pool.probs[1:100]) } } to.del <- which(p.add < p.allow.tree) if(length(to.del)==Nvars) to.del<-to.del[-sample.int(n = Nvars,size = sample.int(n=Nvars-1,size = 1),prob = p.add+p.epsilon)] if(length(to.del)0) { clear(hashStat) #rm(hashStat) #gc() #hashStat<<-hash(keys=keysarr.new,values=as.list(data.frame((values.new)))) clear(hashStat) #hashStat<<-hash() fparam<<-fparam[-to.del] if(!keep.origin) pool.probs[which(fparam.pool %in% fparam)]<-1 Nvars<<-length(fparam) Nvars.init<<-Nvars p.add<<-p.add[-to.del] p.post<-array(data = 1,dim = Nvars) #print(paste("mutation happended ",proposal," tree added")) varcurb<-varcurb[-to.del] varcand<-varcurb[-to.del] varglob<-varcurb[-to.del] p1 <- array(0,dim = (Nvars)) p2 <- array(1,dim = (Nvars)) acc_moves<-1 j.a<-1 super.backward = F } } else { if(Nvars>=Nvars.max) { # delete those that are not in the active model with probability 0.5 each if(keep.origin) { idmut<-(which(p.add[(Nvars.init+1):Nvars] <= p.allow.replace) + Nvars.init) lidmut<-length(idmut) #maximal number of covariates that can die out if(lidmut>0) { p.del<-(lidmut - sum(p.add[idmut]))/lidmut lidmut<-rbinom(n = 1,size = lidmut,prob = p.del) } }else{ idmut<-which(p.add <= p.allow.replace) lidmut<-length(idmut) #maximal number of covariates that can die out if(lidmut>0) { p.del<-(lidmut - sum(p.add[idmut]))/lidmut lidmut<-rbinom(n = 1,size = lidmut,prob = p.del) } } #mod.id.old<-runif(n = 1000,min = 1,max = 2^Nvars) if(on.suggested%%2==1){ if(preaccepted) { log.mod.switch.prob.back<-sum(abs(varcurb.buf-varcurb)) eqal.things<-which(varcurb.buf==varcurb) log.mod.switch.prob.back<-log.mod.switch.prob.back+length(which(fparam[eqal.things]!=tmp.buf.1[eqal.things])) log.mod.switch.prob.back<-log.mod.switch.prob.back^prand if(printable.opt) print(paste0("afteracceptance ratio ",log.mod.switch.prob.back/log.mod.switch.prob)) if(runif(1,0,1)<=log.mod.switch.prob.back/log.mod.switch.prob) { if(printable.opt) print("preaccepted mutation is accepted") mlikcur<-mlikcur.buf.2 varcurb<-varcurb.buf.2 fparam<<-tmp.buf.2 p.add<<-p.add.buf.2 }else { if(printable.opt) print("preaccepted mutation rejected") fparam<<-tmp.buf.1 p.add<<-p.add.buf.1 mlikcur<-mlikcur.buf varcurb<-varcurb.buf } preaccepted<-F } tmp.buf<-fparam[which(varcurb==1)] tmp.buf.1<-fparam p.add.buf.1<-p.add # hashStat.buf.1<-copy(hashStat) vect<-buildmodel(max.cpu = 1,varcur.old = varcurb,statid = -1,min.N = Nvars,max.N = Nvars,switch.type = 9) res.par <- lapply(X = vect,FUN = .self$fitmodel) mlikcur<-res.par[[1]]$mlik varcurb<-vect[[1]]$varcur mlikcur.buf<-mlikcur varcurb.buf<-varcurb if(printable.opt) print("OLDMOD") mso=(mlikcur)#(sum(unlist(lapply(res.par, function(x) exp(x$mlik))))) if(printable.opt) print(mso) }else if(on.suggested%%2==0){ #tmp.buf2<-fparam[which(varcurb==1)] tmp.buf.2<-fparam p.add.buf.2<-p.add vect<-buildmodel(max.cpu = 1,varcur.old = varcurb,statid = -1,min.N = Nvars,max.N = Nvars,switch.type = 9) log.mod.switch.prob<-vect[[1]]$log.mod.switch.prob res.par <- lapply(X = vect,FUN = .self$fitmodel) #print(mlikcur) mlikcur<-res.par[[1]]$mlik varcurb<-vect[[1]]$varcur vect<-buildmodel(max.cpu = 1,varcur.old = varcurb,statid = -1,min.N = Nvars,max.N = Nvars,switch.type = 9) log.mod.switch.prob<-vect[[1]]$log.mod.switch.prob res.par <- lapply(X = vect,FUN = .self$fitmodel) #print(mlikcur) mlikcur<-res.par[[1]]$mlik varcurb<-vect[[1]]$varcur mlikcur.buf.2<-mlikcur varcurb.buf.2<-varcurb msn=(mlikcur)#(sum(unlist(lapply(res.par, function(x) exp(x$mlik))))) if(printable.opt) print("NEWMOD") if(printable.opt) print(msn) if(msn == Inf && mso == Inf ||msn == 0 && mso == 0) { msn<-1 mso<-1 }#*log((sum(tmp.buf%in%fparam)==length(tmp.buf))) if(log(runif(1,0,1))>=((msn)-(mso))) { if(printable.opt)print("proposal is rejected") fparam<<-tmp.buf.1 p.add<<-p.add.buf.1 mlikcur<-mlikcur.buf varcurb<-varcurb.buf preaccepted<-F #on.suggested<-on.suggested+1 }else{ if(printable.opt)print("mutation is preaccepted") #fparam<<-tmp.buf.1 #p.add<<-p.add.buf.1 #lidmut<-0 preaccepted<-T } } on.suggested<-on.suggested+1 }else { idmut<-(Nvars+1):Nvars.max lidmut<-Nvars.max-Nvars } # now having chosen the candidates to be deleted we can propose new variables idel<-1 while(idel <= lidmut){ #gen.prob<-c(1,1,1,1,1)#just uniform for now action.type <- sample.int(n = 5,size = 1,prob = gen.prob) if(action.type==1) { # mutation (add a leave not in the search space) proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1,prob = pool.probs)] }else if(action.type==2){ # crossover type of a proposal # generate a mother #actvars<-which(varcurb==1) mother<-ifelse(runif(n = 1,min = 0,max = 1)<=pool.cross,fparam[sample.int(n =Nvars, size =1,prob = p.add+p.epsilon)],fparam.pool[sample.int(n = length(fparam.pool),size =1,prob = pool.probs)]) ltreem<-stri_length(mother) mother<-stri_sub(mother,from=1, to = ltreem) #sjm<-sum(stri_count_fixed(str = mother, pattern = c("+","*"))) # generate a father father<-ifelse(runif(n = 1,min = 0,max = 1)<=pool.cross,fparam[sample.int(n = Nvars, size =1,prob = p.add+p.epsilon)],fparam.pool[sample.int(n = length(fparam.pool),size =1,prob = pool.probs)]) ltreef<-stri_length(father) father<-stri_sub(father,from=1, to = ltreef) #sjf<-sum(stri_count_fixed(str = father, pattern = c("+","*"))) proposal<-stri_paste("I(",stri_paste(mother,father,sep="*"),")",sep = "") }else if(action.type==3){ proposal<-ifelse(runif(n = 1,min = 0,max = 1)<=pool.cross,fparam[sample.int(n = Nvars,size =1, prob = p.add+p.epsilon)],fparam.pool[sample.int(n = length(fparam.pool),size =1,prob = pool.probs)]) proposal<-stri_paste("I(",sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)],"(",proposal,"))",sep = "") }else if(action.type==4){ # select a sparse subset for the projection spad = sum(p.add) actvars <- which(rbinom(n = length(fparam),size = 1,prob = p.add/spad+p.epsilon)==1) #actvars <- which(rbinom(n = Nvars,size = 1,prob = p.add+p.epsilon)==1) if(length(actvars)<=1) { proposal <- fparam[1] }else{ capture.output(withRestarts(tryCatch({ #print(as.formula(stri_paste(fobserved,"~ 1 +",paste0(fparam[actvars],collapse = "+")))) # get the projection coefficients as the posterior mode of the fixed effects if(deep.method == 1){ bet.act <- do.call(.self$estimator, c(estimator.args,as.formula(stri_paste(fobserved,"~ 1 +",paste0(fparam[actvars],collapse = "+")))))$summary.fixed$mean nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 bet.act<-round(bet.act, digits = 8) bet.act<-stri_paste("m(",bet.act,",",sep = "") # make a projection bet.act<-stri_paste(bet.act,c("1",fparam[actvars]),")",sep = "") bet.act<-stri_paste("I(",bet.act,")",sep = "") proposal<-stri_paste("I(",stri_paste(bet.act,collapse = "+"),")",collapse = "") proposal<-stri_paste("I(",sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)],"(",proposal,"))",sep = "") #print(proposal) }else if(deep.method == 2){ cursigma = sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)] bet.act = gnlr(y=data.example[[fobserved]], distribution = estimator.args$distribution, mu = as.formula(stri_paste("~",estimator.args$link,"(",cursigma,"(","b0 +",paste0("b",1:length(actvars),"*",fparam[actvars],collapse = "+"),"))")), pmu = rep(0,length(actvars)+1))$coefficients nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 #bet.act<-round(bet.act, digits = 2) bet.act<-round(bet.act, digits = 8) bet.act<-stri_paste("m(",bet.act,",",sep = "") # make a projection bet.act<-stri_paste(bet.act,c("1",fparam[actvars]),")",sep = "") bet.act<-stri_paste("I(",bet.act,")",sep = "") proposal<-stri_paste("I(",stri_paste(bet.act,collapse = "+"),")",collapse = "") proposal<-stri_paste("I(",cursigma,"(",proposal,"))",sep = "") }else if(deep.method == 3){ cursigma = sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)] forstr = stri_paste("~",estimator.args$link,"(",cursigma,"(","m(0,1) +",paste0("m(",rnorm(length(actvars),0,1),",",fparam[actvars],")",collapse = "+"),"))") forstr = stri_replace_all_fixed(str = forstr,pattern = c("m(-"),replacement = "m(") forstr = stri_replace_all_fixed(str = forstr,pattern = c("m("),replacement = "m(b_") #print(as.formula(forstr)) nlrr = gnlr(y=data.example[[fobserved]], distribution = estimator.args$distribution, mu = as.formula(forstr), pmu = rnorm(stri_count_fixed(str = forstr,pattern = "m("),0,0.0001))#$coefficients beg.rep = stri_locate_all(str = forstr,fixed = "m(b")[[1]][,2] end.rep = stri_locate_all(str = forstr,fixed = "," )[[1]][,2] bet.act = nlrr$coefficients nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 bet.act<-round(bet.act, digits = 8) torepl = stri_sub(str = forstr,from = beg.rep,to = end.rep) for(i in 1:length(bet.act)) forstr = stri_replace_all_fixed(str = forstr,pattern =torepl[i],replacement = stri_paste(bet.act[i],",")) forstr = stri_replace_all_fixed(str = forstr,pattern =paste0("~",estimator.args$link),replacement = "I") proposal<-forstr } else{ bet.act <- rnorm(n = (length(actvars)+1),mean = 0,sd = 1) nab<-which(is.na(bet.act)) if(length(nab)>0) bet.act[nab]<-0 bet.act<-round(bet.act, digits = 8) bet.act<-stri_paste("m(",bet.act,",",sep = "") # make a projection bet.act<-stri_paste(bet.act,c("1",fparam[actvars]),")",sep = "") bet.act<-stri_paste("I(",bet.act,")",sep = "") proposal<-stri_paste("I(",stri_paste(bet.act,collapse = "+"),")",collapse = "") proposal<-stri_paste("I(",sigmas[sample.int(n = length(sigmas),size=1,replace = F,prob = sigmas.prob)],"(",proposal,"))",sep = "") } }, error = function(err) { #print(err) proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] },finally = { proposal = proposal }))) #print(proposal) if(is.na(proposal)) { print(fparam[actvars]) print(actvars) print(bet.act) } } }else if(action.type==5) { #reduce an operator fparam[idel] #print("reduction") #print(idel) if(idel>length(fparam)) { proposal<-fparam[1] }else{ cpm<-sum(stri_count_fixed(str = fparam[idel], pattern = c("*"))) if(length(cpm)==0) cpm<-0 if(cpm>0) { t.d<-sample.int(size = 1,n = (cpm)) #print(fparam[idel]) loc<-c(1,stri_locate_all(str = fparam[idel],regex = "\\*")[[1]][,1],stri_length(fparam[idel])) proposal<-stri_paste(stri_sub(fparam[idel],from = 1,to = loc[t.d]-1+2*(t.d==1)),stri_sub(fparam[idel],from = (loc[t.d+1]+(t.d==1)),to = stri_length(fparam[idel]))) if(runif(n = 1,min = 0,max = 1)so){ proposal<-stri_paste(stri_paste("I",rep("(",sc-so), collapse = ''),proposal) }else if(scmax.tree.size || length(sj)==0) proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] if(is.na(proposal)) proposal<-fparam.pool[sample.int(n=length(fparam.pool),size = 1)] add<-T tryCatch(capture.output({ bet.act <- do.call(.self$estimator, c(estimator.args,as.formula(stri_paste(fobserved,"~ 1 +",paste0(c(fparam,proposal),collapse = "+")))))$summary.fixed$mean if(is.na(bet.act[length(fparam)+2])) { add<-F }else { idel<-idel+1 } }, error = function(err) { add<-F })) if(add & Nvars0) { id.replace <- to.del[round(runif(n = 1,min = 1,max = lto.del))] #if(printable.opt) #print(paste("mutation suggested ",proposal," tree replaced ", fparam[id.replace])) fparam[id.replace]<<-proposal keysarr <- as.array(keys(hashStat)) p.add[id.replace]<<-p.allow.replace for(jjj in 1:length(keysarr)) { if(stri_sub(keysarr[jjj],from = id.replace, to = id.replace)=="1") { del(x = keysarr[jjj],hash = hashStat) } } } } } } varcurb<-c(varcurb,array(1,dim = (Nvars -length(varcurb)))) varcand<-c(varcand,array(1,dim = (Nvars -length(varcand)))) varglob<-c(varglob,array(1,dim = (Nvars -length(varglob)))) p.post<- array(1,dim = (Nvars)) p1 = c(p1,array(0,dim = (Nvars -length(p1)))) p2 = c(p1,array(1,dim = (Nvars -length(p1)))) acc_moves<-1 j.a<-1 }else if(allow_offsprings > 0 && j%%mutation_rate == 0 && j>last.mutation) { recalc.margin = 2^Nvars } #withRestarts(tryCatch({ varcur<-varcurb if(LocImprove<=3) { vect<-buildmodel(max.cpu = 1,varcur.old = varcurb,statid = 4 + LocImprove,min.N = min.N.glob,max.N = max.N.glob,switch.type = switch.type.glob.buf) max.cpu.buf = 1 }else { vect<-buildmodel(max.cpu = max.cpu.glob,varcur.old = varcurb,statid = 4 + LocImprove,min.N = min.N,max.N = max.N,switch.type = switch.type.glob.buf) max.cpu.buf = max.cpu.glob } cluster<-TRUE flag1<-0 for(mod_id in 1:max.cpu.buf) { if(is.null(vect[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==max.cpu.glob) { cluster<-FALSE if(printable.opt)print("!!!!Models already estimated!!!!") }else { if(max.cpu.glob > 1) res.par <- parallelize(X = vect,FUN = .self$fitmodel) else res.par <- lapply(X = vect,FUN = .self$fitmodel) } if(LocImprove>3) { if(printable.opt)print("!!!!Proceed with no local improvements!!!!") p.select.y <- array(data = 0, dim = max.cpu.glob) for(mod_id in 1:max.cpu.glob) { if(cluster) { fm<-res.par[[mod_id]] if(is.null(fm)&&(is.na(res.par[[mod_id]]$waic))) { varcand<-varcurb if(printable.opt)print("GlobMTMCMC Model Fit Error!?") next } } varcand<-vect[[mod_id]]$varcur if(cluster) { waiccand<-res.par[[mod_id]]$waic mlikcand<-res.par[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } if((mlikcand>mlikglob)) #update the parameter of interest { if(printable.opt)print(paste("GlobMTMCMC update waic.glob = ", waiccand)) if(printable.opt)print(paste("GlobMTMCMC update mlik.glob = ", mlikglob)) mlikglob<-mlikcand waicglob<-waiccand varglob<-varcand if(cluster) modglob<-fm } g1 <- waiccur if(waiccur == Inf) { g1 = 1 } p.select.y[mod_id]<-(mlikcand + vect[[mod_id]]$log.mod.switchback.prob+log(lambda(c = cc, alpha = aa, g1 = -g1, g2 = -waiccand,g.domain.pos = FALSE))) # correct for different criteria later if(is.na(p.select.y[mod_id])) p.select.y[mod_id] <- 0 if(is.infinite(p.select.y[mod_id]) || p.select.y[mod_id]>100000000) { #if(printable.opt)print(paste("very large log.w.y detected ",p.select.y[mod_id])) p.select.y[mod_id] <- 100000000 } } max.p.select.y <- max(p.select.y) p.select.y<-p.select.y-max.p.select.y if(printable.opt)print(paste("max log.w.y is ",max.p.select.y,"normilized log.w.n.y is ", paste(p.select.y,collapse = ", "))) ID<-sample.int(n = max.cpu.glob,size = 1,prob = exp(p.select.y)) if(printable.opt)print(paste("cand ",ID," selected")) varcand<-vect[[ID]]$varcur if(cluster) { waiccand<-res.par[[ID]]$waic mlikcand<-res.par[[ID]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } #p.Q.cand<- p.select.y[ID]/sum(p.select.y) if(printable.opt)print("do reverse step") p.select.z <- array(data = 0.01, dim = max.cpu.glob) if(max.cpu.glob!=1) { if(switch.type.glob.buf==5) cstm<-6 else if(switch.type.glob.buf==6) { cstm<-5 } else { cstm <- switch.type.glob.buf } vect1<-buildmodel(max.cpu = max.cpu.glob -1,varcur.old = varcand,statid = 4 + LocImprove,switch.type = cstm, min.N = min.N, max.N = max.N) cluster<-TRUE flag1<-0 for(mod_id in 1:(max.cpu.glob-1)) { if(is.null(vect1[[mod_id]]$formula)) { flag1<-flag1+1 } } if(flag1==(max.cpu.glob-1)) { cluster<-FALSE if(printable.opt)print("!!!!MTMCMC reverse models already estimated!!!!") }else { res.par.back <- parallelize(X = vect1,FUN = .self$fitmodel) } for(mod_id in 1:(max.cpu.glob-1)) { if(cluster) { if(is.null(fm)&&(is.na(res.par.back[[mod_id]]$waic))) { if(printable.opt)print("locMTMCMC Model Fit Error!?") next } } varcand.b<-vect1[[mod_id]]$varcur if(cluster) { waiccand.b<-res.par.back[[mod_id]]$waic mlikcand.b<-res.par.back[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand.b)+1 waiccand.b<-statistics1[iidd,2] mlikcand.b<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand.b,collapse = "") waiccand.b<-values(hashStat[iidd])[2] mlikcand.b<-values(hashStat[iidd])[1] } if((mlikcand.b>mlikglob)) { if(printable.opt)print(paste("GlobMTMCMC update waic.glob = ", waiccand.b)) if(printable.opt)print(paste("GlobMTMCMC update mlik.glob = ", mlikcand.b)) mlikglob<-mlikcand.b waicglob<-waiccand.b varglob<-varcand.b if(cluster) modglob<-fm } g1 = waiccand if(waiccand == Inf) { g1 = 1 } p.select.z[mod_id]<-(mlikcand.b+vect1[[mod_id]]$log.mod.switchback.prob+(lambda(c = cc, alpha = aa, g1 = -g1, g2 = -waiccand.b,g.domain.pos = FALSE))) # correct for different criteria later if(is.na(p.select.z[mod_id])) p.select.z[mod_id]=0 if(is.infinite(p.select.z[mod_id]) || p.select.z[mod_id] > 100000000) { #if(printable.opt)print(paste("very large log.w.y detected ",p.select.z[mod_id])) p.select.z[mod_id] <- 100000000 } } } if( waiccur == Inf) { g1 = 1 } p.select.z[max.cpu.glob] <- (mlikcur+vect[[ID]]$log.mod.switch.prob+(lambda(c = cc, alpha = aa, g1 = -g1, g2 = -waiccand,g.domain.pos = FALSE))) if(is.na(p.select.z[mod_id])) p.select.z[mod_id]=0 if(is.infinite(p.select.z[mod_id]) || p.select.z[mod_id] > 100000000) { #if(printable.opt)print(paste("very large log.w.y detected ",p.select.z[mod_id])) p.select.z[mod_id] <- 100000000 } max.p.select.z <- max(p.select.z) p.select.z<-p.select.z-max.p.select.z if(printable.opt)print(paste("max log.w.z is ",max.p.select.z,"normilized log.w.n.z is ", paste(p.select.z,collapse = ", "))) if(log(runif(n = 1,min = 0,max = 1)) < (log(sum(exp(p.select.y)))-log(sum(exp(p.select.z)))) + max.p.select.y - max.p.select.z ) { mlikcur<-mlikcand ratcur<-mlikcand if(printable.opt)print(paste("global MTMCMC update ratcur = ", mlikcand)) if(printable.opt)print(paste("global MTMCMC accept move with ", waiccand)) varcurb<-varcand waiccur<-waiccand id<-bittodec(varcurb)+1 acc_moves<-acc_moves+1 if(j0) { distrib_of_proposals[5]<-distrib_of_proposals[5] - 1 } }else { if(cluster) { fm<-res.par[[mod_id]] if(is.null(fm)&&(is.na(res.par[[mod_id]]$waic))) { varcand<-varcurb if(printable.opt)print("EMJMCMC Model Fit Error!?") next } } varcand<-vect[[mod_id]]$varcur if(cluster) { waiccand<-res.par[[mod_id]]$waic mlikcand<-res.par[[mod_id]]$mlik }else if(exists("statistics1")) { iidd<-bittodec(varcand)+1 waiccand<-statistics1[iidd,2] mlikcand<-statistics1[iidd,1] }else if(exists("hashStat")) { iidd<-paste(varcand,collapse = "") waiccand<-values(hashStat[iidd])[2] mlikcand<-values(hashStat[iidd])[1] } varcur<-varcand } # try local improvements if(LocImprove<=3) { # sa improvements if(LocImprove == 0 || LocImprove == 3) { if(printable.opt)print("Try SA imptovements") buf.change <- array(data = 1,dim = Nvars) buf.change[which(varcur - varcurb!=0)]=0 #buf.opt[floor(runif(n = n.size,min = 1,max = Nvars+0.999999))] = 1 if(objective ==0) { objold<-waiccur objcur<-waiccand }else { objold<- -mlikcur objcur<- -mlikcand } model = list(statid = 4 + LocImprove, switch.type = switch.type.buf, change = buf.change,mlikcur = mlikcur, varcur = varcur,varold = varcurb, objcur = objcur,objold = objold, sa2 = ifelse(LocImprove == 3,TRUE,FALSE) ,reverse=FALSE) SA.forw<-learnlocalSA(model) ratcand<-SA.forw$mlikcur if(LocImprove == 0) { ids = which(buf.change == 1) model$varcur <- SA.forw$varcur model$waiccur<-SA.forw$waiccur model$varcur[ids] = 1 - model$varcur[ids] model$mlikcur<- -Inf model$waiccur<- Inf #estimate the jump!!! if(objective ==0) { model$objold<-waiccur }else { model$objold<- -ratcand } model$reverse = TRUE if(switch.type.buf==5) { model$switch.type <- 6 } else if(switch.type.buf==6) { model$switch.type <- 5 } SA.back<-learnlocalSA(model) } if(LocImprove == 0) { thact<-sum(ratcand, - ratcur, - SA.forw$log.prob.cur,SA.forw$log.prob.fix,SA.back$log.prob.cur, - SA.back$log.prob.fix,na.rm=T) if(log(runif(n = 1,min = 0,max = 1))<=thact) { ratcur<-ratcand mlikcur<-ratcand if(printable.opt)print(paste("update ratcur through SA = ", ratcur)) varcurb<-SA.forw$varcur acc_moves<-acc_moves+1 id<-bittodec(varcurb)+1 if(j0) { distrib_of_proposals[1]<-distrib_of_proposals[1] - 1 } if((SA.back$mlikglob0) { distrib_of_proposals[4]<-distrib_of_proposals[4] - 1 } } if((SA.forw$mlikglob1) { distrib_of_proposals[2]<-distrib_of_proposals[2] - 1 } if((MTMCMC.forw$mlikglob1) { distrib_of_proposals[3]<-distrib_of_proposals[3] - 1 } if((GREEDY.forw$mlikglobglob.model$burnin && j%%as.integer(thin_rate)==0) #carry out smart thinning { if(!is.null(varcurb)) { p.post<- (p.post + varcurb) thin_count <- thin_count +1 id<-bittodec(varcurb)+1 if(exists("statistics1")) { statistics1[id,4]<-statistics1[id,4] + 1 } } } } accept_old <- acc_moves p2<-p.post/acc_moves if(j>glob.model$burnin && (recalc.margin >= 2^Nvars) && sum(p2)!=0) { p.add <<- as.array(p2) } eps.emp<-normprob(p1,p2) etm <- proc.time() delta.time <- (etm[3] - stm[3])/60.0 } if(is.null(modglob)) { vect<-buildmodel(max.cpu = 1,varcur.old = varglob,statid = 3, switch.type = 8) res.par <- lapply(X = vect, FUN = fitmodel) modglob<-res.par[[1]]$fm } #!#print the results if(printable.opt)print(paste(j," iterations completed in total")) #if(printable.opt)print(paste("WAIC.glob = ", waicglob)) etm <- proc.time() tt<-(etm[3]-stm[3])/60.0 acc_ratio <- acc_moves/j.a if(printable.opt)print(paste(j," moves proposed in total, ", acc_moves," of them accepted, acceptance ratio is ",acc_ratio)) if(printable.opt)print(paste("posterior distribution ofproposals is", distrib_of_proposals)) if(exists("statistics1")) { bayes.res<-post_proceed_results(statistics1) m.post<-statistics1[,4]/j.a }else if(exists("hashStat")) { bayes.res<-post_proceed_results_hash(hashStat) m.post<-NULL } else { bayes.res<-NULL m.post<-NULL } return(list(model=modglob, vars=varglob, waic = waicglob, mlik = mlikglob, time=(tt), freqs = distrib_of_proposals, acc_ratio = acc_ratio, p.post = p.post/acc_moves, m.post = m.post, p.post.freq = p.post, eps=eps.emp, bayes.results = bayes.res)) }, #save big.data results, if the latter are available save_results_csv = function(statistics1, filename) { write.big.matrix(x = statistics1,filename = filename) }, #vizualize the results graphically if the latter are available visualize_results = function(statistics1, template, mds_size, crit,draw_dist = FALSE) { ids = NULL if(crit$mlik){ ids<-c(ids,which(statistics1[,1]==-100000)) ids<-c(ids,which(statistics1[,1]==-Inf))} #ids<-c(ids,which(statistics1[,1]>0)) if(crit$waic){ ids<-c(ids,which(statistics1[,2]==Inf)) ids<-c(ids,which(statistics1[,3]==Inf))} if(crit$dic){ ids<-c(ids,which(is.na(statistics1[,3])))} if(length(ids)!=0) { if(crit$mlik) mlik.lim<-c(min(statistics1[-ids,1],na.rm = TRUE),max(statistics1[-ids,1],na.rm = TRUE)) if(crit$waic) waic.lim<-c(min(statistics1[-ids,2],na.rm = TRUE),max(statistics1[-ids,2],na.rm = TRUE)) if(crit$dic) dic.lim<-c(min(statistics1[-ids,3],na.rm = TRUE),max(statistics1[-ids,3],na.rm = TRUE)) }else { if(crit$mlik) mlik.lim<-c(min(statistics1[,1],na.rm = TRUE),max(statistics1[,1],na.rm = TRUE)) if(crit$waic) waic.lim<-c(min(statistics1[,2],na.rm = TRUE),max(statistics1[,2],na.rm = TRUE)) if(crit$dic) dic.lim<-c(min(statistics1[,3],na.rm = TRUE),max(statistics1[,3],na.rm = TRUE)) } norm<-0.5*sqrt(sum(statistics1[,4],na.rm = TRUE)) if(crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_legend.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_legend.jpg",sep = "")) plot(xlab = "posterior probability of a visit found", ylab="visit type",c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(7,7,7,-1,-1,-1,-1),ylim = c(0,9), xlim=c(0,1),pch=19, col = 7,cex= c(2,2,2,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(6,6,6,-1,-1,-1,-1),pch=8, col = 5,cex= c(1,2,3,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(1,1,1,-1,-1,-1,-1),pch=2, col = 2,cex= c(1,2,3,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(2,2,2,-1,-1,-1,-1),pch=3, col = 3,cex= c(1,2,3,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(3,3,3,-1,-1,-1,-1),pch=4, col = 4,cex= c(1,2,3,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(4,4,4,-1,-1,-1,-1),pch=6, col = 6,cex=c(1,2,3,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(5,5,5,-1,-1,-1,-1),pch=1, col = 1,cex= c(1,2,3,0,0,0,0)) points(c(0.1,0.2,0.3,0.4,0.5,0.6,0.7),c(8,8,8,-1,-1,-1,-1),pch=19, col = 2,cex= c(2,2,2,0,0,0,0)) legend(x = 0.35,y = 1.5,legend = "- SA 1 local optimization accepted",bty="n" ) legend(x = 0.35,y = 2.5,legend = "- MTMCMC local optimization accepted",bty="n" ) legend(x = 0.35,y = 3.5,legend = "- GREEDY local optimization accepted",bty="n" ) legend(x = 0.35,y = 4.5,legend = "- SA 2 local optimization accepted",bty="n" ) legend(x = 0.35,y = 5.5,legend = "- NO local optimization accepted",bty="n" ) legend(x = 0.35,y = 6.5,legend = "- Totally accepted",bty="n" ) legend(x = 0.35,y = 7.5,legend = "- Totally explored",bty="n" ) legend(x = 0.35,y = 8.5,legend = "- Bayes formula based posterior",bty="n" ) dev.off() if(printable.opt)print(paste("drawing ",workdir,template,"_mlik.jpg",sep = "")) jpeg(file=paste(workdir,template,"_mlik.jpg",sep = "")) plot(ylim = mlik.lim, xlab = "model_id", ylab="MLIK" , statistics1[,1],pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,1],pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,1],pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,1],pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,1],pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,1],pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,1],pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$waic) { if(printable.opt)print(paste("drawing ",workdir,template,"_waic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_waic.jpg",sep = "")) plot(ylim = waic.lim, xlab = "model_id", ylab="WAIC" ,statistics1[,2],pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,2],pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,2],pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,2],pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,2],pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,2],pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,2],pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$dic) { if(printable.opt)print(paste("drawing ",workdir,template,"_dic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_dic.jpg",sep = "")) plot(ylim = dic.lim, xlab = "model_id", ylab="DIC" ,statistics1[,3],pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,3],pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,3],pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,3],pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,3],pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,3],pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,3],pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$waic && crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_mlik-waic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_mlik-waic.jpg",sep = "")) plot(ylim = mlik.lim, xlim = waic.lim,xlab = "WAIC", ylab="MLIK" ,statistics1[,2],statistics1[,1], pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,2],statistics1[,1],pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,2],statistics1[,1],pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,2],statistics1[,1],pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,2],statistics1[,1],pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,2],statistics1[,1],pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,2],statistics1[,1],pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$dic && crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_mlik-dic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_mlik-dic.jpg",sep = "")) plot(ylim = mlik.lim, xlim = dic.lim,xlab = "DIC", ylab="MLIK" ,statistics1[,3],statistics1[,1], pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,3],statistics1[,1],pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,3],statistics1[,1],pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,3],statistics1[,1],pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,3],statistics1[,1],pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,3],statistics1[,1],pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,3],statistics1[,1],pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$dic && crit$waic) { if(printable.opt)print(paste("drawing ",workdir,template,"_waic-dic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_waic-dic.jpg",sep = "")) plot(ylim = waic.lim, xlim = dic.lim,xlab = "WAIC", ylab="DIC" ,statistics1[,3],statistics1[,2], pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,3],statistics1[,2],pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,3],statistics1[,2],pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,3],statistics1[,2],pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,3],statistics1[,2],pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,3],statistics1[,2],pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,3],statistics1[,2],pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } norm1<-(sum(statistics1[,4],na.rm = TRUE)) xyz<-which(!is.na(statistics1[,1])) xyz<-intersect(xyz,which(statistics1[,1]!=-10000)) moddee<-which(statistics1[,1]==max(statistics1[,1],na.rm = TRUE))[1] zyx<-array(data = NA,dim = 2 ^(Nvars)+1) nconsum<-sum(exp(-statistics1[moddee,1]+statistics1[xyz,1]),na.rm = TRUE) if( nconsum > 0) { zyx[xyz]<-exp(statistics1[xyz,1]-statistics1[moddee,1])/nconsum y.post.lim<-c(0,max(zyx[xyz])) }else{ zyx[xyz]<-statistics1[xyz,3]/norm1 y.post.lim<-c(0,NaN) } if(is.nan(y.post.lim[2])) y.post.lim[2]<-max(statistics1[,3]/norm1,na.rm = TRUE) if(is.nan(y.post.lim[2])) y.post.lim[2]<-1 if(crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_Pr(MID).jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_Pr(MID).jpg",sep = "")) plot(xlab = "model_id", ylab="Pr(M(model_id)ID)",ylim = y.post.lim, statistics1[,4]/norm1,pch=19, col = 7,cex= 3*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,4]/norm1,pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,4]/norm1,pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,4]/norm1,pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,4]/norm1,pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,4]/norm1,pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,4]/norm1,pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) points(zyx,pch=20, col = 2,cex= 2) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$waic && crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_waic-Pr(MID).jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_waic-Pr(MID).jpg",sep = "")) plot(xlim = waic.lim, ylim = y.post.lim,xlab = "WAIC(M)", ylab="Pr(MID)",statistics1[,2],statistics1[,4]/norm1, pch=19, col = 7,cex= 3*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,2],statistics1[,4]/norm1,pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,2],statistics1[,4]/norm1,pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,2],statistics1[,4]/norm1,pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,2],statistics1[,4]/norm1,pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,2],statistics1[,4]/norm1,pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,2],statistics1[,4]/norm1,pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) points(statistics1[,2],zyx,pch=20, col = 2,cex= 2) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$dic && crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_dic-Pr(MID).jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_dic-Pr(MID).jpg",sep = "")) plot(xlim = dic.lim, ylim = y.post.lim,xlab = "dic(M)", ylab="Pr(MID)",statistics1[,3],statistics1[,4]/norm1, pch=19, col = 7,cex= 3*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,3],statistics1[,4]/norm1,pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/norm+1,0)) points(statistics1[,3],statistics1[,4]/norm1,pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/norm+1,0)) points(statistics1[,3],statistics1[,4]/norm1,pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/norm+1,0)) points(statistics1[,3],statistics1[,4]/norm1,pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/norm+1,0)) points(statistics1[,3],statistics1[,4]/norm1,pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/norm+1,0)) points(statistics1[,3],statistics1[,4]/norm1,pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/norm+1,0)) points(statistics1[,3],zyx,pch=20, col = 2,cex= 2) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$mlik) { jpeg(file=paste(workdir,template,"_mlik-Pr(MID).jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ plot(xlim = mlik.lim,ylim = y.post.lim, xlab = "MLIK", ylab="Pr(MID)",statistics1[,1],statistics1[,4]/norm1, pch=19, col = 7,cex= 3*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(statistics1[,1],statistics1[,4]/norm1,pch=8, col = ifelse(statistics1[,4]>0,5,0),cex= ifelse(statistics1[,4]>0,statistics1[,4]/statistics1[,3]*3 +1,0)) points(statistics1[,1],statistics1[,4]/norm1,pch=2, col = ifelse(statistics1[,10]>0,2,0),cex= ifelse(statistics1[,10]>0,statistics1[,10]/statistics1[,4]*3 +1,0)) points(statistics1[,1],statistics1[,4]/norm1,pch=3, col = ifelse(statistics1[,11]>0,3,0),cex= ifelse(statistics1[,11]>0,statistics1[,11]/statistics1[,4]*3 +1,0)) points(statistics1[,1],statistics1[,4]/norm1,pch=4, col = ifelse(statistics1[,12]>0,4,0),cex= ifelse(statistics1[,12]>0,statistics1[,12]/statistics1[,4]*3 +1,0)) points(statistics1[,1],statistics1[,4]/norm1,pch=6, col = ifelse(statistics1[,13]>0,6,0),cex= ifelse(statistics1[,13]>0,statistics1[,13]/statistics1[,4]*3 +1,0)) points(statistics1[,1],statistics1[,4]/norm1,pch=1, col = ifelse(statistics1[,14]>0,1,0),cex= ifelse(statistics1[,14]>0,statistics1[,14]/statistics1[,4]*3 +1,0)) points(statistics1[,1],zyx,pch=20, col = 2,cex= 2) dev.off() })), abort = function(){onerr<-TRUE})}) } if(draw_dist) { if(printable.opt)print("Calculating distance matrix, may take a significant amount of time, may also produce errors if your machine does not have enough memory") capture.output({withRestarts(tryCatch(capture.output({ lldd<-2^(Nvars)+1 moddee<-which(zyx==max(zyx,na.rm = TRUE)) iidr<-which(statistics1[moddee,1]==max(statistics1[moddee,1],na.rm = TRUE)) iidr<-which(statistics1[moddee[iidr],3]==max(statistics1[moddee[iidr],3],na.rm = TRUE)) moddee<-moddee[iidr] if(length(moddee)>1) moddee<-moddee[1] vec<-dectobit.alt(moddee-1) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) df = data.frame(varcur) for(i in 1:(lldd-1)) { if(i==moddee) { next }else { vec<-dectobit.alt(i-1) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) df<-cbind(df,varcur) #colnames(x = df)[i] <- paste("solution ",i) } } df<-t(df) x<-dist(x = df,method = "binary") dists<-c(0,x[1:lldd-1]) #length(dists) #which(dists==0) })), abort = function(){onerr<-TRUE})}) if(crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_distance-mlik.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_distance-MLIK.jpg",sep = "")) plot(xlab = "x = |M* - M| = distance from the main mode", ylab="MLIK(M)",ylim = mlik.lim,y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=8, col = ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,5,0),cex= ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,c(statistics1[moddee,4],statistics1[-moddee,4])/norm+1,0)) points(y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=2, col = ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,2,0),cex= ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,c(statistics1[moddee,10],statistics1[-moddee,10])/norm+1,0)) points(y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=3, col = ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,3,0),cex= ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,c(statistics1[moddee,11],statistics1[-moddee,11])/norm+1,0)) points(y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=4, col = ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,4,0),cex= ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,c(statistics1[moddee,12],statistics1[-moddee,12])/norm+1,0)) points(y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=6, col = ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,6,0),cex= ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,c(statistics1[moddee,13],statistics1[-moddee,13])/norm+1,0)) points(y=c(statistics1[moddee,1],statistics1[-moddee,1]),x=dists,pch=1, col = ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,1,0),cex= ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,c(statistics1[moddee,14],statistics1[-moddee,14])/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$waic) { if(printable.opt)print(paste("drawing ",workdir,template,"_distance-waic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_distance-waic.jpg",sep = "")) plot(xlab = "x = |M* - M| = distance from the main mode", ylab="WAIC(M)",ylim = waic.lim,y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=8, col = ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,5,0),cex= ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,c(statistics1[moddee,4],statistics1[-moddee,4])/norm+1,0)) points(y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=2, col = ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,2,0),cex= ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,c(statistics1[moddee,10],statistics1[-moddee,10])/norm+1,0)) points(y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=3, col = ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,3,0),cex= ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,c(statistics1[moddee,11],statistics1[-moddee,11])/norm+1,0)) points(y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=4, col = ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,4,0),cex= ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,c(statistics1[moddee,12],statistics1[-moddee,12])/norm+1,0)) points(y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=6, col = ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,6,0),cex= ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,c(statistics1[moddee,13],statistics1[-moddee,13])/norm+1,0)) points(y=c(statistics1[moddee,2],statistics1[-moddee,2]),x=dists,pch=1, col = ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,1,0),cex= ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,c(statistics1[moddee,14],statistics1[-moddee,14])/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$dic) { if(printable.opt)print(paste("drawing ",workdir,template,"_distance-dic.jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_distance-dic.jpg",sep = "")) plot(xlab = "x = |M* - M| = distance from the main mode", ylab="DIC(M)",ylim = dic.lim,y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=19, col = 7,cex= 1*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=8, col = ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,5,0),cex= ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,c(statistics1[moddee,4],statistics1[-moddee,4])/norm+1,0)) points(y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=2, col = ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,2,0),cex= ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,c(statistics1[moddee,10],statistics1[-moddee,10])/norm+1,0)) points(y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=3, col = ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,3,0),cex= ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,c(statistics1[moddee,11],statistics1[-moddee,11])/norm+1,0)) points(y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=4, col = ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,4,0),cex= ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,c(statistics1[moddee,12],statistics1[-moddee,12])/norm+1,0)) points(y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=6, col = ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,6,0),cex= ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,c(statistics1[moddee,13],statistics1[-moddee,13])/norm+1,0)) points(y=c(statistics1[moddee,3],statistics1[-moddee,3]),x=dists,pch=1, col = ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,1,0),cex= ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,c(statistics1[moddee,14],statistics1[-moddee,14])/norm+1,0)) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_distance-Pr(MID).jpg",sep = "")) capture.output({withRestarts(tryCatch(capture.output({ jpeg(file=paste(workdir,template,"_distance-Pr(MID).jpg",sep = "")) plot(xlab = "x = |M* - M| = distance from the main mode",ylim = y.post.lim, ylab="Pr(MID)",y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=19, col = 7,cex= 3*((statistics1[,9]+statistics1[,5]+statistics1[,6]+statistics1[,7]+statistics1[,8])>0)) points(y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=8, col = ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,5,0),cex= ifelse(c(statistics1[moddee,4],statistics1[-moddee,4])>0,c(statistics1[moddee,4],statistics1[-moddee,4])/norm+1,0)) points(y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=2, col = ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,2,0),cex= ifelse(c(statistics1[moddee,10],statistics1[-moddee,10])>0,c(statistics1[moddee,10],statistics1[-moddee,10])/norm+1,0)) points(y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=3, col = ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,3,0),cex= ifelse(c(statistics1[moddee,11],statistics1[-moddee,11])>0,c(statistics1[moddee,11],statistics1[-moddee,11])/norm+1,0)) points(y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=4, col = ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,4,0),cex= ifelse(c(statistics1[moddee,12],statistics1[-moddee,12])>0,c(statistics1[moddee,12],statistics1[-moddee,12])/norm+1,0)) points(y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=6, col = ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,6,0),cex= ifelse(c(statistics1[moddee,13],statistics1[-moddee,13])>0,c(statistics1[moddee,13],statistics1[-moddee,13])/norm+1,0)) points(y=c(statistics1[moddee,4],statistics1[-moddee,4])/norm1,x=dists,pch=1, col = ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,1,0),cex= ifelse(c(statistics1[moddee,14],statistics1[-moddee,14])>0,c(statistics1[moddee,14],statistics1[-moddee,14])/norm+1,0)) points(y= c(zyx[moddee],zyx[-moddee]),x=dists,pch=20, col = 2,cex= 2) dev.off() })), abort = function(){onerr<-TRUE})}) } if(crit$mlik) { if(printable.opt)print(paste("drawing ",workdir,template,"_mds-Pr(MID).jpg",sep = "")) if(printable.opt)print("Calculating distance matrix, may take a significant amount of time, may also produce errors if your machine does not have enough memory") capture.output({withRestarts(tryCatch(capture.output({ # further address subset of the set of the best solution of cardinality 1024 if(lldd>mds_size) { lldd<-mds_size quant<-(sort(statistics1[,1],decreasing = TRUE)[lldd+1]) indmds<-which(statistics1[,1]>quant) length(indmds) }else{ quant<- -Inf indmds<-1:(lldd) } vec<-dectobit.alt(moddee-1) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) df = data.frame(varcur) for(i in 1:(lldd-1)) { if(i==moddee) { next }else { vec<-dectobit.alt(indmds[i]-1) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) df<-cbind(df,varcur) #colnames(x = df)[i] <- paste("solution ",i) } } df<-t(df) x<-dist(x = df,method = "binary") dists<-c(0,x[1:lldd-1]) fit.mds <- cmdscale(d = x,eig=FALSE, k=2) # k is the number of dim #fit.mds # view results x.mds <- fit.mds[,1] y.mds <- fit.mds[,2] jpeg(file=paste(workdir,template,"_mds_map_posteriors.jpg",sep = "")) plot(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=2,pch = 10,cex= c(zyx[moddee],zyx[setdiff(indmds, moddee)])*50,0) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=2,pch = 10,cex= c(zyx[moddee],zyx[setdiff(indmds, moddee)])*50,0) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=5,pch = 8,cex= ifelse(c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])>0,c(statistics1[moddee,4],statistics1[setdiff(indmds, moddee),4])/norm1*50,0)) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=7,pch = 19,cex= 0.4) points(x.mds[], y.mds[], xlab="Coordinate 1", ylab="Coordinate 2", main="Metric MDS", type="p",col=1,pch = 19,cex= 0.01) dev.off() })), abort = function(){onerr<-TRUE})}) } } }, #calculates posterior probabilities based on a current search post_proceed_results = function(statistics1) { xyz<-which(!is.na(statistics1[,1])) g.results[4,2] <- length(xyz) xyz<-intersect(xyz,which(statistics1[,1]!=-10000)) moddee<-which(statistics1[,1]==max(statistics1[,1],na.rm = TRUE))[1] zyx<-array(data = NA,dim = length(statistics1[,1])) nconsum<-sum(exp(-statistics1[moddee,1]+statistics1[xyz,1]),na.rm = TRUE) if( nconsum > 0) { zyx[xyz]<-exp(statistics1[xyz,1]-statistics1[moddee,1])/nconsum }else{ nnnorm<-sum(statistics1[xyz,4],na.rm = T) if(nnnorm==0) nnnorm <- 1 zyx[xyz]<-statistics1[xyz,4]/nnnorm } statistics1[,15]<-zyx lldd<-2^(Nvars)+1 p.post<-array(data = 0,dim = Nvars) for(i in xyz) { vec<-dectobit(i-1) varcur<-c(array(0,dim = (Nvars -length(vec))),vec) p.post <- (p.post + varcur*statistics1[i,15]) } if(!exists("p.post")||is.null(p.post)||sum(p.post,na.rm = T)==0 || sum(p.post,na.rm = T)>Nvars) { p.post <- array(data = 0.5,dim = Nvars) } return(list(p.post = p.post, m.post = zyx, s.mass = sum(exp(statistics1[xyz,1]),na.rm = TRUE))) }, post_proceed_results_hash = function(hashStat) { if(save.beta) { if(allow_offsprings==0||Nvars>Nvars.max) { if(fparam[1]=="Const") { linx<-Nvars + 3 }else { linx<-Nvars + 1 + 3 } }else { if(fparam[1]=="Const") { linx<-Nvars.max + 3 }else { linx<-Nvars.max + 1 + 3 } } }else { linx <- 3 } lHash<-length(hashStat) mliks <- values(hashStat)[which((1:(lHash * linx)) %% linx == 1)] xyz<-which(mliks!=-10000) g.results[4,2] <- lHash moddee<-which( mliks ==max( mliks ,na.rm = TRUE))[1] zyx<-array(data = NA,dim = lHash) nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) if( nconsum > 0) { zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum }else{ diff<-0-mliks[moddee] mliks<-mliks+diff nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum } keysarr <- as.array(keys(hashStat)) p.post<-array(data = 0,dim = Nvars) for(i in 1:lHash) { if(is.na(zyx[i])) { del(x = keysarr[i],hash = hashStat) next } #vec<-dectobit(strtoi(keysarr[i], base = 0L)-1) # we will have to write some function that would handle laaaaargeee integers! varcur<- as.integer(strsplit(keysarr[i],split = "")[[1]]) if(length(which(is.na(varcur)))>0) { del(x = keysarr[i],hash = hashStat) next } if(length(varcur)>Nvars) varcur<-varcur[1:Nvars] p.post <- (p.post + varcur*zyx[i]) } if(!exists("p.post") || is.null(p.post) || sum(p.post,na.rm = T)==0 || sum(p.post,na.rm = T)>Nvars) { p.post <- array(data = 0.5,dim = Nvars) } #values(hashStat)[which((1:(lHash * linx)) %%linx == 4)]<-zyx return(list(p.post = p.post, m.post = zyx, s.mass = sum(exp(mliks),na.rm = TRUE))) }, calculate_quality_measures = function(vect,n,truth) { rmse.pi <- array(data = 0,Nvars) bias.pi <- array(data = 0,Nvars) for(i in 1:n) { bias.pi <- (bias.pi + (vect[,i]-truth)) rmse.pi <- (rmse.pi + (vect[,i]^2+truth^2 - 2*vect[,i]*truth)) } bias.pi <- bias.pi/n rmse.pi <- rmse.pi/n return(list(bias.pi = bias.pi, rmse.pi = rmse.pi)) }, forecast=function(covariates,nvars,link.g) { ids<-which(!is.na(statistics1[,15])) res<-0 for(i in ids) { res<-res + statistics1[i,15]*link.g(sum(statistics1[i,16:nvars]*covariates,na.rm = T)) } return(list(forecast=res)) }, forecast.matrix=function(covariates,link.g) { res<-NULL ncases <- dim(covariates)[1] formula.cur<-as.formula(paste(fparam[1],"/2 ~",paste0(fparam,collapse = "+"))) nvars <- Nvars if(save.beta) { if(exists("statistics1")) { ids<-which(!is.na(statistics1[,15])) lids<-length(ids) statistics1[ids,(17:(nvars+16))][which(is.na(statistics1[ids,(17:(nvars+16))]))]<-0 res<-t(statistics1[ids,15])%*%g(statistics1[ids,(16:(nvars+16))]%*%t(model.matrix(object = formula.cur,data = covariates))) }else if(exists("hashStat")) { if(allow_offsprings==0) { if(fparam[1]=="Const") { linx<-Nvars + 3 }else { linx<-Nvars + 1 + 3 } }else { if(fparam[1]=="Const") { linx<-Nvars.max + 3 }else { linx<-Nvars.max + 1 + 3 } } lHash<-length(hashStat) mliks <- values(hashStat)[which((1:(lHash * linx)) %% linx == 1)] betas <- values(hashStat)[which((1:(lHash * linx)) %% linx == 4)] for(i in 1:(Nvars-1)) { betas<-cbind(betas,values(hashStat)[which((1:(lHash * linx)) %% linx == (4+i))]) } betas<-cbind(betas,values(hashStat)[which((1:(lHash * linx)) %% linx == (0))]) betas[which(is.na(betas))]<-0 xyz<-which(mliks!=-10000) g.results[4,2] <- lHash moddee<-which( mliks ==max( mliks ,na.rm = TRUE))[1] zyx<-array(data = NA,dim = lHash) nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) if( nconsum > 0) { zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum }else{ diff<-0-mliks[moddee] mliks<-mliks+diff nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum } res<-t(zyx)%*%g(betas%*%t(model.matrix(object = formula.cur,data = covariates))) } }else { warning("No betas were saved. Prediction is impossible. Please change the search parameters and run the search again.") } return(list(forecast=res)) }, forecast.matrix.na=function(covariates,link.g,betas,mliks.in) { formula2<-as.formula(paste(fparam[1],"/2 ~ -1+",paste0(fparam,collapse = "+"))) current.na.action <- options('na.action') options(na.action='na.pass') covariates<- as.data.frame(model.matrix(object = formula2,data = covariates,na.action=na.include)) options('na.action'= current.na.action$na.action) names(covariates)<-stri_replace_all(str = names(covariates),fixed = "I",replacement = "Z") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "(",replacement = "o") names(covariates)<-stri_replace_all(str = names(covariates),fixed = ")",replacement = "c") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "+",replacement = "p") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "-",replacement = "m") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "*",replacement = "M") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "|",replacement = "a") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "&",replacement = "A") names(covariates)<-stri_replace_all(str = names(covariates),fixed = ",",replacement = "k") names(covariates)<-stri_replace_all(str = names(covariates),fixed = " ",replacement = "") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "\n",replacement = "") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "cFALSE",replacement = "c") names(covariates)<-stri_replace_all(str = names(covariates),fixed = "cTRUE",replacement = "c") fparam.tmp<-stri_replace_all(str = fparam,fixed = "I",replacement = "Z") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "(",replacement = "o") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = ")",replacement = "c") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "+",replacement = "p") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "-",replacement = "m") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = ",",replacement = "k") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "*",replacement = "M") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "|",replacement = "a") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "&",replacement = "A") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = " ",replacement = "") fparam.tmp<-stri_replace_all(str = fparam.tmp,fixed = "\n",replacement = "") formula.cur<-as.formula(paste(fparam.tmp[1],"/2 ~",paste0(fparam.tmp,collapse = "+"))) na.ids<-matrix(data = 0,nrow = dim(covariates)[1],ncol = dim(covariates)[2]) na.ids[which(is.na(covariates))]<-1 na.bc<-colSums(na.ids) ids.betas<-NULL res.na<-array(NA, dim(covariates)[1]) for(i in which(na.bc>0)) { if(((names(covariates)[i] %in% fparam.tmp)||(paste0("Zo",names(covariates)[i],"c",collapse = "")%in%fparam.tmp))) { ids.betas<-c(ids.betas,which(fparam.tmp==names(covariates)[i] | fparam.tmp == paste0("Zo",names(covariates)[i],"c",collapse = ""))) next } else { na.ids[which(is.na(covariates[,i])),i]<-0 na.bc[i]<-0 } } na.br<-rowSums(na.ids) lv.br<-sort(unique(na.br)) if(lv.br[1]==0) { ids<-which(na.br==0) mliks<-mliks.in xyz<-which(mliks!=-10000) moddee<-which( mliks ==max( mliks ,na.rm = TRUE))[1] zyx<-array(data = NA,dim = length(mliks)) nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) betas1<-betas betas1[which(is.na(betas1))]<-0 if( nconsum > 0) { zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum }else{ diff<-0-mliks[moddee] mliks<-mliks+diff nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum } covariates1<- covariates[ids,] res<-t(zyx)%*%g(betas1%*%t(model.matrix(object = as.formula(formula.cur),data = covariates1))) res.na[ids]<-res rm(mliks) rm(res) rm(zyx) rm(xyz) rm(covariates1) rm(betas1) } k.b<-0 for(i in which(na.bc>0)) { k.b<-k.b+1 w.ids<-which(!is.na(betas[,(ids.betas[k.b]+1)])) for(j in lv.br) { if(j==0) next ids<-((which(na.ids[,i]==1 & na.br==j & is.na(res.na)))) if(length(ids)==0) next var.del <- NULL for(iii in which(na.bc>0)) { if(iii==i) next if(sum(na.ids[ids,iii])>0) { var.del<-(which(fparam.tmp==names(covariates)[iii] | fparam.tmp == paste0("I(",names(covariates)[iii],")",collapse = ""))) if(length(var.del>0)) { w.ids<-union(w.ids,which(!is.na(betas[,(var.del+1)]))) } } } mliks<-mliks.in[-w.ids] if(length(mliks)==0) { warning("not enough models for bagging in prediction. please train the model longer!") return(-1) } xyz<-which(mliks!=-10000) moddee<-which( mliks ==max( mliks ,na.rm = TRUE))[1] zyx<-array(data = NA,dim = length(mliks)) nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) betas1<-betas[-w.ids,] betas1[which(is.na(betas1))]<-0 if( nconsum > 0) { zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum }else{ diff<-0-mliks[moddee] mliks<-mliks+diff nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum } covariates1<- as.matrix(covariates[ids,]) covariates1[which(is.na(covariates1))]<-0 covariates1<-as.data.frame(covariates1) res<-t(zyx)%*%g(betas1%*%t(model.matrix(object = formula.cur,data = covariates1))) res.na[ids]<-res rm(mliks) rm(zyx) rm(xyz) rm(res) rm(covariates1) } } return(list(forecast = res.na)) }, forecast.matrix.na.fast=function(covariates,link.g,betas,mliks.in) { formula.cur<-as.formula(paste(fparam[1],"/2 ~",paste0(fparam,collapse = "+"))) na.ids<-matrix(data = 0,nrow = dim(covariates)[1],ncol = dim(covariates)[2]) na.ids[which(is.na(covariates))]<-1 na.bc<-colSums(na.ids) ids.betas<-NULL res.na<-array(NA, dim(covariates)[1]) for(i in which(na.bc>0)) { if(((names(covariates)[i] %in% fparam)||(paste0("I(",names(covariates)[i],")",collapse = "")%in%fparam))) { ids.betas<-c(ids.betas,which(fparam==names(covariates)[i] | fparam == paste0("I(",names(covariates)[i],")",collapse = ""))) next } else { na.ids[which(is.na(covariates[,i])),i]<-0 na.bc[i]<-0 } } na.br<-rowSums(na.ids) lv.br<-sort(unique(na.br)) if(lv.br[1]==0) { ids<-which(na.br==0) mliks<-mliks.in xyz<-which(mliks!=-10000) moddee<-which( mliks ==max( mliks ,na.rm = TRUE))[1] zyx<-array(data = NA,dim = lHash) nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) betas1<-betas betas1[which(is.na(betas1))]<-0 if( nconsum > 0) { zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum }else{ diff<-0-mliks[moddee] mliks<-mliks+diff nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum } covariates1<- covariates[ids,] res<-t(zyx)%*%g(betas1%*%t(model.matrix(object = formula.cur,data = covariates1))) res.na[ids]<-res rm(mliks) rm(res) rm(zyx) rm(xyz) rm(covariates1) rm(betas1) } ids<-(which(is.na(res.na))) for(iii in which(na.bc>0)) { if(sum(na.ids[ids,iii])>0) { var.del<-(which(fparam==names(covariates)[iii] | fparam == paste0("I(",names(covariates)[iii],")",collapse = ""))) if(length(var.del>0)) { w.ids<-union(w.ids,which(!is.na(betas[,(var.del+1)]))) } } } mliks<-mliks.in[-w.ids] if(length(mliks)==0) { warning("not enough models for bagging in prediction. please train the model longer!") return(-1) } xyz<-which(mliks!=-10000) moddee<-which( mliks ==max( mliks ,na.rm = TRUE))[1] zyx<-array(data = NA,dim = length(mliks)) nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) betas1<-betas[-w.ids,] betas1[which(is.na(betas1))]<-0 if( nconsum > 0) { zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum }else{ diff<-0-mliks[moddee] mliks<-mliks+diff nconsum<-sum(exp(- mliks[moddee]+ mliks[xyz]),na.rm = TRUE) zyx[xyz]<-exp(mliks[xyz]- mliks[moddee])/nconsum } covariates1<- as.matrix(covariates[ids,]) covariates1[which(is.na(covariates1))]<-0 covariates1<-as.data.frame(covariates1) res<-t(zyx)%*%g(betas1%*%t(model.matrix(object = formula.cur,data = covariates1))) res.na[ids]<-res rm(mliks) rm(zyx) rm(xyz) rm(res) rm(covariates1) return(list(forecast = res.na)) } ) ) options(bigmemory.typecast.warning=FALSE)