###############################################################################
### Table1 function 
###############################################################################

myTable1=function(dat,contvar=NULL,catvar=NULL,splitvar,mydec=1,pdec=3,
                docaption=F,my.docaption=NULL,
                tsdec=2,rowPCT=F,prmsd=NULL,
                my.loc="./tab1.tex", mysize="scriptsize",
                myinsert.bottom=NULL, Trace=F, splitlabel=NULL, showtable=F,
                mysim.p=F, myB=200, mylongtable=F, mylandscape=F,
                myeps=0.001,bracket=F,mylevels=NULL,contTest=NULL,
                catTest=NULL, Test=T, chi.correct=F,
                exceloutput=F, exceloutputName=NULL,
                latexoutput=F, ... ){   
  #This function computes summary of variables as need for Table 1 with test and P 
  #for continuous and categorical values:updated Dec 24, 2016
  #Takes a longer time to work for large data set    
  
  #if(is.null(catTest)){catTest=rep("chisq.t",length(catvar))} 
  #if(is.null(contTest)){contTest=rep("aov.t",length(contvar))}
  if(is.null(prmsd)){prmsd=c(rep("mean",length(contvar)))}
  
  mycaption="Summary of patients' variables across "  

  if(is.null(contTest)&!is.null(contvar))contTest=rep("aov.t",length(contvar))
  if(is.null(catTest)&!is.null(catvar))catTest=rep("chisq.t",length(contvar))
  
  
  tagnam=data.frame(aov.t="ANOVA test",fisher.t="Fisher exact test",
                    chisq.t="Chi-squared test",t.test="T-test",
                    kruskal.t="Kruskal-Wallis test"
                    ,wilcox.t="Wilcoxon ranked sum test")
  
  mysel=unique(c(contTest, catTest))
  tagnam1=tagnam%>%select(one_of(mysel))
  ttgg=as.matrix(tagnam1)
  bottom=NULL
  n.ttgg=ncol(ttgg)
  bb.prmsd=NULL
  
  prmsd.med="{\\scriptsize $a$\\ }{\\bf $b$\\ }{\\scriptsize $c$\\ } represent the lower quartile $a$, the median $b$, and 
the upper quartile $c$\\ for continuous variables."
  prmsd.mean="~~$x\\pm s$ represents $\\bar{X}\\pm 1$ SD."
  if(bracket==T)prmsd.mean="~~$x(s)$ represents $\\bar{X}(1SD)$."
  prmsd.cat=" Numbers after percents are counts." 
 test.used="\\indent Tests used:"
 if("median"%in%prmsd){
   bb.prmsd=paste(bb.prmsd,prmsd.med,sep="")
 }
 if("mean"%in%prmsd){
   bb.prmsd=paste(bb.prmsd,prmsd.mean,sep="")
 }
 
 if(!is.null(catvar)){
   bb.prmsd=paste(bb.prmsd,prmsd.cat,sep="")
 }
 
 
  
  for( i in 1:n.ttgg){
    if(i==n.ttgg){point="."}else{point=","}
    bottom=c(paste(bottom," \\textsuperscript{\\normalfont ", i,"} ",
                   ttgg[i],point , " ",sep="") )
  }

 if(Test==T){
 bottom=paste("\\noindent ",bb.prmsd," ", test.used ,bottom,sep="") }else{
 bottom=paste("\\noindent ",bb.prmsd,sep="")
 }
 #; \\textsuperscript{\\normalfont 2} Pearson test; 
        
#\\textsuperscript{\\normalfont 3}Proportional odds likelihood ratio test"
  
  
  #Make data compatible with dplyr data frames to use colwise
  #dat=dat%>%tbl_df
  
  
  #my mean function
  catnamhold=NULL
  mathold=NULL
  cont.row.names=NULL
  rownull=NULL
 
  
  #Function for mean
  mymean=function(x,mydec=mydec,...){
    m=round(mean(x,na.rm=T),digits=mydec)
    s=round(sd(x,na.rm=T),digits=mydec)
    n=sum(!is.na(x))
    rr=paste(m,"(",s,")",sep="")
    pm=paste(m,"$\\pm$",s,sep="")
    if(bracket==F)rr=pm
    
    return(rr)
  }
  # Function for mean ends 
  
  
  #Function for median   
  mymedian=function(x,mydec=mydec,...){
    m=round(quantile(x,na.rm=T),digits=mydec)
    lq=m[[2]]
    mq=m[[3]]
    uq=m[[4]]
    n=sum(!is.na(x))
    rr=paste("{\\scriptsize",lq,"~}", "{\\bf",mq,"}~","{\\scriptsize",uq,"}",sep="")
    return(rr)
  }
  #Make all catvars factors
  for(k in catvar){
    tmp.lbl=label(dat[[k]])
    dat[[k]]<-factor(dat[[k]]) # check how NA's under factors behave
    label(dat[[k]])<-tmp.lbl
  }
  

  if(is.null(mylevels)){
    dat[[splitvar]]=factor(dat[[splitvar]]) # changing split var to factors
  }else{
    dat[[splitvar]]=factor(dat[[splitvar]],levels=mylevels)                                                       
  }
  #Test to exclude contvars if there are only categorical variables 
  if(is.null(contvar)){ tabmean1<-NULL      }else{
    for( jt in contvar){
      myrow=tapply(dat[[jt]], dat[[splitvar]],function(x){mymean(x,mydec=mydec)})
      myrowmedian=tapply(dat[[jt]], dat[[splitvar]],function(x){mymedian(x,mydec=mydec)})
      if(Trace==T)cat("Variable ",jt," is done","\n")
      pos.n=which(contvar==jt)
      #Need to test for spellings of prmsd var in function and give warning later project
      if(prmsd[pos.n]=="median"){myrow<-myrowmedian}
      rownull= rbind(rownull,myrow)
      k11<-jt
      #if( !(is.na(label(dat[[jt]])))) label(dat[[jt]])->k11
      if( (label(dat[[jt]])!="")) {
        if(Trace==T) cat(label(dat[[jt]]),"\n")
        k11=label(dat[[jt]])
        if(Trace==T)cat(k11," should be same as above","\n")
      }
      k11=paste("\\bf{",k11,"}",sep="")
      #         cat(k11,"\n")
      cont.row.names=c(cont.row.names,k11)
    }
    
    
    
  if(Trace==T)cat("Now adding overall","\n") 
    mymean1=function(x)mymean(x,mydec=mydec)
    mymedian1=function(x)mymedian(x,mydec=mydec)                               
    myN=function(x)(sum(!is.na(x)))
    Combined=unname(unlist( colwise(mymean1)(dat[,contvar])   ))
    Combmed=unname(unlist( colwise(mymedian1)(dat[,contvar])   ))
    mean.n=sum(prmsd%in%"mean")
    median.n=sum(prmsd%in%"median") 
    #if((mean.n==0 & median.n !=0)){Combined=Combmed
    #}else if((median.n==0 & mean.n !=0)){Combined=Combined
    #}else{                                 
      
    #  Combined=c(Combined[1:mean.n],Combmed[-c(1:mean.n)])
      
    #} 
    
    
    for(i in 1:length(prmsd)){
      if(prmsd[i]=="mean"){
        Combined=Combined
      }else{ 
        Combined[i]=Combmed[i]
      }
      }
    #Need to test for spellings of prmsd var in function and give warning later project
    if(Trace==T)cat("Step 1 is done","\n")#steps 1/10 
    
    N=unname(unlist(colwise(myN)(dat[,contvar])))
    tabmean= cbind(N,rownull,Combined)
    
    rownames(tabmean)<-NULL
    tabmean1=cbind(cont.row.names,tabmean)   
    
    if(Trace==T)cat("Step 2 is done","\n")#steps 2/10 
  }#end of test for if(is.null(contvar))
  
  # Categorical summaries start here
  if(is.null(mylevels)){
    if(Trace==T)cat("Step 3 is done","\n")#steps 3/10     
    dat[[splitvar]]=factor(dat[[splitvar]])
    if(Trace==T)cat("Step 4 is done","\n")#steps 4/10 
  }else{
    dat[[splitvar]]=factor(dat[[splitvar]],levels=mylevels)
    
    if(Trace==T)cat("Step 5 is done","\n")#steps 5/10 
    
  }   # This is redundant think of taking out
  
  
  #Take categorical summaries out if there are only cont vars invlove in table by setting tabcat to NULL
  if(is.null(catvar)){mathold<-NULL}else{
    
    for(k in catvar){
      
      catab=t(table(dat[[splitvar]],dat[[k]]))
      if(Trace==T)cat("Step 6 is done","\n")#steps 6/10 
      coltot=apply(catab, 2,sum)
      rowtot=apply(catab,1,sum)
      
      allN=sum(rowtot)
      combtotalp=round((rowtot/allN)*100,0)
      n.v=nlevels(dat[[k]])
      if(Trace==T)cat("Step 7 is done","\n")#steps 7/10 
      if(rowPCT==T)combtotalp=c(rep(100,n.v))    
      
      #if(rowPCT==T)combtotalp=c(100,100)    
      colpct=round(sweep(catab,2,coltot,"/")*100,0) # column percentages with sweep matric and vector division BCareful
      rowpct=round(sweep(catab,1,rowtot,"/")*100,0) #row pct with sweep
      #colpct=round((catab/coltot)*100,2)
      
      #paste col totals and pct
      
      n.col=nlevels(dat[[splitvar]])
      
      n.row=nlevels(dat[[k]])
      if(Trace==T)cat("Step 8:levels shd be greater than 2 nlevel=",n.row,"\n")#steps 8/10 
      r.names=rownames(catab)
      mat1=matrix(rep("",(n.row+1)*(n.col+1) ),nrow=n.row+1)
      
      #changing row totals to columns to cbind to catab
      r.tot=as.vector(rowtot)
      rp.tot=as.vector(combtotalp)
      
      
      catab1=cbind(catab,r.tot)
      if(rowPCT==T) colpct=rowpct
      colpct1=cbind(colpct,rp.tot)
      #how do we get row percentages
      
      for(i in 2:(n.row+1)){
        for(j in 1:(n.col+1)){ 
          mat1[i,j]<-paste(colpct1[i-1,j],"\\%~","(",catab1[i-1,j],")",sep="")   
        }}
      
      if(Trace==T)cat("Step 9 is done...","\n")#steps 9/10 
      rnm=paste("~~~~",r.names,sep="")
      k1<-k
      

      
if( Hmisc::label(dat[[k]])!="" ){  #This how labels names are swap with actual rownames
        if(Trace==T)cat("Label of ",k," is =",label(dat[[k]]),"\n")
        k1=label(dat[[k]])
        if(Trace==T)cat( k1," should be same as previous","\n")
      }
      
      k1=paste("\\bf{",k1,"}",sep="")
      r.names1=c(k1,rnm)       # You may bold this cat names in future
      
      catnamhold=c(catnamhold, r.names1)
      N2=rep("",n.row)
      N3=sum(!is.na(dat[[k]]))
      N4=c(N3,N2)
      mat2=cbind(r.names1,N4,mat1)
      mathold=rbind(mathold,mat2)
    }
    
    colnames(mathold)<-NULL
  } #end of test for excluding catvar summaries 
  
  alltabb= rbind(tabmean1,mathold)
  
  #computing test statistics
  #pt.test, 1) t.test, sign.rank, 2) rank.sum, 3) kruskal.wallis, 4) anova, 5) chisq.test, 6) chisq4trend
  ## myesp=.001    
  ## contvar=c("age.at.start","last.score","L.F.score","time.L.F"  )
  ## splitvar="Fitness.Group"
  ## mydec=1
  
  TShold.anova<-NULL # aov.t
  TShold.kruskal<-NULL # kruskal.t
  TShold.ranksum<-NULL #wilcox.t
  TShold.ttest<-NULL# t.test
  TShold.chisq<-NULL # chisq.t
  TShold.fisher<-NULL #fisher.t
  
  #Test to set TScont to NULL if there are no contvars
  
  if(is.null(contvar)){TScont<-NULL}else{    
    
    lll=nlevels(dat[[splitvar]])
    if(lll < 3) {
      
      #Do the t.test
      if( any(contTest%in%"t.test")){    #Test to stop running t test if not needed                                       
        for(t.nn in contvar){
          
          vartest=with(dat,var.test(as.formula(paste(t.nn,splitvar,sep="~")))) # Variance test to check which variance to use pooled or sattarwaite 
          var.p=vartest[["p.value"]]
          myvar.equal=T
          if(var.p < .05)myvar.equal<-F
          
          tmd=with(dat,t.test(as.formula(paste(t.nn,splitvar,sep="~")),var.equal=myvar.equal))
          
          ts.t=round(tmd[["statistic"]],digits=tsdec)[[1]]
          p.t= format.pval(round(tmd[['p.value']],digits=pdec) ,eps=myeps)  
          df.t=round(tmd[["parameter"]])[[1]]
          if(round(tmd[["p.value"]],digits=pdec) < myeps){P="~P"}else{P="~P="}
          grep("t.test",mysel)->t.jn
          if(!(any(t.jn)))t.jn<-NULL 
          espo=paste("^{",t.jn,"}",sep="")
          t.ts=paste("$t(",df.t,")=",ts.t,",",P,p.t,espo, "$",sep="")
          TShold.ttest=c(TShold.ttest,t.ts)
        }}
      
      
      #Do Wilcox.test    
      if( any(contTest%in%"wilcox.t")){    #Test to stop running wilcox test if not needed                                       
        for(w.nn in contvar){
          wmd=with(dat,wilcox.test(as.formula(paste(w.nn,splitvar,sep="~"))))
          
          ts.w=round(wmd[["statistic"]], digits=tsdec)[[1]]
          p.w= format.pval(round(wmd[['p.value']],digits=pdec),eps=myeps)  
          # df.w=round(wmd[["parameter"]])[[1]]
          if(round(wmd[["p.value"]],digits=pdec) < myeps){P="~P"}else{P="~P="}
          
          grep("wilcox.t",mysel)->w.jn
          if(!(any(w.jn)))w.jn<-NULL 
          espo=paste("^{",w.jn,"}",sep="")
          w.ts=paste("$W=",ts.w,",",P,p.w,espo,"$",sep="")
          TShold.ranksum=c(TShold.ranksum, w.ts)
        }}
      
    } #end of two level split
    
    #Do kruskal wallis
    if( any(contTest%in%"kruskal.t")){    #Test to stop running kruskal test if not needed                                                                         
      for(k.nn in contvar){
        kmd=with(dat,kruskal.test(as.formula(paste(k.nn,splitvar,sep="~"))))
        ts.k=round(kmd[["statistic"]],digits=tsdec)
        p.k= format.pval(round(kmd[['p.value']], digits=pdec),eps=myeps)  
        df.k=round(kmd[["parameter"]])[[1]]
        
        if(round(kmd[["p.value"]],digits=pdec) < myeps){P=",~P"}else{P=",~P="}
        grep("kruskal.t",mysel)->k.jn
        if(!(any(k.jn)))k.jn<-NULL 
        espo=paste("^{",k.jn,"}",sep="")
        k.ts=paste("$\\chi^2_{",df.k,"}=",ts.k,P,p.k,espo,"$",sep="")
        TShold.kruskal=c(TShold.kruskal,k.ts)
      }}
    
    #And Anova  
    
    if( any(contTest%in%"aov.t")){    #Test to stop running anova test if not needed                                                                           
      for(nn in contvar){
        #md=summary(with(dat,aov(age.at.start~Fitness.Group)))
        md=summary(with(dat,aov(as.formula(paste(nn,splitvar,sep="~")))))    
        md=unlist(md)
        df1=round(md["Df1"],0)
        df2=round(md["Df2"],0)
        P.a=format.pval(round(md["Pr(>F)1"],digits=pdec),eps=myeps)
        if(round(md["Pr(>F)1"],pdec)< myeps){P=",~P"}else{P=",~P="}
        ts=round(md["F value1"],digits=tsdec)[[1]]
        grep("aov.t",mysel)->a.jn
        if(!(any(a.jn)))a.jn<-NULL 
        espo=paste("^{",a.jn,"}",sep="")
        TS=paste("$F_{",df1,", ",df2,"}=",ts,P,P.a,espo,"$", sep="" )
        TShold.anova=c(TShold.anova,TS)
        
      }}
    
    
    TScont<-NULL
    #contTest=c("aov.t","aov.t","aov.t","kruskal.t")
    for(ii in 1:length(contvar)){
      {
        if(contTest[ii]=="t.test"){tscont=TShold.ttest[ii]
        }else{
          if(contTest[ii]=="aov.t"){tscont=TShold.anova[ii]                           
          }else{
            if(contTest[ii]=="wilcox.t"){tscont=TShold.ranksum[ii]
            }else{
              if(contTest[ii]=="kruskal.t"){tscont=TShold.kruskal[ii]
              }
            }}}}
      if(Trace==T)cat(paste("Done for ",contvar[ii], "\n",sep=" "))
      TScont=c(TScont,tscont)
    }
    
    
  } #end of test for TScont equal NULL if there are no cont var 
  
  ######################## Need to dom for categorical variables########################
  #catvar=c("sex","group.percent.days")
  #fisher test
  
  if(is.null(catvar)){TScat<-NULL}else{
    
    if( any(catTest%in%"fisher.t")){    #Test to stop running fihser test if not needed
      for(f.nn in catvar){
        fmd=fisher.test(table(dat[[f.nn]],dat[[splitvar]]),simulate.p.value=mysim.p,B=myB)
        options(digits=7)
        p.f= format.pval(round(fmd[['p.value']],digits=pdec),eps=myeps)  
        
        if(round(fmd[["p.value"]],digits=pdec) < myeps){P="~~~~~~P"}else{P="~~~~~~P="}
        grep("fisher.t",mysel)->f.jn
        if(!(any(f.jn)))f.jn<-NULL 
        espo=paste("^{",f.jn,"}",sep="")
        f.ts=paste("$",P,p.f,espo,"$",sep="")
        TShold.fisher=c(TShold.fisher,f.ts)
        
      }
    }
    
    
    ###########chi-square
    if( any(catTest%in%"chisq.t")){    #Test to stop running chisquare test if not needed                                       
      for(chi.nn in catvar){
        options(digits=7)
        chimd=chisq.test(table(dat[[chi.nn]], dat[[splitvar]]  ),correct=chi.correct)
        
        tryCatch( chisq.test(table(dat[[chi.nn]], dat[[splitvar]]  ), correct=chi.correct),error=function(e)e,warning=function(w)w)->error_warn
        if(is(error_warn,"warning")){
          if(Trace==T)cat("Warning: Use fihser exact test for \"", chi.nn, "\" variable","\n")
        }
        ts.chi=round(chimd[["statistic"]],digits=tsdec)[[1]]
        p.chi=format.pval(round(chimd[['p.value']],digits=pdec),eps=myeps)  
        df.chi=round(chimd[["parameter"]])[[1]]
        
        if(round(chimd[["p.value"]],digits=pdec) < myeps){P=",~P"}else{P=",~P="}
        grep("chisq.t",mysel)->c.jn
        if(!(any(c.jn)))c.jn<-NULL 
        espo=paste("^{",c.jn,"}",sep="")
        chi.ts=paste("$\\chi^2_{",df.chi,"}=",ts.chi,P,p.chi,espo,"$",sep="")
        TShold.chisq=c(TShold.chisq,chi.ts)
      }
    }
    
    ### looping throu to get TS for catvars
    
    TScat<-NULL
    #catTest=c("chisq.t","fisher.t")
    for(iii in 1:length(catvar)){
      { 
        if(catTest[iii]=="chisq.t"){tscat=TShold.chisq[iii]
        }else{
          if(catTest[iii]=="fisher.t"){tscat=TShold.fisher[iii] }                          
        }
        if(Trace==T)cat(paste("Done for ",catvar[iii], "\n",sep=" "))
        TScat=c(TScat,tscat)
        nlevels(dat[[catvar[iii]]])->catn
        TScat=c(TScat,rep("",catn))
        
      }      
    }
    
  }
  #end of setting TScat to NULL if there are no catvars
  
  #combine TStats for cont and cat vars
  
  Test.Statistics=c(TScont,TScat)
  alltabbb=data.frame(alltabb,Test.Statistics)
  
  ch=colheads=table(dat[[splitvar]])
  hnam=names(colheads)
  colheads=c("Variables","N", hnam,"Combined","Test Statistic")
  extracolheads=c("","",paste("N=",as.vector(ch),sep=""),paste("N=",sum(as.vector(ch)),sep=""))
  insert.bottoml="\\scriptsize{   Data is presented as : 
Mean$\\pm$SD for continuous variables, row percentages (frequency) for categorical variables~~\\indent test Test used: \\textsuperscript{\\normalfont 1} T-test ~~~~~, \\textsuperscript{\\normalfont 2} Pearson chi-square test } "
  
  myres=list(alltabb,alltabbb,colheads,extracolheads,insert.bottoml)
  names(myres)<-list("mytab","mytab1","colheads","extracolheads","insert.bottom")
  
  
  #writing to an excel file
  #cleann=function(x)str_remove_all( x,"[\\bf\\{~\\\\}$]")
  #cleann=function(x)str_replace_all( x,"[\\bf\\{~\\\\}$]"," ")
  cleann=function(x){
    y=gsub("\\}","",gsub("\\{\\\\bf","",gsub("\\\\bf\\{","",x)))
    str_replace_all( y,"[\\{~\\\\}$]"," ")
  }
  
  library(stringr)
  xcelldat=apply(myres[["mytab1"]],2,FUN=cleann)
  nn.col=ncol(xcelldat)
  colnames(xcelldat)[1]<-"Variables"
  
  extraH=t(as.matrix( c(extracolheads,"") ))
  
  hhhh=t(as.matrix(colnames(xcelldat)))
   
  xlabel=splitvar
  if(!is.null(splitlabel))xlabel=splitlabel
  ssplit=t(as.matrix(c(" ","",xlabel,rep("",(nn.col-3)))))
  xcelldat=rbind(ssplit,hhhh,extraH,xcelldat)
  xcelldat=apply(xcelldat,2,FUN=function(x)gsub(mysize," ",x))
  
  if(Test==F){xcelldat=xcelldat[,-nn.col]}
  if(exceloutput==T){ 
    if(is.null(exceloutputName))exceloutputName=splitvar
    colnames(xcelldat)<-NULL
    write.csv(xcelldat,file=paste("./",exceloutputName,".csv",sep=""),row.names=F)
    }
  
  
  #Making LaTeX Tables  
  mytab=if(Test==T)myres[["mytab1"]] else myres[["mytab"]] # i change here today
  Variable=as.vector(mytab[,1])
  tab=mytab[,-1]
  l.splitvar=splitvar
  if(  !(is.null(splitlabel)))l.splitvar<-splitlabel
  n.sp=nlevels(dat[[splitvar]])
  if( !is.null(splitlabel)) splitvar<-splitlabel

  if(latexoutput==T){
  tab1=Hmisc::latex(tab,rowname=Variable,rowlabel="Variables",
             caption=(if(docaption==T)paste(mycaption,splitvar,sep="")else my.docaption),
             file=my.loc,where="!htbp",landscape=mylandscape,longtable=mylongtable,
             
             cgroup=(if(Test==T)c("",l.splitvar,"","") else c("",l.splitvar,"")),
             n.cgroup=(if(Test==T)c(1,n.sp,1,1) else c(1,n.sp,1)),
             extracolheads=(if(Test==T)c(myres[["extracolheads"]][-1],"" ) else c(myres[["extracolheads"]][-1])),
             colheads=(if(Test==T)c(myres[["colheads"]][-1]) else c(myres[["colheads"]][c(-1,-nn.col)]) ),
             
             size=mysize,
             insert.bottom=bottom,... )
    }else{
    tab1=NULL
    showtable=F
    }
  
  if(showtable==T)return(tab1) else return(myres)    
  #   return(myres)
  
  
}