## ----setup, include=TRUE--------------------------------------------------------
knitr::opts_chunk$set(echo = TRUE)

# wrangling
library(data.table)
library(stringr)
library(forcats)

# analysis
library(nlme)
library(lmerTest)
library(emmeans)
library(MASS)

# plot function
library(ggplot2)
library(ggpubr)
library(ggforce)
library(insight)
library(cowplot)
library(lazyWeave) #pretty p-values
library(rstatix) # even prettier p-values

pal_okabe_ito <- c(
  "#E69F00",
  "#56B4E9",
  "#009E73",
  "#F0E442",
  "#0072B2",
  "#D55E00",
  "#CC79A7"
)
pal_okabe_ito_blue <- pal_okabe_ito[c(5,6,1,2,3,7,4)] 
pal_okabe_ito_red <- pal_okabe_ito[c(6,5,3,1,2,7,4)] 
pal_okabe_ito_2 <- pal_okabe_ito[c(5,6)]
pal_okabe_ito_3 <- pal_okabe_ito[c(5,6,7)]
pal_okabe_ito_3_light <- pal_okabe_ito[c(1,2,7)]
pal_okabe_ito_4 <- pal_okabe_ito[c(5,6,7,2)]


## ----twoway-fmt_p_value_2, echo=FALSE, warning=FALSE, message=FALSE-------------
fmt_p_value_2 <- function(p, digits = 0.0001){
  p_char <- ifelse(p < 0.06 | is.na(p),
                   scales::pvalue(p,
                                  accuracy = ifelse(
                                    (10^-(ceiling(-log10(p)) + 1)) < digits,
                                    digits,
                                    (10^-(ceiling(-log10(p)) + 1)))),
                   scales::pvalue(p, accuracy = 0.01))
}
fmt_p_value_rmd <- function(x, digits = 0.0001){
  return(sapply(x, fmt_p_value_2, digits))
}


## -------------------------------------------------------------------------------
fmt_table <- function(df, digits = 0.0001){
  
}


## ----odd-even-------------------------------------------------------------------
odd <- function(x) x%%2 != 0
even <- function(x) x%%2 == 0


## ----not_in---------------------------------------------------------------------
'%not_in%' <- Negate('%in%')


## -------------------------------------------------------------------------------
factor_wrap <- function(gg, sep = " "){
  x_labels <- layer_scales(gg)$x$get_limits()
  label_x <- str_replace(x_labels, sep, "\n")
  gg <- gg +
    scale_x_discrete(labels = label_x)
  return(gg)
}


## -------------------------------------------------------------------------------
factor_wrap_1 <- function(gg, sep = " "){
  x_labels <- layer_scales(gg)$x$get_limits()
  label_x <- str_replace(x_labels, sep, "\n")
  return(label_x)
}


## ----remove-parentheses---------------------------------------------------------
remove_parentheses <- function(x){
  if(substr(x, 1, 1) == "("){
    x <- substr(x, 2, nchar(x))
  }
  if(substr(x, nchar(x), nchar(x)) == ")"){
    x <- substr(x, 1, nchar(x)-1)
  }
  return(x)
}


## ----ggcheck_the_qq, warning = FALSE--------------------------------------------
ggcheck_the_qq = function(m1,
                   line = "robust",
                   n_boot = 200){
  n <- nobs(m1)
  m1_res <- residuals(m1)
  #sigma_m1_res <- sigma(m1)

  normal_qq <- ppoints(n) %>%
    qnorm()
  sample_qq <- m1_res[order(m1_res)]
  
  # mean + sd
  parametric_slope <- sd(sample_qq)
  parametric_intercept <- mean(sample_qq)
  
  # quartiles
  m1_quartiles <- quantile(m1_res, c(.25, .75))
  qnorm_quartiles <- qnorm( c(.25, .75))
  m1_diff <- m1_quartiles[2] - m1_quartiles[1]
  qnorm_diff <- qnorm_quartiles[2] - qnorm_quartiles[1] # = 1.349
  quartile_slope <- m1_diff/qnorm_diff
  quartile_intercept <- median(m1_quartiles) # median of quartiles not quantiles
  
  # robust uses MASS:rlm (default arguments?)
  qq_observed <- data.table(normal_qq = normal_qq,
                            sample_qq = sample_qq)
  m2 <- rlm(sample_qq ~ normal_qq, data = qq_observed)
  robust_intercept <- coef(m2)[1]
  robust_slope <- coef(m2)[2]
  
  # re-sample ribbon
  set.seed(1)
  resample_qq_model <- numeric(n_boot*n)
  Y <- simulate(m1, n_boot)
  fd <- model.frame(m1) %>%
    data.table
  inc <- 1:n
  for(sim_i in 1:n_boot){
    # parametric bound
    fd[, (1) := Y[,sim_i]]
    m1_class <- class(m1)[1]
    if(m1_class == "lm"){
      ff <- lm(formula(m1), data = fd) 
    }
    if(m1_class == "lmerModLmerTest" | m1_class == "lmerMod"){
      ff <- lmer(formula(m1), data = fd)
    }
    y_res <- residuals(ff)
    resample_qq <- y_res[order(y_res)]
    resample_qq_model[inc] <- resample_qq
    inc <- inc + n
    
    # robust bound
    qq_resampled <- data.table(normal_qq = normal_qq,
                              resample_qq = resample_qq)
    m2_resample <- rlm(resample_qq ~ normal_qq, data = qq_resampled)
    
  }

  qq_sim <- data.table(normal_qq = normal_qq,
                       resample_qq_model = resample_qq_model)
  
  qq_ci_model <- qq_sim[, .(median = median(resample_qq_model),
                      lower = quantile(resample_qq_model, 0.025),
                      upper = quantile(resample_qq_model, 0.975)),
                  by = normal_qq]
  m2_boot <- rlm(median ~ normal_qq, data = qq_ci_model)
  robust_intercept_boot <- coef(m2_boot)[1]
  robust_slope_boot <- coef(m2_boot)[2]
 
  ggplot(data = qq_observed,
         aes(x = normal_qq, y = sample_qq)) +
    
    # ribbon
    geom_ribbon(data = qq_ci_model,
                aes(ymin = lower,
                    ymax = upper,
                    y = median,
                    fill = "band"),
                fill = "gray",
                alpha = 0.6) +
    # draw points
    geom_point() +
    
   # robust
    geom_abline(aes(intercept = robust_intercept,
                    slope = robust_slope,
                    color = "robust"),
                show.legend = FALSE,
                size = 0.75) +
    # robust_boot
    # geom_abline(aes(intercept = robust_intercept_boot,
    #                 slope = robust_slope_boot,
    #                 color = "robust boot"),
    #             show.legend = TRUE,
    #             size = 0.75) +
    xlab("Normal Quantiles") +
    ylab("Sample Quantiles") +
    
    scale_color_manual(values = pal_okabe_ito[c(1:2,5:6)]) +
    theme_minimal_grid() +
    NULL
  
}


## ----ggcheck_the_spreadlevel----------------------------------------------------
ggcheck_the_spreadlevel <- function(m1,
                   n_boot = 200){
  n <- nobs(m1)
  m1_res <- residuals(m1)
  m1_scaled <- m1_res/sd(m1_res)
  m1_root <- sqrt(abs(m1_scaled))
  m1_fitted <- fitted(m1)
  
  m2 <- lm(m1_root ~ m1_fitted)
  m2_intercept <- coef(m2)[1]
  m2_slope <- coef(m2)[2]

  plot_data <- data.table(
    m1_res = sqrt(abs(m1_scaled)),
    fitted = m1_fitted
  )
  
    ggplot(data = plot_data,
         aes(x = fitted, y = m1_res)) +
    
    # ribbon
    # geom_ribbon(data = qq_ci_model,
    #             aes(ymin = lower,
    #                 ymax = upper,
    #                 y = median,
    #                 fill = "band"),
    #             fill = "gray",
    #             alpha = 0.6) +
    # # draw points
      geom_point() +
    
      geom_smooth(method = lm) +
   # robust 
    # geom_abline(aes(intercept = robust_intercept,
    #                 slope = robust_slope,
    #                 color = "robust"),
    #             show.legend = TRUE,
    #             size = 0.75) +
    # robust_boot
    # geom_abline(aes(intercept = robust_intercept_boot,
    #                 slope = robust_slope_boot,
    #                 color = "robust boot"),
    #             show.legend = TRUE,
    #             size = 0.75) +
    xlab("Fitted") +
    ylab("root abs-scaled-residual") +
    
    
    scale_color_manual(values = pal_okabe_ito[c(1:2,5:6)]) +
    theme_minimal_grid() +
    NULL
}


## ----ggcheck_the_model----------------------------------------------------------
ggcheck_the_model <- function(m1){
  gg1 <- ggcheck_the_qq(m1)
  gg2 <- ggcheck_the_spreadlevel(m1)
  cowplot::plot_grid(gg1, gg2, nrow = 1)
}


## ----emm_table------------------------------------------------------------------
emm_table <- function(fit_emm){
  table_out <- data.table(summary(fit_emm))
  if("response" %in% colnames(table_out)){
    # if the lm with log(y) or glm with log link, use " / "
    setnames(table_out,
             old = c("response"),
             new = c("emmean"))
  }
  if("asymp.LCL" %in% colnames(table_out)){
    # if the lm with log(y) or glm with log link, use " / "
    setnames(table_out,
             old = c("asymp.LCL", "asymp.UCL"),
             new = c("lower.CL", "upper.CL"))
  }
  return(table_out)
}


## ----effects_table--------------------------------------------------------------
effects_table <- function(fit_pairs,
                          digits = 2,
                          accuracy = 1e-04,
                          add.p = FALSE){
  if(is.data.frame(fit_pairs) == TRUE){
    pairs_dt <- data.table(fit_pairs)
  }else{
    pairs_dt <- summary(fit_pairs) %>%
      data.table()
  }
 
  if("asymp.LCL" %in% colnames(pairs_dt)){
    # if the lm with log(y) or glm with log link, use " / "
    setnames(pairs_dt,
             old = c("asymp.LCL", "asymp.UCL"),
             new = c("lower.CL", "upper.CL"))
  }
  
  # for simple = each,
  if(which(colnames(pairs_dt)=="contrast") == 3){
    # replace "." with blank
    g_col <- colnames(fit_pairs)[1]
    x_col <- colnames(fit_pairs)[2]
    pairs_dt[get(g_col) == ".", 
             (1) := ""]
    pairs_dt[get(x_col) == ".", 
             (2) := ""]

    # beautify contrast column and group1, group2
    pairs_dt[, contrast := paste0(get(names(pairs_dt)[1]),
                                   get(names(pairs_dt)[2]),
                                   ": ",
                                   contrast)]
  }
  
  # create a column of nicely formatted p-values for display.
  # pairs_dt[, p := pvalString(p.value)]
  pairs_dt[, p := p_format(p.value,
                           digits = digits,
                           accuracy = accuracy,
                           add.p = add.p)]
  pairs_dt[, contrast := factor(contrast, contrast)]
  return(pairs_dt)
}


## ----pvalue_table---------------------------------------------------------------
pvalue_table <- function(fit_pairs){
  if(is.data.frame(fit_pairs) == TRUE){
    pairs_dt <- data.table(fit_pairs)
  }else{
    pairs_dt <- summary(fit_pairs) %>%
      data.table()
  }
  
  if("ratio" %in% colnames(pairs_dt)){
    # if the lm with log(y) or glm with log link, use " / "
    groups <- unlist(str_split(pairs_dt$contrast, " / "))
    setnames(pairs_dt, old = "ratio", new = "estimate")
  }else{
    # if lm use " - "
    groups <- unlist(str_split(pairs_dt$contrast, " - "))
  }

  # remove parentheses if they exist from group cols
  groups <- lapply(groups, remove_parentheses) %>%
    unlist
  
  # add the group1 and group 2 columns
  pairs_dt[, group1 := groups[odd(1:length(groups))]]
  pairs_dt[, group2 := groups[even(1:length(groups))]]
  
  # match for simple and remove parentheses if they exist
  if(names(pairs_dt)[1] != "contrast"){
    x_col <- names(pairs_dt)[2]
    g_col <- names(pairs_dt)[1]
    x_col_levels <- unique(pairs_dt[, get(x_col)])
    x_col_levels <- x_col_levels[x_col_levels != "."]
    g_col_levels <- unique(pairs_dt[, get(g_col)])
    g_col_levels <- g_col_levels[g_col_levels != "."]
    repair <- pairs_dt[, group1] %in% c(x_col_levels, g_col_levels)
    pairs_dt[, group1 := ifelse(repair,
                                group1,
                                gsub(")", "", group1, fixed = TRUE))]
    pairs_dt[, group1 := ifelse(repair,
                                group1,
                                gsub("(", "", group1, fixed = TRUE))]
    repair <- pairs_dt[, group2] %in% c(x_col_levels, g_col_levels)
    pairs_dt[, group2 := ifelse(repair,
                                group2,
                                gsub(")", "", group2, fixed = TRUE))]
    pairs_dt[, group2 := ifelse(repair,
                                group2,
                                gsub("(", "", group2, fixed = TRUE))]
  }
  
  
  if("asymp.LCL" %in% colnames(pairs_dt)){
    # if the lm with log(y) or glm with log link, use " / "
    setnames(pairs_dt,
             old = c("asymp.LCL", "asymp.UCL"),
             new = c("lower.CL", "upper.CL"))
  }
  
  # for simple = each,
  if(which(colnames(pairs_dt)=="contrast") == 3){
    # replace "." with blank
    pairs_dt[get(g_col) == ".", 
             (1) := ""]
    pairs_dt[get(x_col) == ".", 
             (2) := ""]

    # beautify contrast column and group1, group2
    pairs_dt[, contrast := paste0(get(names(pairs_dt)[1]),
                                   get(names(pairs_dt)[2]),
                                   ": ",
                                   contrast)]
    pairs_dt[get(g_col) == "",
             group1 := paste0(get(names(pairs_dt)[1]),
                              get(names(pairs_dt)[2]),
                              ",",
                              group1)]
    pairs_dt[get(g_col) == "",
             group2 := paste0(get(names(pairs_dt)[1]),
                              get(names(pairs_dt)[2]),
                              ",",
                              group2)]
    pairs_dt[get(x_col) == "",
             group1 := paste0(group1,
                              ",",
                              get(names(pairs_dt)[1]),
                              get(names(pairs_dt)[2]))]
    pairs_dt[get(x_col) == "",
             group2 := paste0(group2,
                              ",",
                              get(names(pairs_dt)[1]),
                              get(names(pairs_dt)[2]))]
  }
  
  # create a column of nicely formatted p-values for display.
  pairs_dt[, p := pvalString(p.value)]
  pairs_dt[, contrast := factor(contrast, contrast)]
  return(pairs_dt)
}


## ----response-plot--------------------------------------------------------------
response_plot <- function(
  fit, # model fit from lm, lmer, nlme, glmmTMB
  fit_emm, # data frame with means, error
  x_label = "none",
  y_label = "Response (units)",
  g_label = NULL,
  dots = "sina",
  dodge_width = 0.8,
  adjust = 0.5,
  palette = pal_okabe_ito,
  legend_position = "top",
  flip_horizontal = FALSE,
  group_lines = FALSE
){
   dt <- model.frame(fit) %>% # correctly subsets
    data.table()
  y_col <- insight::find_response(fit)
  
  fit_emm_dt <- emm_table(fit_emm)
  
  # get number of pre emmean/ratio/rate columns
  if("emmean" %in% names(fit_emm_dt)){response <- "emmean"}
  if("rate" %in% names(fit_emm_dt)){response <- "rate"}
  if("ratio" %in% names(fit_emm_dt)){response <- "ratio"}
  
  if(which(names(fit_emm_dt) == response) == 2){
    x_col <- names(fit_emm_dt)[1]
    g_col <- NA
  }else{
    x_col <- names(fit_emm_dt)[1]
    g_col <- names(fit_emm_dt)[2]
  }
    
  rand_int <- find_random(fit)
  
  if(is.na(g_col) & group_lines==TRUE){
    group_lines <- FALSE
  }
  if(is.factor(dt[, get(x_col)]) == FALSE){
    dt[, (x_col) := factor(get(x_col))]
  }
  if(!is.na(g_col)){
    if(is.factor(dt[, get(g_col)]) == FALSE){
      dt[, (g_col) := factor(get(g_col))]
    }
  } 
  
  if(is.na(g_col)){
    pd <- position_dodge(width = 0)
  }else{
    pd <- position_dodge(width = dodge_width)
  }
  
  if(is.na(g_col)){
    color_col <- x_col
  }else{
    color_col <- g_col
  }
  
  gg <- ggplot(data=dt, aes(x = get(x_col),
                            y = get(y_col),
                            color = get(color_col)))
  # plot points
  if(dots == "sina"){
    gg <- gg + geom_sina(alpha = 0.5,
                         position = pd,
                         adjust = adjust)
  }
  if(dots == "jitter"){
    gg <- gg + geom_point(alpha = 0.5,
                          position = position_jitter(seed = 1))
  }
  if(dots == "dotplot"){
    gg <- gg + geom_dotplot(binaxis='y',
                            stackdir='center',
                            alpha = 0.5,
                            position = pd)
  }
  
  # plot means and CI
  gg <- gg +
    geom_errorbar(data = fit_emm_dt, aes(y = get(response),
                                         ymin = lower.CL,
                                         ymax = upper.CL,
                                         color = get(color_col)),
                  width = 0,
                  position = pd
    ) +
    geom_point(data = fit_emm_dt, aes(y = get(response),
                                      color = get(color_col)),
               size = 3,
               position = pd
    ) +
    
    # aesthetics
    ylab(y_label) +
    scale_color_manual(values = palette,
                       name = color_col) +
    theme_pubr() +
    theme(legend.position = legend_position) +
    
    NULL
  
  # if(group_lines == TRUE | group_lines == g_col){
  #   gg <- gg +
  #     geom_line(data = fit_emm_dt,
  #               aes(y = emmean,
  #                   group = get(g_col)),
  #               position = pd)
  # }
  # if(group_lines == x_col){
  #   gg <- gg +
  #     geom_line(data = fit_emm_dt,
  #               aes(y = emmean,
  #                   group = get(x_col)))
  # }
  
  if(is.na(g_col)){
    gg <- gg + theme(legend.position="none")
  }
  
  if(is.null(g_label)){
    gg <- gg + guides(color = guide_legend(title=g_col))
  }else{
    if(g_label == "none"){
      gg <- gg + guides(color = guide_legend(title=NULL))
    }else{
      gg <- gg + guides(color = guide_legend(title=g_label))
    }
  }
  
  # remove x axis title
  if(x_label == "none"){
    gg <- gg + theme(axis.title.x = element_blank())
  }else{
    gg <- gg + xlab(x_label)}
  
  if(flip_horizontal == TRUE){
    gg <- gg + coord_flip() +
      theme(legend.position = legend_position)
    gg <- gg + theme(axis.title.x = element_text())
    if(x_label == "none"){
      gg <- gg + theme(axis.title.y = element_blank())
    }else{
      gg <- gg + ylab(x_label)
    }
  }
  
  # gg
  
  return(gg) 
}


## ----plot_pvalues---------------------------------------------------------------
plot_pvalues <- function(
      gg,
      fit_emm,
      fit_pairs,
      contrast_rows,
      y_pos = NULL
){
  fit_emm_dt <- emm_table(fit_emm)
  pvalue_table_dt <- pvalue_table(fit_pairs)

  if(which(names(fit_emm_dt) == "emmean") == 2){
    x_col <- names(fit_emm_dt)[1]
    g_col <- NA
  }else{
    x_col <- names(fit_emm_dt)[1]
    g_col <- names(fit_emm_dt)[2]
  }
    

  if(is.na(g_col)){
#    pd <- position_dodge(width = 0)
    pvalue_table_dt[, x_min_col := group1]
    pvalue_table_dt[, x_max_col := group2]
  }else{
#    pd <- position_dodge(width = dodge_width)
    pvalue_table_dt[, x_min_col := NA]
    pvalue_table_dt[, x_max_col := NA]
  }
  
  if(is.na(g_col)){
    color_col <- x_col
  }else{
    color_col <- g_col
  }
  
  if(contrast_rows[1] == "all"){
    contrast_rows <- 1:nrow(pvalue_table_dt)
  }
  
  if(sum(contrast_rows) > 0){ # show p-values
    # get x positions for p-values
    # [[3]] is 3rd layer which is means
    if(is.na(pvalue_table_dt[1, x_min_col])){
      gg_data <- cbind(fit_emm_dt,
                       ggplot_build(gg)$data[[3]])
      
      gg_data[, cell := paste(get(x_col), get(g_col), sep=",")]
      match_it_ref <- match(pvalue_table_dt$group1, gg_data$cell)
      
      pvalue_table_dt[, rowa := match(group1, gg_data$cell)]
      pvalue_table_dt[, rowb := match(group2, gg_data$cell)]
      pvalue_table_dt[, x_min_col := gg_data[rowa, x]]
      pvalue_table_dt[, x_max_col := gg_data[rowb, x]]
    }
    
#    if(is.null(p_pos)){
      p_pos <- 1:length(contrast_rows)
 #   }
    
    # get min/max y
    y_range <- ggplot_build(gg)$layout$panel_params[[1]]$y.range
    max_y <- y_range[2]
    min_y <- y_range[1]

    increment <- 0.08*(max_y - min_y)
    if(is.null(y_pos)){
      y_position <- max_y + increment*p_pos
    }else{
      y_position <- y_pos
    }
    
    
    gg <- gg + 
      stat_pvalue_manual(pvalue_table_dt[contrast_rows],
                         label = "p",
                         y.position = y_position,
                         xmin = "x_min_col",
                         xmax = "x_max_col",
                         size = 2.5,
                         tip.length = 0.01)
    
    # make sure ylim includes p-value
    y_hi <- max(y_position)
    y_lo <- min_y - 0.05*(max_y - min_y)
    gg <- gg + coord_cartesian(ylim = c(y_lo, y_hi))
  }
  
  
  # gg
  
  return(gg)
}


## ----ggplot_the_response--------------------------------------------------------
ggplot_the_response <- function(
 fit, # model fit from lm, lmer, nlme, glmmTMB
  fit_emm,
  fit_pairs,
  wrap_col = NULL,
  x_label = "none",
  y_label = "Response (units)",
  g_label = NULL,
  dots = "sina",
  dodge_width = 0.8,
  adjust = 0.5,
  contrast_rows = "all",
  y_pos = NULL,
  palette = pal_okabe_ito,
  legend_position = "top",
  flip_horizontal = FALSE,
  group_lines = FALSE  
){
  
  # base plot
  gg <- response_plot(
    fit = fit,
    fit_emm = fit_emm,
    x_label = x_label,
    y_label = y_label,
    g_label = g_label,
    dots = dots,
    dodge_width = dodge_width,
    adjust = adjust,
    palette = palette,
    legend_position = legend_position,
    flip_horizontal = flip_horizontal,
    group_lines = group_lines    
  )
  
  # add p-values
  if(contrast_rows[1] != "none" |
     contrast_rows[1] == 0 ){
    gg <- plot_pvalues(
      gg = gg,
      fit_emm = fit_emm,
      fit_pairs = fit_pairs,
      contrast_rows = contrast_rows,
      y_pos = y_pos
    )
  }
  
  gg
  return(gg)
}


## ----old_ggplot_the_response----------------------------------------------------
old_ggplot_the_response <- function(
  fit, # model fit from lm, lmer, nlme, glmmTMB
  fit_emm,
  fit_pairs,
  wrap_col=NULL,
  x_label = "none",
  y_label = "Response (units)",
  g_label = NULL,
  dots = "sina",
  dodge_width = 0.8,
  adjust = 0.5,
  contrast_rows = "all",
  p_pos = NULL,
  palette = pal_okabe_ito,
  legend_position = "top",
  flip_horizontal = FALSE,
  group_lines = FALSE){
  
  show_contrasts <- contrast_rows
  
  dt <- model.frame(fit) %>% # correctly subsets
    data.table()
  y_col <- insight::find_response(fit)
  
  fit_emm_dt <- emm_table(fit_emm)
  fit_pairs_dt <- effects_table(fit_pairs)
  
  if(which(names(fit_emm_dt) == "emmean") == 2){
    x_col <- names(fit_emm_dt)[1]
    g_col <- NA
  }else{
    x_col <- names(fit_emm_dt)[1]
    g_col <- names(fit_emm_dt)[2]
  }
    
  # if(is.null(x_col)){
  #   x_col <- find_predictors(fit)$conditional[1]
  # }
  # if(is.null(g_col)){
  #   g_col <- find_predictors(fit)$conditional[2]
  # }
  rand_int <- find_random(fit)
  
  if(is.na(g_col) & group_lines==TRUE){
    group_lines <- FALSE
  }
  if(is.factor(dt[, get(x_col)]) == FALSE){
    dt[, (x_col) := factor(get(x_col))]
  }
  if(!is.na(g_col)){
    if(is.factor(dt[, get(g_col)]) == FALSE){
      dt[, (g_col) := factor(get(g_col))]
    }
  } 
  
  if(is.na(g_col)){
    pd <- position_dodge(width = 0)
    fit_pairs_dt[, x_min_col := group1]
    fit_pairs_dt[, x_max_col := group2]
  }else{
    pd <- position_dodge(width = dodge_width)
    fit_pairs_dt[, x_min_col := NA]
    fit_pairs_dt[, x_max_col := NA]
  }
  
  if(is.na(g_col)){
    color_col <- x_col
  }else{
    color_col <- g_col
  }
  
  gg <- ggplot(data=dt, aes(x = get(x_col),
                            y = get(y_col),
                            color = get(color_col)))
  # plot points
  if(dots == "sina"){
    gg <- gg + geom_sina(alpha = 0.5,
                         position = pd,
                         adjust = adjust)
  }
  if(dots == "jitter"){
    gg <- gg + geom_point(alpha = 0.5,
                          position = "jitter")
  }
  if(dots == "dotplot"){
    gg <- gg + geom_dotplot(binaxis='y',
                            stackdir='center',
                            alpha = 0.5,
                            position = pd)
    
  }
  
  # plot means and CI
  gg <- gg +
    geom_errorbar(data = fit_emm_dt, aes(y = emmean,
                                         ymin = lower.CL,
                                         ymax = upper.CL,
                                         color = get(color_col)),
                  width = 0,
                  position = pd
    ) +
    geom_point(data = fit_emm_dt, aes(y = emmean,
                                      color = get(color_col)),
               size = 3,
               position = pd
    ) +
    
    # aesthetics
    ylab(y_label) +
    scale_color_manual(values = palette,
                       name = color_col) +
    theme_pubr() +
    theme(legend.position = legend_position) +
    
    NULL
  
  # if(group_lines == TRUE | group_lines == g_col){
  #   gg <- gg +
  #     geom_line(data = fit_emm_dt,
  #               aes(y = emmean,
  #                   group = get(g_col)),
  #               position = pd)
  # }
  # if(group_lines == x_col){
  #   gg <- gg +
  #     geom_line(data = fit_emm_dt,
  #               aes(y = emmean,
  #                   group = get(x_col)))
  # }
  
  if(is.na(g_col)){
    gg <- gg + theme(legend.position="none")
  }
  
  if(is.null(g_label)){
    gg <- gg + guides(color = guide_legend(title=g_col))
  }else{
    if(g_label == "none"){
      gg <- gg + guides(color = guide_legend(title=NULL))
    }else{
      gg <- gg + guides(color = guide_legend(title=g_label))
    }
  }
  
  if(is.numeric(show_contrasts)){
    contrast_rows <- show_contrasts
  }else{
    if(show_contrasts == "none"){
      contrast_rows <- 0
    }
    if(show_contrasts == "all"){
      contrast_rows <- 1:nrow(fit_pairs_dt)
    }
  }
  if(sum(contrast_rows) > 0){ # show p-values
    # get x positions for p-values
    # [[3]] is 3rd layer which is means
    if(is.na(fit_pairs_dt[1, x_min_col])){
      gg_data <- cbind(fit_emm_dt,
                       ggplot_build(gg)$data[[3]])
      
      gg_data[, cell := paste(get(x_col), get(g_col), sep=",")]
      match_it_ref <- match(fit_pairs_dt$group1, gg_data$cell)
      
      fit_pairs_dt[, rowa := match(group1, gg_data$cell)]
      fit_pairs_dt[, rowb := match(group2, gg_data$cell)]
      fit_pairs_dt[, x_min_col := gg_data[rowa, x]]
      fit_pairs_dt[, x_max_col := gg_data[rowb, x]]
    }
    
    if(is.null(p_pos)){
      p_pos <- 1:length(contrast_rows)
    }
    max_y <- max(dt[, get(y_col)], na.rm=TRUE)
    min_y <- min(dt[, get(y_col)], na.rm=TRUE)
    # make sure max includes CIs!
    max_y <- max(max_y, fit_emm_dt$upper.CL)
    min_y <- min(min_y, fit_emm_dt$lower.CL)
    
    increment <- 0.08*(max_y - min_y)
    y_position <- max_y + increment*p_pos
    gg <- gg + 
      stat_pvalue_manual(fit_pairs_dt[contrast_rows],
                         label = "p",
                         y.position = y_position,
                         xmin = "x_min_col",
                         xmax = "x_max_col",
                         size = 2.5,
                         tip.length = 0.01)
    
    # make sure ylim includes p-value
    y_hi <- max_y + 0.05*(max_y - min_y) +
      increment*max(p_pos)
    y_lo <- min_y - 0.05*(max_y - min_y)
    gg <- gg + coord_cartesian(ylim = c(y_lo, y_hi))
  }
  
  
  # remove x axis title
  if(x_label == "none"){
    gg <- gg + theme(axis.title.x = element_blank())
  }else{
    gg <- gg + xlab(x_label)}
  
  if(flip_horizontal == TRUE){
    gg <- gg + coord_flip() +
      theme(legend.position = legend_position)
    gg <- gg + theme(axis.title.x = element_text())
    if(x_label == "none"){
      gg <- gg + theme(axis.title.y = element_blank())
    }else{
      gg <- gg + ylab(x_label)
    }
  }
  
  # gg
  
  return(gg)
}


## ----ggplot_the_effects---------------------------------------------------------
ggplot_the_effects <- function(fit,
                       fit_pairs,
                       contrast_rows = "all",
                       show_p = TRUE,
                       p_position = "top", # "top", "right", "align"
                       nudge_y = 0.33,
                       effect_label = "Effect (units)",
                       effect_x_lim = "auto"){

  if(!is.data.frame(fit_pairs)){
    stop("contrast object (fit_pairs) not converted to a data frame. Convert using fit_pairs <- summary(infer = TRUE)")
  }

  if(contrast_rows[1] == "all"){
    contrast_rows <- 1:nrow(fit_pairs)
  }
  
  fit_pairs_dt <- effects_table(fit_pairs[contrast_rows,])
  fit_pairs_dt[, contrast := fct_rev(contrast)]
  
  if("lower.CL" %not_in% colnames(fit_pairs_dt)){
    stop("contrast object (fit_pairs) does not include confidence interval columns. Recompute using fit_pairs <- summary(infer = TRUE)")
  }
  
  if("ratio" %in% names(fit_pairs)){
    nill_null <- 1.0
    setnames(fit_pairs_dt, old = "ratio", new = "estimate")
    effect_type <- "ratio"
  }else{
    nill_null <- 0.0
  }
  
  neg_space <- 0.4
  p <- nrow(fit_pairs_dt)
  min_bound <- min(fit_pairs_dt[, lower.CL])
  max_bound <- max(fit_pairs_dt[, upper.CL])
  x_lo <- min_bound - 0.1*abs(min_bound)
  x_hi <- max_bound + 0.1*abs(max_bound)
  if(x_lo > -neg_space*x_hi){
    x_lo <- -neg_space*x_hi
  }
  if(x_hi < -neg_space*x_lo){
    x_hi <- -neg_space*x_lo
  }
  
  if(effect_x_lim[1] == "auto"){
    x_lims <- c(x_lo, x_hi)
  }else
  {
    x_lims <- effect_x_lim
  }
  
  gg <- ggplot(data = fit_pairs_dt,
               aes(y = contrast,
                   x = estimate)) +
    geom_errorbar(aes(xmin=lower.CL, 
                      xmax=upper.CL),
                  width=0, 
                  color="black") +
    geom_point(size = 3) +
    
    geom_vline(xintercept = nill_null, linetype = 2) +
    theme_pubr() +
    xlab(effect_label) +
    theme(axis.title.y = element_blank()) +
    scale_x_continuous(position="top") +
    NULL
  
    gg <- gg + coord_cartesian(xlim = x_lims)
  
  
  nudge_x <- 0
  hjust_i <- 0.5
  if(show_p == TRUE){
    if(p_position == "top"){
      xpos <- fit_pairs_dt[, estimate]
      hjust_i <- 0.5
   }
     if(p_position == "right"){
       xpos <- fit_pairs_dt[, upper.CL] + (x_hi - x_lo)/100
       nudge_y <- 0.0
       hjust_i <- 0
    }
   gg <- gg +
      # p-values
      annotate(geom = "text",
               label = fit_pairs_dt[, p],
               x = xpos + nudge_x,
               y = p:1 + nudge_y,
               size = 2.5,
               hjust = hjust_i)    
  }

  # gg
  
  return(gg)
}


## ----ggplot_the_model-----------------------------------------------------------
ggplot_the_model <- function(fit,
                           fit_emm,
                           fit_pairs,
                           x_label = "none",
                           y_label = "Response (units)",
                           g_label = NULL,
                           effect_label = "Effect (units)",
                           dots = "sina",
                           dodge_width = 0.8,
                           adjust = 0.5,
                           contrast_rows = "all",
                           show_p = TRUE,
                           p_position = "top",
                           nudge_y = 0.33,
                           y_pos = NULL,
                           palette = pal_okabe_ito,
                           legend_position = "bottom",
                           flip_horizontal = FALSE,
                           rel_heights = c(1,1),
                           group_lines = FALSE,
                           effect_x_lim = "auto"){
  
  gg1 <- response_plot(
    fit = fit,
    fit_emm = fit_emm,
    x_label = x_label,
    y_label = y_label,
    g_label = g_label,
    dots = dots,
    dodge_width = dodge_width,
    adjust = adjust,
    palette = palette,
    legend_position = legend_position,
    flip_horizontal = flip_horizontal,
    group_lines = group_lines
  )

  gg2 <- ggplot_the_effects(fit = fit,
                          fit_pairs = fit_pairs,
                          contrast_rows = contrast_rows,
                          show_p = show_p,
                          p_position = p_position,
                          nudge_y = nudge_y,
                          effect_label = effect_label,
                          effect_x_lim = effect_x_lim)
  plot_grid(gg2,
            gg1,
            nrow=2,
            align = "v",
            axis = "lr",
            rel_heights = rel_heights)
}


## ----plot_treatments------------------------------------------------------------
plot_treatments <- function(gg,
                            x_levels,
                            text_size = 5){
  
  x_pos <- ggplot_build(gg)$data[[2]][, "x"]
  # x_lim <- c(min(ggplot_build(gg)$data[[1]][, "xmin"]),
  #            max(ggplot_build(gg)$data[[1]][, "xmax"]))
  x_range <- ggplot_build(gg)$layout$panel_params[[1]]$x.range
  x_lim <- x_range
  
  y_pos <- 0
  p <- nrow(x_levels)
  y_sep <- 1
  y_breaks <- y_pos + y_sep*(p-1):0
  y_lim <- c(y_pos, y_pos + y_sep*(p-1)) +
    + c(-y_sep/2, y_sep/2)

  gg_levels <- ggplot(data = data.table(x_pos, y_pos),
               aes(x = x_pos,
                   y = y_pos))
  
  for(i in 1:nrow(x_levels)){
    gg_levels <- gg_levels +
      annotate(geom = "text",
               label = x_levels[i,],
               x = x_pos,
               y = y_breaks[i],
               size = text_size)
  }
  gg_levels <- gg_levels +
    coord_cartesian(ylim = y_lim,
                    xlim = x_lim) +
    theme(axis.title = element_blank(),
          axis.text.x = element_blank(),
          axis.ticks = element_blank(),
          panel.grid = element_blank(),
          axis.line = element_blank(),
          panel.background = element_blank(),
          plot.margin = unit(c(0, 0, 0, 0), "pt")) +
    scale_y_continuous(breaks = y_breaks,
                       labels = row.names(x_levels)) +
    scale_x_discrete(expand = expansion(mult = c(0, 0))) +
    NULL 
  
  return(gg_levels)
}


## ----ggplot_the_treatments------------------------------------------------------
ggplot_the_treatments <- function(
  gg,
  x_levels,
  text_size = 5,
  show_groups = FALSE,
  rel_heights = c(1, 0.1)
){
  gg_levels <- plot_treatments(gg,
                               x_levels,
                               text_size)
  
  if(show_groups == FALSE){
    gg <- gg +
      theme(axis.text.x= element_blank())
  }
  
  gg_out <- plot_grid(
    gg,
    gg_levels,
    nrow = 2,
    rel_heights = rel_heights,
    align = "v",
    axis = "lr"
  )
  return(gg_out)
}


## ----vertical-brackets-functions, echo=FALSE------------------------------------
bracketsGrob <- function(...){
l <- list(...)
e <- new.env()
e$l <- l
  grid:::recordGrob(  {
    do.call(grid.brackets, l)
  }, e)
}


geom_bracket <- function(x,
                         y,
                         yend,
                         tip.length = 0.01,
                         line.size = 0.4,
                         label = "",
                         text.size = 4,
                         text.nudge_x = 0.02,
                         text.hjust = 0.5,
                         text.vjust = 0.5,
                         text.angle = 0,
                         parse = FALSE,
                         ...){
  # x_range <- layer_scales(gg)$x$get_limits()
  # if(!is.numeric(x_range)){
  #   x_range <- ggplot_build(gg)$layout$panel_scales_x[[1]]$range_c$range
  # }
  # x_scale <- diff(x_range)
  x_scale <- 1
  
  list(geom_segment(x = x,
                    y = y,
                    xend = x,
                    yend = yend,
                    size = line.size,
                    ...),
       geom_segment(x = x,
                    y = y,
                    xend = x - tip.length*x_scale,
                    yend = y,
                    size = line.size,
                    ...),
       geom_segment(x = x,
                    y = yend,
                    xend = x - tip.length*x_scale,
                    yend = yend,
                    size = line.size,
                    ...),
       annotate(geom = "text",
                x = x + text.nudge_x*x_scale,
                y = y  + text.vjust*(yend - y),
                label = label,
                size = text.size,
                hjust = text.hjust,
                angle = text.angle,
                parse = parse,
                ...)
  )
}
geom_vbracket <- function(gg,
                          x,
                          y,
                          tip.length = 0.02,
                          width = 1,
                          color = "black"){
  x_range <- layer_scales(gg)$x$get_limits()
  if(!is.numeric(x_range)){
    x_range <- ggplot_build(gg)$layout$panel_scales_x[[1]]$range_c$range
  }
  x_scale <- diff(x_range)
  gg <- gg +
    geom_segment(x = x,
               y = y[1],
               xend = x,
               yend = y[2],
               size = 0.1*width,
               color = color) +
    geom_segment(x = x,
               y = y[1],
               xend = x - tip.length*x_scale,
               yend = y[1],
               size = 0.1*width,
               color = color) +
    geom_segment(x = x,
               y = y[2],
               xend = x - tip.length*x_scale,
               yend = y[2],
               size = 0.1*width,
               color = color)
  return(gg)
}

geom_pbracket <- function(gg,
                          x,
                          y,
                          line.tip = 0.02,
                          line.width = 1,
                          label = NULL,
                          text.size = 4,
                          text.hjust = 1,
                          text.vjust = 0.5,
                          parse = FALSE,
                          color = "black"){

  x_range <- layer_scales(gg)$x$get_limits()
  if(!is.numeric(x_range)){
    x_range <- ggplot_build(gg)$layout$panel_scales_x[[1]]$range_c$range
  }
  nudge_x <- diff(x_range)*0.01
  
  gg <- geom_vbracket(gg,x,y,line.tip,line.width,color) +
    
    annotate(geom = "text",
             x = x + nudge_x*text.hjust,
             y = y[1]  + text.vjust*diff(y),
             label = label,
             size = text.size,
             color = color,
             hjust = ifelse(text.hjust==1,0,1),
             parse = parse)
  return(gg)
}


geom_vbracket_1 <- function(gg,
                          x,
                          y,
                          xend,
                          yend,
                          type = 4,
                          tip.length = 0.02,
                          width = 1,
                          color = "black"){
  #  y_range <- ggplot_build(gg)$layout$panel_scales_y[[1]]$range$range
  #  y_range <- layer_scales(gg)$y$range$range
  y_range <- layer_scales(gg)$y$get_limits()
  y_diff <- diff(y_range)
  y_diff_expand <- y_diff*1.05
  y_limits <- y_range + c(- 0.5*(y_diff_expand - y_diff),
                          0.5*(y_diff_expand - y_diff))
  x_range <- layer_scales(gg)$x$get_limits()
  if(!is.numeric(x_range)){
    x_range <- ggplot_build(gg)$layout$panel_scales_x[[1]]$range_c$range
  }
  x1 <- (x - x_range[1])/diff(x_range)
  x2 <- (xend - x_range[1])/diff(x_range)
  y1 <- (y - y_limits[1])/y_diff_expand
  y2 <- (yend - y_limits[1])/y_diff_expand
  
  b1 <- bracketsGrob(x1, y1, x2, y2,
                     type = type,
                     h = tip.length,
                     lwd = width,
                     col = color)
  return(b1)
}


## -------------------------------------------------------------------------------
testy <- function(fit_emm){
  exp1c_m1_emm_dt <- fit_emm %>%
    summary()
  b <- exp1c_m1_emm_dt[2, "emmean"]
  gg <- ggplot(data = exp1c_m1_emm_dt,
         aes(x = hac,
             y = emmean)) +
    geom_point() +
    geom_hline(yintercept = b) +
    geom_hline(yintercept = exp1c_m1_emm_dt[1, "emmean"]) +
    geom_segment(x = 1,
                 y = exp1c_m1_emm_dt[1, "emmean"],
                 xend = 2,
                 yend = exp1c_m1_emm_dt[4, "emmean"]) +
    annotate(geom = "text",
             x = 1.5,
             y = 30,
             label = "test") +
    theme_pubr()
  return(gg)
}


## ----output-as-R-file-----------------------------------------------------------
# highlight and run to put update into R folder
# knitr::purl("ggplot_the_model.Rmd")