# BASIC STANDARDISED CONTROL CHART
#   Plot 24 random normal data points connected with lines.
#   Add three horizontal lines at -3, 0, 3.
#   Make points outside control limits stand out.
#   Re-run the code many times.
#   How often do you get data points outside the limits?
#   What affects the chance of (false) signals?
#   How big a shift in data is necessary to make almost every chart signal?
#   How can you increase the chart's sensitivity to (minor) shifts in data?
#
# ADDING RUNS ANALYSIS
#   Add check for unusually long runs or unusually few crossings.
#   How often do you get false signals?
#   How big a shift is necessary to make almost every chart signal?
#
# EMPIRICAL CENTRE LINE AND LIMITS (I CHART, RUN CHART)
#   What parts of the script do you need to change to add empirical limits?
#   Make an I chart.
#   Make a median based run chart.
#
# ONE FUNCTION TO RULE THEM ALL
#   Wrap everything in a function, spc().
#   Add optional x argument.
#   Add C, U, and P charts.
#   Fix LCL < 0 for P, C, and U charts.
#   Fix UCL > 1 for P charts.
#
# WRAPPING UP
#   Allow data to be in a data frame.
#   Add a multiplication argument (P and U charts).
#   How do you add title and labels to the plot?
#   Add a target argument.
#   How do you decide when the target has been reached?
#
#   The final function interface should look something like this:
#   spc(
#     x,                                       # x axis values
#     y        = NULL,                         # measure or numerator
#     n        = NULL,                         # denominator for u and p charts
#     data     = NULL,                         # data frame with x, y, and n
#     chart    = c('run', 'i', 'c', 'u', 'p'), # type of SPC chart
#     target   = NA,                           # target value to be plotted
#     multiply = 1,                            # multiplicator value
#     ...
#   )
#
#  WHAT NEXT
#   What about error handling - what can go wrong?
#   What else do you want from an SPC function?

spc <- function(x,
                y        = NULL,
                n        = NULL,
                data     = NULL,
                chart    = c('run', 'i', 'c', 'u', 'p'),
                target   = NA,
                multiply = 1,
                ...) {
  x <- eval(substitute(x), data, parent.frame())
  y <- eval(substitute(y), data, parent.frame())
  n <- eval(substitute(n), data, parent.frame())

  if (is.null(y)) {
    y <- x
    x <- seq_along(y)
  }

  n.obs <- length(y)

  chart <- match.arg(chart)

  if (chart == 'run') {               # Run chart
    m <- median(y)
    s <- NA
  } else if (chart == 'i') {          # I chart
    m <- mean(y)
    s <- mean(abs(diff(y))) / 1.128
  } else if (chart == 'c') {          # C chart
    m <- mean(y)
    s <- sqrt(m)
  } else if (chart == 'u') {          # U chart
    m <- sum(y) / sum(n)
    s <- sqrt(m / n)
    y <- y / n
  } else if (chart == 'p') {          # P chart
    m <- sum(y) / sum(n)
    s <- sqrt(m * (1 - m) / n)
    y <- y / n
  }

  cl  <- rep(m, n.obs) * multiply
  lcl <- cl - 3 * s * multiply
  ucl <- cl + 3 * s * multiply
  y   <- y * multiply

  if (chart %in% c('c', 'u', 'p')) {
    lcl[lcl < 0] <- 0
  }

  if (chart == 'p') {
    ucl[ucl > 1 * multiply] <- 1 * multiply
  }

  # sigma signal
  signals                 <- y < lcl | y > ucl
  signals[is.na(signals)] <- F

  # runs analysis
  runs            <- sign(y - cl)
  runs            <- runs[runs != 0]
  run.lengths     <- rle(runs)$lengths
  n.useful        <- sum(run.lengths)
  longest.run     <- max(run.lengths)
  n.crossings     <- length(run.lengths) - 1
  longest.run.max <- round(log2(n.useful)) + 3
  n.crossings.min <- qbinom(0.05, n.useful - 1, 0.5)
  runs.signal     <- longest.run > longest.run.max |
                     n.crossings < n.crossings.min

  # plot
  plot(x, y,
       type = 'l',
       bty  = 'l',
       las  = 1,
       ylim = range(lcl, ucl, target, y, na.rm = T),
       ...)
  lines(x, lcl)
  lines(x, cl,
        col = runs.signal + 1,
        lty = runs.signal + 1)
  lines(x, ucl)
  lines(x,
        rep(target, length(y)),
        col = 'darkgreen',
        lty = 3)
  points(x, y,
         pch = 19,
         col = signals + 1)
}

# Examples
y <- rnorm(24)
spc(y)
spc(y, chart = 'i')
spc(y, target = 3)

x <- Sys.Date() - rev(seq_along(y))
spc(x, y)

# Data frame examples
d <- readRDS(url('https://github.com/anhoej/r4spc/raw/main/data/r4spc_data.rds'))

spc(x, measure,
    data  = d,
    chart = 'i')
spc(month, events,
    data  = d,
    chart = 'c')
spc(month, events, days,
    data     = d,
    chart    = 'u',
    multiply = 100)
spc(month, cases, patients,
    data     = d,
    chart    = 'p',
    multiply = 100,
    main     = 'P chart',
    xlab     = 'Month',
    ylab     = '%',
    las      = 1)