## you may have to install some of these
library(aqp)
library(e1071)
library(tactile)
library(rms)
library(lattice)
library(visreg)
library(ormPlot)


# get example data
tf <- tempfile()
download.file('https://github.com/ncss-tech/stats_for_soil_survey/raw/master/data/CA792-MAST-STR-modeling.rds', destfile = tf)

# load from .RDS
x <- readRDS(tf)

# check
str(x)


# note that these data contain training data AND predictions


# investigate source data
bwplot(STR ~ seki_elev, data = x)

# fit proportional-odd logistic regression model
# suitable for ordinal (categories with order) data

# may have to subset extraneous columns
# adjust as needed
vars <- c('peiid', 'STR', 'seki_elev', 'weight')
x.sub <- x[, vars]

# annoying quirk of rms package functions
dd <- datadist(x.sub)
options(datadist = "dd")

# looks OK
orm.fit <- orm(STR ~ seki_elev, data = x.sub, x = TRUE, y = TRUE)
orm.fit


## useful starting point, some times confidence envelope is strange 
# output from ormplot provided plot-method
# https://cran.r-project.org/web/packages/ormPlot/vignettes/ormPlot.html
plot(orm.fit, 'seki_elev')


## investigate probability vs. elev for each STR
## note rms-specific strategy

# generate Pr(STR)
pp <- as.data.frame(predict(orm.fit, type='fitted.ind'))

# fix names
names(pp) <- paste0('Pr_', gsub('STR=', '', names(pp), fixed = TRUE))

# eval most likely STR
pp$pred_STR <- levels(x$STR)[apply(pp, 1, which.max)]

# init levels
pp$pred_STR <- factor(pp$pred_STR, levels = levels(x$STR))

# copy over to source data, row-order is the same
g <- cbind(x.sub, pp)

# reshape a subset of columns
m <- reshape2::melt(g, id.var=c('seki_elev', 'STR'), measure.vars=c("Pr_cryic", "Pr_frigid", "Pr_mesic", "Pr_thermic"))

xyplot(value ~ seki_elev, groups=variable, data=m, auto.key=list(columns=4, lines=TRUE, points=FALSE), panel=panel.plsmo, method='supsmu', label.curves=FALSE, datadensity=TRUE, trim=0, type='l', par.settings=tactile.theme())

anova(orm.fit)


## evaluation of predictions, contained within 'x'

# Shannon entropy for STR predictions
# by row
x$H <- apply(
  x[, c('Pr_cryic', 'Pr_frigid', 'Pr_mesic', 'Pr_thermic')], 
  MARGIN = 1, 
  FUN = shannonEntropy
)


# compare with equal-class threshold (log_2) = 2
# shannonEntropy(rep(1, times = 4) / 4)

bwplot(
  STR ~ H, 
  data = x, 
  xlim = c(-0.1, 2.1),
  par.settings = tactile.theme(), 
  xlab = 'Shannon Entropy (base 2)', 
  panel = function(...) {
    panel.abline(v = 2, lty =2)
    panel.bwplot(...)
  })


head(x)


## confusion matrix
(tab <- table(observed = x$STR, predicted = x$pred_STR))

## PCC / Kappa
classAgreement(tab, match.names = TRUE)


## Tau

# equal prior probabilities / proportions
tauW(tab)$tau

# specified prior probabilities / proportions
priors <- apply(tab, 2, sum)/sum(tab)
tauW(tab, P = priors)$tau


# weight matrix...
weights <- matrix(data=c(1.00,0.05,0.05,0.15,0.05,0.15,
                         0.05,1.00,0.05,0.05,0.05,0.35,
                         0.05,0.05,1.00,0.20,0.15,0.15,
                         0.15,0.05,0.25,1.00,0.10,0.25,
                         0.05,0.10,0.15,0.10,1.00,0.15,
                         0.20,0.30,0.10,0.25,0.20,1.00),
                  nrow=6, byrow=TRUE)


## Brier Scores

# prepare input data
B <- data.frame(
  actual = x$STR,
  x[, c('Pr_cryic', 'Pr_frigid', 'Pr_mesic', 'Pr_thermic')]
)

names(B)[2:5] <- levels(x$STR)

# compute Brier scores, lower is better
brierScore(B, classLabels = levels(x$STR), actual = 'actual')


## raster prediction hints


# raster stack functions

# re-scale probabilities to 0-100 for data storage reduction
pred.fun <- function(model, data) {
  v <- predict(model, data, type='fitted.ind')
  return(round(v * 100))
}


# most likely layer, by pixel
# return index to ML layer
most.likely.layer <- function(i, ...) {
  
  # this will return an empty vector when all NA
  i.max <- which.max(i)
  
  # pad with NA
  if(length(i.max) < 1)
    i.max <- NA
  
  return(i.max)
}

## TODO: NODATA encoding problem NA / 0 ?
# note: probabilities have been rescaled to 0-100
entropy <- function(i, na.rm=TRUE) {
  # rescale to probabilities
  i <- i / 100
  # add a very small fudge factor to remove 0
  i <- i + 1e-15
  # shannon entropy
  return(-sum(i * log(i), na.rm=na.rm))
}


## predictions -- save for later in the class, project, etc. 

# `rs` is a raster stack containing rasters with the same names as predictors using in model fit

# # predicts probabilities for the 4 possible classes fit by model
# seki.str <- predict(rs, orm.fit, index=1:4, fun=pred.fun, progress='text', filename='results.tif', datatype='INT1U', overwrite=TRUE)
# 
# 
# 
# ## most-likely STR
# seki.str.ml <- stackApply(seki.str, indices=rep(1, times=nlayers(seki.str)), fun = most.likely.layer)
# 
# # convert to factor
# seki.str.ml <- ratify(seki.str.ml)
# 
# # init RAT
# rat <- levels(seki.str.ml)[[1]]
# rat$STR <- c('cryic', 'frigid', 'mesic', 'thermic')
# levels(seki.str.ml) <- rat
#