## ----------------------------------------------------------------------------
## Name: 05-glm.R
## Description: Script for Chapter 5 of "Notes for Predictive Modeling"
## Link: https://bookdown.org/egarpor/PM-UC3M/
## License: https://creativecommons.org/licenses/by-nc-nd/4.0/
## Author: Eduardo García-Portugués
## Version: 5.10.0
## ----------------------------------------------------------------------------

## ----challenger-load, eval = FALSE------------------------------------------------------
## # Read data
## challenger <- read.table(file = "challenger.txt", header = TRUE, sep = "\t")

## ----challengerfigs---------------------------------------------------------------------
# Figures 5.3a and 5.3b
car::scatterplot(nfails.field ~ temp, smooth = FALSE, boxplots = FALSE,
                 data = challenger, subset = nfails.field > 0)
car::scatterplot(nfails.field ~ temp, smooth = FALSE, boxplots = FALSE,
                 data = challenger)

## ----nasadiag---------------------------------------------------------------------------
# Fit linear model, and run linearity and normality diagnostics
mod <- lm(nfails.field ~ temp, data = challenger)
plot(mod, 1)
plot(mod, 2)

## ----logcurve---------------------------------------------------------------------------
# Logistic regression: computed with glm and family = "binomial"
nasa <- glm(fail.field ~ temp, family = "binomial", data = challenger)

# Plot data
plot(challenger$temp, challenger$fail.field, xlim = c(-1, 30),
     xlab = "Temperature", ylab = "Incident probability")

# Draw the fitted logistic curve
x <- seq(-1, 30, l = 200)
y <- exp(-(nasa$coefficients[1] + nasa$coefficients[2] * x))
y <- 1 / (1 + y)
lines(x, y, col = 2, lwd = 2)

# The Challenger
points(-0.6, 1, pch = 16)
text(-0.6, 1, labels = "Challenger", pos = 4)

## ----logsummary-------------------------------------------------------------------------
# Exponentiated coefficients ("odds ratios")
exp(coef(nasa))

## ----modform-1--------------------------------------------------------------------------
# Data
set.seed(34567)
x <- rnorm(50, sd = 1.5)
y1 <- -0.5 + 3 * x
y2 <- 0.5 - 2 * x
y3 <- -2 + 5 * x
y1 <- rbinom(50, size = 1, prob = 1 / (1 + exp(-y1)))
y2 <- rbinom(50, size = 1, prob = 1 / (1 + exp(-y2)))
y3 <- rbinom(50, size = 1, prob = 1 / (1 + exp(-y3)))

# Data
dataMle <- data.frame(x = x, y1 = y1, y2 = y2, y3 = y3)

## ----modform-2, fig.cap = '(ref:modform-2-title)'---------------------------------------
# Call glm
mod <- glm(y1 ~ x, family = "binomial", data = dataMle)
mod$coefficients

# -loglik(beta)
minusLogLik <- function(beta) {
  p <- 1 / (1 + exp(-(beta[1] + beta[2] * x)))
  -sum(y1 * log(p) + (1 - y1) * log(1 - p))
}

# Optimization using as starting values beta = c(0, 0)
opt <- optim(par = c(0, 0), fn = minusLogLik)
opt

# Visualization of the log-likelihood surface
beta0 <- seq(-3, 3, l = 50)
beta1 <- seq(-2, 8, l = 50)
L <- matrix(nrow = length(beta0), ncol = length(beta1))
for (i in seq_along(beta0)) {
  for (j in seq_along(beta1)) {
    L[i, j] <- minusLogLik(c(beta0[i], beta1[j]))
  }
}
filled.contour(beta0, beta1, -L, color.palette = viridis::viridis,
               xlab = expression(beta[0]), ylab = expression(beta[1]),
               plot.axes = {
                 axis(1); axis(2)
                 points(mod$coefficients[1], mod$coefficients[2],
                        col = 2, pch = 16)
                 points(opt$par[1], opt$par[2], col = 4)
               })
# The plot.axes argument is a hack to add graphical information within the
# coordinates of the main panel (behind filled.contour there is a layout()...)

## ----pois-load, eval = FALSE------------------------------------------------------------
## # Read data
## species <- read.table("species.txt", header = TRUE)
## species$pH <- as.factor(species$pH)

## ----poiscurve--------------------------------------------------------------------------
# Plot data
plot(Species ~ Biomass, data = species, col = as.numeric(pH))
legend("topright", legend = c("High pH", "Medium pH", "Low pH"),
       col = c(1, 3, 2), lwd = 2) # colors according to as.numeric(pH)

# Fit Poisson regression
species1 <- glm(Species ~ ., data = species, family = poisson)
summary(species1)
# Took 4 iterations of the IRLS

# Interpretation of the coefficients:
exp(species1$coefficients)
# - 46.9433 is the average number of species when Biomass = 0 and the pH is high
# - For each increment in one unit in Biomass, the number of species decreases
#   by a factor of 0.88 (12% reduction)
# - If pH decreases to med (low), then the number of species decreases by a factor
#   of 0.6407 (0.3209)

# With interactions
species2 <- glm(Species ~ Biomass * pH, data = species, family = poisson)
summary(species2)
exp(species2$coefficients)
# - If pH decreases to med (low), then the effect of the biomass in the number
#   of species decreases by a factor of 0.9686 (0.8564). The higher the pH, the
#   stronger the effect of the Biomass in Species

# Draw fits
plot(Species ~ Biomass, data = species, col = as.numeric(pH))
legend("topright", legend = c("High pH", "Medium pH", "Low pH"),
       col = c(1, 3, 2), lwd = 2) # colors according to as.numeric(pH)

# Without interactions
bio <- seq(0, 10, l = 100)
z <- species1$coefficients[1] + species1$coefficients[4] * bio
lines(bio, exp(z), col = 1)
lines(bio, exp(species1$coefficients[2] + z), col = 2)
lines(bio, exp(species1$coefficients[3] + z), col = 3)

# With interactions seems to provide a significant improvement
bio <- seq(0, 10, l = 100)
z <- species2$coefficients[1] + species2$coefficients[2] * bio
lines(bio, exp(z), col = 1, lty = 2)
lines(bio, exp(species2$coefficients[3] + species2$coefficients[5] * bio + z),
      col = 2, lty = 2)
lines(bio, exp(species2$coefficients[4] + species2$coefficients[6] * bio + z),
      col = 3, lty = 2)

## ----bino-load, eval = FALSE------------------------------------------------------------
## # Read data
## heart <- read.table("heart.txt", header = TRUE)
## 
## # Sizes for each observation (Ni's)
## heart$Ni <- heart$ok + heart$ha
## 
## # Proportions of patients with heart attacks
## heart$prop <- heart$ha / (heart$ha + heart$ok)

## ----binocurve--------------------------------------------------------------------------
# Plot of proportions versus ck: twelve observations, each requiring
# Ni patients to determine the proportion
plot(heart$ck, heart$prop, xlab = "Creatinine kinase level",
     ylab = "Proportion of heart attacks")

# Fit binomial regression: recall the cbind() to pass the number of successes
# and failures
heart1 <- glm(cbind(ha, ok) ~ ck, family = binomial, data = heart)
summary(heart1)

# Alternatively: put proportions as responses, but then it is required to
# inform about the binomial size of each observation
heart1 <- glm(prop ~ ck, family = binomial, data = heart, weights = Ni)
summary(heart1)

# Add fitted line
ck <- 0:500
newdata <- data.frame(ck = ck)
logistic <- function(eta) 1 / (1 + exp(-eta))
lines(ck, logistic(cbind(1, ck) %*% heart1$coefficients))

# It seems that a polynomial fit could better capture the "wiggly" pattern
# of the data
heart2 <- glm(prop ~ poly(ck, 2, raw = TRUE), family = binomial, data = heart,
              weights = Ni)
heart3 <- glm(prop ~ poly(ck, 3, raw = TRUE), family = binomial, data = heart,
              weights = Ni)
heart4 <- glm(prop ~ poly(ck, 4, raw = TRUE), family = binomial, data = heart,
              weights = Ni)

# Best fit given by heart3
BIC(heart1, heart2, heart3, heart4)
summary(heart3)

# All fits together
lines(ck, logistic(cbind(1, poly(ck, 2, raw = TRUE)) %*% heart2$coefficients),
      col = 2)
lines(ck, logistic(cbind(1, poly(ck, 3, raw = TRUE)) %*% heart3$coefficients),
      col = 3)
lines(ck, logistic(cbind(1, poly(ck, 4, raw = TRUE)) %*% heart4$coefficients),
      col = 4)
legend("bottomright", legend = c("Linear", "Quadratic", "Cubic", "Quartic"),
       col = 1:4, lwd = 2)

## ----nasa-case-1, message = FALSE-------------------------------------------------------
# Summary of the model
summary(nasa)

# Confidence intervals at 95%
confint.default(nasa)

# Confidence intervals at other levels
confint.default(nasa, level = 0.90)

# Confidence intervals for the factors affecting the odds
exp(confint.default(nasa))

## ----testcoef-1-------------------------------------------------------------------------
# Significances with asymptotic approximation for the standard errors
summary(nasa)

# CIs with asymptotic approximation -- coherent with summary
confint.default(nasa, level = 0.95)
confint.default(nasa, level = 0.99)

# CIs with profile likelihood -- incoherent with summary
confint(nasa, level = 0.95) # intercept still significant
confint(nasa, level = 0.99) # temp still significant

## ----pred-case-1------------------------------------------------------------------------
predict(nasa, newdata = data.frame(temp = -0.6), type = "response")

## ----pred-case-2------------------------------------------------------------------------
# Function for computing the predictions and CIs for the conditional probability
predictCIsLogistic <- function(object, newdata, level = 0.95) {

  # Compute predictions in the log-odds
  pred <- predict(object = object, newdata = newdata, se.fit = TRUE)

  # CI in the log-odds
  za <- qnorm(p = (1 - level) / 2)
  lwr <- pred$fit + za * pred$se.fit
  upr <- pred$fit - za * pred$se.fit

  # Transform to probabilities
  fit <- 1 / (1 + exp(-pred$fit))
  lwr <- 1 / (1 + exp(-lwr))
  upr <- 1 / (1 + exp(-upr))

  # Return a matrix with column names "fit", "lwr" and "upr"
  result <- cbind(fit, lwr, upr)
  colnames(result) <- c("fit", "lwr", "upr")
  return(result)

}

## ----pred-case-3------------------------------------------------------------------------
# Data for which we want a prediction
newdata <- data.frame(temp = -0.6)

# Prediction of the conditional log-odds, the default
predict(nasa, newdata = newdata, type = "link")

# Prediction of the conditional probability
predict(nasa, newdata = newdata, type = "response")

# Simple call
predictCIsLogistic(nasa, newdata = newdata)
# The CI is large because there is no data around temp = -0.6 and
# that makes the prediction more variable (and also because we only
# have 23 observations)

## ----pred-case-4------------------------------------------------------------------------
# Estimated probability for launching at 53 degrees Fahrenheit
predictCIsLogistic(nasa, newdata = data.frame(temp = 11.67))

## ----deviance-1-------------------------------------------------------------------------
# Summary of model
nasa <- glm(fail.field ~ temp, family = "binomial", data = challenger)
summaryLog <- summary(nasa)
summaryLog
# 'Residual deviance' is the deviance; 'Null deviance' is the null deviance

# Null model (only intercept)
null <- glm(fail.field ~ 1, family = "binomial", data = challenger)
summaryNull <- summary(null)
summaryNull

# Compute the R^2 with a function -- useful for repetitive computations
r2glm <- function(model) {

  summaryLog <- summary(model)
  1 - summaryLog$deviance / summaryLog$null.deviance

}

# R^2
r2glm(nasa)
r2glm(null)

## ----deviance-2-------------------------------------------------------------------------
# Polynomial predictors
nasa0 <- glm(fail.field ~ 1, family = "binomial", data = challenger)
nasa1 <- glm(fail.field ~ temp, family = "binomial", data = challenger)
nasa2 <- glm(fail.field ~ poly(temp, degree = 2), family = "binomial",
             data = challenger)
nasa3 <- glm(fail.field ~ poly(temp, degree = 3), family = "binomial",
             data = challenger)

# Plot fits
temp <- seq(-1, 35, l = 200)
tt <- data.frame(temp = temp)
plot(fail.field ~ temp, data = challenger, pch = 16, xlim = c(-1, 30),
     xlab = "Temperature", ylab = "Incident probability")
lines(temp, predict(nasa0, newdata = tt, type = "response"), col = 1)
lines(temp, predict(nasa1, newdata = tt, type = "response"), col = 2)
lines(temp, predict(nasa2, newdata = tt, type = "response"), col = 3)
lines(temp, predict(nasa3, newdata = tt, type = "response"), col = 4)
legend("bottomleft", legend = c("Null model", "Linear", "Quadratic", "Cubic"),
       lwd = 2, col = 1:4)

# R^2's
r2glm(nasa0)
r2glm(nasa1)
r2glm(nasa2)
r2glm(nasa3)

# Chisq and F tests -- same results since phi is known
anova(nasa1, test = "Chisq")
anova(nasa1, test = "F")

# Incremental comparisons of nested models
anova(nasa1, nasa2, nasa3, test = "Chisq")
# Quadratic effects are not significative

# Cubic vs. linear
anova(nasa1, nasa3, test = "Chisq")

# Example in Poisson regression
species1 <- glm(Species ~ ., data = species, family = poisson)
species2 <- glm(Species ~ Biomass * pH, data = species, family = poisson)

# Comparison
anova(species1, species2, test = "Chisq")
r2glm(species1)
r2glm(species2)

## ----glmmodsel-1------------------------------------------------------------------------
# Models
nasa1 <- glm(fail.field ~ temp, family = "binomial", data = challenger)
nasa2 <- glm(fail.field ~ temp + pres.field, family = "binomial",
             data = challenger)

# Summaries
summary(nasa1)
summary(nasa2)

# AICs
AIC(nasa1) # Better
AIC(nasa2)

## ----glmmodsel-2------------------------------------------------------------------------
# Boston dataset
data(Boston, package = "MASS")

# Model whether a suburb has a median house value larger than $25000
mod <- glm(I(medv > 25) ~ ., data = Boston, family = "binomial")
summary(mod)
r2glm(mod)

# With BIC -- ends up with only the significant variables and a similar R^2
modBIC <- MASS::stepAIC(mod, trace = 0, k = log(nrow(Boston)))
summary(modBIC)
r2glm(modBIC)

## ----glmmodsel-3------------------------------------------------------------------------
# Fitted probabilities for Y = 1
nasa$fitted.values

# Classified Y's
yHat <- nasa$fitted.values > 0.5

# Hit matrix:
# - 16 correctly classified as 0
# - 4 correctly classified as 1
# - 3 incorrectly classified as 0
tab <- table(challenger$fail.field, yHat)
tab

# Hit ratio (ratio of correct classification)
sum(diag(tab)) / sum(tab)

## ----moddiag-1--------------------------------------------------------------------------
# Create predictors with multicollinearity: x4 depends on the rest
set.seed(45678)
x1 <- rnorm(100)
x2 <- 0.5 * x1 + rnorm(100)
x3 <- 0.5 * x2 + rnorm(100)
x4 <- -x1 + x2 + rnorm(100, sd = 0.25)

# Response
z <- 1 + 0.5 * x1 + 2 * x2 - 3 * x3 - x4
y <- rbinom(n = 100, size = 1, prob = 1/(1 + exp(-z)))
data <- data.frame(x1 = x1, x2 = x2, x3 = x3, x4 = x4, y = y)

# Correlations -- none seems suspicious
cor(data)

# Abnormal generalized variance inflation factors: largest for x4, we remove it
modMultiCo <- glm(y ~ x1 + x2 + x3 + x4, family = "binomial")
car::vif(modMultiCo)

# Without x4
modClean <- glm(y ~ x1 + x2 + x3, family = "binomial")

# Comparison
summary(modMultiCo)
summary(modClean)

# Generalized variance inflation factors normal
car::vif(modClean)

## ----glmshrinkage-----------------------------------------------------------------------
# Load data
data(Hitters, package = "ISLR")

# Include only predictors related with 1986 season and discard NA's
Hitters <- subset(Hitters, select = c(League, AtBat, Hits, HmRun, Runs, RBI,
                                      Walks, Division, PutOuts, Assists,
                                      Errors))
Hitters <- na.omit(Hitters)

# Response and predictors
y <- Hitters$League
x <- model.matrix(League ~ ., data = Hitters)[, -1]

## ----glmshr-1---------------------------------------------------------------------------
# Ridge and lasso regressions
library(glmnet)
ridgeMod <- glmnet(x = x, y = y, alpha = 0, family = "binomial")
lassoMod <- glmnet(x = x, y = y, alpha = 1, family = "binomial")

# Solution paths versus lambda
plot(ridgeMod, label = TRUE, xvar = "lambda")
plot(lassoMod, label = TRUE, xvar = "lambda")

# Versus the percentage of deviance explained
plot(ridgeMod, label = TRUE, xvar = "dev")
plot(lassoMod, label = TRUE, xvar = "dev")
# The percentage of deviance explained only goes up to 0.05. There are no
# clear patterns indicating player differences between both leagues

# Let's select the predictors to be included with a 10-fold cross-validation
set.seed(12345)
kcvLasso <- cv.glmnet(x = x, y = y, alpha = 1, nfolds = 10, family = "binomial")
plot(kcvLasso)

# The lambda that minimizes the CV error and "one standard error rule"'s lambda
kcvLasso$lambda.min
kcvLasso$lambda.1se

# Leave-one-out cross-validation -- similar result
ncvLasso <- cv.glmnet(x = x, y = y, alpha = 1, nfolds = nrow(Hitters),
                      family = "binomial")
plot(ncvLasso)
ncvLasso$lambda.min
ncvLasso$lambda.1se

# Model selected
predict(ncvLasso, type = "coefficients", s = ncvLasso$lambda.1se)

## ----glmshr-2, fig.asp = 1/2------------------------------------------------------------
# Analyze the selected model
fit <- glm(League ~ HmRun, data = Hitters, family = "binomial")
summary(fit)
# HmRun is significant -- but it may be spurious due to the model selection
# procedure (see Appendix A.5)

# Let's split the dataset in two, do model-selection in one part and then
# inference on the selected model in the other, to have an idea of the real
# significance of HmRun
set.seed(12345678)
train <- sample(c(FALSE, TRUE), size = nrow(Hitters), replace = TRUE)

# Model selection in training part
ncvLasso <- cv.glmnet(x = x[train, ], y = y[train], alpha = 1,
                      nfolds = sum(train), family = "binomial")
predict(ncvLasso, type = "coefficients", s = ncvLasso$lambda.1se)

# Inference in testing part
summary(glm(League ~ HmRun, data = Hitters[!train, ], family = "binomial"))
# HmRun is now not significant...
# We can repeat the analysis for different partitions of the data and we will
# obtain weak significances. Therefore, we can conclude that this is an spurious
# finding and that HmRun is not significant as a single predictor

# Prediction (obviously not trustable, but for illustration)
pred <- predict(ncvLasso, newx = x[!train, ], type = "response",
                s = ncvLasso$lambda.1se)

# Hit matrix and hit ratio
H <- table(pred > 0.5, y[!train] == "A") # ("A" was the reference level)
H
sum(diag(H)) / sum(H) # Almost like tossing a coin!

## ----bigglm-0, eval = FALSE-------------------------------------------------------------
## # To install specific versions of packages
## install.packages("versions")
## library(versions)
## 
## # Install specific package versions. It may take a while to do so, be patient
## install.versions(pkgs = c("bit", "ff", "ffbase"),
##                  versions = c("1.1-15.2", "2.2-14.2", "0.12.8"))
## # After bit's version 1.1-15.2, something is off in the integration with
## # ffbase; see issue in https://github.com/edwindj/ffbase/issues/61
## 
## # Alternatively, if the binaries for your OS are not available (e.g., for
## # Apple M1's processors), then you will need to compile the packages from
## # source... and cross your fingers!
## urls <- c(
##   "https://cran.r-project.org/src/contrib/Archive/bit/bit_1.1-15.2.tar.gz",
##   "https://cran.r-project.org/src/contrib/Archive/ff/ff_2.2-14.2.tar.gz",
##   "https://cran.r-project.org/src/contrib/Archive/ffbase/ffbase_0.12.8.tar.gz"
##   )
## install.packages(pkgs = urls, repos = NULL, type = "source")

## ----bigglm-1, eval = FALSE-------------------------------------------------------------
## # Not really "big data", but for the sake of illustration
## set.seed(12345)
## n <- 1e6
## p <- 10
## beta <- seq(-1, 1, length.out = p)^5
## x1 <- matrix(rnorm(n * p), nrow = n, ncol = p)
## x1[, p] <- 2 * x1[, 1] + rnorm(n, sd = 0.1) # Add some dependence to predictors
## x1[, p - 1] <- 2 - x1[, 2] + rnorm(n, sd = 0.5)
## y1 <- rbinom(n, size = 1, prob = 1 / (1 + exp(-(1 + x1 %*% beta))))
## x2 <- matrix(rnorm(100 * p), nrow = 100, ncol = p)
## y2 <- rbinom(100, size = 1, prob = 1 / (1 + exp(-(1 + x2 %*% beta))))
## bigData1 <- data.frame("resp" = y1, "pred" = x1)
## bigData2 <- data.frame("resp" = y2, "pred" = x2)
## 
## # Save files to disk to emulate the situation with big data
## write.csv(x = bigData1, file = "bigData1.csv", row.names = FALSE)
## write.csv(x = bigData2, file = "bigData2.csv", row.names = FALSE)
## 
## # Read files using ff
## library(ffbase) # Imports ff
## bigData1ff <- read.table.ffdf(file = "bigData1.csv", header = TRUE, sep = ",")
## bigData2ff <- read.table.ffdf(file = "bigData2.csv", header = TRUE, sep = ",")
## 
## # Recall: bigData1.csv is not copied into RAM
## print(object.size(bigData1), units = "MB")
## print(object.size(bigData1ff), units = "KB")
## 
## # Logistic regression
## # Same comments for the formula framework -- this is the hack for automatic
## # inclusion of all the predictors
## library(biglm)
## f <- formula(paste("resp ~", paste(names(bigData1)[-1], collapse = " + ")))
## bigglmMod <- bigglm.ffdf(formula = f, data = bigData1ff, family = binomial())
## 
## # glm's call
## glmMod <- glm(formula = resp ~ ., data = bigData1, family = binomial())
## 
## # Compare sizes
## print(object.size(bigglmMod), units = "KB")
## print(object.size(glmMod), units = "MB")
## 
## # Summaries
## s1 <- summary(bigglmMod)
## s2 <- summary(glmMod)
## s1
## s2
## 
## # Further information
## s1$mat # Coefficients and their inferences
## s1$rsq # R^2
## s1$nullrss # Null deviance
## 
## # Extract coefficients
## coef(bigglmMod)
## 
## # Prediction works as usual
## predict(bigglmMod, newdata = bigData2[1:5, ], type = "response")
## # predict(bigglmMod, newdata = bigData2[1:5, -1]) # Error
## 
## # Update the model with training data
## update(bigglmMod, moredata = bigData2)
## 
## # AIC and BIC
## AIC(bigglmMod, k = 2)
## AIC(bigglmMod, k = log(n))
## 
## # Delete the files in disk
## file.remove(c("bigData1.csv", "bigData2.csv"))

## Note that this is also a perfectly **valid approach for linear models**, we just need to specify `family = gaussian()` in the call to `bigglm.ffdf`.

## ----bigglm-2, eval = FALSE-------------------------------------------------------------
## # Model selection adapted to big data generalized linear models
## reg <- leaps::regsubsets(bigglmMod, nvmax = p + 1, method = "exhaustive")
## # This takes the QR decomposition, which encodes the linear model associated to
## # the last iteration of the IRLS algorithm. However, the reported BICs are *not*
## # the true BICs of the generalized linear models, but a sufficient
## # approximation to obtain a list of candidate models in a fast way
## 
## # Get the model with lowest BIC
## plot(reg)
## subs <- summary(reg)
## subs$which
## subs$bic
## subs$which[which.min(subs$bic), ]
## 
## # Let's compute the true BICs for the p models. This implies fitting p bigglm's
## bestModels <- list()
## for (i in 1:nrow(subs$which)) {
##   f <- formula(paste("resp ~", paste(names(which(subs$which[i, -1])),
##                                      collapse = " + ")))
##   bestModels[[i]] <- bigglm.ffdf(formula = f, data = bigData1ff,
##                                  family = binomial(), maxit = 20)
##   # Did not converge with the default iteration limit, maxit = 8
## 
## }
## 
## # The approximate BICs and the true BICs are very similar (in this example)
## exactBICs <- sapply(bestModels, AIC, k = log(n))
## plot(subs$bic, exactBICs, type = "o", xlab = "Exact", ylab = "Approximate")
## 
## # Pearson correlation
## cor(subs$bic, exactBICs, method = "pearson")
## 
## # Order correlation
## cor(subs$bic, exactBICs, method = "spearman")
## 
## # Both give the same model selection and same order
## subs$which[which.min(subs$bic), ] # Approximate
## subs$which[which.min(exactBICs), ] # Exact