## ----goofus--------------------------------------------------
mean1 <- mean(swiss$Fertility)
mean2 <- mean(swiss$Agriculture)
mean3 <- mean(swissExamination)
mean4 <- mean(swiss$Fertility)
mean5 <- mean(swiss$Catholic)
mean5 <- mean(swiss$Infant.Mortality)
c(mean1, mean2 mean3, mean4, mean5, man6)


## ---gallant----------------------------------------------------------------
swiss_means <- setNames(numeric(ncol(swiss)), colnames(swiss))
for(i in seq_along(swiss)) {
    swiss_means[i] <- mean(swiss[[i]])
}
swiss_means

## ------------------------------------------------------------------------
for(i in 1:10) {
    # inside for, output won't show up w/o "print"
    print(i^2) 
}

## ------------------------------------------------------------------------
for(i in 1:3) {
    print(i^2) 
}

## ------------------------------------------------------------------------
i <- 1
print(i^2) 
i <- 2
print(i^2)
i <- 3
print(i^2)

## ------------------------------------------------------------------------
some_letters <- letters[4:6]
for(i in some_letters) {
    print(i)
}
i # in R, this will exist outside of the loop!

## ------------------------------------------------------------------------
for(a in seq_along(some_letters)) {
    print(paste0("Letter ", a, ": ", some_letters[a]))
}
a

## ------------------------------------------------------------------------
# preallocate numeric vector
iters <- 10
output <- numeric(iters)

for(i in 1:iters) {
    output[i] <- (i-1)^2 + (i-2)^2
}
output

## ------------------------------------------------------------------------
(names_to_use <- paste0("iter ", letters[1:5]))
# without setNames:
a_vector <- numeric(5)
names(a_vector) <- names_to_use

# with setNames: first arg = values, second = names
(a_vector <- setNames(numeric(5), names_to_use))

## ---- cache=TRUE---------------------------------------------------------
set.seed(98195)
# simulating example data:
n <- 300
x <- rnorm(n, mean = 5, sd = 4)
fake_data <- data.frame(x = x,
                        y = -0.5 * x + 0.05 * x^2 + rnorm(n, sd = 1))

## ---- fig.width = 10, fig.height = 3, dpi=300, out.width="1100px", out.height="330px"----
library(ggplot2)
ggplot(data = fake_data, aes(x = x, y = y)) +
    geom_point() +
    ggtitle("Our fake data")

## ------------------------------------------------------------------------
models <- c("intercept only" = "y ~ 1",
            "linear" = "y ~ x",
            "quadratic" = "y ~ x + I(x^2)",
            "cubic" = "y ~ x + I(x^2) + I(x^3)")

## ------------------------------------------------------------------------
fitted_lms <- vector("list", length(models)) # initialize list
names(fitted_lms) <- names(models) # give entries good names

## ------------------------------------------------------------------------
for(mod in names(models)) {
    fitted_lms[[mod]] <- lm(formula(models[mod]),
                            data = fake_data)
}

## ------------------------------------------------------------------------
# initialize data frame to hold predictions
predicted_data <- fake_data
for(mod in names(models)) {
    # make a new column in predicted data for each model's predictions
    predicted_data[[mod]] <- predict(fitted_lms[[mod]],
                                newdata = predicted_data)
}

## ---- warning=FALSE, message=FALSE---------------------------------------
library(tidyr)
library(dplyr)
tidy_predicted_data <- predicted_data %>%
    gather(Model, Prediction, -x, -y) %>%
    mutate(Model = factor(Model, levels = names(models)))

## ---- echo=FALSE, fig.width = 10, fig.height = 4.5, dpi=300, out.width="1100px", out.height="500px"----
ggplot(data = fake_data, aes(x = x, y = y)) +
    geom_point() +
    geom_line(data = tidy_predicted_data,
              aes(x = x,
                  y = Prediction,
                  group = Model,
                  color = Model),
              alpha = 0.5, size = 2) +
    ggtitle("Predicted trends from regression") +
    theme_bw()

## ------------------------------------------------------------------------
K <- 10
CV_predictions <- fake_data
CV_predictions$fold <- sample(rep(1:K, length.out = nrow(CV_predictions)),
                              replace = FALSE)
CV_predictions[, names(models)] <- NA
head(CV_predictions, 2)

## ------------------------------------------------------------------------
for(mod in names(models)) {
    for(k in 1:K) {
        # TRUE/FALSE vector of rows in the fold
        fold_rows <- (CV_predictions$fold == k)
        # fit model to data not in fold
        temp_mod <- lm(formula(models[mod]),
                       data = CV_predictions[!fold_rows, ])
        # predict on data in fold
        CV_predictions[fold_rows, mod] <- predict(temp_mod,
                                                  newdata = CV_predictions[fold_rows, ])
    }
}

## ------------------------------------------------------------------------
CV_MSE <- setNames(numeric(length(models)),
                   names(models))
for(mod in names(models)) {
    pred_sq_error <- (CV_predictions$y - CV_predictions[[mod]])^2
    CV_MSE[mod] <- mean(pred_sq_error)
}
CV_MSE

## -------------------------------------------------------------
for(i in 1:10) {
    if(i %% 2 == 0) {
        print(paste0("The number ", i, " is even"))
    } else if(i %% 3 == 0) {
        print(paste0("The number ", i, " is not even but divisible by 3"))
    } else {
        print(paste0("The number ", i, " is not divisible by 2 or 3"))
    }
}

## ------------------------------------------------------------------------
num_heads <- 0; num_flips <- 0
while(num_heads < 4) {
    coin_flip <- rbinom(n = 1, size = 1, prob = 0.5)
    if(coin_flip == 1) {
        num_heads <- num_heads + 1
    }
    num_flips <- num_flips + 1
}
num_flips # follows negative binomial distribution

## ------------------------------------------------------------------------
my_vector <- rnorm(100000)

## ------------------------------------------------------------------------
for_start <- proc.time() # start the clock
new_vector <- rep(NA, length(my_vector))
for(position in 1:length(my_vector)) {
    new_vector[position] <- my_vector[position] + 1
}
(for_time <- proc.time() - for_start) # time elapsed

## ------------------------------------------------------------------------
vec_start <- proc.time()
new_vector <- my_vector + 1
(vec_time <- proc.time() - vec_start)
for_time / vec_time

## ------------------------------------------------------------------------
(a_matrix <- matrix(1:12, nrow = 3, ncol = 4))
rowSums(a_matrix)

## ------------------------------------------------------------------------
cumsum(1:7)

## ------------------------------------------------------------------------
pmax(c(0, 2, 4), c(1, 1, 1), c(2, 2, 2))