#====================================================================#
# Title: Day 4                                                       #
# Name:  Your name here                                              #
#====================================================================#

#### Code ------------------------------------------------------------
# Run through this code on your own to practice what we covered in
# the slides. Today I've put it all in one script, so keep track of 
# where you are! You can always restart R and rerun to start from 
# scratch if you've messed anything up.

# Before you do anything, make sure you're starting in a fresh session.
# Restart R by going to Session -> Restart R
# You can make it so that this also clears your workspace (highly recommended)
# by going to Tools -> Global Options -> General -> Basic -> Workspace
# and unclicking "restore .RData" and switching the other option to "never"

# Then we need to reload packages and data (same as last week)
library(tidyverse)
library(tableone)
nlsy <- read_csv("nlsy_cc.csv")
colnames(nlsy) <- c("glasses", "eyesight", "sleep_wkdy", "sleep_wknd",
                    "id", "nsibs", "samp", "race_eth", "sex", "region", 
                    "income", "res_1980", "res_2002", "age_bir")

#### functions -----------------------------------------------------

# Look inside a function
CreateTableOne
# Look inside another function you know!

# make a practice vector for our new_mean function
x <- c(1, 3, 5, 7, 9)

# try out our code
n <- length(x)
sum(x) / n

# insert into the function body
new_mean <- function(x) {
  n <- length(x)
  mean_val <- sum(x) / n
  return(mean_val)
}

# try it out!
new_mean(x = x)
new_mean(x = c(100, 200, 300))
# try it with another vector! How about nlsy$income?

## proportions ##

# new test vector
x <- c(0, 1, 1)

# two options:
percent <- 100
percent * sum(x) / length(x)
percent <- 1
percent * sum(x) / length(x)

# add percent as an argument, with a default of 1
prop <- function(x, percent = 1) {
  n <- length(x)
  mean_val <- percent * sum(x) / n
  return(mean_val)
}

prop(x = x)
prop(x = x, percent = 100)

#### Exercises 1 -----------------------------------------------------

# 1. You're tired of writing x^2 when you want to square x. Make a function to
# square a number. You can call it square().

square <- function(x) {
  squared <- x * x
  return(squared)
}
# Note that you could also do:
square <- function(x) {
  x * x
}
# since it's obvious from the context what will be returned

# 2. You don't just want to square numbers, you want to raise them to higher
# powers too. Make a function that uses two arguments, x for a number, and
# power for the power. Call it raise().

raise <- function(x, power) {
  raised <- x^power
  return(raised)
}

# 3. Change your raise() function to default to squaring x when the user doesn't
# enter a value for power.
  
raise <- function(x, power = 2) {
    raised <- x^power
    return(raised)
}

# 4. Use your function to square and cube 524 with raise(524) and raise(524, power
# = 3).

raise(524)
raise(524, power = 3)

#### functions 2 -----------------------------------------------------

# we want to be able to change the quantile we want:
q <- .5
nlsy %>% summarize(
  q_var = quantile(age_bir, probs = q)
)

# paste in the body of a function
age_bir_q <- function(q) {
  quant <- nlsy %>% 
    summarize(q_var = quantile(age_bir, probs = q))
  return(quant)
}

# test
age_bir_q(q = 0.5)
median(nlsy$age_bir)

## what if we want to change the variable? ##
# not going to work:
var_q <- function(q, var) {
  quant <- nlsy %>%
    summarize(q_var = quantile(var, probs = q))
  return(quant)
}
var_q(q = 0.5, var = income)
var_q(q = 0.5, var = "income")

# we can accept variable names as quotes with rename
# compare:
nlsy
nlsy %>% rename(eyeglasses = "glasses")

# if we rename the variable we pass as an argument,
# our function will work
var_q <- function(q, var) {
  quant <- nlsy %>%
    rename(new_var = var) %>%
    summarise(q_var = quantile(new_var, probs = q))
  return(quant)
}
var_q(q = 0.5, var = "income")
var_q(q = 0.25, var = "sleep_wkdy")
var_q(q = 0.95, var = "nsibs")

# what if we want to change the grouping variable?
# i.e., from sex to race_eth here:
nlsy %>% group_by(sex) %>% summarise(mean_inc = mean(income))
nlsy %>% group_by(race_eth) %>% summarise(mean_inc = mean(income))

#### Exercises 2 -----------------------------------------------------

# 1. Write a function to calculate the stratified mean income for grouping
# variable var. In other words, write a function such that mean_group_inc(var =
# "sex") produces the same results as the first line on the previous slide,
# mean_group_inc(var = "race_eth") the second.

mean_group_inc <- function(var) {
  nlsy %>%
    rename(new_var = var) %>%
    group_by(new_var) %>%
    summarise(mean(income))
}
mean_group_inc(var = "sex")
mean_group_inc(var = "race_eth")

# 2. Rewrite your function to accept two arguments: group_var to determine what
# the grouping variable is, and mean_var to determine what variable you want to
# take the mean of (e.g., mean_group(group_var = "sex", mean_var = "income")
# should give you the same results as above).

mean_group <- function(group_var, mean_var) {
  nlsy %>%
    rename(group_var = group_var,
           mean_var = mean_var) %>%
    group_by(group_var) %>%
    summarise(mean(mean_var))
}
mean_group(group_var = "sex", mean_var = "income")

#### for loops -----------------------------------------------------

# just print the values
for (i in 1:3) {
  print(i)
}

# have to use the print function to print to the console
# try taking out the print() here:
qs <- c(0.1, 0.5, 0.9)
for (i in qs) {
  print(var_q(q = i, var = "income"))
}

# create an empty object to store results
results <- rep(NA, 3)
results
for (i in 1:3) {
  results[[i]] <- i * 1.5
}
results

# revise our function: use pull() to get just one variable, not return a tibble
var_q_new <- function(q, var) {
  quant <- nlsy %>%
    rename(new_var = var) %>%
    summarise(q_var = quantile(new_var, probs = q)) %>%
    pull(q_var)
  return(quant)
}
var_q_new(q = 0.5, var = "income")

# set up the loop
qs <- seq(0.1, 0.9, by = 0.1)
qs
deciles <- rep(NA, length(qs))
deciles
seq_along(qs)

# run the loop
for (i in seq_along(qs)) {
  deciles[[i]] <- var_q_new(q = qs[[i]], 
                            var = "income")
}
deciles

#### Exercises 3 -----------------------------------------------------

# 1. Change the last for loop in the slides to loop over different variables
# instead of different quantiles. That is, calculate the 0.25 quantile for each
# of c("income", "age_bir", "nsibs") in a for loop.
qvars <- c("income", "age_bir", "nsibs")
res <- rep(NA, length(qvars))

for (i in seq_along(qvars)) {
  res[[i]] <- var_q_new(q = 0.25, 
                            var = qvars[[i]])
}
res

# 2. You can nest for loops inside each other, as long as you use different
# iteration variables. Write a nested for loop to iterate over variables (with
# i) and quantiles (with j). You'll need to start with an empty matrix instead
# of a vector, with rows indexed by i and columns by j. Calculate each of the
# deciles for each of the above variables.

qs <- seq(0.1, 0.9, by = 0.1)
qvars <- c("income", "age_bir", "nsibs")
res_mat <- matrix(NA, ncol = length(qvars), nrow = length(qs))
for (i in seq_along(qvars)) {
  for (j in seq_along(qs)) {
    res_mat[j,i] <- var_q_new(q = qs[[j]], 
                          var = qvars[[i]])
  }
}
res_mat

#### scoped functions -----------------------------------------------------

# just to explore
nlsy %>%
  summarise_at(vars(contains("sleep")), 
               list(med = median, sd = sd))

nlsy %>%
  summarise_if(is.numeric, 
               mean)

nlsy %>%
  mutate_at(vars(eyesight, race_eth, sex),
            factor) %>%
  select(eyesight, race_eth, sex)

nlsy %>%
  rename_all(toupper)

#### Challenge -----------------------------------------------------

# Create a function that calculates the stratified proportion of people with
# different levels of eyesight by any categorical variable. Then use any
# technique (besides copying and pasting) to calculate the proportions
# stratified by sex, race_eth, and region. You should end up with something
# like this:

## # A tibble: 45 x 5
##    var      var_level eyesight     n    prop
##    <chr>        <dbl>    <dbl> <int>   <dbl>
##  1 sex              1        1   228   0.455  
##  2 sex              1        2   162   0.323  
##  3 sex              1        3    85   0.170  
##  4 sex              1        4    21   0.0419 
##  5 sex              1        5     5   0.00998
##  6 sex              2        1   246   0.349  
##  7 sex              2        2   223   0.317  
##  8 sex              2        3   164   0.233  
##  9 sex              2        4    57   0.0810 
## 10 sex              2        5    14   0.0199 
## 11 race_eth         1        1    90   0.427  
## 12 race_eth         1        2    54   0.256  
## 13 race_eth         1        3    50   0.237  
## 14 race_eth         1        4    14   0.0664 
## 15 race_eth         1        5     3   0.0142 
## 16 race_eth         2        1   102   0.332  
## 17 race_eth         2        2    96   0.313  
## 18 race_eth         2        3    73   0.238  
## 19 race_eth         2        4    29   0.0945 
## 20 race_eth         2        5     7   0.0228 
## 21 race_eth         3        1   282   0.410  
## 22 race_eth         3        2   235   0.342  

# one of many ways to do this
eye_func <- function(var) {
  nlsy %>%
    rename(var_level = var) %>%
    group_by(var_level, eyesight) %>%
    summarize(n = n()) %>%
    mutate(prop = n / sum(n),
           var = var) %>%
    ungroup() %>%
    select(var, everything())
}
# use a function from the map() family in the purrr package
# to loop over those three variables, and combine them
# by rows into a dataframe
res <- map_dfr(c("sex", "race_eth", "region"), eye_func)
res