## ----------------------------------------------------------------------------
## Name: 07-appendix.R
## Description: Script for Chapter 7 of "Notes for Predictive Modeling"
## Link: https://bookdown.org/egarpor/PM-UC3M/
## License: https://creativecommons.org/licenses/by-nc-nd/4.0/
## Author: Eduardo García-Portugués
## Version: 5.10.0
## ----------------------------------------------------------------------------

## ----ht, fig.margin = FALSE, fig.fullwidth = TRUE, fig.asp = 1/2, fig.cap = '(ref:ht-title)'----
# Sample data from a N(0, 1)
set.seed(3245678)
n <- 50
x <- rnorm(n)

# Kolmogorov-Smirnov test for H_0 : F = N(0, 1). Does not reject.
ks.test(x, "pnorm")

# Simulation of p-values when H_0 is true
M <- 1e4
pValues_H0 <- sapply(1:M, function(i) {
  x <- rnorm(n) # N(0, 1)
  ks.test(x, "pnorm")$p.value
})

# Simulation of p-values when H_0 is false -- the data does not
# come from a N(0, 1) but from a N(0, 1.5)
pValues_H1 <- sapply(1:M, function(i) {
  x <- rnorm(n, mean = 0, sd = sqrt(1.5)) # N(0, 1.5)
  ks.test(x, "pnorm")$p.value
})

# Comparison of p-values
par(mfrow = 1:2)
hist(pValues_H0, breaks = seq(0, 1, l = 20), probability = TRUE,
     main = expression(H[0]), ylim = c(0, 2.5))
abline(h = 1, col = 2)
hist(pValues_H1, breaks = seq(0, 1, l = 20), probability = TRUE,
     main = expression(H[1]), ylim = c(0, 2.5))
abline(h = 1, col = 2)

## ----multn-1----------------------------------------------------------------------------
# Data from the voting intentions in the 1988 Chilean national plebiscite
data(Chile, package = "carData")
summary(Chile)
# vote is a factor with levels A (abstention), N (against Pinochet),
# U (undecided), Y (for Pinochet)

# Fit of the model done by multinom: Response ~ Predictors
# It is an iterative procedure (maxit sets the maximum number of iterations)
# Read the documentation in ?multinom for more information
mod1 <- nnet::multinom(vote ~ age + education + statusquo, data = Chile,
                       maxit = 1e3)

# Each row of coefficients gives the coefficients of the logistic
# regression of a level versus the reference level (A)
summary(mod1)

# Set a different level as the reference (N) for easier interpretations
Chile$vote <- relevel(Chile$vote, ref = "N")
mod2 <- nnet::multinom(vote ~ age + education + statusquo, data = Chile,
                       maxit = 1e3)
summary(mod2)
exp(coef(mod2))
# Some highlights:
# - intercepts do not have too much interpretation (correspond to age = 0).
#   A possible solution is to center age by its mean (so age = 0 would
#   represent the mean of the ages)
# - both age and statusquo increase the probability of voting Y, A or U
#   with respect to voting N -> conservatism increases with ages
# - both age and statusquo increase more the probability of voting Y and U
#   than A -> elderly and status quo supporters are more decided to participate
# - a PS level of education increases the probability of voting N. Same for
#   a S level of education, but more prone to A

# Prediction of votes -- three profile of voters
newdata <- data.frame(age = c(23, 40, 50),
                      education = c("PS", "S", "P"),
                      statusquo = c(-1, 0, 2))

# Probabilities of belonging to each class
predict(mod2, newdata = newdata, type = "probs")

# Predicted class
predict(mod2, newdata = newdata, type = "class")

## ----nas-1, error = TRUE----------------------------------------------------------------
# The airquality dataset contains NA's
data(airquality)
head(airquality)
summary(airquality)

# Let's add more NA's for the sake of illustration
set.seed(123456)
airquality$Solar.R[runif(nrow(airquality)) < 0.7] <- NA
airquality$Day[runif(nrow(airquality)) < 0.1] <- NA

# See what are the fully-observed cases
comp <- complete.cases(airquality)
mean(comp) # Only 15% of cases are fully observed

# Complete cases
head(airquality[comp, ])

# Linear model on all the variables
summary(lm(Ozone ~ ., data = airquality)) # 129 not included

# Caution! Even if the problematic variable is excluded, only
# the complete observations are employed
summary(lm(Ozone ~ . - Solar.R, data = airquality))

# Notice the difference with
summary(lm(Ozone ~ ., data = subset(airquality, select = -Solar.R)))

# Model selection can be problematic with missing data, since
# the number of complete cases changes with the addition or
# removing of predictors
mod <- lm(Ozone ~ ., data = airquality)

# stepAIC drops an error
modAIC <- MASS::stepAIC(mod)

# Also, this will be problematic (the number of complete
# cases changes with the predictors considered!)
modBIC <- MASS::stepAIC(mod, k = log(nrow(airquality)))

# Comparison of AICs or BICs is spurious: the scale of the
# likelihood changes with the sample size (the likelihood
# decreases with n), which increases AIC / BIC with n.
# Hence using BIC / AIC is not adequate for model selection
# with missing data.
AIC(lm(Ozone ~ ., data = airquality))
AIC(lm(Ozone ~ ., data = subset(airquality, select = -Solar.R)))

# Considers only complete cases including Solar.R
AIC(lm(Ozone ~ . - Solar.R, data = airquality))

## ----nas-2, fig.asp = 1/2---------------------------------------------------------------
# The complete cases approach is the default in R
summary(lm(Ozone ~ ., data = airquality))

# However, since the complete cases that R is going to consider
# depends on which predictors are included, it is safer to
# exclude NA's explicitly before the fitting of a model.
airqualityNoNA <- na.exclude(airquality)
summary(airqualityNoNA)

# The package VIM has a function to visualize where the missing
# data is present. It gives the percentage of NA's for each
# variable and for the most important combinations of NA's.
VIM::aggr(airquality)
VIM::aggr(airqualityNoNA)

# Stepwise regression without NA's -- no problem
modBIC1 <- MASS::stepAIC(lm(Ozone ~ ., data = airqualityNoNA),
                         k = log(nrow(airqualityNoNA)), trace = 0)
summary(modBIC1)

# But we only take into account 16% of the original data
nrow(airqualityNoNA) / nrow(airquality)

# Removing the predictor with many NA's, as we did before
# We also exclude NA's from other predictors
airqualityNoSolar.R <- na.exclude(subset(airquality, select = -Solar.R))
modBIC2 <- MASS::stepAIC(lm(Ozone ~ ., data = airqualityNoSolar.R),
                         k = log(nrow(airqualityNoSolar.R)), trace = 0)
summary(modBIC2)
# In this example the approach works well because most of
# the NA's are associated to the variable Solar.R

# Input data using the sample mean
library(mice)
airqualityMean <- complete(mice(data = airquality, m = 1, method = "mean"))
head(airqualityMean)
# Explanation of the syntax:
# - mice::complete() serves to retrieve the completed dataset from
#   the mids object.
# - m = 1 specifies that we only want a reconstruction of the
#   dataset because the imputation method is deterministic
#   (it could be random).
# - method = "mean" says that we want the sample mean to be
#   used to fill NA's in all the columns. This only works
#   properly for quantitative variables.

# Impute using linear regression for the response (first column)
# and mean for the predictors (remaining five columns)
airqualityLm <- complete(mice(data = airquality, m = 1, 
                              method = c("norm.predict", rep("mean", 5))))
head(airqualityLm)

# Imputed data -- some extrapolation problems may happen
airqualityLm$Ozone[is.na(airquality$Ozone)]

# Notice that the imputed data is the same (except for a small
# truncation that is introduced by mice) as
predict(lm(airquality$Ozone ~ ., data = airqualityMean),
        newdata = airqualityMean[is.na(airquality$Ozone), -1])

# Removing the truncation with ridge = 0
complete(mice(data = airquality, m = 1, 
              method = c("norm.predict", rep("mean", 5)), 
              ridge = 0))[is.na(airquality$Ozone), 1]

# The default mice's method (predictive mean matching) works
# better in this case (in the sense that it does not yield
# negative Ozone values)
# Notice that there is randomness in the imputation!
airqualityMice <- complete(mice(data = airquality, m = 1, seed = 123))
head(airqualityMice)

## ----notevarsel-------------------------------------------------------------------------
# Simulation setting
n <- 2e2
p <- 4
p0 <- p %/% 2
beta <- c(rep(1, p0), rep(0, p - p0))

# Generate two sets of independent data following the same linear model
# with coefficients beta and null intercept
x1 <- matrix(rnorm(n * p), nrow = n, ncol = p)
data1 <- data.frame("x" = x1)
xbeta1 <- x1 %*% beta
x2 <- matrix(rnorm(n * p), nrow = n, ncol = p)
data2 <- data.frame("x" = x2)
xbeta2 <- x2 %*% beta

# Objects for the simulation
M <- 1e4
pvalues1 <- pvalues2 <- pvalues3 <- matrix(NA, nrow = M, ncol = p)
set.seed(12345678)
data1$y <- xbeta1 + rnorm(n)
nam <- names(lm(y ~ 0 + ., data = data1)$coefficients)

# Simulation
# pb <- txtProgressBar(style = 3)
for (i in 1:M) {

  # Generate new data
  data1$y <- xbeta1 + rnorm(n)

  # Obtain the significances of the coefficients for the usual linear model
  mod1 <- lm(y ~ 0 + ., data = data1)
  s1 <- summary(mod1)
  pvalues1[i, ] <- s1$coefficients[, 4]

  # Obtain the significances of the coefficients for a data-driven selected
  # linear model (in this case, by stepwise regression using BIC)
  mod2 <- MASS::stepAIC(mod1, k = log(n), trace = 0)
  s2 <- summary(mod2)
  ind <- match(x = names(s2$coefficients[, 4]), table = nam)
  pvalues2[i, ind] <- s2$coefficients[, 4]

  # Generate independent data
  data2$y <- xbeta2 + rnorm(n)

  # Significances of the coefficients by the data-driven selected model
  s3 <- summary(lm(y ~ 0 + ., data = data2[, c(ind, p + 1)]))
  pvalues3[i, ind] <- s3$coefficients[, 4]

  # Progress
  # setTxtProgressBar(pb = pb, value = i / M)

}

# Percentage of NA's: NA = predictor excluded
apply(pvalues2, 2, function(x) mean(is.na(x)))

# Boxplots of significances
boxplot(pvalues1, names = expression(beta[1], beta[3], beta[3], beta[4]),
        main = "p-values in the full model", ylim = c(0, 1))
boxplot(pvalues2, names = expression(beta[1], beta[3], beta[3], beta[4]),
        main = "p-values in the stepwise model", ylim = c(0, 1))
boxplot(pvalues3, names = expression(beta[1], beta[3], beta[3], beta[4]),
        main = "p-values in the model with the predictors selected by
        stepwise regression, and fitted in an independent sample",
        ylim = c(0, 1))

# Test uniformity of the p-values associated to the coefficients that are 0
apply(pvalues1[, (p0 + 1):p], 2, function(x) ks.test(x, y = "punif")$p.value)
apply(pvalues2[, (p0 + 1):p], 2, function(x) ks.test(x, y = "punif")$p.value)
apply(pvalues3[, (p0 + 1):p], 2, function(x) ks.test(x, y = "punif")$p.value)

## ----r-1, echo = FALSE, cache = FALSE---------------------------------------------------
rm(list = ls())

## ----r-2, error = TRUE, cache = FALSE---------------------------------------------------
# The console can act as a simple calculator
1.0 + 1.1
2 * 2
3/2
2^3
1/0
0/0

# Use ";" for performing several operations in the same line
(1 + 3) * 2 - 1; 3 + 2

# Elemental mathematical functions
sqrt(2); 2^0.5
exp(1)
log(10) # Natural logarithm
log10(10); log2(10) # Logs in base 10 and 2
sin(pi); cos(0); asin(0)
tan(pi/3)
sqrt(-1)

# Remember to close the parenthesis -- errors below
1 +
(1 + 3

## ----r-3, error = TRUE------------------------------------------------------------------
# Any operation that you perform in R can be stored in a variable
# (or object) with the assignment operator "<-"
x <- 1

# To see the value of a variable, simply type it
x

# A variable can be overwritten
x <- 1 + 1

# Now the value of x is 2 and not 1, as before
x

# Capitalization matters
X <- 3
x; X

# See what are the variables in the workspace
ls()

# Remove variables
rm(X)
X

## ----r-4, error = TRUE------------------------------------------------------------------
# We combine numbers with the function "c"
c(1, 3)
c(1.5, 0, 5, -3.4)

# A handy way of creating integer sequences is the operator ":"
1:5

# Storing some vectors
myData <- c(1, 2)
myData2 <- c(-4.12, 0, 1.1, 1, 3, 4)
myData
myData2

# Entrywise operations
myData + 1
myData^2

# If you want to access a position of a vector, use [position]
myData[1]
myData2[6]

# You can also change elements
myData[1] <- 0
myData

# Think on what you want to access...
myData2[7]
myData2[0]

# If you want to access all the elements except a position,
# use [-position]
myData2[-1]
myData2[-2]

# Also with vectors as indexes
myData2[1:2]
myData2[myData]

# And also
myData2[-c(1, 2)]

# But do not mix positive and negative indexes!
myData2[c(-1, 2)]

# Remove the first element
myData2 <- myData2[-1]

## ----r-5--------------------------------------------------------------------------------
# Functions take arguments between parenthesis and transform them
# into an output
sum(myData)
prod(myData)

# Summary of an object
summary(myData)

# Length of the vector
length(myData)

# Mean, standard deviation, variance, covariance, correlation
mean(myData)
var(myData)
cov(myData, myData^2)
cor(myData, myData * 2)
quantile(myData)

# Maximum and minimum of vectors
min(myData)
which.min(myData)

# Usually the functions have several arguments, which are set
# by "argument = value" In the next case, the second argument is
# a logical flag to indicate the kind of sorting
sort(myData) # If nothing is specified, decreasing = FALSE is
# assumed
sort(myData, decreasing = TRUE)

# Do not know what are the arguments of a function? Use args
# and help!
args(mean)
?mean

## ----r-6, error = TRUE------------------------------------------------------------------
# A matrix is an array of vectors
A <- matrix(1:4, nrow = 2, ncol = 2)
A

# Another matrix
B <- matrix(1, nrow = 2, ncol = 2, byrow = TRUE)
B

# Matrix is a vector with dimension attributes
dim(A)

# Binding by rows or columns
rbind(1:3, 4:6)
cbind(1:3, 4:6)

# Entrywise operations
A + 1
A * B

# Accessing elements
A[2, 1] # Element (2, 1)
A[1, ] # First row -- this is a vector
A[, 2] # First column -- this is a vector

# Obtain rows and columns as matrices (and not as vectors)
A[1, , drop = FALSE]
A[, 2, drop = FALSE]

# Matrix transpose
t(A)

# Matrix multiplication
A %*% B
A %*% B[, 1]
A %*% B[1, ]

# Care is needed
A %*% B[1, , drop = FALSE] # Incompatible product

# Compute inverses with "solve"
solve(A) %*% A

# A data frame is a matrix with column names
# Useful when you have multiple variables
myDf <- data.frame(var1 = 1:2, var2 = 3:4)
myDf

# You can change names
names(myDf) <- c("newname1", "newname2")
myDf

# You can access variables by its name with the "$" operator
myDf$newname1

# And create new variables also (they have to be of the same
# length as the rest of variables)
myDf$myNewVariable <- c(0, 1)
myDf

# A list is a collection of arbitrary variables
myList <- list(myData = myData, A = A, myDf = myDf)

# Access elements by names
myList$myData
myList$A
myList$myDf

# Reveal the structure of an object
str(myList)
str(myDf)

# A less lengthy output
names(myList)

## ----r-7--------------------------------------------------------------------------------
# The iris dataset is already imported in R
# (beware: locfit has also an iris dataset, with different names
# and shorter)

# The beginning of the data
head(iris)

# "names" gives you the variables in the data frame
names(iris)

# So we can access variables by "$" or as in a matrix
iris$Sepal.Length[1:10]
iris[1:10, 1]
iris[3, 1]

# Information on the dimension of the data frame
dim(iris)

# "str" gives the structure of any object in R
str(iris)

# Recall the species variable: it is a categorical variable
# (or factor), not a numeric variable
iris$Species[1:10]

# Factors can only take certain values
levels(iris$Species)

# If a file contains a variable with character strings as
# observations (either encapsulated by quotation marks or not),
# the variable will become a factor when imported into R

## ----r-8--------------------------------------------------------------------------------
# The function "seq" creates sequences of numbers equally separated
seq(0, 1, by = 0.1)
seq(0, 1, length.out = 5)

# You can short the latter argument
seq(0, 1, l = 5)

# Repeat number
rep(0, 5)

# Reverse a vector
myVec <- c(1:5, -1:3)
rev(myVec)

# Another way
myVec[length(myVec):1]

# Count repetitions in your data
table(iris$Species)

## ----r-9--------------------------------------------------------------------------------
# Relational operators: x < y, x > y, x <= y, x >= y, x == y, x!= y
# They return TRUE or FALSE

# Smaller than
0 < 1

# Greater than
1 > 1

# Greater or equal to
1 >= 1 # Remember: ">="" and not "=>"" !

# Smaller or equal to
2 <= 1 # Remember: "<="" and not "=<"" !

# Equal
1 == 1 # Tests equality. Remember: "=="" and not "="" !

# Unequal
1 != 0 # Tests inequality

# TRUE is encoded as 1 and FALSE as 0
TRUE + 1
FALSE + 1

# In a vector-like fashion
x <- 1:5
y <- c(0, 3, 1, 5, 2)
x < y
x == y
x != y

# Subsetting of vectors
x
x[x >= 2]
x[x < 3]

# Easy way of work with parts of the data
data <- data.frame(x = c(0, 1, 3, 3, 0), y = 1:5)
data

# Data such that x is zero
data0 <- data[data$x == 0, ]
data0

# Data such that x is larger than 2
data2 <- data[data$x > 2, ]
data2

# Problem -- what happened?
data[x > 2, ]

# AND operator "&"
TRUE & TRUE
TRUE & FALSE
FALSE & FALSE

# OR operator "|"
TRUE | TRUE
TRUE | FALSE
FALSE | FALSE

# Both operators are useful for checking for ranges of data
y
index1 <- (y <= 3) & (y > 0)
y[index1]
index2 <- (y < 2) | (y > 4)
y[index2]

## ----r-10, out.width = '70%'------------------------------------------------------------
# "plot" is the main function for plotting in R
# It has a different behavior depending on the kind of object
# that it receives

# How to plot some data
plot(iris$Sepal.Length, iris$Sepal.Width,
     main = "Sepal.Length vs. Sepal.Width")

# Change the axis limits
plot(iris$Sepal.Length, iris$Sepal.Width, xlim = c(0, 10),
     ylim = c(0, 10))

# How to plot a curve (a parabola)
x <- seq(-1, 1, l = 50)
y <- x^2
plot(x, y, main = "A red thick parabola", type = "l",
     col = "red", lwd = 3)

# Plotting a more complicated curve between -pi and pi
x <- seq(-pi, pi, l = 50)
y <- (2 + sin(10 * x)) * x^2
plot(x, y, type = "l") # Kind of rough...

# Remember that we are joining points for creating a curve!
# More detailed plot
x <- seq(-pi, pi, l = 500)
y <- (2 + sin(10 * x)) * x^2
plot(x, y, type = "l")

# For more options in the plot customization see
?plot
?par

# "plot" is a first level plotting function. That means that
# whenever is called, it creates a new plot. If we want to
# add information to an existing plot, we have to use a second
# level plotting function such as "points", "lines" or "abline"

plot(x, y) # Create a plot
lines(x, x^2, col = "red") # Add lines
points(x, y + 10, col = "blue") # Add points
abline(a = 5, b = 1, col = "orange", lwd = 2) # Add a straight
# line y = a + b * x

## ----r-11, out.width = '70%'------------------------------------------------------------
# R allows to sample [r], compute density/probability mass
# functions [d], compute distribution function [p], and compute
# quantiles [q] for several continuous and discrete distributions.
# The format employed is [rdpq]name, where name stands for:
# - norm -> Normal
# - unif -> Uniform
# - exp -> Exponential
# - t -> Student's t
# - f -> Snedecor's F
# - chisq -> Chi squared
# - pois -> Poisson
# - binom -> Binomial
# More distributions:
?Distributions

# Sampling from a normal -- 5 random points from a N(0, 1)
rnorm(n = 5, mean = 0, sd = 1)

# If you want to have always the same result, set the seed of
# the random number generator
set.seed(45678)
rnorm(n = 5, mean = 0, sd = 1)

# Plotting the density of a N(0, 1) -- the Gaussian bell
x <- seq(-4, 4, l = 100)
y <- dnorm(x = x, mean = 0, sd = 1)
plot(x, y, type = "l")

# Plotting the distribution function of a N(0, 1)
x <- seq(-4, 4, l = 100)
y <- pnorm(q = x, mean = 0, sd = 1)
plot(x, y, type = "l")

# Computing the 95% quantile for a N(0, 1)
qnorm(p = 0.95, mean = 0, sd = 1)

# All distributions have the same syntax: rname(n,...),
# dname(x,...), dname(p,...) and qname(p,...), but the
# parameters in ... change. Look them in ?Distributions
# For example, here is the same for the uniform distribution

# Sampling from a U(0, 1)
set.seed(45678)
runif(n = 10, min = 0, max = 1)

# Plotting the density of a U(0, 1)
x <- seq(-2, 2, l = 100)
y <- dunif(x = x, min = 0, max = 1)
plot(x, y, type = "l")

# Computing the 95% quantile for a U(0, 1)
qunif(p = 0.95, min = 0, max = 1)

# Sampling from a Bi(10, 0.5)
set.seed(45678)
samp <- rbinom(n = 200, size = 10, prob = 0.5)
table(samp) / 200

# Plotting the probability mass of a Bi(10, 0.5)
x <- 0:10
y <- dbinom(x = x, size = 10, prob = 0.5)
plot(x, y, type = "h") # Vertical bars

# Plotting the distribution function of a Bi(10, 0.5)
x <- 0:10
y <- pbinom(q = x, size = 10, prob = 0.5)
plot(x, y, type = "h")

## ----r-12, out.width = '70%', error = TRUE----------------------------------------------
# A function is a way of encapsulating a block of code so it can
# be reused easily. They are useful for simplifying repetitive
# tasks and organize analyses

# This is a silly function that takes x and y and returns its sum
# Note the use of "return" to indicate what should be returned
add <- function(x, y) {
  z <- x + y
  return(z)
}

# Calling add -- you need to run the definition of the function
# first!
add(x = 1, y = 2)
add(1, 1) # Arguments names can be omitted

# A more complex function: computes a linear model and its
# posterior summary. Saves us a few keystrokes when computing a
# lm and a summary
lmSummary <- function(formula, data) {
  model <- lm(formula = formula, data = data)
  summary(model)
}
# If no return(), the function returns the value of the last
# expression

# Usage
lmSummary(Sepal.Length ~ Petal.Width, iris)

# Recall: there is no variable called model in the workspace.
# The function works on its own workspace!
model

# Add a line to a plot
addLine <- function(x, beta0, beta1) {
  lines(x, beta0 + beta1 * x, lwd = 2, col = 2)
}

# Usage
plot(x, y)
addLine(x, beta0 = 0.1, beta1 = 0)

# The function "sapply" allows to sequentially apply a function
sapply(1:10, sqrt)
sqrt(1:10) # The same

# The advantage of "sapply" is that you can use with any function
myFun <- function(x) c(x, x^2)
sapply(1:10, myFun) # Returns a 2 x 10 matrix

# "sapply" is useful for plotting non-vectorized functions
sumSeries <- function(n) sum(1:n)
plot(1:10, sapply(1:10, sumSeries), type = "l")

# "apply" applies iteratively a function to rows (1) or columns
# (2) of a matrix or data frame
A <- matrix(1:10, nrow = 5, ncol = 2)
A
apply(A, 1, sum) # Applies the function by rows
apply(A, 2, sum) # By columns

# With other functions
apply(A, 1, sqrt)
apply(A, 2, function(x) x^2)

## ----r-13-------------------------------------------------------------------------------
# The "for" statement allows to create loops that run along a
# given vector
# Print 3 times a message (i varies in 1:3)
for (i in 1:3) {
  print(i)
}

# Another example
a <- 0
for (i in 1:3) {
  a <- i + a
}
a

# Nested loops are possible
A <- matrix(0, nrow = 2, ncol = 3)
for (i in 1:2) {
  for (j in 1:3) {
    A[i, j] <- i + j
  }
}

# The "if" statement allows to create conditional structures of
# the forms:
# if (condition) {
#  # Something
# } else {
#  # Something else
# }
# These structures are thought to be inside functions

# Is the number positive?
isPositive <- function(x) {
  if (x > 0) {
    print("Positive")
  } else {
    print("Not positive")
  }
}
isPositive(1)
isPositive(-1)

# A loop can be interrupted with the "break" statement
# Stop when x is above 100
x <- 1
for (i in 1:1000) {
  x <- (x + 0.01) * x
  print(x)
  if (x > 100) {
    break
  }
}