####################################################### ### Statistical Computing with R 2e ### ### Maria L. Rizzo ### ### Chapman & Hall/CRC The R Series ### ### ISBN 9781466553323 - CAT# K15269 ### ### March 6, 2019 ### ### ### ### R code for Chapter 15 ### ### Programming Topics ### ####################################################### # packages to install: # energy, ggplot2, profvis, pryr, microbenchmark, mvtnorm, rbenchmark # for the last section: dplyr, Lahman, Rcpp ### Example 15.1 (Benchmarking methods to generate a sequence) s1 <- 1:10 s2 <- seq(1, 10, 1) s3 <- seq.int(1, 10, 1) df <- data.frame(s1=s1, s2=s2, s3=s3) str(df) library(microbenchmark) library(ggplot2) n <- 1000 mb <- microbenchmark( seq(1, n, 1), seq.int(1, n, 1), 1:n ) mb autoplot(mb) # display a violin plot ### Example 15.2 (Benchmarking methods to initialize a vector) n <- 100 mb2 <- microbenchmark( numeric = numeric(n) + 1, rep = rep(1, n), seq = seq(from=1, to=1, length=n), ones = matrix(1, nrow=n, ncol=1), as.ones = as.matrix(rep(1, n)) ) mb2 autoplot(mb2) # display a violin plot ### Example 15.3 (Timings of two multivariate normal generators) library(rbenchmark) library(MASS) library(mvtnorm) n <- 100 #sample size d <- 30 #dimension N <- 2000 #iterations mu <- numeric(d) benchmark( columns = c("test", "replications", "elapsed", "relative"), replications = 2000, cov = {S <- cov(matrix(rnorm(n*d), n, d))}, mvrnorm = mvrnorm(n, mu, S), rmvnorm = rmvnorm(n, mu, S) ) ### Example 15.4 (Profiling with Rprof) x <- rnorm(1000) y <- rnorm(1000) Rprof("pr.out", line.profiling = TRUE) energy::dcor(x, y) Rprof(NULL) summaryRprof("pr.out") ### Example 15.5 (profvis interactive visualization) library(profvis) profvis(energy::dcor(x, y)) ### Example 15.6 (Object size) x <- matrix(rnorm(5000), 1000, 5) #1000 obs in R^5 DF <- as.data.frame(x) object.size(x) object.size(DF) pryr::object_size(x) pryr::object_size(DF) pryr::compare_size(x) listTwo <- list(x, x) pryr::compare_size(listTwo) ### Example 15.7 (Comparing objects and attributes) str(x) str(DF) all.equal(x, DF) names(attributes(x)) names(attributes(DF)) all.equal(x, DF, check.attributes = FALSE) ### Example 15.8 (Comparing objects for equality) try(ifelse(all.equal(x, DF), "T", "F")) # error ifelse(isTRUE(all.equal(x, DF)), "T", "F") # correct x <- 1 - 10e-4 y <- x + 2 x == (y - 2) # equal mathematically but isTRUE(all.equal(x, y - 2)) #gives expected result ## does not necessarily evaluate to TRUE or FALSE try(ifelse(all.equal(x, y), "T", "F")) ## returns TRUE or FALSE ifelse(isTRUE(all.equal(x, y)), "T", "F") ### Example 15.9 (Display R function code) nclass.scott ### Example 15.10 (RSiteSearch) RSiteSearch("ggcorr") ### Example 15.11 (UseMethod) body(density) args(density.default) body(density.default) ### Example 15.12 (Show methods) methods(t.test) getAnywhere(t.test.formula) body(stats:::t.test.formula) ### Example 15.13 (Object not found or not an exported object) perc.ci boot::perc.ci getAnywhere(perc.ci) args(boot:::perc.ci) body(boot:::perc.ci) boot:::perc.ci getFromNamespace("perc.ci", "boot") ### Example 15.14 (getS3method) library(microbenchmark) library(ggplot2) getAnywhere(autoplot) getS3method("autoplot", class = "microbenchmark") getAnywhere(autoplot.microbenchmark) ### Example 15.15 (.Primitive or .Internal) pryr::show_c_source(.Primitive(cumsum(x))) ### Example 15.16 (.Call, .External, .C or .Fortran) dist #is implemented by a .Call to C_Cdist ### Example 15.17 (A first Rcpp experiment) # In RStudio use the menu # File > New File > C++ file to display this code ### Example 15.18 (cppFunction) library(Rcpp) set.seed(1) x <- matrix(rnorm(20), nrow = 5, ncol = 4) cppFunction('double vecnorm(NumericVector v) { // compute the Euclidean norm of vector v int d = v.size(); double s = 0.0; for (int i = 0; i < d; i++) s += v(i) * v(i); return sqrt(s); }') print(vecnorm(x[, 1])) print(apply(x, MARGIN = 2, FUN = "vecnorm")) ### Example 15.19 (sourceCpp) # Create the C++ source file "printme.cpp" by editing the template # in RStudio menu: File > New File > C++ x <- sample(1:5) print_me(x) ### Example 15.20 (Lahman baseball data) library(Lahman) str(Batting) # method 1 S <- subset(Batting, Batting$yearID == 1999, select = c("playerID", "AB", "H")) # method 2 (dplyr) library(dplyr) Batting %>% filter(yearID == 1999) -> b # method 1 AB <- as.vector(by(S$AB, S$playerID, FUN = sum)) H <- as.vector(by(S$H, S$playerID, FUN = sum)) S <- data.frame(playerID = unique(S$playerID), AB = AB, H = H, AVG = round(H / AB, 3), stringsAsFactors = FALSE) S400 <- S[S$AB >= 400, ] # method 2 (dplyr) b %>% group_by(playerID) %>% summarize(AB = sum(AB), H = sum(H)) -> S S %>% mutate(AVG = round(H / AB, 3)) -> S S %>% filter(AB >= 400) -> S400 # method 1 o <- order(S400$AVG, decreasing = TRUE) S400 <- S400[o, ] top <- S400[1:10, ] # method 2 (dplyr) S400 %>% arrange(desc(AVG)) -> S400 slice(S400, 1:10) -> top top Master %>% select(playerID, nameFirst, nameLast) -> m top %>% inner_join(m) %>% select(nameFirst, nameLast, AVG) ### Example 15.21 (Comparison with microbenchmark) # The source for this example is in R Markdown format # Open "Lahman.Rmd" in RStudio and knit to see report