parse_logical(c(TRUE, TRUE, FALSE, TRUE, NA))
## [1] TRUE TRUE FALSE TRUE NA
parse_integer(c(1, 5, 3, 4, 12423))
## [1] 1 5 3 4 12423
parse_double(c(4.2, 4, 6, 53.2))
## [1] 4.2 4.0 6.0 53.2
parse_character(c("Goodnight Moon", "Runaway Bunny", "Big Red Barn"))
## [1] "Goodnight Moon" "Runaway Bunny" "Big Red Barn"
(x <- sample(10))
## [1] 2 6 5 8 9 4 1 7 10 3
x + c(100, 100, 100, 100, 100, 100, 100, 100, 100, 100)
## [1] 102 106 105 108 109 104 101 107 110 103
x + 100
## [1] 102 106 105 108 109 104 101 107 110 103
# create a sequence of numbers between 1 and 10
(x1 <- seq(from = 1, to = 2))
## [1] 1 2
(x2 <- seq(from = 1, to = 10))
## [1] 1 2 3 4 5 6 7 8 9 10
# add together two sequences of numbers
x1 + x2
## [1] 2 4 4 6 6 8 8 10 10 12
x <- c("one", "two", "three", "four", "five")
With positive integers
x[c(3, 2, 5)]
## [1] "three" "two" "five"
With negative integers
x[c(-1, -3, -5)]
## [1] "two" "four"
Don’t mix positive and negative
x[c(-1, 1)]
## Error in x[c(-1, 1)]: only 0's may be mixed with negative subscripts
(x <- c(10, 3, NA, 5, 8, 1, NA))
## [1] 10 3 NA 5 8 1 NA
# All non-missing values of x
!is.na(x)
## [1] TRUE TRUE FALSE TRUE TRUE TRUE FALSE
x[!is.na(x)]
## [1] 10 3 5 8 1
# All even (or missing!) values of x
x[x %% 2 == 0]
## [1] 10 NA 8 NA
x <- list(1, 2, 3)
x
## [[1]]
## [1] 1
##
## [[2]]
## [1] 2
##
## [[3]]
## [1] 3
str()
str(x)
## List of 3
## $ : num 1
## $ : num 2
## $ : num 3
x_named <- list(a = 1, b = 2, c = 3)
str(x_named)
## List of 3
## $ a: num 1
## $ b: num 2
## $ c: num 3
y <- list("a", 1L, 1.5, TRUE)
str(y)
## List of 4
## $ : chr "a"
## $ : int 1
## $ : num 1.5
## $ : logi TRUE
z <- list(list(1, 2), list(3, 4))
str(z)
## List of 2
## $ :List of 2
## ..$ : num 1
## ..$ : num 2
## $ :List of 2
## ..$ : num 3
## ..$ : num 4
str(diamonds)
## Classes 'tbl_df', 'tbl' and 'data.frame': 53940 obs. of 10 variables:
## $ carat : num 0.23 0.21 0.23 0.29 0.31 0.24 0.24 0.26 0.22 0.23 ...
## $ cut : Ord.factor w/ 5 levels "Fair"<"Good"<..: 5 4 2 4 2 3 3 3 1 3 ...
## $ color : Ord.factor w/ 7 levels "D"<"E"<"F"<"G"<..: 2 2 2 6 7 7 6 5 2 5 ...
## $ clarity: Ord.factor w/ 8 levels "I1"<"SI2"<"SI1"<..: 2 3 5 4 2 6 7 3 4 5 ...
## $ depth : num 61.5 59.8 56.9 62.4 63.3 62.8 62.3 61.9 65.1 59.4 ...
## $ table : num 55 61 65 58 58 57 57 55 61 61 ...
## $ price : int 326 326 327 334 335 336 336 337 337 338 ...
## $ x : num 3.95 3.89 4.05 4.2 4.34 3.94 3.95 4.07 3.87 4 ...
## $ y : num 3.98 3.84 4.07 4.23 4.35 3.96 3.98 4.11 3.78 4.05 ...
## $ z : num 2.43 2.31 2.31 2.63 2.75 2.48 2.47 2.53 2.49 2.39 ...
df <- tibble(
a = rnorm(10),
b = rnorm(10),
c = rnorm(10),
d = rnorm(10)
)
median(df$a)
## [1] -0.5555419
median(df$b)
## [1] -0.4656169
median(df$c)
## [1] -0.605349
median(df$d)
## [1] -0.9248524
for
loopoutput <- vector(mode = "double", length = ncol(df))
for (i in seq_along(df)) {
output[[i]] <- median(df[[i]])
}
output
## [1] -0.5555419 -0.4656169 -0.6053490 -0.9248524
output <- vector(mode = "double", length = ncol(df))
vector(mode = "double", length = ncol(df))
## [1] 0 0 0 0
vector(mode = "logical", length = ncol(df))
## [1] FALSE FALSE FALSE FALSE
vector(mode = "character", length = ncol(df))
## [1] "" "" "" ""
vector(mode = "list", length = ncol(df))
## [[1]]
## NULL
##
## [[2]]
## NULL
##
## [[3]]
## NULL
##
## [[4]]
## NULL
i in seq_along(df)
seq_along(df)
## [1] 1 2 3 4
output[[i]] <- median(df[[i]])
x <- rnorm(1000, mean = 0, sd = 1)
str(x)
## num [1:1000] -0.969 -1.107 -1.252 -0.524 -0.497 ...
# load microbenchmark library to time code
library(microbenchmark)
microbenchmark(
# don't preallocate
`No preallocation` = {
output <- vector("numeric", 0)
for (i in seq_along(x)) {
output <- c(output, x[[i]] + 1)
}
},
# preallocate
`Preallocation` = {
output <- vector("numeric", length(x))
for (i in seq_along(x)) {
output[[i]] <- x[[i]] + 1
}
}) %>%
autoplot +
scale_y_log10(breaks = c(2, 4, 8, 16, 32)) +
labs(y = "Time [milliseconds]")
for
loopsfor
loops are goodmap()
functions may be bettermap()
functions
map()
makes a listmap_lgl()
makes a logical vectormap_int()
makes an integer vectormap_dbl()
makes a double vectormap_chr()
makes a character vectormap_dbl(df, mean)
## a b c d
## -0.32976859 -0.09851033 -0.50612789 -0.71983177
map_dbl(df, median)
## a b c d
## -0.5555419 -0.4656169 -0.6053490 -0.9248524
map_dbl(df, sd)
## a b c d
## 0.6377362 0.9825674 0.8589300 0.9474181
map_dbl(df, mean, na.rm = TRUE)
## a b c d
## -0.32976859 -0.09851033 -0.50612789 -0.71983177
df %>%
map_dbl(mean, na.rm = TRUE)
## a b c d
## -0.32976859 -0.09851033 -0.50612789 -0.71983177
map()
functionsmtcars %>%
summarize(mpg = mean(mpg))
## mpg
## 1 20.09062
mtcars %>%
summarize(mpg = mean(mpg),
cyl = mean(cyl),
disp = mean(disp),
hp = mean(hp),
drat = mean(drat),
wt = mean(wt),
qsec = mean(qsec),
vs = mean(vs),
am = mean(am),
gear = mean(gear),
carb = mean(carb))
## mpg cyl disp hp drat wt qsec vs
## 1 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375
## am gear carb
## 1 0.40625 3.6875 2.8125
_if
allows you to pick variables based on a predicate function like is.numeric()
or is.character()
_at
allows you to pick variables using the same syntax as select()
_all
operates on all variablessummarize_all()
summarize_all(mtcars, mean)
## mpg cyl disp hp drat wt qsec vs
## 1 20.09062 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375
## am gear carb
## 1 0.40625 3.6875 2.8125
summarize_all(mtcars, funs(min, max))
## mpg_min cyl_min disp_min hp_min drat_min wt_min qsec_min vs_min am_min
## 1 10.4 4 71.1 52 2.76 1.513 14.5 0 0
## gear_min carb_min mpg_max cyl_max disp_max hp_max drat_max wt_max
## 1 3 1 33.9 8 472 335 4.93 5.424
## qsec_max vs_max am_max gear_max carb_max
## 1 22.9 1 1 5 8
mtcars %>%
group_by(gear) %>%
summarize_all(mean)
## # A tibble: 3 x 11
## gear mpg cyl disp hp drat wt qsec vs am carb
## <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 3. 16.1 7.47 326. 176. 3.13 3.89 17.7 0.200 0. 2.67
## 2 4. 24.5 4.67 123. 89.5 4.04 2.62 19.0 0.833 0.667 2.33
## 3 5. 21.4 6.00 202. 196. 3.92 2.63 15.6 0.200 1.00 4.40
summarize_at()
summarize_at(mtcars, vars(-mpg), mean)
## cyl disp hp drat wt qsec vs am gear
## 1 6.1875 230.7219 146.6875 3.596563 3.21725 17.84875 0.4375 0.40625 3.6875
## carb
## 1 2.8125
summarize_at(mtcars, vars(mpg), funs(min, max))
## min max
## 1 10.4 33.9
summarize_at(mtcars, vars(mpg, wt), min)
## mpg wt
## 1 10.4 1.513
summarize_at(mtcars, vars(-mpg), funs(min, max))
## cyl_min disp_min hp_min drat_min wt_min qsec_min vs_min am_min gear_min
## 1 4 71.1 52 2.76 1.513 14.5 0 0 3
## carb_min cyl_max disp_max hp_max drat_max wt_max qsec_max vs_max am_max
## 1 1 8 472 335 4.93 5.424 22.9 1 1
## gear_max carb_max
## 1 5 8
summarize_if()
starwars
## # A tibble: 87 x 13
## name height mass hair_color skin_color eye_color birth_year gender
## <chr> <int> <dbl> <chr> <chr> <chr> <dbl> <chr>
## 1 Luke Sk… 172 77. blond fair blue 19.0 male
## 2 C-3PO 167 75. <NA> gold yellow 112. <NA>
## 3 R2-D2 96 32. <NA> white, bl… red 33.0 <NA>
## 4 Darth V… 202 136. none white yellow 41.9 male
## 5 Leia Or… 150 49. brown light brown 19.0 female
## 6 Owen La… 178 120. brown, gr… light blue 52.0 male
## 7 Beru Wh… 165 75. brown light blue 47.0 female
## 8 R5-D4 97 32. <NA> white, red red NA <NA>
## 9 Biggs D… 183 84. black light brown 24.0 male
## 10 Obi-Wan… 182 77. auburn, w… fair blue-gray 57.0 male
## # ... with 77 more rows, and 5 more variables: homeworld <chr>,
## # species <chr>, films <list>, vehicles <list>, starships <list>
starwars %>%
group_by(species) %>%
summarize_if(is.numeric, mean, na.rm = TRUE)
## # A tibble: 38 x 4
## species height mass birth_year
## <chr> <dbl> <dbl> <dbl>
## 1 Aleena 79.0 15.0 NaN
## 2 Besalisk 198. 102. NaN
## 3 Cerean 198. 82.0 92.0
## 4 Chagrian 196. NaN NaN
## 5 Clawdite 168. 55.0 NaN
## 6 Droid 140. 69.8 53.3
## 7 Dug 112. 40.0 NaN
## 8 Ewok 88.0 20.0 8.00
## 9 Geonosian 183. 80.0 NaN
## 10 Gungan 209. 74.0 52.0
## # ... with 28 more rows
mutate_all(mtcars, log10)
## mpg cyl disp hp drat wt qsec vs
## 1 1.322219 0.7781513 2.204120 2.041393 0.5910646 0.4183013 1.216430 -Inf
## 2 1.322219 0.7781513 2.204120 2.041393 0.5910646 0.4586378 1.230960 -Inf
## 3 1.357935 0.6020600 2.033424 1.968483 0.5854607 0.3654880 1.269746 0
## 4 1.330414 0.7781513 2.411620 2.041393 0.4885507 0.5071810 1.288696 0
## 5 1.271842 0.9030900 2.556303 2.243038 0.4983106 0.5365584 1.230960 -Inf
## 6 1.257679 0.7781513 2.352183 2.021189 0.4409091 0.5390761 1.305781 0
## 7 1.155336 0.9030900 2.556303 2.389166 0.5065050 0.5526682 1.199755 -Inf
## 8 1.387390 0.6020600 2.166430 1.792392 0.5670264 0.5037907 1.301030 0
## 9 1.357935 0.6020600 2.148603 1.977724 0.5932861 0.4983106 1.359835 0
## 10 1.283301 0.7781513 2.224274 2.089905 0.5932861 0.5365584 1.262451 0
## 11 1.250420 0.7781513 2.224274 2.089905 0.5932861 0.5365584 1.276462 0
## 12 1.214844 0.9030900 2.440594 2.255273 0.4871384 0.6095944 1.240549 -Inf
## 13 1.238046 0.9030900 2.440594 2.255273 0.4871384 0.5717088 1.245513 -Inf
## 14 1.181844 0.9030900 2.440594 2.255273 0.4871384 0.5774918 1.255273 -Inf
## 15 1.017033 0.9030900 2.673942 2.311754 0.4668676 0.7201593 1.254790 -Inf
## 16 1.017033 0.9030900 2.662758 2.332438 0.4771213 0.7343197 1.250908 -Inf
## 17 1.167317 0.9030900 2.643453 2.361728 0.5092025 0.7279477 1.241048 -Inf
## 18 1.510545 0.6020600 1.895975 1.819544 0.6106602 0.3424227 1.289366 0
## 19 1.482874 0.6020600 1.879096 1.716003 0.6928469 0.2081725 1.267641 0
## 20 1.530200 0.6020600 1.851870 1.812913 0.6253125 0.2636361 1.298853 0
## 21 1.332438 0.6020600 2.079543 1.986772 0.5682017 0.3918169 1.301247 0
## 22 1.190332 0.9030900 2.502427 2.176091 0.4409091 0.5465427 1.227115 -Inf
## 23 1.181844 0.9030900 2.482874 2.176091 0.4983106 0.5359267 1.238046 -Inf
## 24 1.123852 0.9030900 2.544068 2.389166 0.5717088 0.5843312 1.187803 -Inf
## 25 1.283301 0.9030900 2.602060 2.243038 0.4885507 0.5848963 1.231724 -Inf
## 26 1.436163 0.6020600 1.897627 1.819544 0.6106602 0.2866810 1.276462 0
## 27 1.414973 0.6020600 2.080266 1.959041 0.6464037 0.3304138 1.222716 -Inf
## 28 1.482874 0.6020600 1.978181 2.053078 0.5763414 0.1798389 1.227887 0
## 29 1.198657 0.9030900 2.545307 2.421604 0.6253125 0.5010593 1.161368 -Inf
## 30 1.294466 0.7781513 2.161368 2.243038 0.5587086 0.4424798 1.190332 -Inf
## 31 1.176091 0.9030900 2.478566 2.525045 0.5490033 0.5526682 1.164353 -Inf
## 32 1.330414 0.6020600 2.082785 2.037426 0.6138418 0.4440448 1.269513 0
## am gear carb
## 1 0 0.6020600 0.6020600
## 2 0 0.6020600 0.6020600
## 3 0 0.6020600 0.0000000
## 4 -Inf 0.4771213 0.0000000
## 5 -Inf 0.4771213 0.3010300
## 6 -Inf 0.4771213 0.0000000
## 7 -Inf 0.4771213 0.6020600
## 8 -Inf 0.6020600 0.3010300
## 9 -Inf 0.6020600 0.3010300
## 10 -Inf 0.6020600 0.6020600
## 11 -Inf 0.6020600 0.6020600
## 12 -Inf 0.4771213 0.4771213
## 13 -Inf 0.4771213 0.4771213
## 14 -Inf 0.4771213 0.4771213
## 15 -Inf 0.4771213 0.6020600
## 16 -Inf 0.4771213 0.6020600
## 17 -Inf 0.4771213 0.6020600
## 18 0 0.6020600 0.0000000
## 19 0 0.6020600 0.3010300
## 20 0 0.6020600 0.0000000
## 21 -Inf 0.4771213 0.0000000
## 22 -Inf 0.4771213 0.3010300
## 23 -Inf 0.4771213 0.3010300
## 24 -Inf 0.4771213 0.6020600
## 25 -Inf 0.4771213 0.3010300
## 26 0 0.6020600 0.0000000
## 27 0 0.6989700 0.3010300
## 28 0 0.6989700 0.3010300
## 29 0 0.6989700 0.6020600
## 30 0 0.6989700 0.7781513
## 31 0 0.6989700 0.9030900
## 32 0 0.6020600 0.3010300
library(nycflights13)
# Rows where any value is missing
filter_all(weather, any_vars(is.na(.)))
## # A tibble: 3,109 x 15
## origin year month day hour temp dewp humid wind_dir wind_speed
## <chr> <dbl> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 EWR 2013. 1. 1 17 39.2 28.4 64.9 270. 16.1
## 2 EWR 2013. 1. 1 18 39.2 28.4 64.9 330. 15.0
## 3 EWR 2013. 1. 3 16 30.9 14.0 49.0 NA 4.60
## 4 EWR 2013. 1. 6 10 33.8 30.2 86.5 210. 4.60
## 5 EWR 2013. 1. 6 12 33.8 32.0 93.0 220. 9.21
## 6 EWR 2013. 1. 6 13 35.6 32.0 86.6 240. 8.06
## 7 EWR 2013. 1. 6 14 35.6 32.0 86.6 230. 8.06
## 8 EWR 2013. 1. 6 15 37.4 30.2 75.0 250. 11.5
## 9 EWR 2013. 1. 11 17 45.0 36.0 70.5 NA 4.60
## 10 EWR 2013. 1. 11 21 46.4 39.2 75.8 90. 4.60
## # ... with 3,099 more rows, and 5 more variables: wind_gust <dbl>,
## # precip <dbl>, pressure <dbl>, visib <dbl>, time_hour <dttm>
# Rows where all wind variables are missing
filter_at(weather, vars(starts_with("wind")), all_vars(is.na(.)))
## # A tibble: 3 x 15
## origin year month day hour temp dewp humid wind_dir wind_speed
## <chr> <dbl> <dbl> <int> <int> <dbl> <dbl> <dbl> <dbl> <dbl>
## 1 EWR 2013. 3. 27 21 52.0 19.0 27.0 NA NA
## 2 JFK 2013. 7. 4 10 73.0 71.1 93.5 NA NA
## 3 JFK 2013. 7. 20 10 81.0 71.1 71.9 NA NA
## # ... with 5 more variables: wind_gust <dbl>, precip <dbl>,
## # pressure <dbl>, visib <dbl>, time_hour <dttm>