Select helper — Select_helper • metan

These functions allow you to select variables based operations with prefixes and suffixes and length of names.

difference_var(): Select variables that start with a prefix AND NOT end wiht a suffix.
intersect_var(): Select variables that start with a prefix AND end wiht a suffix.
union_var(): Select variables that start with a prefix OR end wiht a suffix.
width_of(): Select variables with width of n.
width_greater_than(): Select variables with width greater than n.
width_less_than(): Select variables with width less than n.
lower_case_only(): Select variables that contains lower case only (e.g., "env").
upper_case_only(): Select variables that contains upper case only (e.g., "ENV").
title_case_only(): Select variables that contains upper case in the first character only (e.g., "Env").

Usage

difference_var(prefix, suffix)

intersect_var(prefix, suffix)

union_var(prefix, suffix)

width_of(n, vars = peek_vars(fn = "width_of"))

width_greater_than(n, vars = peek_vars(fn = "width_greater_than"))

width_less_than(n, vars = peek_vars(fn = "width_less_than"))

lower_case_only(vars = peek_vars(fn = "lower_case_only"))

upper_case_only(vars = peek_vars(fn = "upper_case_only"))

title_case_only(vars = peek_vars(fn = "title_case_only"))

Arguments

prefix: A prefix that start the variable name.
suffix: A suffix that end the variable name.
n: The length of variable names to select. For width_of() the selected variables contains n characters. For width_greater_than() and width_less_than() the selected variables contains greater and less characteres than n, respectively.
vars: A character vector of variable names. When called from inside selecting functions like select_cols() these are automatically set to the names of the table.

Examples

# \donttest{
library(metan)


# Select variables that start with "C" and not end with "D".
data_ge2 %>%
select_cols(difference_var("C", "D"))
#> # A tibble: 156 × 2
#>       CL    CW
#>    <dbl> <dbl>
#>  1  28.1  25.1
#>  2  27.6  21.4
#>  3  28.4  24.0
#>  4  31.7  26.2
#>  5  32.0  20.7
#>  6  30.4  26.8
#>  7  30.6  26.2
#>  8  28.7  24.1
#>  9  27.6  20.5
#> 10  28.2  20.1
#> # … with 146 more rows

# Select variables that start with "C" and end with "D".
data_ge2 %>%
select_cols(intersect_var("C", "D"))
#> # A tibble: 156 × 2
#>       CD  CDED
#>    <dbl> <dbl>
#>  1  16.3 0.538
#>  2  14.5 0.551
#>  3  16.4 0.561
#>  4  17.4 0.586
#>  5  15.5 0.607
#>  6  17.5 0.577
#>  7  18.0 0.594
#>  8  17.2 0.608
#>  9  16.4 0.576
#> 10  15.5 0.597
#> # … with 146 more rows

# Select variables that start with "C" or end with "D".
data_ge2 %>%
select_cols(union_var("C", "D"))
#> # A tibble: 156 × 5
#>       CL    CD    CW  CDED    ED
#>    <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  28.1  16.3  25.1 0.538  52.2
#>  2  27.6  14.5  21.4 0.551  50.3
#>  3  28.4  16.4  24.0 0.561  50.7
#>  4  31.7  17.4  26.2 0.586  54.1
#>  5  32.0  15.5  20.7 0.607  52.7
#>  6  30.4  17.5  26.8 0.577  52.7
#>  7  30.6  18.0  26.2 0.594  51.7
#>  8  28.7  17.2  24.1 0.608  47.2
#>  9  27.6  16.4  20.5 0.576  47.9
#> 10  28.2  15.5  20.1 0.597  47.5
#> # … with 146 more rows

# Select variables with width name of 4
data_ge2 %>%
select_cols(width_of(4))
#> # A tibble: 156 × 2
#>     CDED  PERK
#>    <dbl> <dbl>
#>  1 0.538  89.6
#>  2 0.551  89.5
#>  3 0.561  89.7
#>  4 0.586  87.9
#>  5 0.607  89.7
#>  6 0.577  88.5
#>  7 0.594  89.1
#>  8 0.608  88.3
#>  9 0.576  89.0
#> 10 0.597  88.7
#> # … with 146 more rows

# Select variables with width name greater than 2
data_ge2 %>%
select_cols(width_greater_than(2))
#> # A tibble: 156 × 8
#>    ENV   GEN   REP     NKR  CDED  PERK   TKW   NKE
#>    <fct> <fct> <fct> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1 A1    H1    1      36.6 0.538  89.6  418.  521.
#>  2 A1    H1    2      31.4 0.551  89.5  361.  494.
#>  3 A1    H1    3      31.8 0.561  89.7  367.  565.
#>  4 A1    H10   1      32.8 0.586  87.9  374.  519.
#>  5 A1    H10   2      28   0.607  89.7  347.  502.
#>  6 A1    H10   3      32.8 0.577  88.5  394.  525.
#>  7 A1    H11   1      34.6 0.594  89.1  377.  575 
#>  8 A1    H11   2      34.4 0.608  88.3  361.  501.
#>  9 A1    H11   3      34.8 0.576  89.0  322.  513.
#> 10 A1    H12   1      31.6 0.597  88.7  345.  480.
#> # … with 146 more rows

# Select variables with width name less than 3
data_ge2 %>%
select_cols(width_less_than(3))
#> # A tibble: 156 × 10
#>       PH    EH    EP    EL    ED    CL    CD    CW    KW    NR
#>    <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl> <dbl>
#>  1  2.61  1.71 0.658  16.1  52.2  28.1  16.3  25.1  217.  15.6
#>  2  2.87  1.76 0.628  14.2  50.3  27.6  14.5  21.4  184.  16  
#>  3  2.68  1.58 0.591  16.0  50.7  28.4  16.4  24.0  208.  17.2
#>  4  2.83  1.64 0.581  16.7  54.1  31.7  17.4  26.2  194.  15.6
#>  5  2.79  1.71 0.616  14.9  52.7  32.0  15.5  20.7  176.  17.6
#>  6  2.72  1.51 0.554  16.7  52.7  30.4  17.5  26.8  207.  16.8
#>  7  2.75  1.51 0.549  17.4  51.7  30.6  18.0  26.2  217.  16.8
#>  8  2.72  1.56 0.573  16.7  47.2  28.7  17.2  24.1  181.  13.6
#>  9  2.77  1.67 0.600  15.8  47.9  27.6  16.4  20.5  166.  15.2
#> 10  2.73  1.54 0.563  14.9  47.5  28.2  15.5  20.1  161.  14.8
#> # … with 146 more rows

# Creating data with messy column names
df <- head(data_ge, 3)
colnames(df) <- c("Env", "gen", "Rep", "GY", "hm")
select_cols(df, lower_case_only())
#> # A tibble: 3 × 2
#>   gen      hm
#>   <fct> <dbl>
#> 1 G1     44.9
#> 2 G1     46.9
#> 3 G1     47.8
select_cols(df, upper_case_only())
#> # A tibble: 3 × 1
#>      GY
#>   <dbl>
#> 1  2.17
#> 2  2.50
#> 3  2.43
select_cols(df, title_case_only())
#> # A tibble: 3 × 2
#>   Env   Rep  
#>   <fct> <fct>
#> 1 E1    1    
#> 2 E1    2    
#> 3 E1    3    
# }