NAs and zeros can increase the noise in multi-environment trial analysis. This collection of functions will make it easier to deal with them.
- fill_na(): Fills- NAin selected columns using the next or previous entry.
- has_na(), has_zero(): Check for- NAsand- 0sin the data and return a logical value.
- prop_na()returns the proportion of- NAsin each column of a data frame.
- random_na(): Generate random- NAvalues in a two-way table based on a desired proportion.
- remove_cols_na(),- remove_rows_na(): Remove columns and rows that contains at least one- NAvalue.
- remove_cols_all_na(),- remove_rows_all_na(): Remove columns and rows where all values are- NAs.
- remove_cols_zero(),- remove_rows_zero(): Remove columns and rows that contains at least one- 0value, respectively.
- select_cols_na(), select_cols_zero(): Select columns with- NAsand- 0s, respectively.
- select_rows_na(), select_rows_zero(): Select rows with- NAsand- 0s, respectively.
- replace_na(), replace_zero(): Replace- NAsand- 0s, respectively, with a- replacementvalue.
Usage
fill_na(.data, ..., direction = "down")
has_na(.data)
prop_na(.data, ...)
remove_rows_na(.data, verbose = TRUE)
remove_rows_all_na(.data, verbose = TRUE)
remove_cols_na(.data, verbose = TRUE)
remove_cols_all_na(.data, verbose = TRUE)
select_cols_na(.data, verbose = TRUE)
select_rows_na(.data, verbose = TRUE)
replace_na(.data, ..., replacement = 0)
random_na(.data, prop)
has_zero(.data)
remove_rows_zero(.data, verbose = TRUE)
remove_cols_zero(.data, verbose = TRUE)
select_cols_zero(.data, verbose = TRUE)
select_rows_zero(.data, verbose = TRUE)
replace_zero(.data, ..., replacement = NA)Arguments
- .data
- A data frame. 
- ...
- Variables to fill - NAsin- fill_na(), replace- NAsin- replace_na()or zeros in- replace_zero(). If- ...is null then all variables in- .datawill be evaluated. It must be a single variable name or a comma-separated list of unquoted variables names. Select helpers are also allowed.
- direction
- Direction in which to fill missing values. Currently either "down" (the default), "up", "downup" (i.e. first down and then up) or "updown" (first up and then down). 
- verbose
- Logical argument. If - TRUE(default) shows in console the rows or columns deleted.
- replacement
- The value used for replacement. Defaults to - 0. Other possible values are Use- "colmean",- "colmin", and- "colmax"to replace missing values with column mean, minimum and maximum values, respectively.
- prop
- The proportion (percentage) of - NAvalues to generate in- .data.
Author
Tiago Olivoto tiagoolivoto@gmail.com
Examples
# \donttest{
library(metan)
data_naz <- iris %>%
              group_by(Species) %>%
              doo(~head(., n = 3)) %>%
              as_character(Species)
data_naz
#> # A tibble: 9 × 5
#>   Species    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>             <dbl>       <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1         3.5          1.4         0.2
#> 2 setosa              4.9         3            1.4         0.2
#> 3 setosa              4.7         3.2          1.3         0.2
#> 4 versicolor          7           3.2          4.7         1.4
#> 5 versicolor          6.4         3.2          4.5         1.5
#> 6 versicolor          6.9         3.1          4.9         1.5
#> 7 virginica           6.3         3.3          6           2.5
#> 8 virginica           5.8         2.7          5.1         1.9
#> 9 virginica           7.1         3            5.9         2.1
data_naz[c(2:3, 6, 8), c(1:2, 4, 5)] <- NA
data_naz[c(2, 7, 9), c(2, 3, 4)] <- 0
has_na(data_naz)
#> [1] TRUE
has_zero(data_naz)
#> [1] TRUE
# Fill NA values of column GEN
fill_na(data_naz, Species)
#> # A tibble: 9 × 5
#>   Species    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>             <dbl>       <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1         3.5          1.4         0.2
#> 2 setosa              0           0            0          NA  
#> 3 setosa             NA           3.2         NA          NA  
#> 4 versicolor          7           3.2          4.7         1.4
#> 5 versicolor          6.4         3.2          4.5         1.5
#> 6 versicolor         NA           3.1         NA          NA  
#> 7 virginica           0           0            0           2.5
#> 8 virginica          NA           2.7         NA          NA  
#> 9 virginica           0           0            0           2.1
# Remove columns
remove_cols_na(data_naz)
#> Warning: Column(s) Species, Sepal.Length, Petal.Length, Petal.Width with NA values deleted.
#> # A tibble: 9 × 1
#>   Sepal.Width
#>         <dbl>
#> 1         3.5
#> 2         0  
#> 3         3.2
#> 4         3.2
#> 5         3.2
#> 6         3.1
#> 7         0  
#> 8         2.7
#> 9         0  
remove_cols_zero(data_naz)
#> Warning: Column(s) Sepal.Length, Sepal.Width, Petal.Length with 0s deleted.
#> # A tibble: 9 × 2
#>   Species    Petal.Width
#>   <chr>            <dbl>
#> 1 setosa             0.2
#> 2 NA                NA  
#> 3 NA                NA  
#> 4 versicolor         1.4
#> 5 versicolor         1.5
#> 6 NA                NA  
#> 7 virginica          2.5
#> 8 NA                NA  
#> 9 virginica          2.1
remove_rows_na(data_naz)
#> Warning: Row(s) 2, 3, 6, 8 with NA values deleted.
#> # A tibble: 5 × 5
#>   Species    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>             <dbl>       <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1         3.5          1.4         0.2
#> 2 versicolor          7           3.2          4.7         1.4
#> 3 versicolor          6.4         3.2          4.5         1.5
#> 4 virginica           0           0            0           2.5
#> 5 virginica           0           0            0           2.1
remove_rows_zero(data_naz)
#> Warning: Row(s) 2, 7, 9 with 0s deleted.
#> # A tibble: 6 × 5
#>   Species    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>             <dbl>       <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1         3.5          1.4         0.2
#> 2 NA                 NA           3.2         NA          NA  
#> 3 versicolor          7           3.2          4.7         1.4
#> 4 versicolor          6.4         3.2          4.5         1.5
#> 5 NA                 NA           3.1         NA          NA  
#> 6 NA                 NA           2.7         NA          NA  
# Select columns
select_cols_na(data_naz)
#> Warning: Column(s) with NAs: Species, Sepal.Length, Petal.Length, Petal.Width
#> # A tibble: 9 × 4
#>   Species    Sepal.Length Petal.Length Petal.Width
#>   <chr>             <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1          1.4         0.2
#> 2 NA                  0            0          NA  
#> 3 NA                 NA           NA          NA  
#> 4 versicolor          7            4.7         1.4
#> 5 versicolor          6.4          4.5         1.5
#> 6 NA                 NA           NA          NA  
#> 7 virginica           0            0           2.5
#> 8 NA                 NA           NA          NA  
#> 9 virginica           0            0           2.1
select_cols_zero(data_naz)
#> Warning: Column(s) with 0s: Sepal.Length, Sepal.Width, Petal.Length
#> # A tibble: 9 × 3
#>   Sepal.Length Sepal.Width Petal.Length
#>          <dbl>       <dbl>        <dbl>
#> 1          5.1         3.5          1.4
#> 2          0           0            0  
#> 3         NA           3.2         NA  
#> 4          7           3.2          4.7
#> 5          6.4         3.2          4.5
#> 6         NA           3.1         NA  
#> 7          0           0            0  
#> 8         NA           2.7         NA  
#> 9          0           0            0  
select_rows_na(data_naz)
#> Warning: Rows(s) with NAs: 2, 3, 6, 8
#> # A tibble: 4 × 5
#>   Species Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>          <dbl>       <dbl>        <dbl>       <dbl>
#> 1 NA                 0         0              0          NA
#> 2 NA                NA         3.2           NA          NA
#> 3 NA                NA         3.1           NA          NA
#> 4 NA                NA         2.7           NA          NA
select_rows_zero(data_naz)
#> Warning: Rows(s) with 0s: 1, 2, 3
#> # A tibble: 3 × 5
#>   Species   Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>            <dbl>       <dbl>        <dbl>       <dbl>
#> 1 NA                   0           0            0        NA  
#> 2 virginica            0           0            0         2.5
#> 3 virginica            0           0            0         2.1
# Replace values
replace_na(data_naz)
#> # A tibble: 9 × 5
#>   Species    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>             <dbl>       <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1         3.5          1.4         0.2
#> 2 0                   0           0            0           0  
#> 3 0                   0           3.2          0           0  
#> 4 versicolor          7           3.2          4.7         1.4
#> 5 versicolor          6.4         3.2          4.5         1.5
#> 6 0                   0           3.1          0           0  
#> 7 virginica           0           0            0           2.5
#> 8 0                   0           2.7          0           0  
#> 9 virginica           0           0            0           2.1
replace_zero(data_naz)
#> # A tibble: 9 × 5
#>   Species    Sepal.Length Sepal.Width Petal.Length Petal.Width
#>   <chr>             <dbl>       <dbl>        <dbl>       <dbl>
#> 1 setosa              5.1         3.5          1.4         0.2
#> 2 NA                 NA          NA           NA          NA  
#> 3 NA                 NA           3.2         NA          NA  
#> 4 versicolor          7           3.2          4.7         1.4
#> 5 versicolor          6.4         3.2          4.5         1.5
#> 6 NA                 NA           3.1         NA          NA  
#> 7 virginica          NA          NA           NA           2.5
#> 8 NA                 NA           2.7         NA          NA  
#> 9 virginica          NA          NA           NA           2.1
# }
