## ----style, echo = FALSE, results = 'asis'--------------------------------------------------------
BiocStyle::markdown()
options(width=100, max.print=1000)
knitr::opts_chunk$set(
    eval=as.logical(Sys.getenv("KNITR_EVAL", "TRUE")),
    cache=as.logical(Sys.getenv("KNITR_CACHE", "TRUE")),
    warning=FALSE, message=FALSE)


## ----setup, echo=FALSE, messages=FALSE, warnings=FALSE--------------------------------------------
suppressPackageStartupMessages({
    library(limma) 
    library(ggplot2) }) 


## ----tidyverse_install, eval=FALSE----------------------------------------------------------------
## install.packages("tidyverse")


## ----data_frame_tbl1, eval=TRUE-------------------------------------------------------------------
library(tidyverse)
as_tibble(iris) # coerce data.frame to tibble tbl


## ----tabular_sample, eval=TRUE--------------------------------------------------------------------
write_tsv(iris, "iris.txt") # Creates sample file


## ----tabular_import1, eval=TRUE-------------------------------------------------------------------
iris_df <- read_tsv("iris.txt") # Import with read_tbv from readr package
iris_df


## ----tabular_import2, eval=TRUE-------------------------------------------------------------------
library(data.table)
iris_df <- as_tibble(fread("iris.txt")) # Import with fread and conversion to tibble
iris_df


## ----tabular_import_ignore, eval=FALSE------------------------------------------------------------
## fread("grep -v '^#' iris.txt")


## ----tabular_export_readr, eval=FALSE-------------------------------------------------------------
## write_tsv(iris_df, "iris.txt")


## ----dplyr_bind, eval=TRUE------------------------------------------------------------------------
bind_cols(iris_df, iris_df)
bind_rows(iris_df, iris_df)


## ----plyr_get_cols, eval=TRUE---------------------------------------------------------------------
iris_df[[5]][1:12]
iris_df$Species[1:12]


## ----plyr_filter, eval=TRUE-----------------------------------------------------------------------
filter(iris_df, Sepal.Length > 7.5, Species=="virginica")


## ----plyr_filter_base, eval=TRUE------------------------------------------------------------------
iris_df[iris_df[, "Sepal.Length"] > 7.5 & iris_df[, "Species"]=="virginica", ]


## ----plyr_filter_boolean, eval=TRUE---------------------------------------------------------------
filter(iris_df, Sepal.Length > 7.5 | Sepal.Length < 5.5, Species=="virginica")


## ----plyr_subset, eval=TRUE-----------------------------------------------------------------------
slice(iris_df, 1:2)


## ----plyr_subset_base, eval=TRUE------------------------------------------------------------------
iris_df[1:2,]


## ----plyr_sample_set2, eval=TRUE------------------------------------------------------------------
df1 <- bind_cols(data_frame(ids1=paste0("g", 1:10)), as_tibble(matrix(1:40, 10, 4, dimnames=list(1:10, paste0("CA", 1:4)))))
df1


## ----plyr_subset_names, eval=TRUE-----------------------------------------------------------------
slice(df1, match(c("g10", "g4", "g4"), ids1))


## ----plyr_subset_names_base, eval=TRUE------------------------------------------------------------
df1_old <- as.data.frame(df1)
rownames(df1_old) <- df1_old[,1]
df1_old[c("g10", "g4", "g4"),]


## ----plyr_order1, eval=TRUE-----------------------------------------------------------------------
arrange(iris_df, Species, Sepal.Length, Sepal.Width)


## ----plyr_order2, eval=TRUE-----------------------------------------------------------------------
arrange(iris_df, desc(Species), Sepal.Length, Sepal.Width)


## ----plyr_order_base, eval=TRUE-------------------------------------------------------------------
iris_df[order(iris_df$Species, iris_df$Sepal.Length, iris_df$Sepal.Width), ]
iris_df[order(iris_df$Species, decreasing=TRUE), ] 


## ----plyr_col_select1, eval=TRUE------------------------------------------------------------------
select(iris_df, Species, Petal.Length, Sepal.Length)


## ----plyr_col_select2, eval=TRUE------------------------------------------------------------------
select(iris_df, Sepal.Length : Petal.Width)


## ----plyr_col_drop, eval=TRUE---------------------------------------------------------------------
select(iris_df, -(Sepal.Length : Petal.Width))


## ----plyr_col_rename, eval=TRUE-------------------------------------------------------------------
rename(iris_df, new_col_name = Species)


## ----baser_col_rename, eval=FALSE-----------------------------------------------------------------
## colnames(iris_df)[colnames(iris_df)=="Species"] <- "new_col_names"


## ----plyr_unique, eval=TRUE-----------------------------------------------------------------------
distinct(iris_df, Species, .keep_all=TRUE)


## ----baser_unique, eval=TRUE----------------------------------------------------------------------
iris_df[!duplicated(iris_df$Species),]


## ----plyr_mutate, eval=TRUE-----------------------------------------------------------------------
mutate(iris_df, Ratio = Sepal.Length / Sepal.Width, Sum = Sepal.Length + Sepal.Width)


## ----plyr_transmute, eval=TRUE--------------------------------------------------------------------
transmute(iris_df, Ratio = Sepal.Length / Sepal.Width, Sum = Sepal.Length + Sepal.Width)


## ----plyr_bind_cols, eval=TRUE--------------------------------------------------------------------
bind_cols(iris_df, iris_df)


## ----plyr_summarize1, eval=TRUE-------------------------------------------------------------------
summarize(iris_df, mean(Petal.Length))


## ----plyr_summarize2, eval=TRUE-------------------------------------------------------------------
summarize_all(iris_df[,1:4], mean)


## ----plyr_summarize, eval=TRUE--------------------------------------------------------------------
summarize(group_by(iris_df, Species), mean(Petal.Length))


## ----plyr_summarize3, eval=TRUE-------------------------------------------------------------------
summarize_all(group_by(iris_df, Species), mean) 


## ----plyr_join_sample, eval=TRUE------------------------------------------------------------------
df1 <- bind_cols(data_frame(ids1=paste0("g", 1:10)), as_tibble(matrix(1:40, 10, 4, dimnames=list(1:10, paste0("CA", 1:4)))))
df1
df2 <- bind_cols(data_frame(ids2=paste0("g", c(2,5,11,12))), as_tibble(matrix(1:16, 4, 4, dimnames=list(1:4, paste0("CB", 1:4)))))
df2


## ----plyr_inner_join, eval=TRUE-------------------------------------------------------------------
inner_join(df1, df2, by=c("ids1"="ids2"))


## ----plyr_left_join, eval=TRUE--------------------------------------------------------------------
left_join(df1, df2, by=c("ids1"="ids2"))


## ----plyr_right_join, eval=TRUE-------------------------------------------------------------------
right_join(df1, df2, by=c("ids1"="ids2"))


## ----plyr_full_join, eval=TRUE--------------------------------------------------------------------
full_join(df1, df2, by=c("ids1"="ids2"))


## ----plyr_anti_join, eval=TRUE--------------------------------------------------------------------
anti_join(df1, df2, by=c("ids1"="ids2"))


## ----plyr_chaining1, eval=TRUE--------------------------------------------------------------------
read_tsv("iris.txt") %>% # Import with read_tbv from readr package
    as_tibble() %>% # Declare to use tibble
    select(Sepal.Length:Species) %>% # Select columns
    filter(Species=="setosa") %>% # Filter rows by some value
    arrange(Sepal.Length) %>% # Sort by some column
    mutate(Subtract=Petal.Length - Petal.Width) # Calculate and append
    # write_tsv("iris.txt") # Export to file, omitted here to show result 


## ----plyr_chaining2, eval=TRUE--------------------------------------------------------------------
iris_df %>% # Declare tibble to use 
    group_by(Species) %>% # Group by species
    summarize(Mean_Sepal.Length=mean(Sepal.Length), 
              Max_Sepal.Length=max(Sepal.Length),
              Min_Sepal.Length=min(Sepal.Length),
              SD_Sepal.Length=sd(Sepal.Length),
              Total=n()) 


## ----plyr_chaining3, eval=TRUE--------------------------------------------------------------------
iris_df %>% 
    group_by(Species) %>% 
    summarize_all(mean) %>% 
    reshape2::melt(id.vars=c("Species"), variable.name = "Samples", value.name="Values") %>%
    ggplot(aes(Samples, Values, fill = Species)) + 
           geom_bar(position="dodge", stat="identity")


## ----load_sqlite, eval=TRUE-----------------------------------------------------------------------
library(RSQLite)
unlink("test.db") # Delete any existing test.db
mydb <- dbConnect(SQLite(), "test.db") # Creates database file test.db
mydf1 <- data.frame(ids=paste0("id", seq_along(iris[,1])), iris)
mydf2 <- mydf1[sample(seq_along(mydf1[,1]), 10),]
dbWriteTable(mydb, "mydf1", mydf1)
dbWriteTable(mydb, "mydf2", mydf2)


## ----list_tables, eval=TRUE-----------------------------------------------------------------------
dbListTables(mydb)


## ----import_sqlite_tables, eval=TRUE--------------------------------------------------------------
dbGetQuery(mydb, 'SELECT * FROM mydf2')


## ----query_sqlite_tables, eval=TRUE---------------------------------------------------------------
dbGetQuery(mydb, 'SELECT * FROM mydf1 WHERE "Sepal.Length" < 4.6')


## ----join_sqlite_tables, eval=TRUE----------------------------------------------------------------
dbGetQuery(mydb, 'SELECT * FROM mydf1, mydf2 WHERE mydf1.ids = mydf2.ids')


## ----sessionInfo----------------------------------------------------------------------------------
sessionInfo()