---
title: "R Notebook"
output:
html_document:
df_print: paged
editor_options:
chunk_output_type: console
---
# Data managements with dplyr
## filter
```{r}
require(readxl)
diab <-read_excel("diabetes.xls")
head(diab, n = 4)
summary(diab)
diab <- diab %>% mutate_if(is.character, as.factor)
summary(diabetes)
require(tidyverse)
#require(dplyr)
diab_filt <- filter(diab, tabac=="No fumador", edat>=50) # and & - or |
head(diab_filt, n = 4)
summary(diab_filt)
#diab$tabac <- as.factor(diab$tabac) ##**Warning!** by default alphabetic order is used when creating factor levels.
rm(diabetes) #Drop dataset from env
```
## select
```{r}
diab_small <- select(diab, mort, edat, tabac, sbp)
head(diab_small, n = 4)
diab_small_consec <- select(diab, mort:bmi) # Select consecutive variables
diab_small_noconsec <- select(diab, !(mort:bmi)) # Non-select consecutive variables
```
## sort
```{r}
diab_sort <-arrange(diab,desc(mort), edat)
head(diab_sort, n = 4)
```
## mutate
```{r}
diab_mut <- mutate(diab,edatcat =cut(edat, breaks =c(0,40,50,60,70,120)),highbmi = bmi> mean(bmi))
tail(diab_mut, n = 4)
summary(diab_mut)
table(diab_mut$edatcat, diab_mut$highbmi)
diab_mut_2 <- mutate(diab,edatcat =cut(edat, breaks =c(0,40,50,60,70,120),
include.lowest=TRUE,
labels=c("0-40","41-50","51-60", "61-70", "71-120")))
```
## E1
Find all individual that:
1. Had a sbp higher than 160 (filter())
```{r}
diab_sbp <- filter(diab, sbp>160)
summary(diab_sbp)
```
2. Had a sbp higher than 160 or tabac was 'Fumador'
```{r}
diab_sbp_tabac <- filter(diab, sbp>160 | tabac=='Fumador')
summary(diab_sbp_tabac)
```
- What happens if you include the name of a variable multiple times in a select() call?
```{r}
diab_mselect <- select(diab, sbp, sbp, dbp)
summary(diab_mselect);names(diab_mselect)
```
3. Sort individual to find the most 'tempsviu'. (arrange() )
```{r}
diab_ltempsviu <- arrange(diab, tempsviu)
head(diab_ltempsviu, 4)
```
# The pipe operator
```{r}
diab40 <-filter(diab, mort=="Vivo"&edat<40)
diab40_small <-select(diab40, edat, dbp)
head(diab40_small,n = 4)
```
```{r}
diab40_small <- diab %>%
filter(mort=="Vivo"&edat<40) %>%
select(edat, dbp)
head(diab40_small,n = 4)
```
## E2
Replicate the last exercice using 'pipes':
```{r}
df <- filter(diab,sbp > 160 | tabac == "Fumador")
dfs <- select(df, tempsviu ,bmi,sbp,sbp)
dfsa <- arrange(dfs, desc(tempsviu))
```
```{r}
diab_pipes <- diab %>% filter(sbp > 160 | tabac == "Fumador") %>%
select(tempsviu ,bmi,sbp,sbp) %>%
arrange(desc(tempsviu))
```
# Merging datasets
```{r}
band_members
band_instruments
```
```{r}
bind_rows(band_members, band_instruments)
## PROBLEMES!
bind_cols(band_members, band_instruments)
```
## mutating
```{r}
band_members %>% inner_join(band_instruments, by = "name")
```
```{r}
band_members %>% left_join(band_instruments)
```
##E3
What happens if you run these lines?
```{r}
band_members %>% right_join(band_instruments)
band_members %>% full_join(band_instruments)
```
#More
```{r}
diab %>% group_by(mort) %>% summarise(mean(sbp))
```