# An Introduction to R # # Devan Allen McGranahan (devan.mcgranahan@gmail.com) # # Course website: https://www.introranger.org # YouTube lectures: https://www.youtube.com/playlist?list=PLKXOvaXmjIGcSHFMe2Wpsaw4yzvWR0AgQ # github repo: https://github.com/devanmcg/IntroRangeR # # Lesson 6: More data manipulation with tidyverse and friends # if (!require("pacman")) install.packages("pacman") pacman::p_load(tidyverse, readxl) # setwd("../R") # Remember not to use setwd() in a .Rmd file, use full file paths # # Load data # # Note: Data are online at https://github.com/devanmcg/IntroRangeR/blob/master/data/VareExample.xlsx # # New tricks! # # Why export and save each sheet in your Excel file as single .csv files # when you can just go in and get them from the .xlsx file with readxl? # Often convenient to set the file path as an object xl_file = "./data/VareExample.xlsx" # Use full path (no .) in .Rmd file # Make tibbles from specific worksheets in the .xlsx file spp_tbl <- readxl::read_excel(xl_file, "SpeciesData") # Note brevity of path argument man_tbl <- read_excel(xl_file, "Management") # Compare data structure spp_tbl # tibble = no need for str() or head() man_tbl # same # Add a unique sample ID column to spp_d spp_tbl <- unite(data=spp_tbl, # Identify the data col="SampleID", # Name the new column c("Pasture", "Treatment", "Point"), # original columns to combine sep=".") # what's between the labels in the merged column spp_tbl # check out the reverse: separate(spp_tbl, # data set SampleID, # existing column to split up c("Pasture","Treatment","Point")) # new columns to create # Data formats: Wide vs long # spp_tbl in wide format--column for each species values. # gather into long format: spp_long <-pivot_longer(spp_tbl, names_to = 'species', values_to = 'abundance', -SampleID) spp_long # spread them back out: pivot_wider(spp_long, names_from = 'species', values_from = 'abundance') # Uh oh: man_tbl # what's with BareSoil?? # Break out multiple entries w/ two new tidyverse functions: # stringr::str_split # tidyr::unnest man_tbl <- man_tbl %>% mutate(BareSoil = str_split(BareSoil, ",")) %>% unnest(BareSoil) man_tbl man_tbl <- mutate(man_tbl, BareSoil = as.numeric(BareSoil)) man_tbl # Reduce to single variable man_tbl <- man_tbl %>% group_by(SampleID, PastureName, BurnSeason) %>% summarise(BareSoil = mean(BareSoil)) %>% ungroup man_tbl # Associate species and management info & plot full_join(man_tbl, spp_tbl, by="SampleID") full_join(man_tbl, spp_tbl, "SampleID") %>% filter(BurnSeason != "Fall") %>% ggplot(aes(x=(100-BareSoil), y=Empenigr)) + # Note X transformation theme_bw(16) + geom_smooth(method="lm", se=F) + geom_point(size=3) + facet_wrap(~BurnSeason, scales = "free_x") + # Note scales argument labs(x = "Soil coverage (%)", y = "Empetrum nigrum", title = "E. nigrum abundance by ground cover & burn season")