# ggplot2 R Script ============================== # EXERCISE: MAKE A barplot # of the mean body_mass_g # BY sex and year # with error bars ====== library(palmerpenguins) library(ggplot2) library(dplyr) data("penguins") # see example at # http://www.cookbook-r.com/Graphs/Plotting_means_and_error_bars_(ggplot2)/ # components needed # means of body mass by sex # and value to use for +/- for # min, max of each error bar # could be sd, se, or ci # a helpful function, summarySE ====================== # summarySE provides the standard deviation, standard error of the mean, and a (default 95%) confidence interval ## Gives count, mean, standard deviation, standard error of the mean, and confidence interval (default 95%). ## data: a data frame. ## measurevar: the name of a column that contains the variable to be summariezed ## groupvars: a vector containing names of columns that contain grouping variables ## na.rm: a boolean that indicates whether to ignore NA's ## conf.interval: the percent range of the confidence interval (default is 95%) summarySE <- function(data = NULL, measurevar, groupvars = NULL, na.rm = FALSE, conf.interval = .95, .drop = TRUE) { library(plyr) # New version of length which can handle NA's: # if na.rm==T, don't count them length2 <- function (x, na.rm = FALSE) { if (na.rm) sum(!is.na(x)) else length(x) } # This does the summary. For each group's data frame, # return a vector with # N, mean, and sd datac <- ddply( data, groupvars, .drop = .drop, .fun = function(xx, col) { c( N = length2(xx[[col]], na.rm = na.rm), mean = mean (xx[[col]], na.rm = na.rm), sd = sd (xx[[col]], na.rm = na.rm) ) }, measurevar ) # Rename the "mean" column datac <- rename(datac, c("mean" = measurevar)) datac$se <- datac$sd / sqrt(datac$N) # Calculate standard error of the mean # Confidence interval multiplier for standard error # Calculate t-statistic for confidence interval: # e.g., if conf.interval is .95, use .975 (above/below), and use df=N-1 ciMult <- qt(conf.interval / 2 + .5, datac$N - 1) datac$ci <- datac$se * ciMult return(datac) } # look at the global environment # see functions # get the summary stats from penguins dataset penguins_bm <- summarySE(penguins, measurevar="body_mass_g", groupvars=c("sex","year")) # Use 95% confidence intervals ggplot(penguins_bm, aes(x=year, y=body_mass_g, fill=sex)) + geom_bar(position=position_dodge(), stat="identity") + geom_errorbar(aes(ymin=body_mass_g-ci, ymax=body_mass_g+ci), width=.2, # Width of the error bars position=position_dodge(.9)) # filter out NAs and redo plot penguins_bmc <- penguins_bm %>% filter(complete.cases(.)) # Use 95% confidence intervals ggplot(penguins_bmc, aes(x=year, y=body_mass_g, fill=sex)) + geom_bar(position=position_dodge(), stat="identity") + geom_errorbar(aes(ymin=body_mass_g-ci, ymax=body_mass_g+ci), width=.2, # Width of the error bars position=position_dodge(.9)) # EXERCISE - YOUR TURN ================================== # Make clustered bar plot # for flipper_length_mm # by year and species # use sd (standard deviation) for error bars # Stacked Proportional Bar Chart # see examples at https://r-graphics.org/recipe-bar-graph-proportional-stacked-bar # Make proportional bar chart # of sex by island # We need "counts" of each first penguins_tab1 <- table(penguins$sex, penguins$island) # make it a data.frame penguins_tab1.df <- data.frame(penguins_tab1) # change names names(penguins_tab1.df) <- c("sex", "island", "count") ggplot(penguins_tab1.df, aes(x = sex, y = count, fill = island)) + geom_col(position = "fill") # make a little better ggplot(penguins_tab1.df, aes(x = sex, y = count, fill = island)) + geom_col(colour = "black", position = "fill") + scale_y_continuous(labels = scales::percent) + scale_fill_brewer(palette = "Pastel1") + labs( x = "Biological Sex", y = "Relative Percent", title = "Percentage of Island by Biological Site", fill = "Island Surveyed" ) # EXERCISE - YOUR TURN ================================== # Make proportional bar chart # of species by island