# # OCRUG Hackathon 2021-04 # Saturday Evening Data Challenge Event # 2021-04-10 # # Answer Key # library(tidyverse) dat <- read_csv("penguins.csv") # Q1: # Which island(s) have at least 2 penguin species? dat %>% group_by(island) %>% summarize(n_species = n_distinct(species), .groups = "drop") %>% filter(n_species >= 2) %>% pull(island) # Q2: # Which species has the shortest mean bill length and what is its bill length value? # Remove any missing values from the calculation. dat %>% group_by(species) %>% summarize(mean_bill_length = mean(bill_length_mm, na.rm = TRUE), .groups = "drop") %>% filter(mean_bill_length == min(mean_bill_length)) %>% select(species, mean_bill_length) # Q3: # Create a scatter plot of bill length (x-axis) and bill depth (y-axis) colored by species. ggplot(dat, aes(bill_length_mm, bill_depth_mm, color = species)) + geom_point() # Q4: # Create a scatter plot of body mass vs. flipper length for Gentoo penguins and add a linear trend line. # Put the value of the trend line slope in the title of the plot. dat_gentoo <- dat %>% filter(species == "Gentoo") trend_lm <- lm(body_mass_g ~ flipper_length_mm, data = dat_gentoo) trend_slope <- coef(trend_lm)["flipper_length_mm"] ggplot(dat_gentoo, aes(flipper_length_mm, body_mass_g)) + geom_point() + geom_smooth(method = "lm") + labs( title = paste0("Gentoo Body Mass vs. Flipper Length, Trend Slope = ", trend_slope) ) # "body mass vs. flipper length" means body mass goes on the y-axis # but most teams reversed the axes. Here's the solution with flipped x and y-axes dat_gentoo <- dat %>% filter(species == "Gentoo") trend_lm <- lm(flipper_length_mm ~ body_mass_g, data = dat_gentoo) trend_slope <- coef(trend_lm)["body_mass_g"] ggplot(dat_gentoo, aes(body_mass_g, flipper_length_mm)) + geom_point() + geom_smooth(method = "lm") + labs( title = paste0("Gentoo Flipper Length vs. Body Mass, Trend Slope = ", trend_slope) )