--- title: Annotating a plot author: Abhijit Dasgupta, PhD --- ```{r setup, include=FALSE, child = here::here('slides/templates/setup.Rmd')} ``` ```{r setup1, include=FALSE} setwd(here::here('slides/lectures')) library(tidyverse) theme_set(theme_classic()+theme(axis.text = element_text(size=14), axis.title = element_text(size=16), legend.text = element_text(size=14), legend.title = element_text(size=16), plot.title = element_text(size=18), plot.subtitle = element_text(size=16), plot.caption = element_text(size=12))) ``` layout: true
BIOF 339: Practical R
--- class:middle, center, inverse # Annotations --- ## Stand-alone stories - You would like a data visualization to stand on its own - Relevant information should be placed on the graph - However, you need to balance the information content with real estate - Don't clutter the graph and make it not readable --- ## An example
![](img/economist1.gif) We will recreate this plot in a tutorial --- ## Titles, subtitles and captions .pull-left[ ```{r p1, eval = F, echo = T} library(palmerpenguins) (plt <- ggplot(penguins, aes(bill_length_mm, body_mass_g, color=species))+ geom_point()) ``` ] .pull-right[ ```{r, eval=T, echo = F, ref.label="p1"} ``` ] --- ## Titles, subtitles and captions .pull-left[ ```{r p2, eval = F, echo = T} library(palmerpenguins) plt <- ggplot(penguins, aes(bill_length_mm, body_mass_g, color=species))+ geom_point() plt + labs(x = 'Bill length (mm)', y = 'Body mass (g)') ``` ] .pull-right[ ```{r, eval=T, echo = F, ref.label="p2"} ``` ] --- ## Titles, subtitles and captions .pull-left[ ```{r p3, eval = F, echo = T} library(palmerpenguins) plt <- ggplot(penguins, aes(bill_length_mm, body_mass_g, color=species))+ geom_point() plt + labs(x = 'Bill length (mm)', y = 'Body mass (g)', color = 'Species') #<< ``` ] .pull-right[ ```{r, eval=T, echo = F, ref.label="p3"} ``` ] --- ## Titles, subtitles and captions .pull-left[ ```{r p4, eval = F, echo = T} library(palmerpenguins) plt <- ggplot(penguins, aes(bill_length_mm, body_mass_g, color=species))+ geom_point() plt + labs(x = 'Bill length (mm)', y = 'Body mass (g)', color = 'Species', title = "Palmer penguins",#<< subtitle = "Bill length vs Body mass") #<< ``` ] .pull-right[ ```{r, eval=T, echo = F, ref.label="p4"} ``` ] --- ## Titles, subtitles and captions .pull-left[ ```{r p5, eval = F, echo = T} library(palmerpenguins) plt <- ggplot(penguins, aes(bill_length_mm, body_mass_g, color=species))+ geom_point() plt + labs(x = 'Bill length (mm)', y = 'Body mass (g)', color = 'Species', title = "Palmer penguins", subtitle = "Bill length vs Body mass", caption = "Palmer Station LTER") ``` ] .pull-right[ ```{r, eval=T, echo = F, ref.label="p5"} ``` ] --- class:middle,center # Adding derived statistics to a plot --- ## Adding group means .pull-left[ ```{r b1, eval = F, echo = T} ggplot(penguins, aes(x = bill_length_mm, y = body_mass_g, color = species))+ geom_point() ``` ] .pull-right[ ```{r, eval=T, echo=F, ref.label="b1"} ``` ] --- ## Adding group means .pull-left[ ```{r 05-Lecture5-11, eval = T, echo = T} means <- penguins %>% group_by(species) %>% summarize_at(vars(bill_length_mm, body_mass_g), mean, na.rm=TRUE) means ``` ```{r b2, eval = F, echo = T} ggplot(penguins, aes(x = bill_length_mm, y = body_mass_g, color = species))+ geom_point()+ geom_point(data = means, #<< size=8) #<< ``` Adding data from a different dataset ] .pull-right[ ```{r, eval=T, echo=F, ref.label='b2'} ``` ] --- ## Adding regression metrics .pull-left[ Regress highway mileage on city mileage (data: mpg) ```{r c1, eval = F, echo = T} mod1 <- lm(hwy ~ cty, data = mpg) r2 <- broom::glance(mod1) %>% pull(r.squared) ggplot(mpg, aes(x = cty, y = hwy))+ geom_point() + geom_smooth(method = 'lm', se=F) + theme_bw() ``` ] .pull-right[ ```{r,eval=T, echo=F, ref.label='c1'} ``` ] --- ## Adding regression metrics .pull-left[ Regress highway mileage on city mileage (data: mpg) ```{r c2, eval = F, echo = T} mod1 <- lm(hwy ~ cty, data = mpg) r2 <- broom::glance(mod1) %>% pull(r.squared) %>% round(., 2) ggplot(mpg, aes(x = cty, y = hwy))+ geom_point() + geom_smooth(method = 'lm', se=F)+ annotate(geom='text', x = 15, y = 40, label=glue::glue("R^2 == {r}",r=r2), size=12, parse=T) + theme_bw() ``` ] .pull-right[ ```{r, eval=T, echo=F, ref.label='c2'} ``` ] --- ## Highlighting regions .pull-left[ ```{r d1, eval = F, echo = T} mpg %>% mutate(cyl = as.factor(cyl)) %>% ggplot(aes(x = cyl, y = hwy)) + geom_boxplot() + theme_bw() ``` ] .pull-right[ ```{r, eval=T, echo=F, ref.label='d1'} ``` ] --- ## Highlighting regions .pull-left[ ```{r d2, eval = F, echo = T} mpg %>% mutate(cyl = as.factor(cyl)) %>% ggplot(aes(x = cyl, y = hwy)) + geom_boxplot() + theme_bw()+ annotate(geom = 'rect', xmin=3.75,xmax=4.25, ymin = 22, ymax = 28, fill = 'red', alpha = 0.2) + annotate('text', x = 4.5, y = 25, label = 'Outliers?', hjust = 0)+ coord_cartesian(xlim = c(0,5))+ theme_bw() ``` Note: If you have a factor on the x-axis, they are plotted at 1, 2, 3, ... ] .pull-right[ ```{r, eval=T, echo=F, ref.label='d2'} ``` ]