---
title: "Activity_4"
output:
html_document:
toc: true
toc_float:
toc_collapsed: true
---
```{r setup, include=FALSE}
knitr::opts_chunk$set(echo = TRUE)
```
# Set-up libraries and data
``` {r}
## Load libraries
library(tidyverse)
library(pheatmap)
## Load data
load("data/Rmarkdown_data.Rdata")
```
# Top 20 significant genes
```{r}
## Get names of top 20 genes
top20_sigOE_genes <- res_tableOE_tb %>%
arrange(padj) %>% #Arrange rows by padj values
pull(gene) %>% #Extract character vector of ordered genes
head(n=20)
## normalized counts for top 20 significant genes
top20_sigOE_norm <- normalized_counts %>%
filter(gene %in% top20_sigOE_genes)
## Gathering the columns to have normalized counts to a single column
gathered_top20_sigOE <- top20_sigOE_norm %>%
gather(colnames(top20_sigOE_norm)[2:9], key = "samplename", value = "normalized_counts")
gathered_top20_sigOE <- inner_join(mov10_meta, gathered_top20_sigOE)
## plot using ggplot2
ggplot(gathered_top20_sigOE) +
geom_point(aes(x = gene, y = normalized_counts, color = sampletype)) +
scale_y_log10() +
xlab("Genes") +
ylab("log10 Normalized Counts") +
ggtitle("Top 20 Significant DE Genes") +
theme_bw() +
theme(axis.text.x = element_text(angle = 45, hjust = 1)) +
theme(plot.title = element_text(hjust = 0.5))
```
Conclusion: Top 20 significantly differentiall expressed genes were ploted, for each three experimental groups etc..
# Create a heatmap of the differentially expressed genes
```{r}
## Extract normalized expression for significant genes from the OE and control samples (2:4 and 7:9)
res_tableOE_tb_sig <- res_tableOE_tb %>%
filter(padj < 0.05)
## Return the normalized counts for the significant DE genes
norm_OEsig <- normalized_counts %>%
filter(gene %in% res_tableOE_tb_sig$gene)
meta <- mov10_meta %>%
column_to_rownames("samplename") %>%
data.frame()
## Run pheatmap using the metadata data frame for the annotation
pheatmap(norm_OEsig[2:9],
cluster_rows = T,
show_rownames = F,
annotation = meta,
border_color = NA,
fontsize = 10,
scale = "row",
fontsize_row = 10,
height = 20)
```
Conclusion: Samples were clustered very well based on different experimental conditions etc..