#cummeRbund - making visuals for your RNAseq data


#set working directory to the cuffdiff folder. all the cuffdiff outputfiles have to be there.  This is only necessary if you read inn cuffdata with default parameters. 

 
library("cummeRbund") #loading in cummerbund, " isn't always needed like in Rstudio 
 
library("ggplot2") #loading the plotting program ggplot2 




 
cuff <- readCufflinks() #reading in the cufflinks output
 
runInfo(cuff) #displays information about the analysis 
 
replicates(cuff) #displays information about the replicates and applied normalization 


#################################################
#	THE VM DOES NOT SUPPORT PNG DISPLAY	#
#################################################



#evaluate your overall data

#DISPERSION
 
#dispersion estimates on gene level

disp<-dispersionPlot(genes(cuff)) # genelevel disp
 
ggsave(disp, filename"dispersion.pdf)


#SQUARED COEFFICIENT OF VARIANCE 
 
genes.scv<-fpkmSCVPlot(genes(cuff))
ggsave(genes.scv, filename="concatenated_scv_genes.pdf") 
 
 
#DISTRIBUTION OF FPKM SCORES ACROSS SAMPLES
 
dens<-csDensity(genes(cuff)) #here you can also add replicates = true like this: densrep <- csDensity(genes(cuff), replicates=TRUE) 
ggsave(dens, filename="concatenated_dens_genes.pdf") 
 
 
#BOXPLOT FOR OUTLIER IDENTIFICATION
 
b<-csBoxplot(genes(cuff)) 
ggsave(b, filename="concatenated_boxplot_genes.pdf") 
 
 
brep<-csBoxplot(genes(cuff),replicates=T) # add replicates for better resolution
ggsave(brep, filename="concatenated_boxplot_genes_repl.pdf") 
 
 
#SCATTERPLOT COMPARING FPKM PER CONDITION
 
s<-csScatterMatrix(genes(cuff))
ggsave(s, filename="concatenated_scatterplot_genes.pdf") 
 



#DENDROGRAM ON THE JENSEN-SHANNON DIVERGENCE INDEX - does not use ggplot2 for saving 
 
dend<-csDendro(genes(cuff))  
pdf("concatenated_dendrogram_genes.pdf") 
dend<-csDendro(genes(cuff))  
dev.off() 
 

dend.rep<-csDendro(genes(cuff),replicates=T) #use replicates for better resolution
pdf("concatenated_dendrogram_genes_rep.pdf") 
dend.rep<-csDendro(genes(cuff),replicates=T) 
dev.off() 
 
 


 
# PCA/MDS plots (see here for explanation: www.rna-seqblog.com/statquest-pca-clearly-explained 
 
#MDS tends to fail on the VM if no replicates are specified

genes.MDS<-MDSplot(genes(cuff), replicates=TRUE) 
ggsave(genes.MDS,filename="concatenated_MDS.pdf") 
 
#or do the PCA 
 
genes.PCA <- PCAplot(genes(cuff),replicates=TRUE) 
ggsave(genes.PCA, filename="concatenated_PCA_repl.pdf") 





 
#VOLCANO EXPRESSION PLOTS
 
#remember that your conditions are names q1 and q2 (control and treated respectively) unless you have specified otherwise - check the replicates(cuff) command for details.  
 
# volcano variant 
 
v1 <- csVolcano(genes(cuff),"q1","q2", alpha=0.05, showSignificant=TRUE)   
v1 
 
ggsave(v1, file='volcano_MA_6hrs_replicates.pdf') 
 
 








#MAKING SIGNIFICANT GENE OBJECTS
 
# making gene set of significant features - this will be identical to the one below if only two conditions
 
mySigGeneIds<-getSig(cuff,alpha=0.05,level='genes') # this can be tweeked if multiple conditions, another p-value cutoff etc 
 
head(mySigGeneIds) 
 
length(mySigGeneIds) 
 


 
 
#individual test setup: if you have more that two conditions you can look at the significant genes between pairs of these conditions - cuffdiff has by default done an all-to-all analysis
 
mySigGeneIds1<-getSig(cuff,alpha=0.05,level='genes',x='q1',y='q2') 
length(mySigGeneIds1) # this will give you the number of significant genes for the comparison of q1 and q2 
 
mySigGeneIds3<-getSig(cuff,alpha=0.05,level='genes',x='q3',y='q4') 
length(mySigGeneIds3) 
 
 
mySigGenes1<-getGenes(cuff,mySigGeneIds1) 
siggene.featurenames1 <- featureNames(mySigGenes1) # you are storing the significant genes in new objects 
 
mySigGenes3<-getGenes(cuff,mySigGeneIds3) 
siggene.featurenames3 <- featureNames(mySigGenes3) 
  
 
table1 <- featureNames(mySigGenes1) 
 
table3 <- featureNames(mySigGenes3) 
 
 
write.table(table1, file="mySigGenes1.txt", quote=F) 
 
write.table(table3, file="mySigGenes3.txt", quote=F) 
 

 
gene1.diff<-diffData(mySigGenes1) 
head(gene1.diff) 
write.table(gene1.diff, file="mySigGenes1_DE.txt", quote=F) 
 
gene3.diff<-diffData(mySigGenes3) 
head(gene3.diff) 
write.table(gene3.diff, file="mySigGenes3_DE.txt", quote=F) 
 
 




#EXPRESSION HEAT MAPS ON GENE SETS - maybe not the most informative plots, but can come in handy
 
h1<-csHeatmap(mySigGenes1, fullnames=F, cluster='row')  
h1 
ggsave(file='heatmap_6hrsgenes_across_time.pdf') 
 
 
h3<-csHeatmap(mySigGenes3, fullnames=F, cluster='row')  
h3 
ggsave(file='heatmap_4daygenes_across_time.pdf') 
 
 
 
 
 
 
# PARTITIONING - PS:read about this in the cummerbund manual - this is not a high resolution partitioning strategy. AIM:looking at genes following the same expression pattern for further evaluation.
 
 
 
ic<-csCluster(mySigGenes,k=6) # you can choose the number of cluster - 4, 5 or 6 are usually the most informative.  
head(ic$cluster) 
icp<-csClusterPlot(ic) 
icp 
ggsave(icp, filename="concatenated_covariance_all_genes.pdf") 
 
 
ic1<-csCluster(mySigGenes1,k=6) # you can choose the number of cluster - 4 or 6 are usually the most informative.  
head(ic1$cluster) 
icp1<-csClusterPlot(ic1) 
icp1 
ggsave(icp1, filename="concatenated_covariance_6hrs_genes.pdf") 
 
 
ic3<-csCluster(mySigGenes3,k=6) # you can choose the number of cluster - 4 or 6 are usually the most informative.  
head(ic3$cluster) 
icp3<-csClusterPlot(ic3) 
icp3 
ggsave(icp3, filename="concatenated_covariance_4day_genes.pdf") 
 


#GETTING THE COUNT MATRIX - might be handy

gene.count.matrix <- countMatrix(genes(cuff))
head(gene.count.matrix)
write.table(gene.count.matrix, file="countmatrix.txt")

 
########################################################################################## 
 
#MAKING INDIVIDUAL GENE PLOTS - a example
#XLOC_017969
 
myGeneId_A <- 'XLOC_017969' 
myGeneId_A <- getGene(cuff,myGeneId_A) 
myGeneId_A 
head(fpkm(myGeneId_A)) 
A <- expressionPlot(myGeneId_A) 
A 
Arep <- expressionPlot(myGeneId_A,replicates=TRUE) 
Arep 
 
ggsave(A, filename="A.pdf")
ggsave(A, filename="A.pdf", width = 100, height = 100, units = c("mm")) # if you need to tweek the size of the plot