From bca12f497773cbc720e81c0256d6a553233e113a Mon Sep 17 00:00:00 2001 From: Philippe Bordron Date: Fri, 15 Mar 2019 16:16:50 +0100 Subject: [PATCH 1/2] use ggplot for vulcano plot --- SCRIPTS/DE/deAnalysis.R | 56 ++++++++++++++++++++++++++++------------- 1 file changed, 38 insertions(+), 18 deletions(-) diff --git a/SCRIPTS/DE/deAnalysis.R b/SCRIPTS/DE/deAnalysis.R index ae48cec..d0bed7f 100644 --- a/SCRIPTS/DE/deAnalysis.R +++ b/SCRIPTS/DE/deAnalysis.R @@ -20,7 +20,7 @@ lire<-function(x, character=FALSE){ } ecrire<-function(x,file="default.tsv",headRow="Name"){ - options(warn=-1) #Supress unecessary warning about append + options(warn=-1) #Supress unecessary warning about append write.table(x = paste0(headRow,"\t"),file = file,sep = "\t",eol="",quote=F,row.names=F,col.names=F) write.table(x=x,file=file,sep="\t", row.names = T, col.names = T, quote = FALSE,append=T) options(warn=0) @@ -112,15 +112,37 @@ ecrire(rbind(DE.sel$up,DE.sel$down),paste(paste(outputDir,comp,sep="/"),"DEseqRe #Volcano-plot +data <- cbind(DE, color = DE$DE) +levels(data$color)[levels(data$color) == "DOWN"] <- "green" +levels(data$color)[levels(data$color) == "NONE"] <- "grey75" +levels(data$color)[levels(data$color) == "UP"] <- "red" +data <- cbind(data, label = ifelse(data$DE != "NONE", rownames(data), "")) +data <- data[order(abs(data$pval)),] + +# limiting number of displayed genes +data[GenesInFig+1:nrow(data),]$label <- "" + + png(paste(paste(outputDir,comp,sep="/"),"Volcano-plot.png",sep="/"),width=10,height=10,units="cm",res=200) - -plot(DE$log2FoldChange,-log10(DE$padj),type="n",ylab="-log10(DEseq padj)",xlab="log2(Fold-Change)",col="black",main=paste0("Volcano plot of comparison ",cond1," (pos FC) VS ",cond2," (neg FC)\nBenjamini & Hochberg method (",nrow(DE.sel$isDE),"/",nrow(DE)," DE genes)"),cex.main=0.5,cex.lab=0.8) -abline(h=-log10(AdjPValthreshold)) -abline(v = -logFCthreshold) -abline(v = logFCthreshold) -if(nrow(DE.sel$up)>0) text(DE.sel$up$log2FoldChange,-log10(DE.sel$up$padj),labels = rownames(DE.sel$up),cex=.3,col="red") -if(nrow(DE.sel$down)>0) text(DE.sel$down$log2FoldChange,-log10(DE.sel$down$padj),labels = rownames(DE.sel$down),cex=.3,col="green") -points(DE.sel$notDE$log2FoldChange,-log10(DE.sel$notDE$padj),cex=.2,col="black",pch=16) + +ggplot(data=data, aes(x=log2FoldChange, y=-log10(padj))) + + geom_point(size = 0.5, color = data$color) + + # put labels + geom_text_repel( + data = subset(data, label != ""), + aes(label = label, fontface="bold.italic"), + size = 2, + point.padding = unit(0.1, "lines") + ) + + geom_hline(aes(yintercept=-log10(AdjPValthreshold))) + + geom_vline(aes(xintercept=logFCthreshold)) + + geom_vline(aes(xintercept=-logFCthreshold)) + + ylab("-log10(DEseq padj)") + + xlab("log2(Fold-Change)") + + ggtitle(paste0("Volcano plot of comparison\n", cond1," (pos FC) VS ",cond2," (neg FC)\nBenjamini & Hochberg method (",nrow(DE.sel$isDE),"/",nrow(DE)," DE genes)")) + + theme_bw() + + theme(plot.title = element_text(size=10), + axis.title = element_text(size=10)) dev.off() @@ -151,7 +173,7 @@ if(nrow(DE.sel$isDE)>=10){ DEgenes.names=rownames(DE.sel$isDE) colFun<-c(ggplotColours,rainbow) - + sampleHt<-colnames(exprDatT) haByComp<-ha exprDE=exprDatT[DEgenes.names,sampleHt,drop=FALSE] @@ -171,7 +193,7 @@ if(nrow(DE.sel$isDE)>=10){ #print(Heatmap(exprDE.scaled,top_annotation = haByComp, row_names_gp = autoGparFontSizeMatrix(nrow(exprDE.scaled)), # cluster_rows = hclustGeneDE,col = colHA, cluster_columns = hclustSampleDE,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDE.scaled)) )) #dev.off() - + pdf(paste(paste(outputDir,comp,sep="/"),"clustDEgene.pdf",sep="/"),width=10,height=10) print(Heatmap(exprDE.scaled,top_annotation = haByComp, row_names_gp = autoGparFontSizeMatrix(nrow(exprDE.scaled)), cluster_rows = hclustGeneDE,col = colHA, cluster_columns = hclustSampleDE,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDE.scaled)) )) @@ -179,7 +201,7 @@ if(nrow(DE.sel$isDE)>=10){ if(length(samples[sampleAnnot[,condCol]%in% c(cond1,cond2)])=10){ haByCompSelect<-HeatmapAnnotation(tempAnnot,col = colTopAnnotTemp) exprDESelect=exprDatT[DEgenes.names,sampleHtSelect,drop=FALSE] exprDESelect.scaled=rowScale(exprDESelect,center=T,scaled=T) - + hclustGeneDESelect<-unsupervisedClustering(exprDESelect.scaled,transpose = F,nboot=nboot,bootstrap = bootTemp) hclustSampleDESelect<-unsupervisedClustering(exprDESelect.scaled,transpose = T,nboot=nboot,bootstrap = bootTemp,method.dist = "euclidean") - + png(paste(paste(outputDir,comp,sep="/"),"clustDEgene.png",sep="/"),width=25,height=22,units="cm",res=600,pointsize=3) print(Heatmap(exprDESelect.scaled,top_annotation = haByCompSelect, row_names_gp = autoGparFontSizeMatrix(nrow(exprDESelect.scaled)), - cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) )) + cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) )) dev.off() print(Heatmap(exprDESelect.scaled,top_annotation = haByCompSelect, row_names_gp = autoGparFontSizeMatrix(nrow(exprDESelect.scaled)), @@ -209,12 +231,10 @@ if(nrow(DE.sel$isDE)>=10){ } dev.off() - + }else{ png(paste(paste(outputDir,comp,sep="/"),"clustDEgene.png",sep="/")) plot.new() text(0.5,0.5,"Less than 10 differential expressed gene for this comparison,\n clustering isn't relevant") dev.off() } - - -- GitLab From 1d12550fb64943e58b173aa5b14ace6b58be1bdf Mon Sep 17 00:00:00 2001 From: Philippe Bordron Date: Tue, 19 Mar 2019 09:45:20 +0100 Subject: [PATCH 2/2] fix ggvulcano & fix issue #9 --- SCRIPTS/DE/deAnalysis.R | 44 +++++++++++++++++++++++++---------------- 1 file changed, 27 insertions(+), 17 deletions(-) diff --git a/SCRIPTS/DE/deAnalysis.R b/SCRIPTS/DE/deAnalysis.R index d0bed7f..9904eac 100644 --- a/SCRIPTS/DE/deAnalysis.R +++ b/SCRIPTS/DE/deAnalysis.R @@ -117,23 +117,18 @@ levels(data$color)[levels(data$color) == "DOWN"] <- "green" levels(data$color)[levels(data$color) == "NONE"] <- "grey75" levels(data$color)[levels(data$color) == "UP"] <- "red" data <- cbind(data, label = ifelse(data$DE != "NONE", rownames(data), "")) -data <- data[order(abs(data$pval)),] +# removing NA for setting correct plot limits +data <- data[!is.na(data$padj),] # limiting number of displayed genes +data <- data[order(abs(data$padj)),] data[GenesInFig+1:nrow(data),]$label <- "" - +dataLabels <- subset(data, label != "") png(paste(paste(outputDir,comp,sep="/"),"Volcano-plot.png",sep="/"),width=10,height=10,units="cm",res=200) -ggplot(data=data, aes(x=log2FoldChange, y=-log10(padj))) + +p <- ggplot(data=data, aes(x=log2FoldChange, y=-log10(padj))) + geom_point(size = 0.5, color = data$color) + - # put labels - geom_text_repel( - data = subset(data, label != ""), - aes(label = label, fontface="bold.italic"), - size = 2, - point.padding = unit(0.1, "lines") - ) + geom_hline(aes(yintercept=-log10(AdjPValthreshold))) + geom_vline(aes(xintercept=logFCthreshold)) + geom_vline(aes(xintercept=-logFCthreshold)) + @@ -143,7 +138,16 @@ ggplot(data=data, aes(x=log2FoldChange, y=-log10(padj))) + theme_bw() + theme(plot.title = element_text(size=10), axis.title = element_text(size=10)) - +# put labels when existing +if (nrow(dataLabels)>0) { + p <- p + geom_text_repel( + data = dataLabels, + aes(label = label, fontface="bold.italic"), + size = 2, + point.padding = unit(0.1, "lines") + ) +} +print(p) dev.off() #MA-plot @@ -195,13 +199,14 @@ if(nrow(DE.sel$isDE)>=10){ #dev.off() pdf(paste(paste(outputDir,comp,sep="/"),"clustDEgene.pdf",sep="/"),width=10,height=10) - print(Heatmap(exprDE.scaled,top_annotation = haByComp, row_names_gp = autoGparFontSizeMatrix(nrow(exprDE.scaled)), - cluster_rows = hclustGeneDE,col = colHA, cluster_columns = hclustSampleDE,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDE.scaled)) )) + p <- Heatmap(exprDE.scaled,top_annotation = haByComp, row_names_gp = autoGparFontSizeMatrix(nrow(exprDE.scaled)), + cluster_rows = hclustGeneDE,col = colHA, cluster_columns = hclustSampleDE,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDE.scaled)) ) + print(p) if(length(samples[sampleAnnot[,condCol]%in% c(cond1,cond2)])=10){ haByCompSelect<-HeatmapAnnotation(tempAnnot,col = colTopAnnotTemp) exprDESelect=exprDatT[DEgenes.names,sampleHtSelect,drop=FALSE] exprDESelect.scaled=rowScale(exprDESelect,center=T,scaled=T) - + hclustGeneDESelect<-unsupervisedClustering(exprDESelect.scaled,transpose = F,nboot=nboot,bootstrap = bootTemp) hclustSampleDESelect<-unsupervisedClustering(exprDESelect.scaled,transpose = T,nboot=nboot,bootstrap = bootTemp,method.dist = "euclidean") - + png(paste(paste(outputDir,comp,sep="/"),"clustDEgene.png",sep="/"),width=25,height=22,units="cm",res=600,pointsize=3) print(Heatmap(exprDESelect.scaled,top_annotation = haByCompSelect, row_names_gp = autoGparFontSizeMatrix(nrow(exprDESelect.scaled)), - cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) )) + cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) )) dev.off() print(Heatmap(exprDESelect.scaled,top_annotation = haByCompSelect, row_names_gp = autoGparFontSizeMatrix(nrow(exprDESelect.scaled)), cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) )) } + else { + png(paste(paste(outputDir,comp,sep="/"),"clustDEgene.png",sep="/"),width=25,height=22,units="cm",res=600,pointsize=3) + print(p) + dev.off() + } dev.off() }else{ -- GitLab