deg.R 10.3 KB
Newer Older
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
suppressMessages(require("DESeq2"))
library(genefilter,quietly=TRUE)
library("RColorBrewer")
library("gplots")
library(fdrtool)
library(ggplot2)
library(ggrepel)
library(ComplexHeatmap)
library(pvclust)
library(circlize)

unsupervisedClustering<-function(x,transpose=TRUE,method.dist="pearson",method.hclust="ward.D2",bootstrap=FALSE,nboot=10){
        if(transpose) x<-t(x)
        if(bootstrap){
                require(pvclust)
                resClust<-pvclust(t(x),nboot=nboot,method.hclust = method.hclust,parallel = TRUE,method.dist = method.dist)$hclust
        }else{
                if(method.dist=="pearson"){
                        resDist<-corrDist(x)
                }else{
                        resDist<-dist(x, method = method.dist)
                }
                resClust<-stats::hclust(resDist,method = method.hclust)
        }
        return(resClust)
}

ggplotColours <- function(n = 6, h = c(0, 360) + 15){
    if ((diff(h) %% 360) < 1) h[2] <- h[2] - 360/n
    hcl(h = (seq(h[1], h[2], length = n)), c = 100, l = 65)
}

autoGparFontSizeMatrix<-function(n){ #Calcule automatiquement la taille de police selon le nombre de colonnes ou lignes (empirique)
        n=max(n,50)
        n=min(n,1000)
        return(gpar(fontsize=1/n*600))
}

rowScale<-function(data,center=TRUE,scaled=FALSE){
        data<-t(data)
        data<-t(scale(data,center=center,scale=scaled))
        return(data)
}

corrDist<-function(x) return(as.dist((1 - cor(Matrix::t(x)))/2))

write<-function(x,file="default.tsv",headRow="Name"){
        options(warn=-1) #Supress unecessary warning about append 
        write.table(x = paste0(headRow,"\t"),file = file,sep = "\t",eol="",quote=F,row.names=F,col.names=F)
        write.table(x=x,file=file,sep="\t", row.names = T, col.names = T, quote = FALSE,append=T)
        options(warn=0)
}

args=commandArgs(trailingOnly=TRUE)
ddsRdata=args[1]
cond1=args[2]
cond2=args[3]

condCol="condition"

logFCthreshold=0
AdjPValthreshold=0.05
GenesInFig=50
bootstrap=FALSE
nboot=30

load(ddsRdata)

comp<-paste(cond1,cond2,sep="__vs__")

sampleAnnot<-as.data.frame(sampleTable[,3])
rownames(sampleAnnot)=sampleTable[,2]
colnames(sampleAnnot)="condition"
samples<-rownames(sampleAnnot)

res=results(ddsHTSeq,contrast=c(condCol,cond1,cond2),independentFiltering=T)
res <- base::merge(res,geneNames,by.x="row.names",by.y="ensembl_gene_id",all.x=TRUE)
78
res$Row.names <- paste(res$Row.names, res$external_gene_name, sep = "|")
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112

rownames(res)=res$Row.names
res <- res[,-ncol(res)]
res <- res[,-1]

## Expression matrices : normalised and transformed
exprDat=matrix[,-1]
rownames(exprDat)=matrix[,1]
exprDatT=mrld[,-1]
rownames(exprDatT)=mrld[,1]

res$meanInComp<-rowMeans(exprDat[,sampleAnnot[,condCol]%in% c(cond1,cond2)])

DE=data.frame(res)

DE.sel=list()
DE.sel$up=DE[which(DE$padj<AdjPValthreshold & DE$log2FoldChange > logFCthreshold),]
DE.sel$down=DE[which(DE$padj<AdjPValthreshold & DE$log2FoldChange < -logFCthreshold),]

DE.sel$isDE=rbind(DE.sel$up,DE.sel$down)
DE.sel$notDE=DE[setdiff(rownames(DE),rownames(DE.sel$isDE)),]

DE$DE="NONE"
DE[rownames(DE.sel$up),"DE"]="UP"
DE[rownames(DE.sel$down),"DE"]="DOWN"
DE$DE=factor(DE$DE,levels=c("DOWN","NONE","UP"))

write(DE,paste(paste(OUTDIR,comp,comp,sep="/"),"DEseqRes.tsv",sep="_"))
write(rbind(DE.sel$up,DE.sel$down),paste(paste(OUTDIR,comp,comp,sep="/"),"All_DEG.tsv",sep="_"))
write(DE.sel$up,paste(paste(OUTDIR,comp,comp,sep="/"),"UP_DEG.tsv",sep="_"))
write(DE.sel$down,paste(paste(OUTDIR,comp,comp,sep="/"),"DOWN_DEG.tsv",sep="_"))

#Volcano-plot
print("Volcano Plot...")
113
114
115
116
117

data <- cbind(DE, color = DE$DE)
levels(data$color)[levels(data$color) == "DOWN"] <- "green"
levels(data$color)[levels(data$color) == "NONE"] <- "grey75"
levels(data$color)[levels(data$color) == "UP"] <- "red"
118
119
120
121
122
123
124
125

data$NAME<-rownames(data)

ensgNames=data$NAME
splitnames=strsplit(as.character(ensgNames),"[|]")
ensgSymb=do.call(rbind,splitnames)
data$SYMBNAME=ensgSymb[,2]
data <- cbind(data, label = ifelse(data$DE != "NONE", data$SYMBNAME, ""))
126
127
128
129
130
131
132
133

# removing NA for setting correct plot limits
data <- data[!is.na(data$padj),]
# limiting number of displayed genes
data <- data[order(abs(data$padj)),]
data[GenesInFig+1:nrow(data),]$label <- ""
dataLabels <- subset(data, label != "")

134
135
png(paste(paste(OUTDIR,comp,comp,sep="/"),"Volcano-plot.png",sep="_"),width=10,height=10,units="cm",res=200)

136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
p <- ggplot(data=data, aes(x=log2FoldChange, y=-log10(padj))) +
        geom_point(size = 0.5, color = data$color) +
        geom_hline(aes(yintercept=-log10(AdjPValthreshold))) +
        geom_vline(aes(xintercept=logFCthreshold)) +
        geom_vline(aes(xintercept=-logFCthreshold)) +
        ylab("-log10(DEseq padj)") +
        xlab("log2(Fold-Change)") +
        ggtitle(paste0("Volcano plot of comparison\n", cond1," (pos FC) VS ",cond2," (neg FC)\nBenjamini & Hochberg method (",nrow(DE.sel$isDE),"/",nrow(DE)," DE genes)")) +
        theme_bw() +
        theme(plot.title = element_text(size=10),
                axis.title = element_text(size=10))

# put labels when existing	
if (nrow(dataLabels)>0) {
        p <- p + geom_text_repel(
                data = dataLabels,
                aes(label = label, fontface="bold.italic"),
                size = 2,
                point.padding = unit(0.1, "lines")
        )
}
print(p)
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
dev.off()

#MA-plot
print("MA Plot...")

data<-data.frame(DE[res$meanInComp>0.5,])
data$NAME<-rownames(data)

ensgNames=data$NAME
splitnames=strsplit(as.character(ensgNames),"[|]")
ensgSymb=do.call(rbind,splitnames)
data$SYMBNAME=ensgSymb[,2]

data<-data[order(abs(data$pval)),]
dataDE<-data[1:GenesInFig,]
data$DE=factor(data$DE,levels=c("DOWN","NONE","UP"))
myColors <- c("green","grey75","red")
names(myColors) <- levels(data$DE)

png(paste(paste(OUTDIR,comp,comp,sep="/"),"MA-plot.png",sep="_"),width=10,height=10,units="cm",res=200)
print(ggplot(data=data,mapping = aes(x=meanInComp,y=log2FoldChange,colour=DE))+scale_colour_manual(name = "DE",values = myColors,guide=FALSE)+geom_point(size=0.2)+scale_x_log10()+
        geom_text_repel(data = dataDE,mapping = aes(x=meanInComp,fontface="bold.italic",
        y=log2FoldChange,label=SYMBNAME),inherit.aes = FALSE,max.iter = 8000,size=2)+
        theme(panel.background = element_rect(fill = NA,colour="black"),
        panel.grid.major = element_line(colour = NA),
        panel.grid.minor = element_line(colour = NA),plot.title = element_text(size=10),axis.title=element_text(size=10))+
        ggtitle(paste0("MA-plot for ",cond1," vs ",cond2))+
        xlab("Mean expression")+ylab("log2(Fold-Change)"))
dev.off()

#DE genes clustering
print("DEG Clustering...")
if(nrow(DE.sel$isDE)>=10){

    annot=sampleAnnot[condCol]  

    colTopAnnot<-vector("list", ncol(annot))
    names(colTopAnnot)<-colnames(annot)
    colFun<-c(ggplotColours,rainbow)
    i<-1
    for(col in colnames(annot)){
        colTopAnnot[[col]]<-colFun[[i]](nlevels(annot[,col]))
        names(colTopAnnot[[col]])<-levels(annot[,col])
        i<-i+1
        if(i==4) i<-1
    }

205
    ha<-HeatmapAnnotation(df = annot,col = colTopAnnot)
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224

    DEgenes.names=rownames(DE.sel$isDE)
    colFun<-c(ggplotColours,rainbow)

    sampleHt<-colnames(exprDatT)
    haByComp<-ha
    exprDE=exprDatT[DEgenes.names,sampleHt,drop=FALSE]
    exprDE.scaled=rowScale(exprDE,center=T,scaled=T)

    bootTemp=bootstrap
    if(nrow(exprDE.scaled>10)){bootTemp=FALSE}

    hclustGeneDE<-unsupervisedClustering(exprDE.scaled,transpose = F,nboot=nboot,bootstrap = bootTemp)
    hclustSampleDE<-unsupervisedClustering(exprDE.scaled,transpose = T,nboot=nboot,bootstrap = bootTemp,method.dist = "euclidean")

    rowScaledExpr<-rowScale(exprDatT,center=TRUE,scale=TRUE)
    quantile.expr<-quantile(unlist(rowScaledExpr),seq(0,1,.01),na.rm = T)
    colHA=colorRamp2(c(quantile.expr[2],0,quantile.expr[100]),c("blue","white","red"))

225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
    pdf(paste(paste(OUTDIR,comp,comp,sep="/"),"clustDEgene.pdf",sep="_"),width=10,height=10)
	p <- Heatmap(exprDE.scaled,top_annotation = haByComp, row_names_gp = autoGparFontSizeMatrix(nrow(exprDE.scaled)),
                cluster_rows = hclustGeneDE,col = colHA, cluster_columns = hclustSampleDE,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDE.scaled)) )
    print(p)

    if(length(samples[sampleAnnot[,condCol]%in% c(cond1,cond2)])<length(sampleHt)){
        sampleHtSelect=samples[sampleAnnot[,condCol]%in% c(cond1,cond2)]
        tempAnnot<-droplevels(sampleAnnot[sampleHtSelect,condCol,drop=FALSE])
        
        colTopAnnotTemp<-vector("list", ncol(tempAnnot))
        names(colTopAnnotTemp)<-colnames(tempAnnot)
        i<-1
        for(col in colnames(tempAnnot)){
                colTopAnnotTemp[[col]]<-colFun[[i]](nlevels(tempAnnot[,col]))
                names(colTopAnnotTemp[[col]])<-levels(tempAnnot[,col])
                i<-i+1
                if(i==4) i<-1
        }
243
        haByCompSelect<-HeatmapAnnotation(df = tempAnnot,col = colTopAnnotTemp)
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
        exprDESelect=exprDatT[DEgenes.names,sampleHtSelect,drop=FALSE]
        exprDESelect.scaled=rowScale(exprDESelect,center=T,scaled=T)
        exprDESelect.scaled<-na.omit(exprDESelect.scaled)

        hclustGeneDESelect<-unsupervisedClustering(exprDESelect.scaled,transpose = F,nboot=nboot,bootstrap = bootTemp)
        hclustSampleDESelect<-unsupervisedClustering(exprDESelect.scaled,transpose = T,nboot=nboot,bootstrap = bootTemp,method.dist = "euclidean")
        
        png(paste(paste(OUTDIR,comp,comp,sep="/"),"clustDEgene.png",sep="_"),width=25,height=22,units="cm",res=600,pointsize=3)
        print(Heatmap(exprDESelect.scaled,top_annotation = haByCompSelect, row_names_gp = autoGparFontSizeMatrix(nrow(exprDESelect.scaled)),
        cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) ))	
        dev.off()

        print(Heatmap(exprDESelect.scaled,top_annotation = haByCompSelect, row_names_gp = autoGparFontSizeMatrix(nrow(exprDESelect.scaled)),
        cluster_rows = hclustGeneDESelect,col = colHA, cluster_columns = hclustSampleDESelect,name=comp,column_names_gp = autoGparFontSizeMatrix(ncol(exprDESelect.scaled)) ))

    }else {	
        png(paste(paste(OUTDIR,comp,comp,sep="/"),"clustDEgene.png",sep="_"),width=25,height=22,units="cm",res=600,pointsize=3)
        print(p)
        dev.off()
    }
264
265
266
267
268
269
270
271
    dev.off()

}else{
    png(paste(paste(OUTDIR,comp,comp,sep="/"),"clustDEgene.png",sep="_"))
    plot.new()
    text(0.5,0.5,"Less than 10 differential expressed gene for this comparison,\n clustering isn't relevant")
    dev.off()
}