Commit 20ed0f19 authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files

fixed targets and biomart access.

parent 964efc3c
......@@ -119,8 +119,8 @@ rule all:
#fastqc=expand(OUTPUTDIR+"/Samples/{sampleName}/FASTQC_cleaned/{sampleName}_{read}.concat_fastqc.html",sampleName=SAMPLESALL,read=READS)
index=expand(OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam.bai",sampleName=SAMPLESALL),
#htseq=expand(OUTPUTDIR+"/DESEQ2/counts/{sampleName}",sampleName=SAMPLESALL)
deg=expand(OUTPUTDIR+"/DESEQ2/results/{suffixe}",suffixe=["NormalizedCountMatrix.txt","NormalizedCountMatrixFiltered.txt","PCAplot.png","sampletosampledistance.jpeg"])
#rep=OUTPUTDIR+"/Report/report.html"
deg=expand(OUTPUTDIR+"/DESEQ2/results/{suffixe}",suffixe=["NormalizedCountMatrix.txt","NormalizedCountMatrixFiltered.txt","PCAplot.png","sampletosampledistance.jpeg"]),
rep=OUTPUTDIR+"/Report/report.html"
rule fastqc:
input: OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}.fastq.gz"
......@@ -213,7 +213,7 @@ rule genomeFiles:
output: GENOMEDIR+"/Genome"
params: cpu=config["align-cpu"]
shell: """
STAR --runThreadN {params.cpu} --runMode genomeGenerate --genomeDir {GENOMEDIR} --genomeFastaFiles {FASTA} --sjdbGTFfile {GTF} --sjdbOverhang {OVERHANG} --limitGenomeGenerateRAM 95543555797
STAR --runThreadN {params.cpu} --runMode genomeGenerate --genomeDir {GENOMEDIR} --genomeFastaFiles {FASTA} --sjdbGTFfile {GTF} --sjdbOverhang {OVERHANG} --limitGenomeGenerateRAM 168632718037
"""
rule star_PE:
......
......@@ -2,3 +2,4 @@ build go kegg
Ensembl_GRCh37 org.Hs.eg.db hsa
rn6 org.Rn.eg.db rno
mm10 org.Mm.eg.db mmu
Ensembl_GRCh38 org.Hs.eg.db hsa
......@@ -75,11 +75,7 @@ samples<-rownames(sampleAnnot)
res=results(ddsHTSeq,contrast=c(condCol,cond1,cond2),independentFiltering=T)
res <- base::merge(res,geneNames,by.x="row.names",by.y="ensembl_gene_id",all.x=TRUE)
if(errorId){
res$Row.names <- paste(res$Row.names, res$external_gene_name, sep = "|")
}else{
res$Row.names <- paste(res$Row.names, res$external_gene_id, sep = "|")
}
res$Row.names <- paste(res$Row.names, res$external_gene_name, sep = "|")
rownames(res)=res$Row.names
res <- res[,-ncol(res)]
......
......@@ -104,12 +104,12 @@ directory <- c(COUNTDIRECTORY)
conditions<-factor(sampleTable$condition)
uniq_conds <- unique(conditions)
pw_tests <- list()
for(i in 1:(length(uniq_conds)-1)) {
for(j in (i+1):length(uniq_conds)) {
pw_tests[[length(pw_tests)+1]] <- c(as.character(uniq_conds[j]), as.character(uniq_conds[i]))
}
}
#pw_tests <- list()
#for(i in 1:(length(uniq_conds)-1)) {
# for(j in (i+1):length(uniq_conds)) {
# pw_tests[[length(pw_tests)+1]] <- c(as.character(uniq_conds[j]), as.character(uniq_conds[i]))
# }
#}
##Récupération de la matrice normalisée
ddsHTSeq <- DESeqDataSetFromHTSeqCount(sampleTable = sampleTable, directory = directory, design= ~condition)
......@@ -121,11 +121,10 @@ ddsHTSeq <- DESeq(ddsHTSeq)
matrix <- counts(ddsHTSeq,normalized=TRUE)
ensemblGeneId <- rownames(matrix)
## Transforming raw counts
## Transforming raw counts
rld <- rlogTransformation(ddsHTSeq, blind=TRUE)
mrld=assay(rld)
## BIOMART
# ex line to parse: BIOMART="37,hsapiens_gene_ensembl"
splitBiomart = strsplit(BIOMART,",")
......@@ -135,52 +134,30 @@ version=splitBiomart[[1]][1]
dataset= splitBiomart[[1]][2]
if (version<=37){
ensembl = useEnsembl(biomart="ensembl", dataset=dataset,GRCh=version)
}
else{
ensembl = useEnsembl(biomart="ensembl", dataset=dataset,version=version)
ensembl = useEnsembl(biomart="ensembl", dataset=dataset,GRCh=version)
}else{
ensembl = useEnsembl(biomart="ensembl", dataset=dataset,version=version)
}
mart <- useDataset(dataset,ensembl)
errorId = FALSE
## Récupération dans la base de donnée BioMart du nom du gène ainsi que de sa description
## correspondant au geneId
## mart <- useDataset("mmusculus_gene_ensembl", useMart("ensembl"))
rs = tryCatch(
{
geneNames <-getBM(filters="ensembl_gene_id",
attributes=c("ensembl_gene_id","external_gene_id"),
values=ensemblGeneId,
mart=mart)
}, error = function(e) {
errorId = TRUE
print("Cette version n'utilise pas external_gene_id")
})
if (errorId)
{
geneNames <-getBM(filters="ensembl_gene_id",
attributes=c("ensembl_gene_id","external_gene_name"),
values=ensemblGeneId,
mart=mart)
}
geneNames <-getBM(filters="ensembl_gene_id",
attributes=c("ensembl_gene_id","external_gene_name"),
values=ensemblGeneId,
mart=mart)
## Fusion of the 2 tables to merge the 2 ID (Ensembl and gene name)
## L'option all.x=TRUE permet de conserver les genes id qui ne matchent pas un gene name
matrix <- merge(matrix,geneNames,by.x="row.names",by.y="ensembl_gene_id",all.x=TRUE)
mrld<-merge(mrld,geneNames,by.x="row.names",by.y="ensembl_gene_id",all.x=TRUE)
if(errorId)
{
matrix$Row.names <- paste(matrix$Row.names, matrix$external_gene_name, sep = "|")
mrld$Row.names <- paste(mrld$Row.names, mrld$external_gene_name, sep = "|")
}else
{
matrix$Row.names <- paste(matrix$Row.names, matrix$external_gene_id, sep = "|")
mrld$Row.names <- paste(mrld$Row.names, mrld$external_gene_id, sep = "|")
}
matrix$Row.names <- paste(matrix$Row.names, matrix$external_gene_name, sep = "|")
mrld$Row.names <- paste(mrld$Row.names, mrld$external_gene_name, sep = "|")
matrix <- matrix[,-ncol(matrix)]
mrld <- mrld[,-ncol(mrld)]
......@@ -206,4 +183,4 @@ dev.off()
# Save global dds object
save_data <- c(paste(RDATA,"dds.RData", sep="/"))
save(ddsHTSeq, sampleTable, matrix, mrld, geneNames, pw_tests, OUTDIR, errorId, file=save_data)
save(ddsHTSeq, sampleTable, matrix, mrld, geneNames, OUTDIR, file=save_data)
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment