Commit 964efc3c authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files

modified references.json for bird cluster, modified the way biomart is accessed

parent 2c846eae
{
"Ensembl_GRCh37":
"Ensembl_GRCh38":
{
"name":"Ensembl_GRCh37",
"description":"Homo sapiens Ensembl GRCh37",
"STARindexDir":"CONFIG/genome",
"fasta":"CONFIG/genome/human_g1k_v37.chr22.fasta",
"gtf":"CONFIG/genome/chr22.gff",
"biomart":"feb2014.archive.ensembl.org,ENSEMBL_MART_ENSEMBL,hsapiens_gene_ensembl"
"name":"Ensembl_GRCh38",
"description":"Homo sapiens Ensembl GRCh38",
"STARindexDir":"/ceph-recherche/resources/species/human/ensembl/release-99/index-star-2.7.3a",
"fasta":"/ceph-recherche/resources/species/human/ensembl/release-99/index-star-2.7.3a/Homo_sapiens.GRCh38.dna.toplevel.fa",
"gtf":"/ceph-recherche/resources/species/human/ensembl/release-99/Homo_sapiens.GRCh38.99.gtf",
"biomart":"99,hsapiens_gene_ensembl"
},
"Ensembl_GRCm38":
{
"name":"Ensembl_GRCm38",
"description":"Mus musculus Ensembl GRCm38",
"STARindexDir":"/ceph-recherche/resources/species/mouse/ensembl/release-99/index-star-2.7.3a",
"fasta":"/ceph-recherche/resources/species/mouse/ensembl/release-99/index-star-2.7.3a/Mus_musculus.GRCm38.dna.toplevel.fa",
"gtf":"/ceph-recherche/resources/species/mouse/ensembl/release-99/Mus_musculus.GRCm38.99.gtf",
"biomart":"99,mmusculus_gene_ensembl"
}
}
\ No newline at end of file
}
......@@ -117,9 +117,10 @@ rule all:
input:
qc=OUTPUTDIR+"/Report/data/general/fastqc/multiqc_report.html",
#fastqc=expand(OUTPUTDIR+"/Samples/{sampleName}/FASTQC_cleaned/{sampleName}_{read}.concat_fastqc.html",sampleName=SAMPLESALL,read=READS)
index=expand(OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam.bai",sampleName=SAMPLESALL) ,
#deg=expand(OUTPUTDIR+"/DESEQ2/results/{suffixe}",suffixe=["NormalizedCountMatrix.txt","NormalizedCountMatrixFiltered.txt","PCAplot.png","sampletosampledistance.jpeg"])
rep=OUTPUTDIR+"/Report/report.html"
index=expand(OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam.bai",sampleName=SAMPLESALL),
#htseq=expand(OUTPUTDIR+"/DESEQ2/counts/{sampleName}",sampleName=SAMPLESALL)
deg=expand(OUTPUTDIR+"/DESEQ2/results/{suffixe}",suffixe=["NormalizedCountMatrix.txt","NormalizedCountMatrixFiltered.txt","PCAplot.png","sampletosampledistance.jpeg"])
#rep=OUTPUTDIR+"/Report/report.html"
rule fastqc:
input: OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}.fastq.gz"
......@@ -222,6 +223,7 @@ rule star_PE:
genome=GENOMEDIR+"/Genome"
output: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
params: cpu=config["align-cpu"]
resources: parallel_star=1
shell: """
STAR --runThreadN {params.cpu} --genomeDir {GENOMEDIR} --readFilesCommand zcat --outFileNamePrefix {OUTPUTDIR}/Samples/{wildcards.sampleName}/STAR/{wildcards.sampleName} --readFilesIn {input.R1} {input.R2} --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 6 --outSAMattrRGline ID:"{wildcards.sampleName}" SM:"{wildcards.sampleName}" LB:"{wildcards.sampleName}" CN:"PFBirdNantes"
"""
......
suppressMessages(require("DESeq2"))
library(biomaRt)
library(genefilter,quietly=TRUE)
library("RColorBrewer")
library("gplots")
......
......@@ -125,24 +125,22 @@ ensemblGeneId <- rownames(matrix)
rld <- rlogTransformation(ddsHTSeq, blind=TRUE)
mrld=assay(rld)
## Pour chercher des versions archivés d'ensembl (ici la 67)
## listMarts(host="may2012.archive.ensembl.org")
## BIOMART
# ex line to parse: BIOMART="37,hsapiens_gene_ensembl"
splitBiomart = strsplit(BIOMART,",")
# Version de la base Biomart (2012,2014...)
splitHost= splitBiomart[[1]][1]
# Identifiant de la base (ensembl, snp...)
base= splitBiomart[[1]][2]
version=splitBiomart[[1]][1]
# Organisme (homosapiens, musmusculus)
dataset= splitBiomart[[1]][3]
dataset= splitBiomart[[1]][2]
if(splitHost == "")
{
ensembl=useMart(biomart=base)
}else
{
ensembl=useMart(host=splitHost,path="/biomart/martservice",biomart=base)
if (version<=37){
ensembl = useEnsembl(biomart="ensembl", dataset=dataset,GRCh=version)
}
else{
ensembl = useEnsembl(biomart="ensembl", dataset=dataset,version=version)
}
mart <- useDataset(dataset,ensembl)
errorId = FALSE
......
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment