Commit 1772de5c authored by Damien VINTACHE's avatar Damien VINTACHE
Browse files

utilisation de conda dans les commandes snakemake

parent 25c08d62
......@@ -10,6 +10,9 @@ echo $PATH
export LC_ALL=en_US.utf8
export LANG=en_US.utf-8
# Creation of a conda virtual environment
echo "=== create a conda environment ==="
conda create -qy -n snakemake snakemake --override-channels -c conda-forge -c bioconda
# Activation of conda virtual environment
echo "=== activate the conda environment ==="
source activate snakemake
......@@ -40,3 +43,5 @@ fi
# Desactivation of the conda virtual environment
echo "=== deactivate the conda environment ==="
source deactivate
echo "=== remove conda environment ==="
conda env remove -qy -n snakemake
name: snakemake
channels:
- dougal
- r
- bird
- conda-forge
- bioconda
- defaults
dependencies:
- filechunkio=1.6=py36_0
- ftputil=3.2=py36_0
- pyasn1-modules=0.0.5=py36_0
- pygraphviz=1.3.1=py36_0
- pysftp=0.2.9=py36_0
- rsa=3.1.4=py36_0
- snakemake=5.2.0=py36_0
- urllib3=1.12=py36_0
- aioeasywebdav=2.2.0=py36_0
- aiohttp=3.3.2=py36h14c3975_0
- appdirs=1.4.3=py_1
- asn1crypto=0.24.0=py_1
- async-timeout=3.0.0=py36_0
- attrs=18.1.0=py_1
- boto3=1.7.58=py_0
- botocore=1.10.58=py_0
- ca-certificates=2018.4.16=0
- cachetools=2.0.1=py_0
- cairo=1.14.12=he56eebe_1
- certifi=2018.4.16=py36_0
- cffi=1.11.5=py36_0
- chardet=3.0.4=py36_2
- configargparse=0.13.0=py_1
- cryptography=2.2.1=py36hdffb7b8_1
- decorator=4.3.0=py_0
- docutils=0.14=py36_0
- dropbox=7.3.1=py36_0
- expat=2.2.5=hfc679d8_1
- fontconfig=2.13.0=h074f895_2
- freetype=2.8.1=0
- gettext=0.19.8.1=0
- glib=2.55.0=0
- google-auth=1.2.1=py_0
- google-auth-httplib2=0.0.2=py36_0
- google-cloud-core=0.24.1=py36_0
- google-cloud-storage=1.1.1=py36_0
- google-resumable-media=0.0.2=py36_0
- googleapis-common-protos=1.5.3=py_1
- graphite2=1.3.11=hfc679d8_0
- graphviz=2.38.0=7
- harfbuzz=1.7.6=0
- httplib2=0.11.3=py36_0
- icu=58.2=hfc679d8_0
- idna=2.7=py36_2
- idna_ssl=1.0.0=0
- jinja2=2.10=py_1
- jmespath=0.9.3=py_1
- jpeg=9c=h470a237_0
- jsonschema=2.6.0=py36_1
- libffi=3.2.1=3
- libiconv=1.15=h470a237_1
- libpng=1.6.34=ha92aebf_1
- libprotobuf=3.5.2=hd28b015_1
- libtiff=4.0.9=he6b73bb_1
- libtool=2.4.6=0
- libuuid=1.0.3=1
- libxcb=1.13=h470a237_1
- libxml2=2.9.8=h422b904_2
- markupsafe=1.0=py36_0
- mkl_fft=1.0.2=py36_0
- mkl_random=1.0.1=py36_0
- multidict=4.3.1=py36h470a237_0
- ncurses=5.9=10
- networkx=2.1=py_1
- openssl=1.0.2o=0
- pandas=0.23.3=py36_0
- pango=1.40.14=hd50be51_1
- paramiko=2.4.1=py36_0
- pcre=8.41=1
- pip=9.0.3=py36_0
- pixman=0.34.0=2
- prettytable=0.7.2=py_2
- protobuf=3.5.2=py36_0
- psutil=5.4.6=py36_0
- pthread-stubs=0.4=h470a237_1
- pyasn1=0.4.3=py_0
- pycparser=2.18=py_1
- pynacl=1.1.2=py36_0
- python=3.6.5=1
- python-dateutil=2.7.3=py_0
- python-irodsclient=0.7.0=py_0
- pytz=2018.5=py_0
- pyyaml=3.12=py36_1
- ratelimiter=1.2.0=py36_0
- readline=7.0=0
- requests=2.13.0=py36_0
- s3transfer=0.1.13=py36_0
- setuptools=40.0.0=py36_0
- six=1.11.0=py36_1
- sqlite=3.20.1=2
- tk=8.6.7=0
- wheel=0.31.1=py36_0
- wrapt=1.10.11=py36_0
- xmlrunner=1.7.7=py_0
- xorg-kbproto=1.0.7=h470a237_2
- xorg-libice=1.0.9=h470a237_3
- xorg-libsm=1.2.2=h470a237_3
- xorg-libx11=1.6.5=h470a237_1
- xorg-libxau=1.0.8=h470a237_5
- xorg-libxdmcp=1.1.2=h470a237_6
- xorg-libxrender=0.9.10=h470a237_1
- xorg-renderproto=0.11.1=h470a237_2
- xorg-xproto=7.0.31=h470a237_7
- xz=5.2.3=0
- yaml=0.1.7=0
- yarl=1.2.6=py36h470a237_0
- zlib=1.2.11=h470a237_3
- bcrypt=3.1.4=py36h14c3975_0
- datrie=0.7.1=py36_0
- intel-openmp=2018.0.3=0
- libgcc-ng=7.2.0=hdf63c60_3
- libgfortran-ng=7.2.0=hdf63c60_3
- libstdcxx-ng=7.2.0=hdf63c60_3
- mkl=2018.0.3=1
- numpy=1.14.2=py36hdbf6ddf_1
prefix: /home/vintache-d/miniconda3/envs/snakemake
......@@ -102,7 +102,7 @@ reads=["R1","R2"]
###############################################
rule all:
input:
input:
qc=OUTPUTDIR+"/Report/data/general/fastqc/multiqc_report.html",
index=expand(OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam.bai",sampleName=sample_all) ,
deg=expand(OUTPUTDIR+"/DESEQ2/results/{suffixe}",suffixe=["NormalizedCountMatrix.txt","NormalizedCountMatrixFiltered.txt","PCAplot.png","sampletosampledistance.jpeg"])
......@@ -115,23 +115,23 @@ rule all:
##
rule fastqc:
input: OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}.fastq.gz"
input: OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}.fastq.gz"
output: OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}_fastqc.html"
conda: CONDA/rnaSeqQuantif.yml
shell: "fastqc -o " + OUTPUTDIR+"/Samples/{wildcards.sampleName}/FASTQC/ {input}"
conda: "CONDA/rnaSeqQuantif.yml"
shell: "fastqc -o " + OUTPUTDIR+"/Samples/{wildcards.sampleName}/FASTQC/ {input}"
rule concat_fastq_chunks:
input: getFastqs
input: getFastqs
output: temp(OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}.fastq.gz")
run:
chunks = ' '.join(input)
shell("cat {chunks} > {output}")
rule multiQC:
input: expand(OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}_fastqc.html", sampleName=sample_all, read=reads)
input: expand(OUTPUTDIR+"/Samples/{sampleName}/FASTQC/{sampleName}_{read}_fastqc.html", sampleName=sample_all, read=reads)
output: OUTPUTDIR+"/Report/data/general/fastqc/multiqc_report.html"
conda: CONDA/rnaSeqQuantif.yml
shell: "multiqc -f -e general_stats -e tophat -e bowtie2 " + OUTPUTDIR+"/Samples -o "+ OUTPUTDIR +"/Report/data/general/fastqc"
conda: "CONDA/rnaSeqQuantif.yml"
shell: "multiqc -f -e general_stats -e tophat -e bowtie2 " + OUTPUTDIR+"/Samples -o "+ OUTPUTDIR +"/Report/data/general/fastqc"
rule dezipFastq:
input: getFastqPair
......@@ -146,7 +146,7 @@ rule prinseq:
log = OUTPUTDIR+"/Samples/{sampleName}/PRINSEQ/{sampleName}_{pairNumber}.log"
params: preGood = OUTPUTDIR+"/Samples/{sampleName}/PRINSEQ/{sampleName}_{pairNumber}_good",
preBad = OUTPUTDIR+"/Samples/{sampleName}/PRINSEQ/{sampleName}_{pairNumber}_bad"
conda: CONDA/rnaSeqQuantif.yml
conda: "CONDA/rnaSeqQuantif.yml"
shell: """
perl `which prinseq-lite.pl` -min_qual_mean 30 -no_qual_header -fastq {input[0]} -fastq2 {input[1]} -out_good {params.preGood} -out_bad {params.preBad} -log {output.log}
if(! test -f {params.preBad}_1.fastq);then touch {params.preBad}_1.fastq {params.preGood}_1_singletons.fastq;fi
......@@ -167,7 +167,7 @@ rule cutadaptR1:
input: OUTPUTDIR+"/Samples/{sampleName}/PRINSEQ/{sampleName}_{pairNumber}_R1.fastq.gz"
output: fastq = OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_{pairNumber}_R1.fastq.gz",
report = OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_{pairNumber}_R1.fastq.report"
conda: CONDA/rnaSeqQuantif.yml
conda: "CONDA/rnaSeqQuantif.yml"
shell: """
cutadapt -a {FWADAPT} 2> {output.report} {input} | java -jar {SCRIPTPATH}/pademptyfastq-fat.jar 2> /dev/null | gzip --best -c > {output.fastq}
"""
......@@ -176,13 +176,13 @@ rule cutadaptR2:
input: OUTPUTDIR+"/Samples/{sampleName}/PRINSEQ/{sampleName}_{pairNumber}_R2.fastq.gz"
output: fastq = OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_{pairNumber}_R2.fastq.gz",
report = OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_{pairNumber}_R2.fastq.report"
conda: CONDA/rnaSeqQuantif.yml
conda: "CONDA/rnaSeqQuantif.yml"
shell: """
cutadapt -a {RVADAPT} 2> {output.report} {input} | java -jar {SCRIPTPATH}/pademptyfastq-fat.jar 2> /dev/null | gzip --best -c > {output.fastq}
"""
rule concat_cleanedfastq_chunks:
input: getCleanedFastq
input: getCleanedFastq
output: temp(OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_{read}.concat.fastq.gz")
run:
chunks = ' '.join(input)
......@@ -190,48 +190,51 @@ rule concat_cleanedfastq_chunks:
rule genomeFiles:
input: GTF,
input: GTF,
FASTA
output: GENOMEDIR+"/Genome"
params: cpu=config["align-cpu"]
conda: CONDA/rnaSeqQuantif.yml
shell: """
STAR --runThreadN {params.cpu} --runMode genomeGenerate --genomeDir {GENOMEDIR} --genomeFastaFiles {FASTA} --sjdbGTFfile {GTF} --sjdbOverhang {OVERHANG}
"""
output: GENOMEDIR+"/Genome"
params: cpu=config["align-cpu"]
conda: "CONDA/rnaSeqQuantif.yml"
shell: """
STAR --runThreadN {params.cpu} --runMode genomeGenerate --genomeDir {GENOMEDIR} --genomeFastaFiles {FASTA} --sjdbGTFfile {GTF} --sjdbOverhang {OVERHANG}
"""
rule star_PE:
input:
R1=OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_R1.concat.fastq.gz",
R2=OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_R2.concat.fastq.gz",
genome=GENOMEDIR+"/Genome"
output: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
params: cpu=config["align-cpu"]
conda: CONDA/rnaSeqQuantif.yml
shell: """
STAR --runThreadN {params.cpu} --genomeDir {GENOMEDIR} --readFilesCommand zcat --outFileNamePrefix {OUTPUTDIR}/Samples/{wildcards.sampleName}/STAR/{wildcards.sampleName} --readFilesIn {input.R1} {input.R2} --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 6
"""
input:
R1=OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_R1.concat.fastq.gz",
R2=OUTPUTDIR+"/Samples/{sampleName}/CUTADAPT/{sampleName}_R2.concat.fastq.gz",
genome=GENOMEDIR+"/Genome"
output: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
params: cpu=config["align-cpu"]
conda: "CONDA/rnaSeqQuantif.yml"
shell: """
STAR --runThreadN {params.cpu} --genomeDir {GENOMEDIR} --readFilesCommand zcat --outFileNamePrefix {OUTPUTDIR}/Samples/{wildcards.sampleName}/STAR/{wildcards.sampleName} --readFilesIn {input.R1} {input.R2} --outSAMtype BAM SortedByCoordinate --outBAMsortingThreadN 6
"""
rule createBamIndex:
input: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
input: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
output: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam.bai"
conda: CONDA/rnaSeqQuantif.yml
shell: "samtools sort {input} | samtools index - {output}"
conda: "CONDA/rnaSeqQuantif.yml"
shell: "samtools sort {input} | samtools index - {output}"
rule htseq:
input: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
input: OUTPUTDIR+"/Samples/{sampleName}/STAR/{sampleName}Aligned.sortedByCoord.out.bam"
output: OUTPUTDIR+"/DESEQ2/counts/{sampleName}"
conda: CONDA/rnaSeqQuantif.yml
run:
if (config["library-type"]=="fr-secondstrand"):
params: library_type=config["library-type"]
conda: "CONDA/rnaSeqQuantif.yml"
shell: """
if [ $library_type = "fr-secondstrand" ]
then
strand="reverse"
elif (config["library-type"]=="fr-firststrand"):
elif [ $library-type = "fr-firststrand" ]
strand="yes"
else:
else
strand="no"
shell("htseq-count -s {strand} -f bam {input} {GTF} > {output}")
fi
htseq-count -s {strand} -f bam {input} {GTF} > {output}
"""
rule deseq2_conditions:
output: tab = OUTPUTDIR+"/DESEQ2/DESEQ2_CONDITIONS.tab"
......@@ -248,7 +251,7 @@ rule deseq2:
conditions=OUTPUTDIR+"/DESEQ2/DESEQ2_CONDITIONS.tab",
counts=expand(OUTPUTDIR+"/DESEQ2/counts/{sampleName}",sampleName=sample_all)
output: expand(OUTPUTDIR+"/DESEQ2/results/{suffixe}",suffixe=["NormalizedCountMatrix.txt","NormalizedCountMatrixFiltered.txt","PCAplot.png","sampletosampledistance.jpeg"])
conda: CONDA/deseq2.yml
conda: "CONDA/deseq2.yml"
shell: """
cat {SCRIPTPATH}/run_deseq2.R | R --slave --args {input.conditions} {OUTPUTDIR}/DESEQ2/counts {OUTPUTDIR}/DESEQ2/results {BIOMART}
"""
Supports Markdown
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment