Commit ade78e3a authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files
parents ffb634d9 87babb63
......@@ -15,7 +15,11 @@ option_list = list(
make_option(c("-a", "--sampleAbstract"), type="character", default=NULL,
help="sample abstract file path", metavar="character"),
make_option(c("-o", "--outdir"), type="character", default=".",
help="output directory [default= %default]", metavar="character")
help="output directory [default= %default]", metavar="character"),
make_option(c("--ignore-plate-effect"), dest="ignore_plate_effect", action="store_true", default=FALSE,
help="Ignore plate effect if multiplate design [default= %default]"),
make_option(c("--ignore-batch-effect"), dest="ignore_batch_effect", action="store_true", default=FALSE,
help="Ignore batch effect [default= %default]")
);
opt_parser = OptionParser(option_list=option_list);
......@@ -86,9 +90,9 @@ nboot=30 #if bootsrap, number of bootstrap replications
sampleAnnot<-lire(sampleTable)
# If column named "batchEffect" exists, apply batch correction
batchEffect<-FALSE; if(batchEffectCol %in% colnames(sampleAnnot)) batchEffect<-TRUE
batchEffect<-FALSE; if((!opt$ignore_batch_effect) && (batchEffectCol %in% colnames(sampleAnnot))) batchEffect<-TRUE
# If column named "plateBatch" exists, apply batch correction
plateBatch<-FALSE; if(plateBatchCol %in% colnames(sampleAnnot)) plateBatch<-TRUE
plateBatch<-FALSE; if((!opt$ignore_plate_effect) && (plateBatchCol %in% colnames(sampleAnnot))) plateBatch<-TRUE
if(batchEffect || plateBatch){
exprDatT=lire(paste(outputDir,"/exprTransformedAdjusted.tsv",sep=""))
......
......@@ -12,7 +12,11 @@ option_list = list(
make_option(c("-s", "--sampleTable"), type="character", default=NULL,
help="sample table file path", metavar="character"),
make_option(c("-o", "--outdir"), type="character", default=".",
help="output directory [default= %default]", metavar="character")
help="output directory [default= %default]", metavar="character"),
make_option(c("--ignore-plate-effect"), dest="ignore_plate_effect", action="store_true", default=FALSE,
help="Ignore plate effect if multiplate design [default= %default]"),
make_option(c("--ignore-batch-effect"), dest="ignore_batch_effect", action="store_true", default=FALSE,
help="Ignore batch effect [default= %default]")
);
opt_parser = OptionParser(option_list=option_list);
......@@ -47,9 +51,9 @@ exprDat<-lire(expressionData)
sampleAnnot<-lire(sampleTable)
# If column named "batchEffect" exists, apply batch correction
batchEffect<-FALSE; if(batchEffectCol %in% colnames(sampleAnnot)) batchEffect<-TRUE
batchEffect<-FALSE; if((!opt$ignore_batch_effect) && (batchEffectCol %in% colnames(sampleAnnot))) batchEffect<-TRUE
# If column named "plateBatch" exists, apply batch correction
plateBatch<-FALSE; if(plateBatchCol %in% colnames(sampleAnnot)) plateBatch<-TRUE
plateBatch<-FALSE; if((!opt$ignore_plate_effect) && (plateBatchCol %in% colnames(sampleAnnot))) plateBatch<-TRUE
sampleAnnot[,condCol]<-as.factor(as.character(sampleAnnot[,condCol]))
exprDat<-exprDat[,c(rownames(sampleAnnot))]
......
......@@ -12,7 +12,11 @@ option_list = list(
make_option(c("-s", "--sampleTable"), type="character", default=NULL,
help="sample table file path", metavar="character"),
make_option(c("-o", "--outdir"), type="character", default=".",
help="output directory [default= %default]", metavar="character")
help="output directory [default= %default]", metavar="character"),
make_option(c("--ignore-plate-effect"), dest="ignore_plate_effect", action="store_true", default=FALSE,
help="Ignore plate effect if multiplate design [default= %default]"),
make_option(c("--ignore-batch-effect"), dest="ignore_batch_effect", action="store_true", default=FALSE,
help="Ignore batch effect [default= %default]")
);
opt_parser = OptionParser(option_list=option_list);
......@@ -160,9 +164,9 @@ sampleAnnot<-lire(sampleTable)
# If column named "batchEffect" exists, apply batch correction
batchEffect<-FALSE; if(batchEffectCol %in% colnames(sampleAnnot)) batchEffect<-TRUE
batchEffect<-FALSE; if((!opt$ignore_batch_effect) && (batchEffectCol %in% colnames(sampleAnnot))) batchEffect<-TRUE
# If column named "plateBatch" exists, apply batch correction
plateBatch<-FALSE; if(plateBatchCol %in% colnames(sampleAnnot)) plateBatch<-TRUE
plateBatch<-FALSE; if((!opt$ignore_plate_effect) && (plateBatchCol %in% colnames(sampleAnnot))) plateBatch<-TRUE
acpT<-ACP(exprDatT)
compo=length(which(acpT$percentVar>0.05))+1
......
......@@ -8,14 +8,24 @@ rule copyConfig:
cp {input} {output}
"""
rule buildReport:
rule renameMainReport:
input:
report = MAINDIR+"/{project}/report-" + DEFAULT_REPORT_PART + ".html"
output:
report = MAINDIR+"/{project}/report.html"
shell:
"""
mv {input.report} {output.report}
"""
rule buildReportPart:
input:
unpack(getAllFilesForReport)
output:
report = MAINDIR+"/{project}/report.html",
report = MAINDIR+"/{project}/report-{multipart}.html",
shell:
"""
python {wfbasedir}/SCRIPTS/make_html.py -c {config[conf]} -t {input.index} -p {wildcards.project} -o {output.report}
python {wfbasedir}/SCRIPTS/make_html.py -c {config[conf]} -t {input.index} -p {wildcards.project} -o {output.report} -m {wildcards.multipart}
"""
rule copyTemplateFolder:
......@@ -107,10 +117,11 @@ rule deClusterHeatmap:
MAINDIR+"/{project}/"+config["de_folder"]+"/HeatmapCorPearson.png",
MAINDIR+"/{project}/"+config["de_folder"]+"/heatmap.RData"
params:
outdir = MAINDIR+"/{project}/"+config["de_folder"]
outdir = MAINDIR+"/{project}/"+config["de_folder"],
additional_params = lambda wildcards: config["comparisons"][wildcards.project].get("normalizationOptions", [])
shell:
"""
Rscript {wfbasedir}/SCRIPTS/DE/clusterHeatmap.R -e {input.expressionTable} -s {input.sampleTable} -a {input.sampleAbstract} -o {params.outdir}
Rscript {wfbasedir}/SCRIPTS/DE/clusterHeatmap.R -e {input.expressionTable} -s {input.sampleTable} -a {input.sampleAbstract} -o {params.outdir} {params.additional_params}
"""
rule deQualityControls:
......@@ -120,10 +131,11 @@ rule deQualityControls:
output:
MAINDIR+"/{project}/"+config["de_folder"]+"/PCA.png"
params:
outdir = MAINDIR+"/{project}/"+config["de_folder"]
outdir = MAINDIR+"/{project}/"+config["de_folder"],
additional_params = lambda wildcards: config["comparisons"][wildcards.project].get("normalizationOptions", [])
shell:
"""
Rscript {wfbasedir}/SCRIPTS/DE/qualityControl.R -e {input.expressionTable} -s {input.sampleTable} -o {params.outdir}
Rscript {wfbasedir}/SCRIPTS/DE/qualityControl.R -e {input.expressionTable} -s {input.sampleTable} -o {params.outdir} {params.additional_params}
"""
rule deNormalization:
......@@ -135,10 +147,12 @@ rule deNormalization:
MAINDIR+"/{project}/"+config["de_folder"]+"/exprTransformed.tsv",
MAINDIR+"/{project}/"+config["de_folder"]+"/dds.RData"
params:
outdir = MAINDIR+"/{project}/"+config["de_folder"]
outdir = MAINDIR+"/{project}/"+config["de_folder"],
additional_params = lambda wildcards: config["comparisons"][wildcards.project].get("normalizationOptions", [])
shell:
"""
Rscript {wfbasedir}/SCRIPTS/DE/normalization.R -e {input.expressionTable} -s {input.sampleTable} -o {params.outdir}
echo "{params.additional_params}"
Rscript {wfbasedir}/SCRIPTS/DE/normalization.R -e {input.expressionTable} -s {input.sampleTable} -o {params.outdir} {params.additional_params}
"""
rule deGenerateSampleTable:
......
......@@ -50,6 +50,7 @@ argParser.add_argument("-c", "--configfile", required=True, help="Jsonfile's pat
argParser.add_argument("-t", "--templateFile", required=True, help="HTML Template's path", dest="templateFile")
argParser.add_argument("-p", "--projectName", required=True, help="Name of the project used to select samples in config file", dest="projectName")
argParser.add_argument("-o", "--output", required=True, help="Path of the output file", dest="output")
argParser.add_argument("-m", "--multipart", required=True, default="main", help="Part of the multipart report", dest="multipart")
def getGenomeForProject(project):
for s in config["samples"]:
......@@ -186,7 +187,7 @@ corrFile = os.path.join(sys.path[0],"DE","corresIDorg.txt")
# Selection of versus conditions
conditionVS = list()
if(project in config["comparisons"]):
conditionVS.extend(config["comparisons"][project]["comps"])
conditionVS.extend([c for c in config["comparisons"][project]["comps"] if c.get("multipart", "main") == args.multipart])
templateVars["conditionVS"] = conditionVS
templateVars["deseqres"] = dict()
......
......@@ -179,10 +179,23 @@ if (args.comparisons is not None):
d["comparisons"][project]["performComps"] = None
d["comparisons"][project]["comps"] = list()
for cond1,cond2 in csv.reader(args.comparisons, delimiter='\t'):
if(cond1!=cond2):
for row in csv.reader(args.comparisons, delimiter='\t'):
multipart = None
if len(row)==2:
project_comp = None
cond1, cond2 = row
else:
project_comp, cond1, cond2 = row
v = project_comp.rsplit("::", 1)
project_comp = v[0]
multipart = v[1] if len(v) > 1 else None
if((project_comp or project_comp == project) and cond1!=cond2):
if(d["comparisons"][project]["performComps"]!=False):
d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)]))
if multipart:
d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2),("multipart",multipart)]))
else:
d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)]))
d["comparisons"][project]["performComps"] = True
else:
eprint("ERROR make_multiplates_config.py: ","Invalid comparisons file. Both comparisons and first part secondary analysis defined.\nPlease choose if you want to perform comparisons (different conditions specified) or if you only want to perform first part of secondary analysis (identical conditions specified)")
......
......@@ -177,6 +177,9 @@ for sample in d["samples"]:
d["comparisons"] = dict()
if (args.conditions is not None):
for project,cond1,cond2 in csv.reader(args.conditions, delimiter='\t'):
v = project.rsplit("::", 1)
project = v[0]
multipart = v[1] if len(v) > 1 else None
if(not project in d["comparisons"]):
d["comparisons"][project] = OrderedDict()
d["comparisons"][project]["species"] = getGenomeForProject(project)
......@@ -191,7 +194,10 @@ if (args.conditions is not None):
if(cond1!=cond2):
if(d["comparisons"][project]["performComps"]!=False):
d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)]))
if multipart:
d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2),("multipart",multipart)]))
else:
d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)]))
d["comparisons"][project]["performComps"] = True
else:
eprint("ERROR make_srp_config.py: ","Invalid condition file. In project: \""+project+"\" both comparisons and first part secondary analysis defined.\nPlease choose if you want to perform comparisons (different conditions specified) or if you only want to perform first part of secondary analysis (identical conditions specified)")
......
......@@ -15,6 +15,7 @@ SECONDARY_ANALYSIS_PROJECTS = {c for c in config["comparisons"] if config["compa
NO_COMPARISONS_PROJECTS = {c for c in config["comparisons"] if not config["comparisons"][c]["performComps"]}
DATE = str(datetime.date.today())
TIME = str(datetime.datetime.now().strftime("%H:%M:%S"))
DEFAULT_REPORT_PART="main"
# final file outputs suffixes for primary analysis
finalSuffixes = ["log.dat","refseq.total.dat","refseq.umi.dat","spike.total.dat","spike.umi.dat","unknown_list","well_summary.dat"]
......@@ -77,11 +78,12 @@ def getAllFilesForReport(wildcards):
files["deseqTables"] = list()
files["annotation"] = list()
for comp in config["comparisons"][wildcards.project]["comps"]:
files["deseqTables"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringDB-genes.txt"))
if comp.get("multipart", DEFAULT_REPORT_PART) == wildcards.multipart:
files["deseqTables"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv"))
files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringDB-genes.txt"))
files["index"] = os.path.join(wfbasedir,"TEMPLATE","index_SA.html")
return files
......@@ -97,7 +99,17 @@ def getTargetFiles():
for s in config["samples"]:
if(s["project"]==p):
targets.append(os.path.join(MAINDIR,p,config["align_folder"],s["name"]+".bai"))
targets.append(os.path.join(MAINDIR,p,"report.html"))
multiparts = set()
for c in config["comparisons"]:
if (p in config["comparisons"]) and config["comparisons"][p]["performComps"]:
for comps in config["comparisons"][p]["comps"]:
multiparts.add(comps.get("multipart", DEFAULT_REPORT_PART))
if multiparts:
targets.extend([os.path.join(MAINDIR, p, "report-{}.html".format(mp)) if mp != DEFAULT_REPORT_PART else os.path.join(MAINDIR, p, "report.html") for mp in multiparts])
else:
targets.append(os.path.join(MAINDIR, p, "report.html"))
targets.append(os.path.join(MAINDIR,p,"INPUT_FILES","samplesheet_used.tsv"))
# Targets for projects with secondary analysis
......
......@@ -10,6 +10,7 @@ wfbasedir = workflow.basedir
MAINDIR = config["maindir"]
DATE = str(datetime.date.today())
TIME = str(datetime.datetime.now().strftime("%H:%M:%S"))
DEFAULT_REPORT_PART="main"
# final file outputs suffixes for primary analysis
finalSuffixes = ["log.dat","refseq.total.dat","refseq.umi.dat","spike.total.dat","spike.umi.dat","well_summary.dat"]
......@@ -53,10 +54,11 @@ def getAllFilesForReport(wildcards):
files["deseqTables"] = list()
files["annotation"] = list()
for comp in config["comparisons"][config["analysis_name"]]["comps"]:
files["deseqTables"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv"))
files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt"))
files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt"))
files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv"))
if comp.get("multipart", DEFAULT_REPORT_PART) == wildcards.multipart:
files["deseqTables"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv"))
files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt"))
files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt"))
files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv"))
files["index"] = os.path.join(wfbasedir,"TEMPLATE","index_SA.html")
return files
......@@ -69,9 +71,16 @@ def getTargetFiles():
targets.append(MAINDIR+"/"+config["analysis_name"]+"/"+config["multiqc_folder"]+"/multiqc_report.html")
targets.extend(expand(os.path.join(MAINDIR,config["analysis_name"],config["expression_folder"],config["analysis_name"]+".{exp}.well_summary.pdf"),exp=["unq","all"]))
targets.append(MAINDIR+"/config_used_in_analysis.json")
multiparts = set()
if(config["analysis_name"] in config["comparisons"]):
targets.append(MAINDIR+"/"+config["analysis_name"]+"/"+config["de_folder"]+"/exprDatUPM.tsv")
targets.append(MAINDIR+"/"+config["analysis_name"]+"/report.html")
for comps in config["comparisons"][config["analysis_name"]]["comps"]:
multiparts.add(comps.get("multipart", DEFAULT_REPORT_PART))
if multiparts:
targets.extend([os.path.join(MAINDIR, config["analysis_name"], "report-{}.html".format(mp)) if mp != DEFAULT_REPORT_PART else os.path.join(MAINDIR, config["analysis_name"], "report.html") for mp in multiparts])
else:
targets.append(os.path.join(MAINDIR, config["analysis_name"], "report.html".format(DEFAULT_REPORT_PART)))
return targets
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment