From d38d2b1e294236c092a3f383590308158226243a Mon Sep 17 00:00:00 2001 From: Philippe Bordron Date: Thu, 24 Sep 2020 16:52:27 +0200 Subject: [PATCH 1/4] Add rules for split report --- SCRIPTS/common.smk | 4 ++-- SCRIPTS/make_html.py | 3 ++- Snakefile | 24 ++++++++++++++++++------ multiplates.smk | 19 ++++++++++++++----- 4 files changed, 36 insertions(+), 14 deletions(-) diff --git a/SCRIPTS/common.smk b/SCRIPTS/common.smk index d8d65fa..0591427 100644 --- a/SCRIPTS/common.smk +++ b/SCRIPTS/common.smk @@ -12,10 +12,10 @@ rule buildReport: input: unpack(getAllFilesForReport) output: - report = MAINDIR+"/{project}/report.html", + report = MAINDIR+"/{project}/report-{multipart}.html", shell: """ - python {wfbasedir}/SCRIPTS/make_html.py -c {config[conf]} -t {input.index} -p {wildcards.project} -o {output.report} + python {wfbasedir}/SCRIPTS/make_html.py -c {config[conf]} -t {input.index} -p {wildcards.project} -o {output.report} -m {wildcards.multipart} """ rule copyTemplateFolder: diff --git a/SCRIPTS/make_html.py b/SCRIPTS/make_html.py index f75ce55..4fed4c6 100644 --- a/SCRIPTS/make_html.py +++ b/SCRIPTS/make_html.py @@ -50,6 +50,7 @@ argParser.add_argument("-c", "--configfile", required=True, help="Jsonfile's pat argParser.add_argument("-t", "--templateFile", required=True, help="HTML Template's path", dest="templateFile") argParser.add_argument("-p", "--projectName", required=True, help="Name of the project used to select samples in config file", dest="projectName") argParser.add_argument("-o", "--output", required=True, help="Path of the output file", dest="output") +argParser.add_argument("-m", "--multipart", required=True, default="main", help="Part of the multipart report", dest="multipart") def getGenomeForProject(project): for s in config["samples"]: @@ -186,7 +187,7 @@ corrFile = os.path.join(sys.path[0],"DE","corresIDorg.txt") # Selection of versus conditions conditionVS = list() if(project in config["comparisons"]): - conditionVS.extend(config["comparisons"][project]["comps"]) + conditionVS.extend([c for c in config["comparisons"][project]["comps"] if c.get("multipart", "main") == args.multipart]) templateVars["conditionVS"] = conditionVS templateVars["deseqres"] = dict() diff --git a/Snakefile b/Snakefile index 275fcf6..f2fc0f3 100644 --- a/Snakefile +++ b/Snakefile @@ -15,6 +15,7 @@ SECONDARY_ANALYSIS_PROJECTS = {c for c in config["comparisons"] if config["compa NO_COMPARISONS_PROJECTS = {c for c in config["comparisons"] if not config["comparisons"][c]["performComps"]} DATE = str(datetime.date.today()) TIME = str(datetime.datetime.now().strftime("%H:%M:%S")) +DEFAULT_REPORT_PART="main" # final file outputs suffixes for primary analysis finalSuffixes = ["log.dat","refseq.total.dat","refseq.umi.dat","spike.total.dat","spike.umi.dat","unknown_list","well_summary.dat"] @@ -77,11 +78,12 @@ def getAllFilesForReport(wildcards): files["deseqTables"] = list() files["annotation"] = list() for comp in config["comparisons"][wildcards.project]["comps"]: - files["deseqTables"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv")) - files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt")) - files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt")) - files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv")) - files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringDB-genes.txt")) + if comp.get("multipart", DEFAULT_REPORT_PART) == wildcards.multipart: + files["deseqTables"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv")) + files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt")) + files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt")) + files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv")) + files["annotation"].append(os.path.join(MAINDIR,wildcards.project,config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringDB-genes.txt")) files["index"] = os.path.join(wfbasedir,"TEMPLATE","index_SA.html") return files @@ -97,7 +99,17 @@ def getTargetFiles(): for s in config["samples"]: if(s["project"]==p): targets.append(os.path.join(MAINDIR,p,config["align_folder"],s["name"]+".bai")) - targets.append(os.path.join(MAINDIR,p,"report.html")) + + multiparts = set() + for c in config["comparisons"]: + if (p in config["comparisons"]) and config["comparisons"][p]["performComps"]: + for comps in config["comparisons"][p]["comps"]: + multiparts.add(comps.get("multipart", DEFAULT_REPORT_PART)) + if multiparts: + targets.extend([os.path.join(MAINDIR,p,"report-{}.html".format(mp)) for mp in multiparts]) + else: + targets.append(os.path.join(MAINDIR,p,"report-{}.html".format(DEFAULT_REPORT_PART))) + targets.append(os.path.join(MAINDIR,p,"INPUT_FILES","samplesheet_used.tsv")) # Targets for projects with secondary analysis diff --git a/multiplates.smk b/multiplates.smk index 33b1849..2e92880 100644 --- a/multiplates.smk +++ b/multiplates.smk @@ -10,6 +10,7 @@ wfbasedir = workflow.basedir MAINDIR = config["maindir"] DATE = str(datetime.date.today()) TIME = str(datetime.datetime.now().strftime("%H:%M:%S")) +DEFAULT_REPORT_PART="main" # final file outputs suffixes for primary analysis finalSuffixes = ["log.dat","refseq.total.dat","refseq.umi.dat","spike.total.dat","spike.umi.dat","well_summary.dat"] @@ -53,10 +54,11 @@ def getAllFilesForReport(wildcards): files["deseqTables"] = list() files["annotation"] = list() for comp in config["comparisons"][config["analysis_name"]]["comps"]: - files["deseqTables"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv")) - files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt")) - files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt")) - files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv")) + if comp.get("multipart", DEFAULT_REPORT_PART) == wildcards.multipart: + files["deseqTables"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"DEseqResFiltered.tsv")) + files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseGo.txt")) + files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"gseKegg.txt")) + files["annotation"].append(os.path.join(MAINDIR,config["analysis_name"],config["de_folder"],comp["condition1"]+"__vs__"+comp["condition2"],"stringFunctionalEnrichment.tsv")) files["index"] = os.path.join(wfbasedir,"TEMPLATE","index_SA.html") return files @@ -69,9 +71,16 @@ def getTargetFiles(): targets.append(MAINDIR+"/"+config["analysis_name"]+"/"+config["multiqc_folder"]+"/multiqc_report.html") targets.extend(expand(os.path.join(MAINDIR,config["analysis_name"],config["expression_folder"],config["analysis_name"]+".{exp}.well_summary.pdf"),exp=["unq","all"])) targets.append(MAINDIR+"/config_used_in_analysis.json") + + multiparts = set() if(config["analysis_name"] in config["comparisons"]): targets.append(MAINDIR+"/"+config["analysis_name"]+"/"+config["de_folder"]+"/exprDatUPM.tsv") - targets.append(MAINDIR+"/"+config["analysis_name"]+"/report.html") + for comps in config["comparisons"][config["analysis_name"]]["comps"]: + multiparts.add(comps.get("multipart", DEFAULT_REPORT_PART)) + if multiparts: + targets.extend([os.path.join(MAINDIR,config["analysis_name"],"report-{}.html".format(mp)) for mp in multiparts]) + else: + targets.append(os.path.join(MAINDIR,config["analysis_name"],"report-{}.html".format(DEFAULT_REPORT_PART))) return targets -- GitLab From 2d1cce24c286e1506cdcc20bd4f5386d222137d1 Mon Sep 17 00:00:00 2001 From: Philippe Bordron Date: Thu, 24 Sep 2020 17:17:18 +0200 Subject: [PATCH 2/4] Describe in with part of the report each comparison will be included --- SCRIPTS/make_multiplates_config.py | 16 ++++++++++++++-- SCRIPTS/make_srp_config.py | 8 +++++++- 2 files changed, 21 insertions(+), 3 deletions(-) diff --git a/SCRIPTS/make_multiplates_config.py b/SCRIPTS/make_multiplates_config.py index dadee7e..64965f8 100644 --- a/SCRIPTS/make_multiplates_config.py +++ b/SCRIPTS/make_multiplates_config.py @@ -179,10 +179,22 @@ if (args.comparisons is not None): d["comparisons"][project]["performComps"] = None d["comparisons"][project]["comps"] = list() - for cond1,cond2 in csv.reader(args.comparisons, delimiter='\t'): + + for row in csv.reader(args.comparisons, delimiter='\t'): + multipart = None + if len(row)==2: + cond1, cond2 = row + else: + project, cond1, cond2 = row + v = project.rsplit("::", 1) + project = v[0] + multipart = v[1] if len(v) > 1 else None if(cond1!=cond2): if(d["comparisons"][project]["performComps"]!=False): - d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)])) + if multipart: + d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2),("multipart",multipart)])) + else: + d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)])) d["comparisons"][project]["performComps"] = True else: eprint("ERROR make_multiplates_config.py: ","Invalid comparisons file. Both comparisons and first part secondary analysis defined.\nPlease choose if you want to perform comparisons (different conditions specified) or if you only want to perform first part of secondary analysis (identical conditions specified)") diff --git a/SCRIPTS/make_srp_config.py b/SCRIPTS/make_srp_config.py index 6b88c4d..a87e071 100644 --- a/SCRIPTS/make_srp_config.py +++ b/SCRIPTS/make_srp_config.py @@ -177,6 +177,9 @@ for sample in d["samples"]: d["comparisons"] = dict() if (args.conditions is not None): for project,cond1,cond2 in csv.reader(args.conditions, delimiter='\t'): + v = project.rsplit("::", 1) + project = v[0] + multipart = v[1] if len(v) > 1 else None if(not project in d["comparisons"]): d["comparisons"][project] = OrderedDict() d["comparisons"][project]["species"] = getGenomeForProject(project) @@ -191,7 +194,10 @@ if (args.conditions is not None): if(cond1!=cond2): if(d["comparisons"][project]["performComps"]!=False): - d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)])) + if multipart: + d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2),("multipart",multipart)])) + else: + d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2)])) d["comparisons"][project]["performComps"] = True else: eprint("ERROR make_srp_config.py: ","Invalid condition file. In project: \""+project+"\" both comparisons and first part secondary analysis defined.\nPlease choose if you want to perform comparisons (different conditions specified) or if you only want to perform first part of secondary analysis (identical conditions specified)") -- GitLab From 3ab721a0dc6b2131f42c0335b0212598be245f64 Mon Sep 17 00:00:00 2001 From: Philippe Bordron Date: Thu, 24 Sep 2020 17:52:45 +0200 Subject: [PATCH 3/4] Fix multiplate comparison --- SCRIPTS/make_multiplates_config.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/SCRIPTS/make_multiplates_config.py b/SCRIPTS/make_multiplates_config.py index 64965f8..2f2a51a 100644 --- a/SCRIPTS/make_multiplates_config.py +++ b/SCRIPTS/make_multiplates_config.py @@ -183,13 +183,14 @@ if (args.comparisons is not None): for row in csv.reader(args.comparisons, delimiter='\t'): multipart = None if len(row)==2: + project_comp = None cond1, cond2 = row else: - project, cond1, cond2 = row - v = project.rsplit("::", 1) - project = v[0] + project_comp, cond1, cond2 = row + v = project_comp.rsplit("::", 1) + project_comp = v[0] multipart = v[1] if len(v) > 1 else None - if(cond1!=cond2): + if((project_comp or project_comp == project) and cond1!=cond2): if(d["comparisons"][project]["performComps"]!=False): if multipart: d["comparisons"][project]["comps"].append(OrderedDict([("condition1",cond1),("condition2",cond2),("multipart",multipart)])) -- GitLab From dded833adf7b0a65fcb7160419aa84f7cef15f62 Mon Sep 17 00:00:00 2001 From: Philippe Bordron Date: Fri, 25 Sep 2020 11:19:59 +0200 Subject: [PATCH 4/4] add rule to rename report-main.html in report.html --- SCRIPTS/common.smk | 12 +++++++++++- Snakefile | 4 ++-- multiplates.smk | 4 ++-- 3 files changed, 15 insertions(+), 5 deletions(-) diff --git a/SCRIPTS/common.smk b/SCRIPTS/common.smk index 0591427..e21a4b5 100644 --- a/SCRIPTS/common.smk +++ b/SCRIPTS/common.smk @@ -8,7 +8,17 @@ rule copyConfig: cp {input} {output} """ -rule buildReport: +rule renameMainReport: + input: + report = MAINDIR+"/{project}/report-" + DEFAULT_REPORT_PART + ".html" + output: + report = MAINDIR+"/{project}/report.html" + shell: + """ + mv {input.report} {output.report} + """ + +rule buildReportPart: input: unpack(getAllFilesForReport) output: diff --git a/Snakefile b/Snakefile index f2fc0f3..d278e3e 100644 --- a/Snakefile +++ b/Snakefile @@ -106,9 +106,9 @@ def getTargetFiles(): for comps in config["comparisons"][p]["comps"]: multiparts.add(comps.get("multipart", DEFAULT_REPORT_PART)) if multiparts: - targets.extend([os.path.join(MAINDIR,p,"report-{}.html".format(mp)) for mp in multiparts]) + targets.extend([os.path.join(MAINDIR, p, "report-{}.html".format(mp)) if mp != DEFAULT_REPORT_PART else os.path.join(MAINDIR, p, "report.html") for mp in multiparts]) else: - targets.append(os.path.join(MAINDIR,p,"report-{}.html".format(DEFAULT_REPORT_PART))) + targets.append(os.path.join(MAINDIR, p, "report.html")) targets.append(os.path.join(MAINDIR,p,"INPUT_FILES","samplesheet_used.tsv")) diff --git a/multiplates.smk b/multiplates.smk index 2e92880..13fa330 100644 --- a/multiplates.smk +++ b/multiplates.smk @@ -78,9 +78,9 @@ def getTargetFiles(): for comps in config["comparisons"][config["analysis_name"]]["comps"]: multiparts.add(comps.get("multipart", DEFAULT_REPORT_PART)) if multiparts: - targets.extend([os.path.join(MAINDIR,config["analysis_name"],"report-{}.html".format(mp)) for mp in multiparts]) + targets.extend([os.path.join(MAINDIR, config["analysis_name"], "report-{}.html".format(mp)) if mp != DEFAULT_REPORT_PART else os.path.join(MAINDIR, config["analysis_name"], "report.html") for mp in multiparts]) else: - targets.append(os.path.join(MAINDIR,config["analysis_name"],"report-{}.html".format(DEFAULT_REPORT_PART))) + targets.append(os.path.join(MAINDIR, config["analysis_name"], "report.html".format(DEFAULT_REPORT_PART))) return targets -- GitLab