Commit 74c7f5ec authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files

added possibility to select samples and override conditions by specifying a...

added possibility to select samples and override conditions by specifying a samplesheet in the multiplates analysis
parent 2b828850
......@@ -44,6 +44,24 @@ example:
> - You can specify the same condition in column 2 and 3 to perform only the first part of the secondary analysis (all but comparisons). If you do so, make sure that there is only one line in your file.
> - The first condition column is the test and the second is the control
### Select samples and override sample conditions
If you need to select samples or override some sample conditions, you can do so by specifying a new samplesheet with the `-s` option.
The samplesheet needs to be a tab separated file with two columns:
- first column: samples that you want to select
- second column: new condition of the sample
| | |
| :--- | :--- |
| sample1 | conditionX |
| sample2 | conditionX |
| sample3 | conditionY |
| sample4 | conditionY |
> **Note:**
> - Conditions specified here must correspond to the comparisons file described above.
## Running the pipeline
......@@ -41,6 +41,7 @@ def getGenomeForProject():
## Main
parser = argparse.ArgumentParser()
parser.add_argument('-s', '--samplesheet', metavar='FILE', help='Tab separated file with column one being the name of the samples to keep and column two the new condition the sample belongs to', type=argparse.FileType('rt'), required=False, dest='samplesheet')
parser.add_argument('-p', '--projectfolders', metavar='DIR', help='List of sub-project folders generated by srp-pipeline. (REQUIRED)', required=True, nargs='+', dest='projectFolders')
parser.add_argument('-w', '--workdir', metavar='DIR', help='Analysis working directory. Default: current directory', default='.', dest='workdir')
parser.add_argument('-c', '--comparisons', metavar='FILE', help='Tab delimited file with no headers indicating which conditions to compare during differential expression analysis. Columns must be "condition1 condition2". (REQUIRED)', type=argparse.FileType('rt'), required=False, dest='comparisons')
......@@ -93,14 +94,28 @@ for f in folders:
eprint("ERROR Cannot read project folder: "+os.path.abspath(f))
# List samples to keep with their new condition if samplesheet supplied
newSamples = dict()
if (args.samplesheet):
count = 1
for name,condition in csv.reader(args.samplesheet, delimiter='\t'):
if(not name in newSamples):
newSamples[name] = {"name":name,"condition":condition}
eprint("ERROR Sample \""+name+"\" defined twice in samplesheet at line "+str(count))
count += 1
eprint("ERROR Invalid entry in samplesheet at line "+str(count))
# List samples and directories from the different project folders
d["projects"] = list()
d["samples"] = list()
runCount = 0
for f in folders:
# generate a run identifier
runCount += 1
runID = "run"+str(runCount)
# create project object
projectObject = OrderedDict()
# get project name and path
......@@ -118,11 +133,22 @@ for f in folders:
projectObject["multiqcPath"] = os.path.join(f,config["multiqc_folder"])
projectObject["alignementPath"] = os.path.join(f,config["align_folder"])
projectObject["expressionPath"] = os.path.join(f,config["expression_folder"])
projectObject["runID"] = runID
# read samples from the config file for project
samples = [s for s in config["samples"] if s["project"]==projectObject["Name"]]
# check if some of the samples are to be kept according to new samplesheet
if (args.samplesheet):
samples = [s for s in config["samples"] if (s["project"]==projectObject["Name"] and s["name"] in newSamples)]
if(not samples): continue
for s in samples:
s["condition"] = newSamples[s["name"]]["condition"]
# generate a run identifier
runCount += 1
runID = "run"+str(runCount)
projectObject["runID"] = runID
# Add runID for batch correction in samples
for s in samples:
s["run"] = runID
......@@ -179,7 +205,6 @@ if (args.comparisons is not None):
d["comparisons"][project]["performComps"] = None
d["comparisons"][project]["comps"] = list()
for row in csv.reader(args.comparisons, delimiter='\t'):
multipart = None
if len(row)==2:
......@@ -190,7 +215,7 @@ if (args.comparisons is not None):
v = project_comp.rsplit("::", 1)
project_comp = v[0]
multipart = v[1] if len(v) > 1 else None
if((project_comp or project_comp == project) and cond1!=cond2):
if((not project_comp or project_comp == project) and cond1!=cond2):
if multipart:
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment