Commit df6eca84 authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files

limited threads in split_fastq to avoid out of memory and cores

parent 42d67d02
......@@ -88,7 +88,7 @@ def split_reads(fastq_pair, count):
write_fastq(os.path.join(main_path, sample["project"], config["fastq_folder"]), sample["well"], sample["name"], sample["index"], count, sample["buf"])
# Concatenate mutliple fastq files corresponding to sample
def concatenate_fastq():
def concatenate_fastq(maxCores):
index2fastqs = dict()
PROJECTS = {s["project"] for s in config["samples"]}
for project in PROJECTS:
......@@ -111,15 +111,12 @@ def concatenate_fastq():
index2fastqs[os.path.join(main_path, filename)].append(os.path.join(main_path, f))
jobs = list()
poolConcat = multiprocessing.Pool(processes=maxCores)
for f in index2fastqs:
jobs.append(multiprocessing.Process(target=concat_and_remove, args=(index2fastqs[f], f)))
poolConcat.apply_async(concat_and_remove, args=(index2fastqs[f], f))
for j in jobs:
j.start()
for j in jobs:
j.join()
poolConcat.close()
poolConcat.join()
# Remove multiple fastq files of same sample after concatenation
def concat_and_remove(flist, fout):
......@@ -142,15 +139,21 @@ config = read_config_file(config_file)
main_path = config["maindir"]
count = 0
jobs = list()
maxCores = multiprocessing.cpu_count()
if(maxCores>20):
maxCores = 20
else:
maxCores = maxCores-1
print("INFO split_fastq.py: Using "+str(maxCores)+" threads to demultiplex data.")
pool = multiprocessing.Pool(processes=maxCores)
for fastq_pair in config["fastq_pairs"]:
count += 1
jobs.append(multiprocessing.Process(target=split_reads, args=(fastq_pair, count)))
for j in jobs:
j.start()
pool.apply_async(split_reads, args=(fastq_pair, count))
for j in jobs:
j.join()
pool.close()
pool.join()
concatenate_fastq()
concatenate_fastq(maxCores)
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment