Commit 662fcd32 authored by Eric CHARPENTIER's avatar Eric CHARPENTIER 🐍
Browse files

added chrM for refseq ref when no chr file is available by downloading the...

added chrM for refseq ref when no chr file is available by downloading the full genome and extracting chrM
parent ba6cd61e
......@@ -52,8 +52,7 @@ def getEnsemblFolderForSpecies(species):
"human":"homo_sapiens",
"rat": "rattus_norvegicus",
"mouse": "mus_musculus",
"cow":"bos_taurus"
"cow": "bos_taurus"
}
if(not species in switcher):
......@@ -121,6 +120,8 @@ def getFastaAnnotPath(ftp, provenance, species):
# Check if chrM is available
if(basedir+"/chromosomes" in ftp.nlst(basedir) and basedir+"/chromosomes/chrM.fa.gz" in ftp.nlst(basedir+"/chromosomes")):
fastaAnnot["chrM"] = basedir+"/chromosomes/chrM.fa.gz"
else:
fastaAnnot["chrM"] = basedir+"/bigZips/"+getLastBuildForSpecies(species)+".fa.gz"
elif(provenance == "gencode"):
dir_list = []
dir_list = ftp.nlst(basedir)
......@@ -163,13 +164,33 @@ def downloadAndProcessFiles(ftp, provenance, files2download, outFolder, refName)
annot = zippy.readlines()
chrM = None
if("chrM" in files2download):
if("chrM" in files2download and files2download["chrM"].endswith("chromosomes/chrM.fa.gz")):
print("INFO make_ref.py: DOWNLOADING mitochondrial chromosome fasta file: "+getServer(provenance)+"/"+files2download["chrM"])
sio = io.BytesIO()
resp = ftp.retrbinary('RETR ' + files2download["chrM"], callback=sio.write)
sio.seek(0) # Go back to the start
zippy = gzip.GzipFile(fileobj=sio)
chrM = zippy.readlines()
elif("chrM" in files2download and "bigZips/" in files2download["chrM"]):
#tempGenome = os.path.join(outFolder,refName+"_genomeTemp.fa")
print("INFO make_ref.py: No chrM file to download. DOWNLOADING full genome in order to extract chrM: "+getServer(provenance)+"/"+files2download["chrM"])
sio = io.BytesIO()
resp = ftp.retrbinary('RETR ' + files2download["chrM"], callback=sio.write)
sio.seek(0) # Go back to the start
zippy = gzip.GzipFile(fileobj=sio,mode='r')
# Get chrM sequence in genome
print("INFO make_ref.py: PROCESSING full genome to extract chrM")
chrM = list()
found = False
for line in zippy.readlines():
if (found and not line.startswith(b">")):
chrM.append(line)
if (found and line.startswith(b">")):
break
if (line.startswith(b">chrM")):
found = True
chrM.append(line)
else:
print("INFO make_ref.py: No mitochondrial chromosome available for reference")
......@@ -309,12 +330,12 @@ if(provenance == "gencode" and (not species in ["human","mouse"])):
try:
print("INFO make_ref.py: Connecting to FTP server "+getServer(provenance))
if(args.proxy):
print("Connecting to proxy")
print("INFO make_ref.py: Connecting to proxy")
ftp = ftplib.FTP("ftproxy.univ-nantes.fr")
print("Rebound to "+getServer(provenance))
print("INFO make_ref.py: Rebound to "+getServer(provenance))
ftp.login("anonymous@"+getServer(provenance))
else:
print("Go to "+getServer(provenance))
print("INFO make_ref.py: Go to "+getServer(provenance))
ftp = ftplib.FTP(getServer(provenance),"anonymous","nobody@nobody.fr")
print("INFO make_ref.py: Connected... searching for files.")
files2download = getFastaAnnotPath(ftp, provenance,species)
......
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment