Commit 714dec79 authored by E144069X's avatar E144069X

Started function to format marseille data

parent 830bf94c
......@@ -468,7 +468,30 @@ def formatCUB(pathToCubFolder):
if not os.path.exists("../data/CUB_200_2011_{}/{}/{}".format(set,label,imgName)):
copyfile(imgPath, "../data/CUB_200_2011_{}/{}/{}".format(set,label,imgName))
def formatMarseille(pathToZip):
dataset = "marseille"
#Unziping and renaming the folder
if not os.path.exists("../data/{}/".format(dataset)):
subprocess.call("unzip {} -d ../data/".format(pathToZip),shell=True)
subprocess.call("mv {} {}".format(os.path.splitext(pathToZip)[0],"../data/{}/".format(dataset)),shell=True)
if not os.path.exists("../data/{}/annotations/".format(dataset)):
os.path.makedirs("../data/{}/annotations/".format(dataset))
#Convert the xls files into csv files if it is not already done
if (len(glob.glob("../data/{}/*.csv".format(dataset))) - 1) < len(glob.glob("../data/{}/*.xls*".format(dataset))):
subprocess.call("libreoffice --headless --convert-to csv --outdir ../data/{}/ ../data/{}/*.xls*".format(dataset,dataset),shell=True)
df = pd.read_csv("../data/{}/MRSGrossessesMirixlsx (1).xlsx".format(dataset),dtype=str,encoding = "ISO-8859-1",sep=",")
idColName = "PatientName"
names = df[idColName].apply(preproc)
df = df[["Well","Well Description"]+list(labelDict.keys())]
df["Name"] = names
dfDict[csvPath] = df
def main(argv=None):
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment