Commit 2b2e616d authored by E144069X's avatar E144069X

Added function to format the marseille dataset

parent 1aee41ae
......@@ -14,6 +14,7 @@ import warnings
from shutil import copyfile
labelDict = {"tPB2":0,"tPNa":1,"tPNf":2,"t2":3,"t3":4,"t4":5,"t5":6,"t6":7,"t7":8,"t8":9,"t9+":10,"tM":11,"tSB":12,"tB":13,"tEB":14,"tHB":15}
mars_labelDict = {"t2":3,"t3":4,"t4":5,"t5":6,"t6":7,"t7":8,"t8":9,"t10":10,"tSC":11,"tM":12,"tSB":12}
def getNoAnnotVideos():
return np.genfromtxt("../data/noAnnot.csv",dtype=str,delimiter=",")
......@@ -476,22 +477,67 @@ def formatMarseille(pathToZip):
if not os.path.exists("../data/{}/".format(dataset)):
subprocess.call("unzip {} -d ../data/".format(pathToZip),shell=True)
subprocess.call("mv {} {}".format(os.path.splitext(pathToZip)[0],"../data/{}/".format(dataset)),shell=True)
print("Extracted")
if not os.path.exists("../data/{}/annotations/".format(dataset)):
os.path.makedirs("../data/{}/annotations/".format(dataset))
os.makedirs("../data/{}/annotations/".format(dataset))
#Convert the xls files into csv files if it is not already done
if (len(glob.glob("../data/{}/*.csv".format(dataset))) - 1) < len(glob.glob("../data/{}/*.xls*".format(dataset))):
subprocess.call("libreoffice --headless --convert-to csv --outdir ../data/{}/ ../data/{}/*.xls*".format(dataset,dataset),shell=True)
df = pd.read_csv("../data/{}/MRSGrossessesMirixlsx (1).xlsx".format(dataset),dtype=str,encoding = "ISO-8859-1",sep=",")
idColName = "PatientName"
df = pd.read_csv("../data/{}/MRSGrossessesMirixlsx (1).csv".format(dataset),dtype=str,sep=",")
#names = df["PatientName"].apply(preproc)
#df = df[["Well","Well Description"]+list(labelDict.keys())]
#df["Name"] = names
def preprocStart(x):
if x.find("FIV") != -1:
x = "FIV"
else:
x = "ICSI"
return x
def preprocEnd(x):
return x[-2:]
df["VideoId"] = df["Patient"].apply(preprocStart)+"-MTL-STJO-"+df["Patient"].apply(preprocEnd)+".avi"
names = df[idColName].apply(preproc)
df = df[["Well","Well Description"]+list(labelDict.keys())]
df["Name"] = names
if os.path.exists("../data/marseille/images_per_hour.csv"):
images_per_hour_csv = np.genfromtxt("../data/marseille/images_per_hour.csv",delimiter=",",dtype=str)
images_per_hour = {images_per_hour_csv[i,0]:images_per_hour_csv[i,3] for i in range(len(images_per_hour_csv))}
total_img_nb = {images_per_hour_csv[i,0]:images_per_hour_csv[i,1] for i in range(len(images_per_hour_csv))}
df["Images per hour"] = df["VideoId"].apply(lambda x:images_per_hour[x.replace(".avi","")])
dfDict[csvPath] = df
videoNb = len(glob.glob("../data/{}/*avi".format(dataset)))
for i in range(videoNb):
row = df.iloc[i].to_frame().transpose()
imagesPerHour = float(df["Images per hour"].iloc[i])
vidName = df["VideoId"].iloc[i].replace(".avi","")
totalImgNb = int(total_img_nb[vidName])
row = row[list(mars_labelDict.keys())]
for col in list(row.columns):
row[col] = row[col].apply(lambda x:x.replace(",",".") if type(x) == str else x).astype(float)
#Removes label columns that do not appear in the video (i.e. those with NaN value)
colsToKeep = []
for col in row.columns:
if not np.isnan(row[col].values):
colsToKeep.append(col)
row = row[colsToKeep]
csv = ''
for j in range(1,len(row.columns)):
precedCol = row.columns[j-1]
col = row.columns[j]
csv += "{},{},{}\n".format(precedCol,int(row[precedCol]*imagesPerHour),int(row[col]*imagesPerHour)-1)
csv += "{},{},{}\n".format(col,int(row[col]*imagesPerHour),totalImgNb-1)
with open("../data/marseille/annotations/{}_phases.csv".format(vidName),"w") as text_file:
print(csv,file=text_file)
else:
raise ValueError("Compute the file images_per_hour.csv using the countImages.py script first.")
def main(argv=None):
......@@ -509,6 +555,7 @@ def main(argv=None):
argreader.parser.add_argument('--minimum_phase_nb',type=int,metavar="NB",help='The minimum number of phases a video should have annotated to be taken into account.',default=6)
argreader.parser.add_argument('--format_cub',type=str,metavar="PATH",help='To format the CUB_200_2011 dataset',default="")
argreader.parser.add_argument('--format_marseille',type=str,metavar="PATH",help='To format the Marseille dataset',default="")
#Reading the comand row arg
argreader.getRemainingArgs()
......@@ -524,6 +571,7 @@ def main(argv=None):
formatDataBig(args.dataset,args.path_to_folder,args.img_for_crop_nb,args.minimum_phase_nb)
elif args.format_cub:
formatCUB(args.format_cub)
elif args.format_marseille:
formatMarseille(args.format_marseille)
if __name__ == "__main__":
main()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment