Nantes Université

Skip to content
Extraits de code Groupes Projets
evallg.py 7,33 ko
Newer Older
Richard Zanibbi's avatar
Richard Zanibbi a validé
################################################################
# evallg.py
#
# Program that reads in two .lg (CSV) files, computes metrics,
# and returns the result as a (CSV) entry, along with a
# CSV entry (row) for each specific error on standard output.
#
# *If run in 'batch' mode, a CSV file for errors and a separate
# file containing all errors observed will be produced.
#
# Author: R. Zanibbi, June 2012
# Copyright (c) 2012-2014 Richard Zanibbi and Harold Mouchere
Richard Zanibbi's avatar
Richard Zanibbi a validé
################################################################
import sys
import csv
from lg import *
from lgio import *
import SmGrConfMatrix
Richard Zanibbi's avatar
Richard Zanibbi a validé

# for RIT web service :
#INKMLVIEWER = "inkml_viewer/index.xhtml?path=../testdata/&files="
#local :
INKMLVIEWER = "http://www.cs.rit.edu/~rlaz/inkml_viewer/index.xhtml?path=http://www.cs.rit.edu/~rlaz/testdata/&files="
MINERRTOSHOW = 3
def runBatch(fileName, defaultFileOrder, confMat, confMatObj):
Richard Zanibbi's avatar
Richard Zanibbi a validé
	"""Compile metrics for pairs of files provided in a CSV
	file. Store metrics and errors in separate files."""
	fileReader = csv.reader(open(fileName))
	metricStream = open(fileName + '.m','w')
	diffStream = open(fileName + '.diff','w')

	htmlStream = None
	matrix = None
	matrixObj = None
	if confMat:
		matrix = SmGrConfMatrix.ConfMatrix()
		if confMatObj:
			matrixObj = SmGrConfMatrix.ConfMatrixObject()
Richard Zanibbi's avatar
Richard Zanibbi a validé
	for row in fileReader:
		# Skip comments and empty lines.
		if not row == [] and not row[0].strip()[0] == "#":
			lgfile1 = row[0].strip() # remove leading/trailing whitespace
			lgfile2 = row[1].strip()
			if not defaultFileOrder:
				temp = lgfile2
				lgfile2 = lgfile1
				lgfile1 = temp
			print ("Test: "+lgfile1+" vs. "+lgfile2);
			toShow = lgfile1
			if len(row)> 2:
				toShow = row[2].strip()
Richard Zanibbi's avatar
Richard Zanibbi a validé
			# Here lg1 is the output, and lg2 the ground truth.
			lg1 = Lg(lgfile1)
			lg2 = Lg(lgfile2)
			out = lg1.compare(lg2)

			metricStream.write('*M,' + lgfile1 + ',' + lgfile2 + '\n')
			writeMetrics(out, metricStream)
			diffStream.write('DIFF,' + lgfile1 + ',' + lgfile2 + '\n')
			writeDiff(out[1], out[3], out[2], diffStream)
			nodeClassErr = set()
			edgeErr = set()
			if confMat or confMatObj:
				for (n,_,_) in out[1] :
					nodeClassErr.add(n)
				for (e,_,_) in out[2] :
					edgeErr.add(e)
			
				for (gt,er) in lg1.compareSubStruct(lg2,[2,3]):
					er.rednodes = set(list(er.nodes)) & nodeClassErr
					er.rededges = set(list(er.edges)) & edgeErr
			if confMatObj:
				for (obj,gt,er) in lg1.compareSegmentsStruct(lg2,[2]):
					er.rednodes = set(list(er.nodes)) & nodeClassErr
					er.rededges = set(list(er.edges)) & edgeErr
		htmlStream = None
		htmlStream = open(fileName + '.html','w')
		htmlStream.write('<html xmlns="http://www.w3.org/1999/xhtml">')
		htmlStream.write('<h1> File :'+fileName+'</h1>')
		htmlStream.write('<p>Only errors with at least '+str(MINERRTOSHOW)+' occurrences appear</p>')
	if confMat:
		htmlStream.write('<h2> Substructure Confusion Matrix </h2>')
		matrix.toHTML(htmlStream,MINERRTOSHOW,INKMLVIEWER)
	if confMatObj:
		htmlStream.write('<h2> Substructure Confusion Matrix at Object level </h2>')
		matrixObj.toHTML(htmlStream,MINERRTOSHOW,INKMLVIEWER)
		htmlStream.write('</html>')
		htmlStream.close()
Richard Zanibbi's avatar
Richard Zanibbi a validé
	metricStream.close()
	diffStream.close()
		
def main():
	if len(sys.argv) < 3:
		print("Usage: [[python]] evallg.py <file1.lg> <file2.lg> [diff/*]  [INTER]")
		print("   OR  [[python]] evallg.py <file1.lg> <file2.lg> MATRIX fileout")
		print("   OR  [[python]] evallg.py [batch] <filepair_list> [GT-FIRST] [MAT] [MATOBJ] [INTER]")
Richard Zanibbi's avatar
Richard Zanibbi a validé
		print("")
		print("    For the first usage, return error metrics and differences")
		print("    for  label graphs in file1.lg and file2.lg.")
		print("    A third argument will return just differences ('diff')")
		print("    or just metrics (any other string). ")
		print("    If MATRIX option is used, 4 evaluations are done with the ")
		print("    different matrix label filters and output in the fileout[ABCD].m]")
Richard Zanibbi's avatar
Richard Zanibbi a validé
		print("")
		print("    For the second usage, a file is provided containing pairs of")
		print("    label graph files, one per line (e.g. 'file1, GTruth').")
		print("    A third optional column contains the file name which should be")
		print("    linked to the InkML viewer.")
		print("")
Richard Zanibbi's avatar
Richard Zanibbi a validé
		print("    A CSV file containing metrics for all comparisons is written")
		print("    to \"filepair_list.m\", and differences are written to a file")
		print("    \"filepair_list.diff\". By default ground truth is listed")
		print("    second on each line of the batch file; GT-FIRST as third argument")
Richard Zanibbi's avatar
Richard Zanibbi a validé
		print("    will result in the first element of each line being treated")
		print("    as ground truth - this does not affect metrics (.m), but does")
		print("    affect difference (.diff) output.")
		print("    The MAT or MATOBJ option will create a HTML file with confusion Matrix")
		print("    MAT will produce the subtructure at stroke level.")
		print("    MATOBJ will produce the subtructure at object level.")
		print("     (in both cases, the size of substructure is 2 or 3 nodes,")
		print("      in both cases, only errors with at least 3 occurrences appear)")
Richard Zanibbi's avatar
Richard Zanibbi a validé
		sys.exit(0)

	showErrors = True
	showMetrics = True
	
	if "INTER" in sys.argv:
		compareTools.cmpNodes = compareTools.intersectMetric;
		compareTools.cmpEdges = compareTools.intersectMetric;
	
Richard Zanibbi's avatar
Richard Zanibbi a validé
	if sys.argv[1] == "batch":
		# If requested, swap arguments.
		defaultFileOrder = True
		if len(sys.argv) > 3 and "GT-FIRST" in sys.argv:
Richard Zanibbi's avatar
Richard Zanibbi a validé
			print(">> Treating 1st column as ground truth.")
			defaultFileOrder = False
		if len(sys.argv) > 3 and "MAT" in sys.argv:
			print(">> Compute the confusion matrix at primitive level.")
		if len(sys.argv) > 3 and "MATOBJ" in sys.argv:
			print(">> Compute the confusion matrix at object level.")
			confMatObj = True
		runBatch(sys.argv[2], defaultFileOrder, confMat, confMatObj)
Richard Zanibbi's avatar
Richard Zanibbi a validé

	else:
		# Running for a pair of files: require default order of arguments.
		fileName1 = sys.argv[1]
		fileName2 = sys.argv[2]
		if len(sys.argv) > 4 and  sys.argv[3] == 'MATRIX':
			fileOut = sys.argv[4]
			#print ("MODE MATRIX : " + fileOut)
			todo = {'Mat':set(['*M']),'Col':set(['*C']),'Row':set(['*R']),'Cell':set(['*Cell'])}
			compareTools.cmpNodes = compareTools.filteredMetric
			compareTools.cmpEdges = compareTools.filteredMetric
			for (n,s) in todo.items():
				compareTools.selectedLabelSet = s
				n1 = Lg(fileName1)
				n2 = Lg(fileName2)
				out	= n1.compare(n2)
				outStream = open(fileOut+n+".m", 'w')
				writeMetrics(out, outStream)
				outStream.close()
			compareTools.selectedLabelSet = set([])
			compareTools.ignoredLabelSet = set(['*M','*C','*R','*Cell'])			
			n1 = Lg(fileName1)
			n2 = Lg(fileName2)
			out	= n1.compare(n2)
			outStream = open(fileOut+"Symb.m", 'w')
			writeMetrics(out, outStream)
			
		else:
			
			if 'diff' in sys.argv:
				showMetrics = False
			elif 'm' in sys.argv:
				showErrors = False
			n1 = Lg(fileName1)
			n2 = Lg(fileName2)
			
			if "INTER" in sys.argv:
				n1.labelMissingEdges()
				n2.labelMissingEdges()
			# print n1.csv()
			# print n2.csv()
				
			out = n1.compare(n2)
Richard Zanibbi's avatar
Richard Zanibbi a validé

			if showMetrics:
				writeMetrics(out, sys.stdout)
			if showErrors:
				writeDiff(out[1],out[3],out[2], sys.stdout)
Richard Zanibbi's avatar
Richard Zanibbi a validé

main()