Newer
Older
################################################################
# evallg.py
#
# Program that reads in two .lg (CSV) files, computes metrics,
# and returns the result as a (CSV) entry, along with a
# CSV entry (row) for each specific error on standard output.
#
# *If run in 'batch' mode, a CSV file for errors and a separate
# file containing all errors observed will be produced.
#
# Author: R. Zanibbi, June 2012
# Copyright (c) 2012-2014 Richard Zanibbi and Harold Mouchere
################################################################
import sys
import csv
from lg import *
from lgio import *
import SmGrConfMatrix
import compareTools
# for RIT web service :
#INKMLVIEWER = "inkml_viewer/index.xhtml?path=../testdata/&files="
#local :
INKMLVIEWER = "http://www.cs.rit.edu/~rlaz/inkml_viewer/index.xhtml?path=http://www.cs.rit.edu/~rlaz/testdata/&files="
MINERRTOSHOW = 3
def runBatch(fileName, defaultFileOrder, confMat, confMatObj):
"""Compile metrics for pairs of files provided in a CSV
file. Store metrics and errors in separate files."""
fileReader = csv.reader(open(fileName))
metricStream = open(fileName + '.m','w')
diffStream = open(fileName + '.diff','w')
htmlStream = None
matrix = None
if confMat:
matrix = SmGrConfMatrix.ConfMatrix()
if confMatObj:
matrixObj = SmGrConfMatrix.ConfMatrixObject()
for row in fileReader:
# Skip comments and empty lines.
if not row == [] and not row[0].strip()[0] == "#":
lgfile1 = row[0].strip() # remove leading/trailing whitespace
lgfile2 = row[1].strip()
if not defaultFileOrder:
temp = lgfile2
lgfile2 = lgfile1
lgfile1 = temp
print ("Test: "+lgfile1+" vs. "+lgfile2);
toShow = lgfile1
if len(row)> 2:
toShow = row[2].strip()
# Here lg1 is the output, and lg2 the ground truth.
lg1 = Lg(lgfile1)
lg2 = Lg(lgfile2)
out = lg1.compare(lg2)
metricStream.write('*M,' + lgfile1 + ',' + lgfile2 + '\n')
writeMetrics(out, metricStream)
diffStream.write('DIFF,' + lgfile1 + ',' + lgfile2 + '\n')
writeDiff(out[1], out[3], out[2], diffStream)
nodeClassErr = set()
edgeErr = set()
if confMat or confMatObj:
for (n,_,_) in out[1] :
nodeClassErr.add(n)
for (e,_,_) in out[2] :
edgeErr.add(e)
for (gt,er) in lg1.compareSubStruct(lg2,[2,3]):
er.rednodes = set(list(er.nodes)) & nodeClassErr
er.rededges = set(list(er.edges)) & edgeErr
matrix.incr(gt,er,toShow)
if confMatObj:
for (obj,gt,er) in lg1.compareSegmentsStruct(lg2,[2]):
er.rednodes = set(list(er.nodes)) & nodeClassErr
er.rededges = set(list(er.edges)) & edgeErr
matrixObj.incr(obj,gt,er,toShow)
if confMat or confMatObj:
htmlStream = open(fileName + '.html','w')
htmlStream.write('<html xmlns="http://www.w3.org/1999/xhtml">')
htmlStream.write('<h1> File :'+fileName+'</h1>')
htmlStream.write('<p>Only errors with at least '+str(MINERRTOSHOW)+' occurrences appear</p>')
if confMat:
htmlStream.write('<h2> Substructure Confusion Matrix </h2>')
matrix.toHTML(htmlStream,MINERRTOSHOW,INKMLVIEWER)
if confMatObj:
htmlStream.write('<h2> Substructure Confusion Matrix at Object level </h2>')
matrixObj.toHTML(htmlStream,MINERRTOSHOW,INKMLVIEWER)
if confMat or confMatObj:
htmlStream.write('</html>')
metricStream.close()
diffStream.close()
def main():
if len(sys.argv) < 3:
print("Usage: [[python]] evallg.py <file1.lg> <file2.lg> [diff/*] [INTER]")
print(" OR [[python]] evallg.py <file1.lg> <file2.lg> MATRIX fileout")
print(" OR [[python]] evallg.py [batch] <filepair_list> [GT-FIRST] [MAT] [MATOBJ] [INTER]")
print("")
print(" For the first usage, return error metrics and differences")
print(" for label graphs in file1.lg and file2.lg.")
print(" A third argument will return just differences ('diff')")
print(" or just metrics (any other string). ")
print(" If MATRIX option is used, 4 evaluations are done with the ")
print(" different matrix label filters and output in the fileout[ABCD].m]")
print("")
print(" For the second usage, a file is provided containing pairs of")
print(" label graph files, one per line (e.g. 'file1, GTruth').")
print(" A third optional column contains the file name which should be")
print(" linked to the InkML viewer.")
print("")
print(" A CSV file containing metrics for all comparisons is written")
print(" to \"filepair_list.m\", and differences are written to a file")
print(" \"filepair_list.diff\". By default ground truth is listed")
print(" second on each line of the batch file; GT-FIRST as third argument")
print(" will result in the first element of each line being treated")
print(" as ground truth - this does not affect metrics (.m), but does")
print(" affect difference (.diff) output.")
print("")
print(" The MAT or MATOBJ option will create a HTML file with confusion Matrix")
print(" between substructures.")
print(" MAT will produce the subtructure at stroke level.")
print(" MATOBJ will produce the subtructure at object level.")
print(" (in both cases, the size of substructure is 2 or 3 nodes,")
print(" in both cases, only errors with at least 3 occurrences appear)")
sys.exit(0)
showErrors = True
showMetrics = True
if "INTER" in sys.argv:
compareTools.cmpNodes = compareTools.intersectMetric;
compareTools.cmpEdges = compareTools.intersectMetric;
if sys.argv[1] == "batch":
# If requested, swap arguments.
defaultFileOrder = True
confMatObj = False
if len(sys.argv) > 3 and "GT-FIRST" in sys.argv:
print(">> Treating 1st column as ground truth.")
defaultFileOrder = False
if len(sys.argv) > 3 and "MAT" in sys.argv:
print(">> Compute the confusion matrix at primitive level.")
if len(sys.argv) > 3 and "MATOBJ" in sys.argv:
print(">> Compute the confusion matrix at object level.")
confMatObj = True
runBatch(sys.argv[2], defaultFileOrder, confMat, confMatObj)
else:
# Running for a pair of files: require default order of arguments.
fileName1 = sys.argv[1]
fileName2 = sys.argv[2]
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
if len(sys.argv) > 4 and sys.argv[3] == 'MATRIX':
fileOut = sys.argv[4]
#print ("MODE MATRIX : " + fileOut)
todo = {'Mat':set(['*M']),'Col':set(['*C']),'Row':set(['*R']),'Cell':set(['*Cell'])}
compareTools.cmpNodes = compareTools.filteredMetric
compareTools.cmpEdges = compareTools.filteredMetric
for (n,s) in todo.items():
compareTools.selectedLabelSet = s
n1 = Lg(fileName1)
n2 = Lg(fileName2)
out = n1.compare(n2)
outStream = open(fileOut+n+".m", 'w')
writeMetrics(out, outStream)
outStream.close()
compareTools.selectedLabelSet = set([])
compareTools.ignoredLabelSet = set(['*M','*C','*R','*Cell'])
n1 = Lg(fileName1)
n2 = Lg(fileName2)
out = n1.compare(n2)
outStream = open(fileOut+"Symb.m", 'w')
writeMetrics(out, outStream)
else:
if 'diff' in sys.argv:
showMetrics = False
elif 'm' in sys.argv:
showErrors = False
n1 = Lg(fileName1)
n2 = Lg(fileName2)
if "INTER" in sys.argv:
n1.labelMissingEdges()
n2.labelMissingEdges()
# print n1.csv()
# print n2.csv()
out = n1.compare(n2)
if showMetrics:
writeMetrics(out, sys.stdout)
if showErrors:
writeDiff(out[1],out[3],out[2], sys.stdout)