Newer
Older
Richard Zanibbi
a validé
################################################################
# sumDiff.py
#
# Program that reads in a .diff (CSV) file containing differences between
# to BG, and outputs confusion matrix for symbols and spatial relations.
# Output is in CSV or HTML formats.
#
# Author: H. Mouchere, June 2012
# Copyright (c) 2012-2014, Richard Zanibbi and Harold Mouchere
Richard Zanibbi
a validé
################################################################
import sys
import csv
import collections
import time
import os
Richard Zanibbi
a validé
def addOneError(confM, id1, id2):
#thanks to "defaultdict" there is nothing to do !
confM[id1][id2] += 1
Richard Zanibbi
a validé
def affMat(output, allID, confM):
# Header
output.write("Output:")
for k in sorted(allID):
output.write(",'"+str(k)+"'")
output.write("\n")
# Data
for k1 in sorted(allID):
output.write("'"+str(k1)+"'")
for k2 in sorted(allID):
if not confM[k1][k2] == 0:
output.write(","+str(confM[k1][k2]))
else:
output.write(",")
output.write("\n")
Richard Zanibbi
a validé
def affMatHTML(output, allID, confM):
output.write("<table>\n<tr><th><i>(Out:Rows)</i></th>")
for k in sorted(allID):
output.write("<th>"+str(k)+"</th>")
output.write("</tr>\n")
for k1 in sorted(allID):
output.write("<tr><th>"+str(k1)+"</th>")
i = 0
for k2 in sorted(allID):
val = str(confM[k1][k2])
if val == "0":
val = ""
output.write('<td class="col_'+str(i)+'">'+val+"</td>")
i = i+1
output.write("<th>"+str(k1)+"</th></tr>\n")
output.write("<tr><th></th>")
for k in sorted(allID):
output.write("<th>"+str(k)+"</th>")
output.write("</tr>\n")
output.write("</table>\n")
Richard Zanibbi
a validé
def writeCSS(output, allID):
output.write('<head><style type="text/css">\n')
output.write('table { border-collapse:collapse;}\n')
output.write('p { line-height: 125%;}\n')
output.write('ul { line-height: 125%;}\n')
output.write('th{ text-align: right; padding: 4px;}\n')
output.write('td { text-align: right; border: 1px solid lightgray; padding: 4px; }\n')
#output.write('h2 { color: red;}\n')
output.write('tr:hover{background-color:rgb(180,200,235);}\n ')
#i = 0
#for k1 in sorted(allID):
# output.write('td.col_'+str(i)+':hover {\nbackground-color:rgb(100,100,255);\n}\n')
# i = i+1
output.write('td:hover{background-color:yellow;} \n')
output.write('</style></head>\n')
Richard Zanibbi
a validé
def main():
if len(sys.argv) < 3:
print("Usage : [[python]] sumDiff.py <file1.diff> <labelsGT.txt> [HTML]\n")
print(" Merge results for each line in file1.diff into confusion Matrices.")
print(" By default output is sent to stdout in CSV format.")
print(" requires list of GT labels from labelsGT.txt.")
print(" [HTML] option changes output format to HTML.")
sys.exit(0)
# Read data from CSV file.
fileName = sys.argv[1]
labelFile = sys.argv[2]
try:
fileReader = csv.reader(open(fileName))
except:
sys.stderr.write(' !! IO Error (cannot open): ' + fileName)
sys.exit(1)
try:
labelfileReader = csv.reader(open(labelFile))
except:
sys.stderr.write(' !! IO Error (cannot open): ' + fileName)
sys.exit(1)
# Read for node and edge label sets.
readEdges = False
gtNodeLabels = set()
gtEdgeLabels = set()
for row in labelfileReader:
if len(row) == 0:
continue
nextEntry = row[0].strip()
if nextEntry == 'NODE LABELS:':
continue
elif nextEntry == 'EDGE LABELS:':
readEdges = True
else:
if readEdges:
gtEdgeLabels.add(nextEntry)
else:
gtNodeLabels.add(nextEntry)
withHTML = False
if len(sys.argv) > 3:
withHTML = True
#confusion matrix = dict->dict->int
labelM = collections.defaultdict(collections.defaultdict(int).copy)
spatRelM = collections.defaultdict(collections.defaultdict(int).copy)
#segRelM = collections.defaultdict(collections.defaultdict(int).copy)
allLabel = set()
allSR = set()
rowCount = -1
# Idenfity all confused symbol labels. We will use this to
# present relationship and segmentation confusions separately.
symbolLabels = set([])
nodeErrors = 0
allSegErrors = 0
allRelErrors = 0
fposMerge = 0
fnegMerge = 0
for row in fileReader:
rowCount += 1
Richard Zanibbi
a validé
# Skip blank lines.
if len(row) == 0:
continue
Richard Zanibbi
a validé
entryType = row[0].strip()
#skip file names
if entryType == "DIFF":
continue
#process node label errors
elif entryType == "*N":
# Capture all confused symbol (node) labels.
symbolLabels.add(row[2].strip())
symbolLabels.add(row[5].strip())
addOneError(labelM,row[2].strip(),row[5].strip())
allLabel.add(row[2].strip())
allLabel.add(row[5].strip())
Richard Zanibbi
a validé
nodeErrors += 1
#process link errors
elif entryType == "*E":
# DEBUG
if row[3].strip() == "1.0" or row[6].strip() == "1.0":
print("ERROR at row: " + str(rowCount) + " for file: " + fileName)
print(row)
elif not len(row) == 8:
print("INVALID LENGTH at row: " + str(rowCount) + " for file: " + fileName)
print(row)
outputLabel = row[3].strip()
otherLabel = row[6].strip()
addOneError(spatRelM, outputLabel, otherLabel)
allSR.add(outputLabel)
allSR.add(otherLabel)
elif entryType == "*S":
# Currently ignore segmentation errors (i.e. object-level errors)
continue
# Obtain the list of edge labels that do not appear on nodes.
# DEBUG: need to consult all GT labels in general case (handling '*' input).
mergeEdgeLabel = '*'
relOnlyLabels = allSR.difference(symbolLabels).difference(gtNodeLabels)
relMergeLabels = relOnlyLabels.union(mergeEdgeLabel)
# Create a modified confusion histogram where all symbol/segmentation
# edge confusions are treated as being of the same type.
ShortEdgeMatrix = collections.defaultdict(collections.defaultdict(int).copy)
for output in list(spatRelM):
olabel = output
if not output in relOnlyLabels:
olabel = mergeEdgeLabel
for target in list(spatRelM[output]):
tlabel = target
if not target in relOnlyLabels:
tlabel = mergeEdgeLabel
# Increment the entry for the appropriate matrix.
ShortEdgeMatrix[olabel][tlabel] += spatRelM[output][target]
if not olabel == output or not tlabel == target:
allSegErrors += spatRelM[output][target]
if not olabel == output and tlabel == target:
fposMerge += spatRelM[output][target]
elif not tlabel == target and olabel == output:
fnegMerge += spatRelM[output][target]
else:
allRelErrors += spatRelM[output][target]
Richard Zanibbi
a validé
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
if withHTML:
sys.stdout.write('<html>')
writeCSS(sys.stdout, allLabel.union(allSR))
print("<font face=\"helvetica,arial,sans-serif\">")
print("<h2>LgEval Error Summary</h2>")
print(time.strftime("%c"))
print("<br>\n")
print("<b>File:</b> " + os.path.splitext( os.path.split(fileName)[1] )[0] + "<br>")
print("<p>All confusion matrices show only errors. In each matrix, output labels appear in the left column, and target labels in the top row.</p>")
print("<UL><LI><A href=\"#nodes\">Node Label Confusion Matrix</A> <LI> <A HREF=\"#ShortEdges\">Edge Label Confusion Matrix (short - ignoring object class confusions)<A> <LI> <A HREF=\"#Edges\">Edge Label Matrix (all labels)</A> </UL>")
print ("<hr>")
print ("<h2><A NAME=\"nodes\">Node Label Confusion Matrix</A></h2>")
print ("<p>"+str(len(allLabel)) + " unique node labels. " + str(nodeErrors) + " errors. ABSENT: a node missing in the output or target graph</p>")
affMatHTML(sys.stdout, allLabel, labelM)
print("<br><hr><br>")
print ("<h2><A NAME=\"ShortEdges\">Edge Label Confusion Matrix (Short)</A></h2>")
print ("<p>" + str(len(relOnlyLabels)) + " unique relationship labels + * representing grouping two nodes into an object (any type). " + str(allSegErrors + allRelErrors) + " errors <UL><LI>" + str(allSegErrors) + " Directed segmentation and node pair classification errors (entries in '*'-labeled row and column) <UL><LI><b>" + str(allSegErrors - fposMerge - fnegMerge) + " edges between correctly grouped nodes, but with conflicting classification (* vs. *)</b> <LI>" + str(fposMerge) + " false positive merge edges (* vs. other)<LI>" + str(fnegMerge) + " false negative merge edges (other vs. *) </UL> <LI>" + str(allRelErrors) + " Directed relationship errors (remaining matrix entries) </UL></p></p>")
affMatHTML(sys.stdout, relMergeLabels, ShortEdgeMatrix)
#affMatHTML(sys.stdout, relOnlyLabels, spatRelM)
print("<br><hr><br>")
print("<h2><A NAME=\"Edges\">Edge Label Confusion Matrix (All Errors)</A></h2>")
print("<p>"+str(len(allSR)) + " unique edge labels representing relationships and node groupings for specific symbol types. " + str(allSegErrors + allRelErrors) + " errors</p>")
affMatHTML(sys.stdout, allSR, spatRelM)
print("</font>")
sys.stdout.write('</html>')
else:
print("LgEval Error Summary for: "+fileName)
print(time.strftime("%c"))
print("")
print("NOTE: This file contains 3 confusion matrices.")
print("")
print("I. Node Label Confusion Matrix: " + str(len(allLabel)) + " unique labels. ABSENT: a node missing in the output or target graph")
affMat(sys.stdout, allLabel, labelM)
print("")
print("")
print("II. Edge Label Confusion Matrix (Short): " + str(len(relOnlyLabels)) + " unique relationship labels + * (merge)")
affMat(sys.stdout, relMergeLabels, ShortEdgeMatrix)
print("")
print("")
print("III. Edge Label Confusion Matrix (Full): " + str(len(allSR)) + " unique labels for relationships and node groupings for specific symbol types")
affMat(sys.stdout, allSR, spatRelM)