-
Richard Zanibbi a rédigéRichard Zanibbi a rédigé
sumDiff.py 12,12 Kio
################################################################
# sumDiff.py
#
# Program that reads in a .diff (CSV) file containing differences between
# to BG, and outputs confusion matrix for symbols and spatial relations.
# Output is in CSV or HTML formats.
#
# Author: H. Mouchere, June 2012
# Copyright (c) 2012-2014, Richard Zanibbi and Harold Mouchere
################################################################
import sys
import csv
import collections
import time
import os
def addOneError(confM, id1, id2):
#thanks to "defaultdict" there is nothing to do !
confM[id1][id2] += 1
def affMat(output, allID, confM):
# Header
output.write("Output:")
for k in sorted(allID):
output.write(",'"+str(k)+"'")
output.write("\n")
# Data
for k1 in sorted(allID):
output.write("'"+str(k1)+"'")
for k2 in sorted(allID):
if not confM[k1][k2] == 0:
output.write(","+str(confM[k1][k2]))
else:
output.write(",")
output.write("\n")
def affMatHTML(output, allID, confM):
output.write("<table>\n<tr><th><i>(Out:Rows)</i></th>")
for k in sorted(allID):
output.write("<th>"+str(k)+"</th>")
output.write("</tr>\n")
for k1 in sorted(allID):
output.write("<tr><th>"+str(k1)+"</th>")
i = 0
for k2 in sorted(allID):
val = str(confM[k1][k2])
if val == "0":
val = ""
output.write('<td class="col_'+str(i)+'">'+val+"</td>")
i = i+1
output.write("<th>"+str(k1)+"</th></tr>\n")
output.write("<tr><th></th>")
for k in sorted(allID):
output.write("<th>"+str(k)+"</th>")
output.write("</tr>\n")
output.write("</table>\n")
def writeCSS(output, allID):
output.write('<head><style type="text/css">\n')
output.write('table { border-collapse:collapse;}\n')
output.write('p { line-height: 125%;}\n')
output.write('ul { line-height: 125%;}\n')
output.write('th{ text-align: right; padding: 4px;}\n')
output.write('td { text-align: right; border: 1px solid lightgray; padding: 4px; }\n')
#output.write('h2 { color: red;}\n')
output.write('tr:hover{background-color:rgb(180,200,235);}\n ')
#i = 0
#for k1 in sorted(allID):
# output.write('td.col_'+str(i)+':hover {\nbackground-color:rgb(100,100,255);\n}\n')
# i = i+1
output.write('td:hover{background-color:yellow;} \n')
output.write('</style></head>\n')
def main():
if len(sys.argv) < 3:
print("Usage : [[python]] sumDiff.py <file1.diff> <labelsGT.txt> [HTML]\n")
print(" Merge results for each line in file1.diff into confusion Matrices.")
print(" By default output is sent to stdout in CSV format.")
print(" requires list of GT labels from labelsGT.txt.")
print(" [HTML] option changes output format to HTML.")
sys.exit(0)
# Read data from CSV file.
fileName = sys.argv[1]
labelFile = sys.argv[2]
try:
fileReader = csv.reader(open(fileName))
except:
sys.stderr.write(' !! IO Error (cannot open): ' + fileName)
sys.exit(1)
try:
labelfileReader = csv.reader(open(labelFile))
except:
sys.stderr.write(' !! IO Error (cannot open): ' + fileName)
sys.exit(1)
# Read for node and edge label sets.
readEdges = False
gtNodeLabels = set()
gtEdgeLabels = set()
for row in labelfileReader:
if len(row) == 0:
continue
nextEntry = row[0].strip()
if nextEntry == 'NODE LABELS:':
continue
elif nextEntry == 'EDGE LABELS:':
readEdges = True
else:
if readEdges:
gtEdgeLabels.add(nextEntry)
else:
gtNodeLabels.add(nextEntry)
withHTML = False
if len(sys.argv) > 3:
withHTML = True
#confusion matrix = dict->dict->int
labelM = collections.defaultdict(collections.defaultdict(int).copy)
spatRelM = collections.defaultdict(collections.defaultdict(int).copy)
#segRelM = collections.defaultdict(collections.defaultdict(int).copy)
allLabel = set()
allSR = set()
rowCount = -1
# Idenfity all confused symbol labels. We will use this to
# present relationship and segmentation confusions separately.
symbolLabels = set([])
nodeErrors = 0
allSegErrors = 0
allRelErrors = 0
fposMerge = 0
fnegMerge = 0
for row in fileReader:
rowCount += 1
# Skip blank lines.
if len(row) == 0:
continue
entryType = row[0].strip()
#skip file names
if entryType == "DIFF":
continue
#process node label errors
elif entryType == "*N":
# Capture all confused symbol (node) labels.
symbolLabels.add(row[2].strip())
symbolLabels.add(row[5].strip())
addOneError(labelM,row[2].strip(),row[5].strip())
allLabel.add(row[2].strip())
allLabel.add(row[5].strip())
nodeErrors += 1
#process link errors
elif entryType == "*E":
# DEBUG
if row[3].strip() == "1.0" or row[6].strip() == "1.0":
print("ERROR at row: " + str(rowCount) + " for file: " + fileName)
print(row)
elif not len(row) == 8:
print("INVALID LENGTH at row: " + str(rowCount) + " for file: " + fileName)
print(row)
outputLabel = row[3].strip()
otherLabel = row[6].strip()
addOneError(spatRelM, outputLabel, otherLabel)
allSR.add(outputLabel)
allSR.add(otherLabel)
elif entryType == "*S":
# Currently ignore segmentation errors (i.e. object-level errors)
continue
# Obtain the list of edge labels that do not appear on nodes.
# DEBUG: need to consult all GT labels in general case (handling '*' input).
mergeEdgeLabel = '*'
relOnlyLabels = allSR.difference(symbolLabels).difference(gtNodeLabels)
relMergeLabels = relOnlyLabels.union(mergeEdgeLabel)
# Create a modified confusion histogram where all symbol/segmentation
# edge confusions are treated as being of the same type.
ShortEdgeMatrix = collections.defaultdict(collections.defaultdict(int).copy)
for output in list(spatRelM):
olabel = output
if not output in relOnlyLabels:
olabel = mergeEdgeLabel
for target in list(spatRelM[output]):
tlabel = target
if not target in relOnlyLabels:
tlabel = mergeEdgeLabel
# Increment the entry for the appropriate matrix.
ShortEdgeMatrix[olabel][tlabel] += spatRelM[output][target]
if not olabel == output or not tlabel == target:
allSegErrors += spatRelM[output][target]
if not olabel == output and tlabel == target:
fposMerge += spatRelM[output][target]
elif not tlabel == target and olabel == output:
fnegMerge += spatRelM[output][target]
else:
allRelErrors += spatRelM[output][target]
if withHTML:
sys.stdout.write('<html>')
writeCSS(sys.stdout, allLabel.union(allSR))
print("<font face=\"helvetica,arial,sans-serif\">")
print("<h2>LgEval Error Summary</h2>")
print(time.strftime("%c"))
print("<br>\n")
print("<b>File:</b> " + os.path.splitext( os.path.split(fileName)[1] )[0] + "<br>")
print("<p>All confusion matrices show only errors. In each matrix, output labels appear in the left column, and target labels in the top row.</p>")
print("<UL><LI><A href=\"#nodes\">Node Label Confusion Matrix</A> <LI> <A HREF=\"#ShortEdges\">Edge Label Confusion Matrix (short - ignoring object class confusions)<A> <LI> <A HREF=\"#Edges\">Edge Label Matrix (all labels)</A> </UL>")
print ("<hr>")
print ("<h2><A NAME=\"nodes\">Node Label Confusion Matrix</A></h2>")
print ("<p>"+str(len(allLabel)) + " unique node labels. " + str(nodeErrors) + " errors. ABSENT: a node missing in the output or target graph</p>")
affMatHTML(sys.stdout, allLabel, labelM)
print("<br><hr><br>")
print ("<h2><A NAME=\"ShortEdges\">Edge Label Confusion Matrix (Short)</A></h2>")
print ("<p>" + str(len(relOnlyLabels)) + " unique relationship labels + * representing grouping two nodes into an object (any type). " + str(allSegErrors + allRelErrors) + " errors <UL><LI>" + str(allSegErrors) + " Directed segmentation and node pair classification errors (entries in '*'-labeled row and column) <UL><LI><b>" + str(allSegErrors - fposMerge - fnegMerge) + " edges between correctly grouped nodes, but with conflicting classification (* vs. *)</b> <LI>" + str(fposMerge) + " false positive merge edges (* vs. other)<LI>" + str(fnegMerge) + " false negative merge edges (other vs. *) </UL> <LI>" + str(allRelErrors) + " Directed relationship errors (remaining matrix entries) </UL></p></p>")
affMatHTML(sys.stdout, relMergeLabels, ShortEdgeMatrix)
#affMatHTML(sys.stdout, relOnlyLabels, spatRelM)
print("<br><hr><br>")
print("<h2><A NAME=\"Edges\">Edge Label Confusion Matrix (All Errors)</A></h2>")
print("<p>"+str(len(allSR)) + " unique edge labels representing relationships and node groupings for specific symbol types. " + str(allSegErrors + allRelErrors) + " errors</p>")
affMatHTML(sys.stdout, allSR, spatRelM)
print("</font>")
sys.stdout.write('</html>')
else:
print("LgEval Error Summary for: "+fileName)
print(time.strftime("%c"))
print("")
print("NOTE: This file contains 3 confusion matrices.")
print("")
print("I. Node Label Confusion Matrix: " + str(len(allLabel)) + " unique labels. ABSENT: a node missing in the output or target graph")
affMat(sys.stdout, allLabel, labelM)
print("")
print("")
print("II. Edge Label Confusion Matrix (Short): " + str(len(relOnlyLabels)) + " unique relationship labels + * (merge)")
affMat(sys.stdout, relMergeLabels, ShortEdgeMatrix)
print("")
print("")
print("III. Edge Label Confusion Matrix (Full): " + str(len(allSR)) + " unique labels for relationships and node groupings for specific symbol types")
affMat(sys.stdout, allSR, spatRelM)
main()