Newer
Older
################################################################
# lg2txt.py
#
# Translate a label graph to a text file of different formats.
#
# NOTE: this program assumes that horizontal adjacency
# is indicated using 'HOR' or 'R' edge labels, superscripts
# and subscripts by 'SUP' and 'SUB.'
#
# Author: R. Zanibbi, June 2012
# Copyright (c) 2012-2014 Richard Zanibbi and Harold Mouchere
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
################################################################
import sys
import csv
from lg import *
def readMapFile(fileName):
"""Read in symbol and structure mappings from a file."""
try:
fileReader = csv.reader(open(fileName))
except:
sys.stderr.write(' !! IO Error (cannot open): ' + fileName + '\n')
return
symbolMap = {}
structureMap = {}
readingSymbols = True
for row in fileReader:
# Skip blank lines and comments.
if len(row) == 0:
continue
elif row[0].strip()[0] == '#':
continue
elif row[0].strip() == 'SYMBOLS':
readingSymbols = True
elif row[0].strip() == 'STRUCTURE':
readingSymbols = False
else:
pattern = []
replacement = []
i = 0
while not row[i].strip() == '->':
pattern += [ row[i] ]
i += 1
i += 1
while i < len(row):
replacement += [ row[i] ]
i += 1
if len(pattern) > 1:
relations = sorted( pattern[1:len(pattern)] )
ptuple = tuple( [ pattern[0] ] + relations )
else:
ptuple = ( pattern[0] )
if len(replacement) > 1:
rtuple = tuple(replacement)
else:
rtuple = ( replacement[0] )
if readingSymbols:
symbolMap[ptuple] = rtuple
else:
structureMap[ptuple] = rtuple
return (symbolMap, structureMap)
def translateStructure( lg, label, nodeRelationPairs, structureMap,\
segPrimMap, edgeMap, symbolMap, segId, nodeString):
"""Generate a string for a given structure."""
strString = ""
byValue = lambda pair: pair[1]
sortedNodeRelationPairs = sorted(nodeRelationPairs, key=byValue)
queryList = [ label ]
primListString = ""
for primitiveId in sorted(list(segPrimMap[ segId ][0])):
primListString += primitiveId + ':'
for (childId, relation) in sortedNodeRelationPairs:
queryList += [ relation ]
#print(primListString)
#print(queryList)
# Obtain the replacement, provided as an ordered sequence of
# regions, giving the order in which to map subregions.
key = tuple(queryList)
anyKey = tuple(['ANY'] + queryList[1:])
#print(list(structureMap))
if key in list(structureMap):
#print("replacement: " + str(replacementTuple))
# Find the node that matches each relation in the passed list,
# and generate the appropriate string.
for i in range(0,len(replacementTuple)):
nextRelation = replacementTuple[ i ]
match = False
for j in range(0,len(nodeRelationPairs)):
(childId, relation) = nodeRelationPairs[j]
if relation == nextRelation:
strString += translate(lg, childId, segPrimMap,\
edgeMap, symbolMap, structureMap)
match = True
break
# RZ, Jan 2013: allow other tags to be inserted (e.g. at end);
# add primitive ids as identifier for symbols with multiple
# subregions (e.g. fractions, roots)
if not match:
strString += replacementTuple[i].replace('_I_','\"' + \
primListString + '\"')
# HACK!!! Copying and modifying above conditional branch.
elif anyKey in list(structureMap):
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
replacementTuple = structureMap[ anyKey ]
#print("replacement: " + str(replacementTuple))
# Find the node that matches each relation in the passed list,
# and generate the appropriate string.
for i in range(0,len(replacementTuple)):
nextRelation = replacementTuple[ i ]
match = False
for j in range(0,len(nodeRelationPairs)):
(childId, relation) = nodeRelationPairs[j]
if relation == nextRelation:
strString += translate(lg, childId, segPrimMap,\
edgeMap, symbolMap, structureMap)
match = True
break
elif nextRelation == 'PARENT':
strString += nodeString
match = True
break
# RZ, Jan 2013: allow other tags to be inserted (e.g. at end);
# add primitive ids as identifier for symbols with multiple
# subregions (e.g. fractions, roots)
if not match:
strString += replacementTuple[i].replace('_I_','\"' + \
primListString + '\"')
def translateRelation(lg, relation, nextChildId, structureMap,
segPrimMap, edgeMap, symbolMap, nodeString):
"""Translate an individual spatial relation."""
relString = ""
Richard Zanibbi
a validé
replacementTuple = ()
if relation in list(structureMap):
Richard Zanibbi
a validé
sys.stderr.write(" !! Error: Unknown relationship label " + relation + "\n")
sys.stderr.write(" !! Using relationship mapping: " + str(structureMap[ 'REL_DEFAULT' ]) + "\n")
Richard Zanibbi
a validé
# Use default mapping if label is unknown.
replacementList = list( structureMap[ 'REL_DEFAULT' ] )
for i in range(0,len(replacementList)):
replacementList[i] = replacementList[i].replace('_L_', \
relation)
Richard Zanibbi
a validé
replacementTuple = tuple( replacementList )
for i in range(0,len(replacementTuple)):
nextEntry = replacementTuple[ i ]
if nextEntry == "PARENT":
# Add current symbol at this location
relString += nodeString
elif nextEntry == "CHILD":
relString += translate(lg, nextChildId, segPrimMap, edgeMap, symbolMap, structureMap)
else:
relString += replacementTuple[i]
return relString
def translate(lg, segId, segPrimMap, edgeMap, symbolMap, structureMap):
"""Recursively create output for an expression at the object level."""
byValue = lambda pair: pair[1]
byRel = lambda pair: pair[0]
oneSegPrimitive = list(segPrimMap[ segId ][0])[0]
labelValuePairs = sorted(lg.nlabels[ oneSegPrimitive ].items(), key=byValue)
(label, value) = labelValuePairs[0]
Richard Zanibbi
a validé
nodeString = label
# Create label identifying primitives in the object.
primListString = ""
for primitiveId in sorted(list(segPrimMap[ segId ][0])):
primListString += primitiveId + ':'
if label in symbolMap:
nodeString = symbolMap[ label ].replace('_I_','\"' + \
primListString + '\"')
Richard Zanibbi
a validé
else:
# Treat all unknowns uniformly.
nodeString = symbolMap[ 'OBJ_DEFAULT' ].replace('_I_','\"' + \
primListString + '\"').replace('_L_',label)
sys.stderr.write(" !! Error: Unknown object label " + label + "\n")
if segId in edgeMap:
# This node has children - lookup replacement based on sorted labels
# for edges to child nodes.
childSegIds = edgeMap[ segId ]
nodeRelationPairs = []
horRelation = []
noSubSupPairs = []
subSupPairs = []
for nextChildId in childSegIds:
# Obtain the highest-valued label for the edge.
childPrimitive =list( segPrimMap[ nextChildId ][0])[0]
edgeLabels = lg.elabels[ (oneSegPrimitive, childPrimitive) ]
labelValuePairs = sorted(edgeLabels.items(), key=byValue)
(relation, value) = labelValuePairs[0]
# DEBUG: remove HOR/R relations, separate SUB/SUP relations.
Richard Zanibbi
a validé
# Add missing "Sub" "Sup" labels for CROHME 2013.
# DEBUG: Separate undefined labels into the 'noSubSupPairs' note
# that this binds these undefined relationships before any hor.
# adjacency relationship.
if not (relation == 'HOR' or relation == 'R' or relation == "Right"):
Richard Zanibbi
a validé
if not (relation == 'SUB' or relation == 'SUP' or \
relation == 'Sub' or relation == 'Sup' or
not relation in list(structureMap) and not relation == 'I' and not relation == 'Inside' ):
noSubSupPairs += [ (nextChildId, relation) ]
else:
subSupPairs += [ (nextChildId, relation) ]
else:
horRelation += [ (nextChildId, relation) ]
# CASE 1: all relations other than HOR/R are in a structure.
strString = translateStructure(lg, label, nodeRelationPairs, structureMap,\
segPrimMap, edgeMap, symbolMap, segId, nodeString)
if not strString == "":
nodeString = strString
else:
# CASE 2: only non-SUP/SUB relations are in a structure.
strString = translateStructure(lg, label, noSubSupPairs, structureMap,\
segPrimMap, edgeMap, symbolMap, segId, nodeString)
if not strString == "":
nodeString = strString
for (nextChildId, relation) in sorted(subSupPairs, key=byValue):
nodeString = translateRelation(lg, relation, nextChildId,
structureMap, segPrimMap, edgeMap, symbolMap, nodeString)
#nodeString += translateRelation(lg, (relation, nextChildId),\
# structureMap, segPrimMap, edgeMap, symbolMap)
else:
# DEFAULT: map relations independently.
for (nextChildId, relation) in sorted(nodeRelationPairs, key=byValue):
nodeString = translateRelation(lg, relation, nextChildId,
structureMap, segPrimMap, edgeMap, symbolMap, nodeString)
# Lastly, generate string for adjacent symbols on the baseline.
# **if there are multiple 'HOR' symbols all will be mapped.
for (child, relation) in horRelation:
nodeString = translateRelation(lg, relation, child,
structureMap, segPrimMap, edgeMap, symbolMap, nodeString)
return nodeString
def main():
if len(sys.argv) < 2:
print("Usage: [[python]] lg2txt.py <infile.lg> [mapfile.csv]")
print("")
print(" Produces a text file for label graph file")
print(" <infile.lg>. A symbol and structure map file (mapfile.csv)")
print(" may be provided to override default (latex) mappings.")
return
lg = Lg(sys.argv[1])
# Hide the unlabeled edges.
lg.hideUnlabeledEdges()
(segmentPrimitiveMap, primitiveSegmentMap, noparentSegments, segmentEdges) = \
lg.segmentGraph()
(rootNodes, treeEdges, otherEdges) = lg.separateTreeEdges()
# Default symbol and structure mappings.
symbolMap = { }
structureMap = { }
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
# A bit dirty; redefining the global maps.
if len(sys.argv) > 2:
(symbolMap, structureMap) = readMapFile(sys.argv[2])
# Create a map from nodes to child nodes, in order to be able to
# detect structures such as fractions, etc.
treeEdgeMap = {}
for (parent, child) in treeEdges:
if parent in treeEdgeMap:
treeEdgeMap[ parent ] += [ child ]
else:
treeEdgeMap[ parent ] = [ child ]
# NOTE: currently this will print out more than one expression on
# separate lines if a graph has multiple root nodes.
# Exit if there is no root node, generate a list of TeX expressions if there are
# multiple root nodes.
if len(rootNodes) < 1:
sys.stderr.write(" !! Error: graph contains no root node; cannot generate output.\n")
sys.exit(1)
elif len(rootNodes) > 1:
sys.stderr.write(" !! Graph contains " + str(len(rootNodes)) + " root nodes.\n")
for root in rootNodes:
print(translate(lg, root, segmentPrimitiveMap, treeEdgeMap,\
symbolMap, structureMap))
main()