Nantes Université

Skip to content
Extraits de code Groupes Projets
Valider 599aba32 rédigé par ayushkumarshah's avatar ayushkumarshah
Parcourir les fichiers

Fix lg to mathml conversion

Remove redundant parent <mrow> tags
Update confHist argument description
parent 04e49ab9
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -46,8 +46,8 @@ then
echo "It is assumed that every .lg file in output_dir exists in target_dir, and a file"
echo "output_dir_vs_target_dir is created as output."
echo ""
echo "Output is written to the file confHist_outputs/CH_<output_dir_vs_target_dir>.html"
echo "or confHist_outputs/CH_<fileList>.html, depending upon the arguments used."
echo "Output is written to the file confHist_outputs/CH_<output_dir_vs_target_dir__size_<graphSize>_min_<minCount>.html"
echo "or confHist_outputs/CH_<fileList__size_<graphSize>_min_<minCount>.html, depending upon the arguments used."
exit 0
fi
......
......@@ -13,6 +13,7 @@
import sys
import csv
from bs4 import BeautifulSoup
from lg import *
def readMapFile(fileName):
......@@ -271,30 +272,45 @@ def translate(lg, segId, segPrimMap, edgeMap, symbolMap, structureMap):
return nodeString
def postprocess(mml_out):
# Add linebreaks for tags expect for mi, mo, mn
mml_out = mml_out.replace("><", ">\n<")
mml_out = mml_out.replace("</", "\n</")
mml_out = mml_out.replace("\n</mi", "</mi")
mml_out = mml_out.replace("\n</mo", "</mo")
mml_out = mml_out.replace("\n</mn", "</mn")
# tags with no indentation
constant_tags = ["mi", "mo", "mn"]
# Counts number of tab sapces
tab = 0
out = []
for line in mml_out.split("\n"):
if not line:
continue
# if re.match(r"<\w", line):
if line.startswith("<m"):
out.append(tab * "\t" + line)
if not line[1:3] in constant_tags:
tab += 1
elif line.startswith("</"):
tab -= 1
out.append(tab * "\t" + line)
return "\n".join(out)
def cleanRows( mmlFile ):
# with open( filePath ) as mmlFile:
rowSoup = BeautifulSoup(mmlFile, 'html.parser')
mrows = rowSoup.find_all('mrow')
for item in mrows:
if item.parent.name == "mrow":
# REMOVING to avoid unexpected cases.
#or len( item.contents ) < 2:
item.unwrap()
return rowSoup.prettify()
# def postprocess(mml_out):
# # Add linebreaks for tags expect for mi, mo, mn
# mml_out = mml_out.replace("><", ">\n<")
# mml_out = mml_out.replace("</", "\n</")
# mml_out = mml_out.replace("\n</mi", "</mi")
# mml_out = mml_out.replace("\n</mo", "</mo")
# mml_out = mml_out.replace("\n</mn", "</mn")
# # tags with no indentation
# constant_tags = ["mi", "mo", "mn"]
# # Counts number of tab sapces
# tab = 0
# out = []
# for line in mml_out.split("\n"):
# if not line:
# continue
# # if re.match(r"<\w", line):
# if line.startswith("<m"):
# out.append(tab * "\t" + line)
# if not line[1:3] in constant_tags:
# tab += 1
# elif line.startswith("</"):
# tab -= 1
# out.append(tab * "\t" + line)
# return "\n".join(out)
def main(lg_file, mapFile):
......@@ -340,7 +356,8 @@ def main(lg_file, mapFile):
# symbolMap, structureMap))
mml_out_raw.append(translate(lg, root, segmentPrimitiveMap,
treeEdgeMap, symbolMap, structureMap))
mml_out = postprocess("\n".join(mml_out_raw))
# mml_out = postprocess("\n".join(mml_out_raw))
mml_out = cleanRows("\n".join(mml_out_raw))
return mml_out
if __name__ == '__main__':
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter