Nantes Université

Skip to content
Extraits de code Groupes Projets
Valider d060e2b4 rédigé par rlaz's avatar rlaz
Parcourir les fichiers

Cleaning up scripts evaluate + confHist

parent 7be31d33
Aucune branche associée trouvée
Aucune étiquette associée trouvée
Aucune requête de fusion associée trouvée
......@@ -7,11 +7,13 @@ usage()
echo -e "\t\t[-p|--dotpdfDir <directory>] [-h|--help]"
echo ""
echo "------- Required Arguments -------"
echo "Note: Use either the output and target directories, or the fileList"
echo ""
echo "output_dir Output lg files directory"
echo "target_dir Ground truth lg files directory"
echo "fileList File whose each line contains outputfile_path targetfile_path"
echo -e "\t\t\t\t\tis used for comparison."
echo "Note: Use either the 2 directories or the fileList"
echo ""
echo -e "-gs or --graphSize <value> \t\tThe number of objects/primitives in targets to analyze"
echo ""
echo "------- Optional Arguments -------"
......@@ -32,22 +34,28 @@ usage()
if [ $# -eq 0 ]
then
echo "LgEval confHist: Structure Confusion Histogram Generator"
echo "Copyright (c) R. Zanibbi, H. Mouchere, 2013-2014"
echo "Copyright (c) R. Zanibbi, H. Mouchere, A.K. Shah 2013-2022"
echo ""
echo "Usage: confHist (output_dir target_dir) | fileList -gs|--graphSize <value>"
echo -e "\t\t[-m|--minCount <value>] [-s|--strokes] [-i|--lgimgDir <directory>]"
echo "Usage: confHist (output_dir target_dir) | fileList"
echo " -gs|--graphSize <value> -m|--minCount <value>] [-s|--strokes]"
echo " [-i|--lgimgDir <directory>]"
# echo -e "\t\t[-p|--dotpdfDir <directory>] [--split] [--filter] [-h|--help]"
echo -e "\t\t[-p|--dotpdfDir <directory>] [-h|--help]"
echo " [-p|--dotpdfDir <directory>] [-h|--help]"
echo ""
echo "For details on arguments usage: confHist -h or confHist --help"
echo ""
echo "Creates an .html file containing structure confusion histograms"
echo "at the object level. The histograms visualize errors by their"
echo "frequency when comparing files in output_dir vs. target_dir (target_dir is 'ground truth')."
echo "Creates an .html file containing structure confusion histograms at the object level."
echo "The histograms visualize errors by their frequency when comparing files in output_dir"
echo "vs. target_dir (target_dir is 'ground truth')."
echo ""
echo "It is assumed that every .lg file in output_dir exists in target_dir, and a file"
echo "output_dir_vs_target_dir is created as output."
echo ""
echo "Output is written to the file confHist_outputs/CH_<output_dir_vs_target_dir__size_<graphSize>_min_<minCount>.html"
echo "or confHist_outputs/CH_<fileList__size_<graphSize>_min_<minCount>.html, depending upon the arguments used."
echo "Output is written to the file:"
echo " * confHist_outputs/CH_<output_dir_vs_target_dir__size_<graphSize>_min_<minCount>.html *OR*"
echo " * confHist_outputs/CH_<fileList__size_<graphSize>_min_<minCount>.html"
echo ""
echo "depending upon the arguments used."
exit 0
fi
......@@ -134,7 +142,7 @@ then
ls $output_dir/*.lg > _f1
ls $target_dir/*.lg > _f2
L1=`wc -l _f1 | awk '{print $1}'`
L1=`wc -l _f1 | awk '{print $1}'`
L2=`wc -l _f2 | awk '{print $1}'`
if [ "$L1" != "$L2" ]
then
......@@ -160,4 +168,3 @@ else
--dotpdfDir $DOTPDF_DIR --split $SPLIT --filter $FILTER
fi
exit 0
......@@ -13,7 +13,7 @@
if [ $# -lt 1 ]
then
echo "LgEval evaluate: Label graph evaluation tool"
echo "Copyright (c) R. Zanibbi, H. Mouchere, 2012-2014"
echo "Copyright (c) R. Zanibbi, H. Mouchere, M. Mahdavi, A.K. Shah 2012-2022"
echo ""
echo "Usage: evaluate outputDir groundTruthDir [p/t/d/s/b] [png/pdf/both] OR"
echo " evaluate fileList [p/t/d/s/b] [png/pdf/both]"
......@@ -51,17 +51,30 @@ BNAME=`basename $1`
MODE="Dir"
FORMAT="pdf"
TARGETS=""
TARGET_COUNT=0
OUTPUTS=""
################################################################
# Compile the list of output files and ground truth files.
#
# NOTE: Ground truth files define the evaluation set, extra
# output files are ignored.
################################################################
# Case 1: Passed a list of file pairs
if ! [ -d $1 ]
then
MODE="List"
LABEL_STRING="List File: $1"
echo "$LABEL_STRING"
MODE="List"
# Get the targets
OUTPUTS=`awk '{ print $1; }' $1`
OUTARR=($OUTPUTS)
TARGETS=`awk '{ print $2; }' $1`
# Grab additional flags
if [ $# -gt 1 ]
then
DOTARG=$2
......@@ -70,35 +83,46 @@ then
then
FORMAT=$3
fi
# Case 2: Passed a pair of directories
else
# Peculiar '$<string>' styntax is to preserve the newline.
OUT_STRING="Output File Directory: $1"
GT_STRING="Ground Truth Directory: $2"
# Peculiar '$<string>' syntax is to preserve the newline.
LABEL_STRING=$(printf '%s\n%s' "$OUT_STRING" "$GT_STRING")
echo "$LABEL_STRING"
OUTPUTS=`ls $1/*.lg`
TARGETS=`ls $2/*.lg`
# Grab additional flags
if [ $# -gt 2 ]
then
DOTARG=$3
fi
# RZ: Debug -- output type ignored
if [ $# -gt 3 ]
then
FORMAT=$4
fi
fi
echo ""
TARGET_COUNT=$((`echo $TARGETS | wc -w`))
################################################################
# Create output directory structure, compile class labels
################################################################
ResultsDir=Results_$BNAME
if ! [ -d $ResultsDir ]
then
mkdir $ResultsDir
mkdir $ResultsDir/Metrics
# Create directories for dot error visualizations
if [ "$DOTARG" != "" ]
then
if [ $# -gt 3 ]
then
FORMAT=$4
fi
# RZ Debug: deleting FORMAT assignment (done above)
mkdir $ResultsDir/errorGraphs
mkdir $ResultsDir/errorGraphs/dot
if [ "$FORMAT" == "pdf" ]; then
......@@ -113,22 +137,28 @@ then
fi
# Compute all .csv metrics outputs (per-file), and .diff results (per-file).
echo "Evaluating files..."
# Compile labels from ground truth. This is needed for confusion matrices to
# be properly defined.
# be properly defined, and for sanity checking results.
echo "$TARGETS" > $ResultsDir/tfileTarget
python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/tfileTarget" > "$ResultsDir/labelsGT.txt"
# Do the same for outputs. This can come handy in many places.
echo "$OUTPUTS" > $ResultsDir/tfileTarget
python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/tfileTarget" > "$ResultsDir/labelsOutput.txt"
rm $ResultsDir/tfileTarget
################################################################
# Evaluate files
################################################################
# Compute all .csv metrics outputs (per-file), and .diff results (per-file).
echo ""
echo "Evaluating..."
# Iterate over ground truth files
INDEX=0
for file in $TARGETS
do
FNAME=`basename $file .lg`
nextFile="_ERROR_"
if [ $MODE == "Dir" ]
......@@ -143,13 +173,23 @@ do
then
# NOTE: the script convertCrohmeLg can be used to convert
# crohme .inkml files to .lg files.
echo " >> Comparing $FNAME.lg"
python3 $LgEvalDir/src/evallg.py $nextFile $file m INTER > $ResultsDir/Metrics/$FNAME.csv
DIFF=`python3 $LgEvalDir/src/evallg.py $nextFile $file diff INTER`
CORRECT="Correct"
#echo -ne " >> Comparing $FNAME.lg"
# RZ: Repairing to avoid running evaluation twice.
python3 $LgEvalDir/src/evallg.py $nextFile $file INTER > $ResultsDir/Metrics/$FNAME.csv
METRICS=`grep -v "\*" $ResultsDir/Metrics/$FNAME.csv`
echo $METRICS > $ResultsDir/Metrics/$FNAME.csv
DIFF=`grep "\*" $ResultsDir/Metrics/$FNAME.csv`
#echo "$METRICS"
#read V
#echo "$DIFF"
#read V
# If differences reported, record them
if [ -n "$DIFF" ]
then
CORRECT="Incorrect"
echo "$DIFF" > $ResultsDir/Metrics/$FNAME.diff
# If a third argument is provided, generate a .pdf file to visualize
......@@ -172,8 +212,6 @@ do
mv $FNAME.png $ResultsDir/errorGraphs/png
fi
fi
CORRECT="Incorrect"
else
rm -f $ResultsDir/Metrics/$FNAME.diff
fi
......@@ -185,10 +223,16 @@ do
fi
INDEX=$((INDEX+1))
PERCENT=`echo "scale=1; 100 * $INDEX / $TARGET_COUNT" | bc`
echo -ne " $PERCENT% complete ($INDEX of $TARGET_COUNT)\r"
done
# Compile all metrics/diffs,
# and then compute metric summaries and confusion matrices.
################################################################
# Compile metrics
# Including summaries and confusion matrices
################################################################
cat $ResultsDir/Metrics/*.csv > $ResultsDir/$BNAME.csv
ALLDIFFS=`ls $ResultsDir/Metrics | grep .diff`
if [ -n "$ALLDIFFS" ]
......@@ -203,6 +247,10 @@ python3 $LgEvalDir/src/sumMetric.py "$LABEL_STRING" $ResultsDir/$BNAME.csv > $Re
python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt html > $ResultsDir/ConfusionMatrices.html
python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt > $ResultsDir/ConfusionMatrices.csv
################################################################
# Create spreadsheet
################################################################
# RZ Oct. 2014: Create spreadsheet pairing file names with metrics.
# Clean up raw metric data to make the file smaller and simpler.
# Use awk and head to select every odd (headers) and even (data) columns,
......@@ -213,7 +261,6 @@ awk -F',' '{ for (i=1;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")}
head -n 1 $ResultsDir/Headers.csv > $ResultsDir/HeaderRow.csv
HEAD=`cat $ResultsDir/HeaderRow.csv`
echo "File,Result,$HEAD" > $ResultsDir/HeaderRow.csv
awk -F',' '{ for (i=2;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")}' $ResultsDir/$BNAME.csv > $ResultsDir/Data.csv
# Combine file names with raw data metrics, then add header labels.
......@@ -223,10 +270,8 @@ cat $ResultsDir/HeaderRow.csv $ResultsDir/DataNew.csv > $ResultsDir/FileMetrics.
# Clean up
rm -f $ResultsDir/Headers.csv $ResultsDir/HeaderRow.csv $ResultsDir/Data.csv
rm -f $ResultsDir/DataNew.csv $ResultsDir/FileResults.csv
# Remove the compiled metrics and differences, but leave the individual metric/diff
# files in Metrics to support debugging for malformed or missing files, etc.
rm -f $ResultsDir/$BNAME.csv $ResultsDir/$BNAME.diff
echo ""
echo "done."
......@@ -37,6 +37,7 @@ def main(
pdf_count = 0
if os.path.exists(dotpdf_dir):
pdf_count = len(glob(os.path.join(dotpdf_dir, "*.pdf")))
if pdf_count == 0:
dotpdf_dir = "confHist_outputs/dotpdfs"
print(
......@@ -46,6 +47,7 @@ def main(
)
if not os.path.exists(dotpdf_dir):
os.makedirs(dotpdf_dir)
for row in fileReader:
# Skip comments and empty lines.
if not row == [] and not row[0].strip()[0] == "#":
......@@ -128,7 +130,7 @@ def main(
'<script src="https://cdnjs.cloudflare.com/ajax/libs/FileSaver.js/1.0.0/FileSaver.min.js"></script>\n'
)
# (Excuse the messs..) create callbacks for checkbox events, save button
# (Excuse the mess..) create callbacks for checkbox events, save button
# which saves the unique list of selected files in sorted order.
# This was a slow, painful way to do this - perhaps an 'include' would be better.
JS_DIR = os.path.join(fileList_head, "js")
......@@ -362,10 +364,7 @@ def parse_args():
if __name__ == "__main__":
args = parse_args()
# print(args)
# img_dir = '../../../../data/test2019_inkml2img' # 3branch...
# img_dir = '../../../../data/infty/IMG' # infty_contour...
# img_dir = '../../../Data/Expressions/IMG' # lpga_rf...
img_dir = os.path.join("../..", args.lgimgDir)
main(
args.fileList,
......
0% Chargement en cours ou .
You are about to add 0 people to the discussion. Proceed with caution.
Terminez d'abord l'édition de ce message.
Veuillez vous inscrire ou vous pour commenter