diff --git a/bin/evaluate b/bin/evaluate index 1765e42c73734554331b9d574ab18f67932ca91e..b2f20d01dc6f3375019de3451b1d88d9174961df 100755 --- a/bin/evaluate +++ b/bin/evaluate @@ -35,7 +35,6 @@ then echo " labelsGT.txt: list of node and edge labels in ground truth" echo " labelsOutput.txt: list of node and edge labels in output files" echo "" - echo " Metrics/: directory with .csv (metric) and .diff (difference) files" echo " graphErrors/: if dot output requested, visualizations for files with" echo -e "\t\t errors are stored here (.dot and .pdf[default] or .png or both as specified)." echo "" @@ -53,6 +52,10 @@ FORMAT="pdf" TARGETS="" TARGET_COUNT=0 OUTPUTS="" +NL=$'\n' + +OUTCOME_LIST="" +ResultsDir=Results_$BNAME ################################################################ # Compile the list of output files and ground truth files. @@ -61,6 +64,8 @@ OUTPUTS="" # output files are ignored. ################################################################ +echo "[ LgEval evaluate ]" + # Case 1: Passed a list of file pairs if ! [ -d $1 ] then @@ -107,13 +112,13 @@ else FORMAT=$4 fi fi +echo "* LgEval Results Directory: $ResultsDir" TARGET_COUNT=$((`echo $TARGETS | wc -w`)) ################################################################ # Create output directory structure, compile class labels ################################################################ -ResultsDir=Results_$BNAME if ! [ -d $ResultsDir ] then mkdir $ResultsDir @@ -136,14 +141,13 @@ then fi fi - # Compile labels from ground truth. This is needed for confusion matrices to # be properly defined, and for sanity checking results. -echo "$TARGETS" > $ResultsDir/tfileTarget -python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/tfileTarget" > "$ResultsDir/labelsGT.txt" -echo "$OUTPUTS" > $ResultsDir/tfileTarget -python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/tfileTarget" > "$ResultsDir/labelsOutput.txt" -rm $ResultsDir/tfileTarget +echo "$TARGETS" > $ResultsDir/temp_file_list +python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/temp_file_list" > "$ResultsDir/labelsGT.txt" +echo "$OUTPUTS" > $ResultsDir/temp_file_list +python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/temp_file_list" > "$ResultsDir/labelsOutput.txt" +rm $ResultsDir/temp_file_list ################################################################ @@ -174,23 +178,20 @@ do # NOTE: the script convertCrohmeLg can be used to convert # crohme .inkml files to .lg files. CORRECT="Correct" - #echo -ne " >> Comparing $FNAME.lg" - - # RZ: Repairing to avoid running evaluation twice. - python3 $LgEvalDir/src/evallg.py $nextFile $file INTER > $ResultsDir/Metrics/$FNAME.csv - METRICS=`grep -v "\*" $ResultsDir/Metrics/$FNAME.csv` - echo $METRICS > $ResultsDir/Metrics/$FNAME.csv - DIFF=`grep "\*" $ResultsDir/Metrics/$FNAME.csv` - #echo "$METRICS" - #read V - #echo "$DIFF" - #read V - - # If differences reported, record them - if [ -n "$DIFF" ] + + # RZ: Run evaluation once vs. twice + OUT=`python3 $LgEvalDir/src/evallg.py $nextFile $file INTER` + + # Match asterisk at beginning of line to select differences/errors + # WARNING: Double quotes are important to preserve newlines! + DIFF=`echo "$OUT" | grep "\*"` + echo "$DIFF" > $ResultsDir/Metrics/$FNAME.diff + echo "$OUT" | grep -v "\*" > $ResultsDir/Metrics/$FNAME.csv + + # If differences reported, record files with errors, generate visualizations + if [ "$DIFF" != "" ] then CORRECT="Incorrect" - echo "$DIFF" > $ResultsDir/Metrics/$FNAME.diff # If a third argument is provided, generate a .pdf file to visualize # differences between graphs. @@ -202,6 +203,7 @@ do else lg2dot $nextFile $file --graph_type "$DOTARG" --format $FORMAT fi + mv $FNAME.dot $ResultsDir/errorGraphs/dot if [ "$FORMAT" == "pdf" ]; then mv $FNAME.pdf $ResultsDir/errorGraphs/pdf @@ -212,14 +214,12 @@ do mv $FNAME.png $ResultsDir/errorGraphs/png fi fi - else - rm -f $ResultsDir/Metrics/$FNAME.diff fi # Add record of evaluating the file. - echo "$nextFile, $CORRECT" >> $ResultsDir/FileResults.csv + OUTCOME_LIST=`printf "%s\n%s" "$OUTCOME_LIST" "$nextFile, $CORRECT"` else - echo " Already processed: $file" + echo " * Already processed: $file" fi INDEX=$((INDEX+1)) @@ -231,9 +231,12 @@ done ################################################################ # Compile metrics # Including summaries and confusion matrices +# +# Stored as individual files to prevent re-computation for user ################################################################ - +echo "$OUTCOME_LIST" >> $ResultsDir/FileResults.csv cat $ResultsDir/Metrics/*.csv > $ResultsDir/$BNAME.csv + ALLDIFFS=`ls $ResultsDir/Metrics | grep .diff` if [ -n "$ALLDIFFS" ] then @@ -243,6 +246,8 @@ else touch $ResultsDir/$BNAME.diff # empty - no errors. fi + +# Compute summaries python3 $LgEvalDir/src/sumMetric.py "$LABEL_STRING" $ResultsDir/$BNAME.csv > $ResultsDir/Summary.txt python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt html > $ResultsDir/ConfusionMatrices.html python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt > $ResultsDir/ConfusionMatrices.csv @@ -267,11 +272,23 @@ awk -F',' '{ for (i=2;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")} paste -d , $ResultsDir/FileResults.csv $ResultsDir/Data.csv > $ResultsDir/DataNew.csv cat $ResultsDir/HeaderRow.csv $ResultsDir/DataNew.csv > $ResultsDir/FileMetrics.csv -# Clean up +################################## +# Clean up +################################## rm -f $ResultsDir/Headers.csv $ResultsDir/HeaderRow.csv $ResultsDir/Data.csv rm -f $ResultsDir/DataNew.csv $ResultsDir/FileResults.csv rm -f $ResultsDir/$BNAME.csv $ResultsDir/$BNAME.diff echo "" echo "done." +echo "" +echo "$ResultsDir/ contents:" +echo " Summary.txt -- Evaluation metrics summary" +echo " FileMetrics.csv -- Raw metrics file" +echo " labelsOut.txt -- Node & edge labels in output files" +echo " labelsGT.txt -- Node & edge labels in ground truth files" +echo " ConfusionMatrices.html -- Readable web page with confusion matrices (HTML)" +echo " ConfusionMatrices.csv -- Confusion matrix (CSV format)" +echo " Metrics/ -- Individual file metrics (.csv) & differences (.diff)" +echo ""