Nantes Université

Skip to content
Extraits de code Groupes Projets
evaluate 9,42 ko
Newer Older
Richard Zanibbi's avatar
Richard Zanibbi a validé
#!/bin/bash

# Make sure that CROHMELibDir and LgEvalDir are defined in
# your shell enviroment, e.g. by including:
#	
#	export LgEvalDir=<path_to_LgEval>
#	export CROHMELibDir=<path_to_CROHMELib>       		
#	export PATH=$PATH:$CROHMELibDir/bin:$LgEvalDir/bin
# 
# in your .bashrc file (the initialization file for bash shell). The PATH
# alteration will add the tools to your search path. 

Richard Zanibbi's avatar
Richard Zanibbi a validé
then
	echo "LgEval evaluate: Label graph evaluation tool"
	echo "Copyright (c) R. Zanibbi, H. Mouchere, M. Mahdavi, A.K. Shah 2012-2022"
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo ""
	echo "Usage: evaluate outputDir groundTruthDir [p/t/d/s/b] [png/pdf/both] OR"
	echo "       evaluate fileList [p/t/d/s/b] [png/pdf/both]"
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo ""
rlaz's avatar
rlaz a validé
	echo "Evaluates label graph (.lg) files in outputDir against the same files"
	echo "in groundTruthDir. groundTruthDir defines the list of files to be compared"
	echo "(i.e. if a file is not in the ground truth directory, it is ignored."
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo ""
rlaz's avatar
rlaz a validé
	echo "If a list of file pairs is provided ('output target' provided on each line)"
	echo "then these file pairs are used for evaluation."
	echo ""
rlaz's avatar
rlaz a validé
	echo "The final optional arguments define the graph type to use in visualizing"
rlaz's avatar
rlaz a validé
	echo "errors, and their output format. Run 'lg2dot' for more on graph types."
Richard Zanibbi's avatar
Richard Zanibbi a validé
	exit 0
fi

Richard Zanibbi's avatar
Richard Zanibbi a validé
BNAME=`basename $1`
TARGET_COUNT=0
rlaz's avatar
rlaz a validé
NL=$'\n'

OUTCOME_LIST=""
ResultsDir=Results_$BNAME
rlaz's avatar
rlaz a validé
MULTI_PASS_WARN=0

################################################################
# Compile the list of output files and ground truth files.
#
# NOTE: Ground truth files define the evaluation set, extra 
# output files are ignored.
################################################################

rlaz's avatar
rlaz a validé
echo ""
rlaz's avatar
rlaz a validé
echo "[ LgEval evaluate ]"
rlaz's avatar
rlaz a validé
echo ""
rlaz's avatar
rlaz a validé

# Case 1: Passed a list of file pairs
	MODE="List"

	LABEL_STRING="List File: $1"
	echo "$LABEL_STRING"
	# Get the targets
	OUTPUTS=`awk '{ print $1; }' $1`
	OUTARR=($OUTPUTS)
	TARGETS=`awk '{ print $2; }' $1`

	# Grab additional flags
	if [ $# -gt 2 ]
	then
		FORMAT=$3
	fi

# Case 2: Passed a pair of directories
	OUT_STRING="Output File Directory:  $1"
	GT_STRING="Ground Truth Directory: $2"
	# Peculiar '$<string>' syntax is to preserve the newline.
	LABEL_STRING=$(printf '%s\n%s' "$OUT_STRING" "$GT_STRING")
	echo "$LABEL_STRING"
Richard Zanibbi's avatar
Richard Zanibbi a validé

	# Grab additional flags

	# RZ: Debug -- output type ignored
	if [ $# -gt 3 ]
	then
		FORMAT=$4
	fi
rlaz's avatar
rlaz a validé
echo "* LgEval Results Directory: $ResultsDir"
TARGET_COUNT=$((`echo $TARGETS | wc -w`))

rlaz's avatar
rlaz a validé

################################################################
# Create output directory structure, compile class labels
################################################################
if ! [ -d $ResultsDir ]
then
	mkdir $ResultsDir
	mkdir $ResultsDir/Metrics
	# Create directories for dot error visualizations
		# RZ Debug: deleting FORMAT assignment (done above)
		mkdir $ResultsDir/errorGraphs
		mkdir $ResultsDir/errorGraphs/dot
		if [ "$FORMAT" == "pdf" ]; then
			mkdir $ResultsDir/errorGraphs/pdf
		elif [ "$FORMAT" == "png" ]; then
			mkdir $ResultsDir/errorGraphs/png
		elif [ "$FORMAT" == "both" ]; then
			mkdir $ResultsDir/errorGraphs/pdf
			mkdir $ResultsDir/errorGraphs/png
		fi
# Compile labels from ground truth. This is needed for confusion matrices to
# be properly defined, and for sanity checking results.
rlaz's avatar
rlaz a validé
echo "$TARGETS" > $ResultsDir/temp_file_list
python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/temp_file_list" > "$ResultsDir/labelsGT.txt"
echo "$OUTPUTS" > $ResultsDir/temp_file_list
python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/temp_file_list" > "$ResultsDir/labelsOutput.txt"
rm $ResultsDir/temp_file_list

################################################################
# Evaluate files
################################################################

# Compute all .csv metrics outputs (per-file), and .diff results (per-file).
echo ""
echo "Evaluating..."

# Iterate over ground truth files
Richard Zanibbi's avatar
Richard Zanibbi a validé
do
	FNAME=`basename $file .lg`
	nextFile="_ERROR_"
	if [ $MODE == "Dir" ]
	then
		nextFile=`echo "$1/$FNAME.lg" | perl -p -e "s/\/\//\//g"`
	else
		# Index to the next input file.
		nextFile=${OUTARR[INDEX]}
	fi

	if  [[ ! -e $ResultsDir/Metrics/$FNAME.csv ]]
Richard Zanibbi's avatar
Richard Zanibbi a validé
	then
		# NOTE: the script convertCrohmeLg can be used to convert
		#       crohme .inkml files to .lg files.
rlaz's avatar
rlaz a validé

		# RZ: Run evaluation once vs. twice
		OUT=`python3 $LgEvalDir/src/evallg.py $nextFile $file INTER`

		# Match asterisk at beginning of line to select differences/errors
		# WARNING: Double quotes are important to preserve newlines!
		DIFF=`echo "$OUT" | grep "\*"`
		echo "$DIFF" > $ResultsDir/Metrics/$FNAME.diff
		echo "$OUT" | grep -v "\*" > $ResultsDir/Metrics/$FNAME.csv

		# If differences reported, record files with errors, generate visualizations
		if [ "$DIFF" != "" ]
Richard Zanibbi's avatar
Richard Zanibbi a validé
		then
			CORRECT="Incorrect"

			# If a third argument is provided, generate a .pdf file to visualize
			# differences between graphs.
				if [ "$DOTARG" == "d" ]
					lg2dot $nextFile $file --format $FORMAT
					lg2dot $nextFile $file --graph_type "$DOTARG" --format $FORMAT
rlaz's avatar
rlaz a validé

				mv $FNAME.dot $ResultsDir/errorGraphs/dot
				if [ "$FORMAT" == "pdf" ]; then
					mv $FNAME.pdf $ResultsDir/errorGraphs/pdf
				elif [ "$FORMAT" == "png" ]; then
					mv $FNAME.png $ResultsDir/errorGraphs/png
				elif [ "$FORMAT" == "both" ]; then
					mv $FNAME.pdf $ResultsDir/errorGraphs/pdf
					mv $FNAME.png $ResultsDir/errorGraphs/png
				fi
Richard Zanibbi's avatar
Richard Zanibbi a validé
		fi
rlaz's avatar
rlaz a validé
		# Record whether file was correct or not.
		if [ $((INDEX)) == 0 ]
		then
			OUTCOME_LIST="$nextFile, $CORRECT"
		else
			OUTCOME_LIST=`printf "%s\n%s" "$OUTCOME_LIST" "$nextFile, $CORRECT"`
		fi
	else
rlaz's avatar
rlaz a validé
		if [ $((MULTI_PASS_WARN)) == 0 ]
		then
			echo "  * Already processed: $file"
			echo "    (message suppressed for other files)"
			MULTI_PASS_WARN=1
		fi
Richard Zanibbi's avatar
Richard Zanibbi a validé
	fi
	PERCENT=`echo "scale=1; 100 * $INDEX / $TARGET_COUNT" | bc`
	
rlaz's avatar
rlaz a validé
	if [ $((`expr $INDEX % 1`)) == 0 ]
	then
		echo -ne "  $PERCENT% complete ($INDEX of $TARGET_COUNT)\r"
	fi
Richard Zanibbi's avatar
Richard Zanibbi a validé
done
rlaz's avatar
rlaz a validé
echo -ne "  $PERCENT% complete ($INDEX of $TARGET_COUNT)\r"

Richard Zanibbi's avatar
Richard Zanibbi a validé

################################################################
rlaz's avatar
rlaz a validé
# Compile metrics 
# Including summaries and confusion matrices
rlaz's avatar
rlaz a validé
#
# Stored as individual files to prevent re-computation for user
################################################################
rlaz's avatar
rlaz a validé

if [ -n "$OUTCOME_LIST" ]
then
rlaz's avatar
rlaz a validé
	# Need to avoid adding empty entries in Correct.csv, and sort by filename
	echo "$OUTCOME_LIST" >> $ResultsDir/Correct.csv
	sort -o $ResultsDir/Correct.csv $ResultsDir/Correct.csv
rlaz's avatar
rlaz a validé
fi
cat $ResultsDir/Metrics/*.csv > $ResultsDir/$BNAME.csv
rlaz's avatar
rlaz a validé

Richard Zanibbi's avatar
Richard Zanibbi a validé
ALLDIFFS=`ls $ResultsDir/Metrics | grep .diff`
if [ -n "$ALLDIFFS" ]
then
	cat $ResultsDir/Metrics/*.diff > $ResultsDir/$BNAME.diff
Richard Zanibbi's avatar
Richard Zanibbi a validé
else
	touch $ResultsDir/00_NoErrors
	touch $ResultsDir/$BNAME.diff  # empty - no errors.
Richard Zanibbi's avatar
Richard Zanibbi a validé
fi

rlaz's avatar
rlaz a validé
# Compute summaries 
rlaz's avatar
rlaz a validé
python3 $LgEvalDir/src/sumMetric.py "$LABEL_STRING" $ResultsDir/$BNAME.csv > \
	$ResultsDir/Summary.txt
python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt html > \
	$ResultsDir/ConfusionMatrices.html
python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt  > \
	$ResultsDir/ConfusionMatrices.csv

################################################################
rlaz's avatar
rlaz a validé
# Create FileMetrics.csv and summary spreadsheet
################################################################
# Use awk and head to select every odd (headers) and even (data) columns,
# Concatenate one header row with data contents.
awk -F',' '{ for (i=1;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")}' $ResultsDir/$BNAME.csv > $ResultsDir/Headers.csv
rlaz's avatar
rlaz a validé
awk -F',' '{ for (i=2;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")}' $ResultsDir/$BNAME.csv > $ResultsDir/Data.csv

# Obtain first row for data labels; insert a "File" label in the first column.
head -n 1 $ResultsDir/Headers.csv > $ResultsDir/HeaderRow.csv
HEAD=`cat $ResultsDir/HeaderRow.csv`
echo "File,Result,$HEAD" > $ResultsDir/HeaderRow.csv

# Combine file names with raw data metrics, then add header labels.
rlaz's avatar
rlaz a validé
paste -d , $ResultsDir/Correct.csv $ResultsDir/Data.csv > $ResultsDir/DataNew.csv
cat $ResultsDir/HeaderRow.csv $ResultsDir/DataNew.csv > $ResultsDir/FileMetrics.csv
rlaz's avatar
rlaz a validé

rlaz's avatar
rlaz a validé
##################################
# Clean up 
##################################
rm -f $ResultsDir/Headers.csv $ResultsDir/HeaderRow.csv $ResultsDir/Data.csv
rlaz's avatar
rlaz a validé
rm -f $ResultsDir/DataNew.csv 
rlaz's avatar
rlaz a validé
# RZ: not deleting Correct.csv, to insure that all files are present.
#rm -f $ResultsDir/Correct.csv
rm -f $ResultsDir/$BNAME.csv $ResultsDir/$BNAME.diff
Richard Zanibbi's avatar
Richard Zanibbi a validé

rlaz's avatar
rlaz a validé

##################################
# Remind user of outputs
##################################
echo ""
Richard Zanibbi's avatar
Richard Zanibbi a validé
echo "done."
rlaz's avatar
rlaz a validé
echo ""
echo "$ResultsDir/ contents:"
rlaz's avatar
rlaz a validé
echo "   Summary.txt     --   Readable metrics summary"
rlaz's avatar
rlaz a validé
echo "   Correct.csv     --   Records which files are correct/incorrect"
echo "   graphErrors/    --   Error graph visualizations (if requested)"
rlaz's avatar
rlaz a validé
echo "   labelsOut.txt   --   Node & edge labels in output files"
echo "   labelsGT.txt    --   Node & edge labels in ground truth files"
rlaz's avatar
rlaz a validé
echo "   FileMetrics.csv --   Raw metrics file"
rlaz's avatar
rlaz a validé
echo "   ConfusionMatrices.html  -- Readable web page with confusion matrices (HTML)"
echo "   ConfusionMatrices.csv   -- Confusion matrix (CSV format)"
echo "   Metrics/                -- Individual file metrics (.csv) & differences (.diff)"
echo ""
Richard Zanibbi's avatar
Richard Zanibbi a validé