Nantes Université

Skip to content
Extraits de code Groupes Projets
evaluate 9,87 ko
Newer Older
Richard Zanibbi's avatar
Richard Zanibbi a validé
#!/bin/bash

# Make sure that CROHMELibDir and LgEvalDir are defined in
# your shell enviroment, e.g. by including:
#	
#	export LgEvalDir=<path_to_LgEval>
#	export CROHMELibDir=<path_to_CROHMELib>       		
#	export PATH=$PATH:$CROHMELibDir/bin:$LgEvalDir/bin
# 
# in your .bashrc file (the initialization file for bash shell). The PATH
# alteration will add the tools to your search path. 

Richard Zanibbi's avatar
Richard Zanibbi a validé
then
	echo "LgEval evaluate: Label graph evaluation tool"
	echo "Copyright (c) R. Zanibbi, H. Mouchere, M. Mahdavi, A.K. Shah 2012-2022"
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo ""
	echo "Usage: evaluate outputDir groundTruthDir [p/t/d/s/b] [png/pdf/both] OR"
	echo "       evaluate fileList [p/t/d/s/b] [png/pdf/both]"
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo ""
	echo "Evaluates all label graph (.lg) files in outputDir against"
	echo "corresponding files in groundTruthDir. groundTruthDir is used"
	echo "to generate the list of files to be compared (i.e. if a file is"
	echo "not in the ground truth directory, it will not be considered)."
	echo ""
	echo "If a list of file pairs is provided instead ('output target' on each line)"
	echo "then these file pairs are used for evaluation."
	echo ""
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo "Outputs"
	echo "-----------------------------"
	echo " Results<outputDir/fileListName>/"
	echo "    ConfusionMatrices.*:    confusion matrix spreadsheet (errors in csv/html)"
	echo "    FileMetrics.csv:        file metrics spreadsheet"
	echo "    Summary.txt:            summary of performance metrics"
	echo "    labelsGT.txt:           list of node and edge labels in ground truth"
	echo "    labelsOutput.txt:       list of node and edge labels in output files"
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo "" 
	echo "    graphErrors/: if dot output requested, visualizations for files with"
ayushkumarshah's avatar
ayushkumarshah a validé
	echo -e "\t\t  errors are stored here (.dot and .pdf[default] or .png or both as specified)."
Richard Zanibbi's avatar
Richard Zanibbi a validé
	echo ""
	echo "NOTE: the different visualizations of structural differences are described"
	echo "      if you run lg2dot without arguments (object (t)ree; (d)irected graph"
	echo "      over objects; primitive (s)egmentation graph; (b)ipartite graph over"
	echo "      primitives; (p): default directed graph over primitives."
Richard Zanibbi's avatar
Richard Zanibbi a validé
	exit 0
fi

Richard Zanibbi's avatar
Richard Zanibbi a validé
BNAME=`basename $1`
TARGET_COUNT=0
rlaz's avatar
rlaz a validé
NL=$'\n'

OUTCOME_LIST=""
ResultsDir=Results_$BNAME

################################################################
# Compile the list of output files and ground truth files.
#
# NOTE: Ground truth files define the evaluation set, extra 
# output files are ignored.
################################################################

rlaz's avatar
rlaz a validé
echo ""
rlaz's avatar
rlaz a validé
echo "[ LgEval evaluate ]"
rlaz's avatar
rlaz a validé
echo ""
rlaz's avatar
rlaz a validé

# Case 1: Passed a list of file pairs
	MODE="List"

	LABEL_STRING="List File: $1"
	echo "$LABEL_STRING"
	# Get the targets
	OUTPUTS=`awk '{ print $1; }' $1`
	OUTARR=($OUTPUTS)
	TARGETS=`awk '{ print $2; }' $1`

	# Grab additional flags
	if [ $# -gt 2 ]
	then
		FORMAT=$3
	fi

# Case 2: Passed a pair of directories
	OUT_STRING="Output File Directory:  $1"
	GT_STRING="Ground Truth Directory: $2"
	# Peculiar '$<string>' syntax is to preserve the newline.
	LABEL_STRING=$(printf '%s\n%s' "$OUT_STRING" "$GT_STRING")
	echo "$LABEL_STRING"
Richard Zanibbi's avatar
Richard Zanibbi a validé

	# Grab additional flags

	# RZ: Debug -- output type ignored
	if [ $# -gt 3 ]
	then
		FORMAT=$4
	fi
rlaz's avatar
rlaz a validé
echo "* LgEval Results Directory: $ResultsDir"
TARGET_COUNT=$((`echo $TARGETS | wc -w`))

rlaz's avatar
rlaz a validé

################################################################
# Create output directory structure, compile class labels
################################################################
if ! [ -d $ResultsDir ]
then
	mkdir $ResultsDir
	mkdir $ResultsDir/Metrics
	# Create directories for dot error visualizations
		# RZ Debug: deleting FORMAT assignment (done above)
		mkdir $ResultsDir/errorGraphs
		mkdir $ResultsDir/errorGraphs/dot
		if [ "$FORMAT" == "pdf" ]; then
			mkdir $ResultsDir/errorGraphs/pdf
		elif [ "$FORMAT" == "png" ]; then
			mkdir $ResultsDir/errorGraphs/png
		elif [ "$FORMAT" == "both" ]; then
			mkdir $ResultsDir/errorGraphs/pdf
			mkdir $ResultsDir/errorGraphs/png
		fi
# Compile labels from ground truth. This is needed for confusion matrices to
# be properly defined, and for sanity checking results.
rlaz's avatar
rlaz a validé
echo "$TARGETS" > $ResultsDir/temp_file_list
python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/temp_file_list" > "$ResultsDir/labelsGT.txt"
echo "$OUTPUTS" > $ResultsDir/temp_file_list
python3 $LgEvalDir/src/compileLabels.py "$ResultsDir/temp_file_list" > "$ResultsDir/labelsOutput.txt"
rm $ResultsDir/temp_file_list

################################################################
# Evaluate files
################################################################

# Compute all .csv metrics outputs (per-file), and .diff results (per-file).
echo ""
echo "Evaluating..."

# Iterate over ground truth files
Richard Zanibbi's avatar
Richard Zanibbi a validé
do
	FNAME=`basename $file .lg`
	nextFile="_ERROR_"
	if [ $MODE == "Dir" ]
	then
		nextFile=`echo "$1/$FNAME.lg" | perl -p -e "s/\/\//\//g"`
	else
		# Index to the next input file.
		nextFile=${OUTARR[INDEX]}
	fi

	if  [[ ! -e $ResultsDir/Metrics/$FNAME.csv ]]
Richard Zanibbi's avatar
Richard Zanibbi a validé
	then
		# NOTE: the script convertCrohmeLg can be used to convert
		#       crohme .inkml files to .lg files.
rlaz's avatar
rlaz a validé

		# RZ: Run evaluation once vs. twice
		OUT=`python3 $LgEvalDir/src/evallg.py $nextFile $file INTER`

		# Match asterisk at beginning of line to select differences/errors
		# WARNING: Double quotes are important to preserve newlines!
		DIFF=`echo "$OUT" | grep "\*"`
		echo "$DIFF" > $ResultsDir/Metrics/$FNAME.diff
		echo "$OUT" | grep -v "\*" > $ResultsDir/Metrics/$FNAME.csv

		# If differences reported, record files with errors, generate visualizations
		if [ "$DIFF" != "" ]
Richard Zanibbi's avatar
Richard Zanibbi a validé
		then
			CORRECT="Incorrect"

			# If a third argument is provided, generate a .pdf file to visualize
			# differences between graphs.
				if [ "$DOTARG" == "d" ]
					lg2dot $nextFile $file --format $FORMAT
					lg2dot $nextFile $file --graph_type "$DOTARG" --format $FORMAT
rlaz's avatar
rlaz a validé

				mv $FNAME.dot $ResultsDir/errorGraphs/dot
				if [ "$FORMAT" == "pdf" ]; then
					mv $FNAME.pdf $ResultsDir/errorGraphs/pdf
				elif [ "$FORMAT" == "png" ]; then
					mv $FNAME.png $ResultsDir/errorGraphs/png
				elif [ "$FORMAT" == "both" ]; then
					mv $FNAME.pdf $ResultsDir/errorGraphs/pdf
					mv $FNAME.png $ResultsDir/errorGraphs/png
				fi
Richard Zanibbi's avatar
Richard Zanibbi a validé
		fi
rlaz's avatar
rlaz a validé
		OUTCOME_LIST=`printf "%s\n%s" "$OUTCOME_LIST" "$nextFile, $CORRECT"`
Richard Zanibbi's avatar
Richard Zanibbi a validé
	fi
rlaz's avatar
rlaz a validé
	#else
	#	echo "  * Already processed: $file"
	#fi
	PERCENT=`echo "scale=1; 100 * $INDEX / $TARGET_COUNT" | bc`
	
rlaz's avatar
rlaz a validé
	if [ $((`expr $INDEX % 1`)) == 0 ]
	then
		echo -ne "  $PERCENT% complete ($INDEX of $TARGET_COUNT)\r"
	fi
Richard Zanibbi's avatar
Richard Zanibbi a validé
done
rlaz's avatar
rlaz a validé
echo -ne "  $PERCENT% complete ($INDEX of $TARGET_COUNT)\r"

Richard Zanibbi's avatar
Richard Zanibbi a validé

################################################################
# Compile metrics
# Including summaries and confusion matrices
rlaz's avatar
rlaz a validé
#
# Stored as individual files to prevent re-computation for user
################################################################
rlaz's avatar
rlaz a validé
echo "$OUTCOME_LIST" >> $ResultsDir/FileResults.csv
cat $ResultsDir/Metrics/*.csv > $ResultsDir/$BNAME.csv
rlaz's avatar
rlaz a validé

Richard Zanibbi's avatar
Richard Zanibbi a validé
ALLDIFFS=`ls $ResultsDir/Metrics | grep .diff`
if [ -n "$ALLDIFFS" ]
then
	cat $ResultsDir/Metrics/*.diff > $ResultsDir/$BNAME.diff
Richard Zanibbi's avatar
Richard Zanibbi a validé
else
	touch $ResultsDir/00_NoErrors
	touch $ResultsDir/$BNAME.diff  # empty - no errors.
Richard Zanibbi's avatar
Richard Zanibbi a validé
fi

rlaz's avatar
rlaz a validé
# Compute summaries 
rlaz's avatar
rlaz a validé
python3 $LgEvalDir/src/sumMetric.py "$LABEL_STRING" $ResultsDir/$BNAME.csv > \
	$ResultsDir/Summary.txt
python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt html > \
	$ResultsDir/ConfusionMatrices.html
python3 $LgEvalDir/src/sumDiff.py $ResultsDir/$BNAME.diff $ResultsDir/labelsGT.txt  > \
	$ResultsDir/ConfusionMatrices.csv

################################################################
# Create spreadsheet
################################################################
# RZ Oct. 2014: Create spreadsheet pairing file names with metrics.
# Clean up raw metric data to make the file smaller and simpler.
# Use awk and head to select every odd (headers) and even (data) columns,
# Concatenate one header row with data contents.
awk -F',' '{ for (i=1;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")}' $ResultsDir/$BNAME.csv > $ResultsDir/Headers.csv
rlaz's avatar
rlaz a validé
awk -F',' '{ for (i=2;i<=NF;i+=2) printf ("%s%c", $i, i + 2 <= NF ? "," : "\n")}' $ResultsDir/$BNAME.csv > $ResultsDir/Data.csv

# Obtain first row for data labels; insert a "File" label in the first column.
head -n 1 $ResultsDir/Headers.csv > $ResultsDir/HeaderRow.csv
HEAD=`cat $ResultsDir/HeaderRow.csv`
echo "File,Result,$HEAD" > $ResultsDir/HeaderRow.csv

# Combine file names with raw data metrics, then add header labels.
paste -d , $ResultsDir/FileResults.csv $ResultsDir/Data.csv > $ResultsDir/DataNew.csv
cat $ResultsDir/HeaderRow.csv $ResultsDir/DataNew.csv > $ResultsDir/FileMetrics.csv
rlaz's avatar
rlaz a validé

rlaz's avatar
rlaz a validé
##################################
# Clean up 
##################################
rm -f $ResultsDir/Headers.csv $ResultsDir/HeaderRow.csv $ResultsDir/Data.csv
rlaz's avatar
rlaz a validé
rm -f $ResultsDir/DataNew.csv 
# RZ: not deleting FileResults, to insure that all files are present.
#rm -f $ResultsDir/FileResults.csv
rm -f $ResultsDir/$BNAME.csv $ResultsDir/$BNAME.diff
Richard Zanibbi's avatar
Richard Zanibbi a validé

rlaz's avatar
rlaz a validé

##################################
# Remind user of outputs
##################################
echo ""
Richard Zanibbi's avatar
Richard Zanibbi a validé
echo "done."
rlaz's avatar
rlaz a validé
echo ""
echo "$ResultsDir/ contents:"
rlaz's avatar
rlaz a validé
echo "   Summary.txt     --   Readable metrics summary"
rlaz's avatar
rlaz a validé
echo "   FileMetrics.csv --   Raw metrics file"
echo "   labelsOut.txt   --   Node & edge labels in output files"
echo "   labelsGT.txt    --   Node & edge labels in ground truth files"
echo "   ConfusionMatrices.html  -- Readable web page with confusion matrices (HTML)"
echo "   ConfusionMatrices.csv   -- Confusion matrix (CSV format)"
echo "   Metrics/                -- Individual file metrics (.csv) & differences (.diff)"
echo ""
Richard Zanibbi's avatar
Richard Zanibbi a validé