1 files changed, 358 insertions, 0 deletions
diff --git a/tests/ExecuteNetwork/evaluate_network.sh b/tests/ExecuteNetwork/evaluate_network.sh
new file mode 100755
index 0000000000..931167dda8
--- /dev/null
+++ b/tests/ExecuteNetwork/evaluate_network.sh
@@ -0,0 +1,358 @@
+#!/bin/bash
+#set -x
+#
+# Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+# SPDX-License-Identifier: MIT
+#
+# This script will run a TfLite model through ExecuteNetwork trying all available backends to measure
+# both speed and accuracy. In addition, it will try some of the performance options that are available.
+#
+# Prerequisites: ExecuteNetwork must be built with:
+# * CpuRef enabled (-DARMNNREF=1)
+# * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1)
+# * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1)
+# * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1
+# * The model must be fully supported by Arm NN.
+#
+# Usage:
+# evaluate_network.sh -e <Path to ExecuteNetwork> -m <Tfite model to test>
+#
+# Sample usage:
+# evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite
+#
+
+CMD=$( basename "$0" )
+
+usage() {
+  echo "Usage: $CMD -e <Path to ExecuteNetwork> -m <Test model>"
+  echo "Options:        -e <Path to ExecuteNetwork>"
+  echo "                -m <Test model>"
+  exit 1
+}
+
+# Errors if the previous command had a non-zero exit code.
+function AssertZeroExitCode {
+  EXITCODE=$?
+  if [ $EXITCODE -ne 0 ]; then
+    echo -e "Previous command exited with code $EXITCODE"
+    exit 1
+  fi
+}
+
+OPTION_COUNTER=0
+while getopts "e:m:" opt; do
+  ((OPTION_COUNTER+=1))
+  case "$opt" in
+    h|\?) usage;;
+    e) EXECUTE_NETWORK_PATH="$OPTARG";;
+    m) MODEL="$OPTARG";;
+  esac
+done
+shift $((OPTIND - 1))
+
+# Both parameters are mandatory.
+if [ -z "$EXECUTE_NETWORK_PATH" ] || [ -z "$MODEL" ]; then
+    usage
+    exit 1
+fi
+
+# Check the path to execute network will find the executable.
+if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then
+    echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork"
+    EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork"
+else
+    echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\""
+    usage
+    exit 1
+fi
+
+# Check that the model exists and has a supported extension.
+if [ -f $MODEL ]; then
+    if [[ ! $MODEL =~ (tflite)$ ]]; then
+        echo "Only .tflite files are supported."
+        exit 1
+    fi
+else
+    echo Model file: "\"$MODEL\" could not be found."
+    usage
+    exit 1
+fi
+
+# Find out the available backends. Unfortunaltey the list of backends spans multiple lines.
+# This means we have to do this in several steps.
+echo -n -e "Available backends on this executable\t\t:"
+HELP_OUTOUT=`$EXECUTE_NETWORK --help`
+BACKENDS=`echo $HELP_OUTOUT | sed  's/.*: \[//' | sed 's/\].*//' | sed 's/,//g'`
+# Remove the leading space to make it look prettier.
+BACKENDS="${BACKENDS:1}"
+if [ -z "$BACKENDS" ]; then
+    echo ""
+    echo "Execute Network reported no available backends!"
+    exit 1
+else
+    echo " $BACKENDS"
+    # We really need the CpuRef to be in there.
+    if [[ ! $BACKENDS =~ "CpuRef" ]]; then
+        echo ""
+        echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)"
+        exit 1
+    fi
+fi
+
+
+# This is where the real work starts.
+# Model execution can take a long time. Trap ctrl-c and tell the user.
+trap ctrl_c INT
+
+function ctrl_c() {
+        echo -e "Interrupted.\nNo patience eh? Try a smaller model."
+        exit 1
+}
+
+
+# We need to check that the delegate is supported otherwise we can't run through the tf runtime.
+echo -n -e "Is the delegate supported on this executable?\t:"
+TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N`
+# Check for an error message about building with the delegate.
+if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then
+    echo ""
+    echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)"
+    exit 1
+else
+    echo " Yes"
+fi
+
+# Run through CpuRef to see if Arm NN supports the model.
+echo -n -e "Is the model fully supported by Arm NN?\t\t:"
+REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N`
+# If it failed look for the most common reason - an unsupported layer.
+if [ $? -ne 0 ]; then
+    if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then
+        echo -e " No - One or more layers are not supported by Arm NN"
+    else
+        echo -e " No - Execution using CpuRef backend failed."
+    fi
+    echo -e "The Reported problems were\t:"
+    echo `echo "$REF_EXECUTION" | sed '/Warning\|ERROR\|Fatal/!d'`
+    echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" "
+    exit 1
+fi
+echo " Yes"
+
+# This function will execute the model and return a string representation of the results. This is the
+# first time the model will be executed.
+# Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend.
+#
+# Parameters:
+# $1 Backend string like CpuRef.
+# $2 Additional ExecuteNetwork parameters.
+#
+function RunAccuracyOnBackendWithParameters {
+    BACKEND=$1
+    ADDITIONAL_PARAM=$2
+    # Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure.
+    ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N`
+    # Start by checking the return code.
+    if [ $? -ne 0 ]; then
+        # Maybe this backend isn't supported.
+        if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then
+            echo -e "\t\t***Is not supported***"
+            return 1
+        elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then
+            # One or more layers require a fall back. Run again with CpuRef fall back.
+            ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N`
+            REQUIRES_CPUREF="*"
+        else
+            # In the case of a general failure against this backend tell the user what we tried and then
+            # ignore this backend.
+            echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\""
+            return 1
+        fi
+    fi
+    # Now check the RMS value. If it isn't 0 then mark this as questionable accuracy.
+    ACCURACY_VALUE=`echo "$ACCURACY_RUN" | grep 'Byte level'`
+    if [[ ! $ACCURACY_VALUE == *0 ]]; then
+        ACCURACY=!`echo $ACCURACY_VALUE | sed 's/[a-zA-Z:]*//g'`
+    else
+        ACCURACY="OK"
+    fi
+    # Add on the * if we needed to add CpuRef.
+    if [ -z $REQUIRES_CPUREF ]; then
+        echo -e "$ACCURACY $REQUIRES_CPUREF\t\t"
+    else
+        echo -e "$ACCURACY\t\t"
+    fi
+}
+
+# This function will execute the model and return a string representation of the results. The execution
+# Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend.
+#
+# Parameters:
+# $1 Backend string like CpuRef.
+# $2 Additional ExecuteNetwork parameters.
+#
+function RunPerformanceOnBackendWithParameters {
+    BACKEND=$1
+    ADDITIONAL_PARAM=$2
+    # Execute with 6 inferences. Mark the first as initial inference. Average the rest.
+    SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM`
+
+    # Extract the model load time
+    MODEL_LOAD_TIME=`echo "$SPEED_RUN" | grep "Initialization time" | sed 's/[a-zA-Z:]*//g'`
+    MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop.
+    # and the optimization time.
+    OPTIMIZATION_TIME=`echo "$SPEED_RUN" | grep "Optimization time" | sed 's/[a-zA-Z:]*//g'`
+    OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space.
+
+    # All 6 inference times.
+    RAW_INFERENCE=`echo "$SPEED_RUN" | grep "Inference time"`
+    # This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01"
+    INFERENCE_TIMES=`echo $RAW_INFERENCE | sed 's/[a-zA-Z:]*//g'`
+    INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES | cut -d ' ' -f 1`
+    # Now remove the initial inference time as it will skew the average.
+    INFERENCE_TIMES=`echo $INFERENCE_TIMES | sed 's/[^ ]* //'`
+    # Use awk to sum and average the remaining 5 numbers.
+    AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES | awk '{s+=$1}END{print s/NR}' RS=" "`
+
+    # Result format is: MODEL LOAD | OPTIMIZATION | INITIAL INFERENCE | AVERAGE INFERENCE
+    echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t"
+}
+
+
+# Check execution in all available backends.
+echo    "==================================================================================="
+echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)"
+for backend in $BACKENDS
+do
+    echo -n -e "$backend\t\t"
+    RESULT=$(RunAccuracyOnBackendWithParameters $backend)
+    echo -n -e "$RESULT"
+    if [[ $RESULT =~ "*" ]]; then
+        REQUIRED_CPU_REF=1
+    fi
+    # It's possible the backend wasn't supported.
+    if [[ ! "$RESULT" =~ "not supported" ]]; then
+        # It was, continue.
+        RESULT=$(RunPerformanceOnBackendWithParameters $backend)
+        echo -n -e "$RESULT"
+        # Save some specific values for use later.
+        if [ $backend == "CpuAcc" ]; then
+            # In the case of CpuAcc we save the avrage inference time.
+            CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
+        fi
+        if [ $backend == "GpuAcc" ]; then
+            # In the case of GpuAcc we save the avrage inference time.
+            GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
+        fi
+    else
+        # Remove this backend from future tests.
+        BACKENDS=`echo $BACKENDS | sed "s/$backend//"`
+    fi
+    echo
+done
+# Only print this if it was required.
+if [ ! -z $REQUIRED_CPU_REF ]; then
+    echo "* denotes this backend required fallback to CpuRef."
+    echo
+fi
+
+# Now its time to look at backend specific parameters.
+
+# This function first run the accuracy test and then the performance test. It uses the average from earlier
+# to compare to.
+function RunAccuracyAndPerformanceWithExtraParameter
+{
+    BACKEND=$1
+    EXTRA_PARAM=$2
+    AVERAGE_INFERENCE_TIME=$3
+    echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)"
+    RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
+    echo -n "$RESULT"
+    RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
+    PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
+    # If adding the parameter was faster then incude by how much.
+    if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then
+        DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME | bc`
+        echo -e "$RESULT\t\t\t$DELTA  ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)"
+    else
+        echo -e "$RESULT\t\t\t**No improvment**"
+    fi
+}
+
+
+# Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16.
+if [[ $BACKENDS =~ "CpuAcc" ]]; then
+    echo
+    echo    "CpuAcc optimizations."
+    echo    "============================"
+    echo    "The value of \"number-of-threads\" parameter by default is decided on by the backend."
+    echo    "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default."
+    echo
+    for i in {1..12}
+    do
+        RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i")
+        AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
+        # Print something out if the returned average is less than the previously saved average.
+        if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then
+            DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME | bc`
+            echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)"
+            FASTER=1
+        fi
+    done
+    if [ -z $FASTER ]; then
+        echo "No value of \"number-of-threads\" was faster than the default."
+    fi
+    # Next is fp16-turbo-mode. We do both accuracy and speed on this one.
+    echo
+    echo -n  "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
+    echo
+    RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME
+
+    # Next is enable-fast-math. Again both accuracy and speed on this one.
+    echo
+    echo -n  "Now trying \"enable-fast-math\"."
+    echo
+    RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME
+fi
+
+# GpuAcc.
+# Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path.
+if [[ $BACKENDS =~ "GpuAcc" ]]; then
+    echo
+    echo    "GpuAcc optimizations."
+    echo    "============================"
+
+    # fp16-turbo-mode. We do both accuracy and speed on this one.
+    echo
+    echo -n  "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
+    echo
+    RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME
+
+    # Next is enable-fast-math. Again both accuracy and speed on this one.
+    echo
+    echo -n  "Now trying \"enable-fast-math\"."
+    echo
+    RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME
+
+    # Next is tuning levels. Just speed on this one.
+    echo
+    echo -n  "Now trying \"tuning-level/tuning-path\"."
+    echo
+    for i in {1..3}
+    do
+        touch ./tuned-network.bin
+        # Create tuned network file with the first run.
+        OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N`
+        AssertZeroExitCode
+        # Now run the perforance test reusing that saved network.
+        RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin")
+        AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
+        if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then
+            DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME | bc`
+            echo  " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
+        else
+            echo  " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
+        fi
+        rm ./tuned-network.bin
+    done
+fi