#!/bin/bash #set -x # # Copyright © 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # # This script will run a TfLite model through ExecuteNetwork trying all available backends to measure # both speed and accuracy. In addition, it will try some of the performance options that are available. # # Prerequisites: ExecuteNetwork must be built with: # * CpuRef enabled (-DARMNNREF=1) # * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1) # * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1) # * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1 # * The model must be fully supported by Arm NN. # # Usage: # evaluate_network.sh -e -m # # Sample usage: # evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite # CMD=$( basename "$0" ) usage() { echo "Usage: $CMD -e -m " echo "Options: -e " echo " -m " exit 1 } # Errors if the previous command had a non-zero exit code. function AssertZeroExitCode { EXITCODE=$? if [ $EXITCODE -ne 0 ]; then echo -e "Previous command exited with code $EXITCODE" exit 1 fi } OPTION_COUNTER=0 while getopts "e:m:" opt; do ((OPTION_COUNTER+=1)) case "$opt" in h|\?) usage;; e) EXECUTE_NETWORK_PATH="$OPTARG";; m) MODEL="$OPTARG";; esac done shift $((OPTIND - 1)) # Both parameters are mandatory. if [ -z "$EXECUTE_NETWORK_PATH" ] || [ -z "$MODEL" ]; then usage exit 1 fi # Check the path to execute network will find the executable. if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork" EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork" else echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\"" usage exit 1 fi # Check that the model exists and has a supported extension. if [ -f $MODEL ]; then if [[ ! $MODEL =~ (tflite)$ ]]; then echo "Only .tflite files are supported." exit 1 fi else echo Model file: "\"$MODEL\" could not be found." usage exit 1 fi # Find out the available backends. Unfortunaltey the list of backends spans multiple lines. # This means we have to do this in several steps. echo -n -e "Available backends on this executable\t\t:" HELP_OUTOUT=`$EXECUTE_NETWORK --help` BACKENDS=`echo $HELP_OUTOUT | sed 's/.*: \[//' | sed 's/\].*//' | sed 's/,//g'` # Remove the leading space to make it look prettier. BACKENDS="${BACKENDS:1}" if [ -z "$BACKENDS" ]; then echo "" echo "Execute Network reported no available backends!" exit 1 else echo " $BACKENDS" # We really need the CpuRef to be in there. if [[ ! $BACKENDS =~ "CpuRef" ]]; then echo "" echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)" exit 1 fi fi # This is where the real work starts. # Model execution can take a long time. Trap ctrl-c and tell the user. trap ctrl_c INT function ctrl_c() { echo -e "Interrupted.\nNo patience eh? Try a smaller model." exit 1 } # We need to check that the delegate is supported otherwise we can't run through the tf runtime. echo -n -e "Is the delegate supported on this executable?\t:" TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N` # Check for an error message about building with the delegate. if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then echo "" echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)" exit 1 else echo " Yes" fi # Run through CpuRef to see if Arm NN supports the model. echo -n -e "Is the model fully supported by Arm NN?\t\t:" REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N` # If it failed look for the most common reason - an unsupported layer. if [ $? -ne 0 ]; then if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then echo -e " No - One or more layers are not supported by Arm NN" else echo -e " No - Execution using CpuRef backend failed." fi echo -e "The Reported problems were\t:" echo `echo "$REF_EXECUTION" | sed '/Warning\|ERROR\|Fatal/!d'` echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" " exit 1 fi echo " Yes" # This function will execute the model and return a string representation of the results. This is the # first time the model will be executed. # Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend. # # Parameters: # $1 Backend string like CpuRef. # $2 Additional ExecuteNetwork parameters. # function RunAccuracyOnBackendWithParameters { BACKEND=$1 ADDITIONAL_PARAM=$2 # Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure. ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N` # Start by checking the return code. if [ $? -ne 0 ]; then # Maybe this backend isn't supported. if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then echo -e "\t\t***Is not supported***" return 1 elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then # One or more layers require a fall back. Run again with CpuRef fall back. ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N` REQUIRES_CPUREF="*" else # In the case of a general failure against this backend tell the user what we tried and then # ignore this backend. echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\"" return 1 fi fi # Now check the RMS value. If it isn't 0 then mark this as questionable accuracy. ACCURACY_VALUE=`echo "$ACCURACY_RUN" | grep 'Byte level'` if [[ ! $ACCURACY_VALUE == *0 ]]; then ACCURACY=!`echo $ACCURACY_VALUE | sed 's/[a-zA-Z:]*//g'` else ACCURACY="OK" fi # Add on the * if we needed to add CpuRef. if [ -z $REQUIRES_CPUREF ]; then echo -e "$ACCURACY $REQUIRES_CPUREF\t\t" else echo -e "$ACCURACY\t\t" fi } # This function will execute the model and return a string representation of the results. The execution # Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend. # # Parameters: # $1 Backend string like CpuRef. # $2 Additional ExecuteNetwork parameters. # function RunPerformanceOnBackendWithParameters { BACKEND=$1 ADDITIONAL_PARAM=$2 # Execute with 6 inferences. Mark the first as initial inference. Average the rest. SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM` # Extract the model load time MODEL_LOAD_TIME=`echo "$SPEED_RUN" | grep "Initialization time" | sed 's/[a-zA-Z:]*//g'` MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop. # and the optimization time. OPTIMIZATION_TIME=`echo "$SPEED_RUN" | grep "Optimization time" | sed 's/[a-zA-Z:]*//g'` OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space. # All 6 inference times. RAW_INFERENCE=`echo "$SPEED_RUN" | grep "Inference time"` # This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01" INFERENCE_TIMES=`echo $RAW_INFERENCE | sed 's/[a-zA-Z:]*//g'` INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES | cut -d ' ' -f 1` # Now remove the initial inference time as it will skew the average. INFERENCE_TIMES=`echo $INFERENCE_TIMES | sed 's/[^ ]* //'` # Use awk to sum and average the remaining 5 numbers. AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES | awk '{s+=$1}END{print s/NR}' RS=" "` # Result format is: MODEL LOAD | OPTIMIZATION | INITIAL INFERENCE | AVERAGE INFERENCE echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t" } # Check execution in all available backends. echo "===================================================================================" echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)" for backend in $BACKENDS do echo -n -e "$backend\t\t" RESULT=$(RunAccuracyOnBackendWithParameters $backend) echo -n -e "$RESULT" if [[ $RESULT =~ "*" ]]; then REQUIRED_CPU_REF=1 fi # It's possible the backend wasn't supported. if [[ ! "$RESULT" =~ "not supported" ]]; then # It was, continue. RESULT=$(RunPerformanceOnBackendWithParameters $backend) echo -n -e "$RESULT" # Save some specific values for use later. if [ $backend == "CpuAcc" ]; then # In the case of CpuAcc we save the avrage inference time. CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` fi if [ $backend == "GpuAcc" ]; then # In the case of GpuAcc we save the avrage inference time. GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` fi else # Remove this backend from future tests. BACKENDS=`echo $BACKENDS | sed "s/$backend//"` fi echo done # Only print this if it was required. if [ ! -z $REQUIRED_CPU_REF ]; then echo "* denotes this backend required fallback to CpuRef." echo fi # Now its time to look at backend specific parameters. # This function first run the accuracy test and then the performance test. It uses the average from earlier # to compare to. function RunAccuracyAndPerformanceWithExtraParameter { BACKEND=$1 EXTRA_PARAM=$2 AVERAGE_INFERENCE_TIME=$3 echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)" RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM) echo -n "$RESULT" RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM) PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` # If adding the parameter was faster then incude by how much. if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME | bc` echo -e "$RESULT\t\t\t$DELTA ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)" else echo -e "$RESULT\t\t\t**No improvment**" fi } # Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16. if [[ $BACKENDS =~ "CpuAcc" ]]; then echo echo "CpuAcc optimizations." echo "============================" echo "The value of \"number-of-threads\" parameter by default is decided on by the backend." echo "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default." echo for i in {1..12} do RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i") AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` # Print something out if the returned average is less than the previously saved average. if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME | bc` echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)" FASTER=1 fi done if [ -z $FASTER ]; then echo "No value of \"number-of-threads\" was faster than the default." fi # Next is fp16-turbo-mode. We do both accuracy and speed on this one. echo echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models." echo RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME # Next is enable-fast-math. Again both accuracy and speed on this one. echo echo -n "Now trying \"enable-fast-math\"." echo RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME fi # GpuAcc. # Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path. if [[ $BACKENDS =~ "GpuAcc" ]]; then echo echo "GpuAcc optimizations." echo "============================" # fp16-turbo-mode. We do both accuracy and speed on this one. echo echo -n "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models." echo RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME # Next is enable-fast-math. Again both accuracy and speed on this one. echo echo -n "Now trying \"enable-fast-math\"." echo RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME # Next is tuning levels. Just speed on this one. echo echo -n "Now trying \"tuning-level/tuning-path\"." echo for i in {1..3} do touch ./tuned-network.bin # Create tuned network file with the first run. OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N` AssertZeroExitCode # Now run the perforance test reusing that saved network. RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin") AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4` if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME | bc` echo " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)" else echo " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)" fi rm ./tuned-network.bin done fi