tests/ExecuteNetwork/evaluate_network.sh


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358

#!/bin/bash
#set -x
#
# Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
# This script will run a TfLite model through ExecuteNetwork trying all available backends to measure
# both speed and accuracy. In addition, it will try some of the performance options that are available.
#
# Prerequisites: ExecuteNetwork must be built with:
# * CpuRef enabled (-DARMNNREF=1)
# * TfLite delegate enabled (-DBUILD_CLASSIC_DELEGATE=1)
# * TfLite parser enabled (-DBUILD_TF_LITE_PARSER=1)
# * Any backend you want to test against. E.g. -DARMCOMPUTENEON=1 -DARMCOMPUTECL=1
# * The model must be fully supported by Arm NN.
#
# Usage:
# evaluate_network.sh -e <Path to ExecuteNetwork> -m <Tfite model to test>
#
# Sample usage:
# evaluate_network.sh -e ./build/release/armnn/test -m ./my_tflite_model.tflite
#

CMD=$( basename "$0" )

usage() {
  echo "Usage: $CMD -e <Path to ExecuteNetwork> -m <Test model>"
  echo "Options:        -e <Path to ExecuteNetwork>"
  echo "                -m <Test model>"
  exit 1
}

# Errors if the previous command had a non-zero exit code.
function AssertZeroExitCode {
  EXITCODE=$?
  if [ $EXITCODE -ne 0 ]; then
    echo -e "Previous command exited with code $EXITCODE"
    exit 1
  fi
}

OPTION_COUNTER=0
while getopts "e:m:" opt; do
  ((OPTION_COUNTER+=1))
  case "$opt" in
    h|\?) usage;;
    e) EXECUTE_NETWORK_PATH="$OPTARG";;
    m) MODEL="$OPTARG";;
  esac
done
shift $((OPTIND - 1))

# Both parameters are mandatory.
if [ -z "$EXECUTE_NETWORK_PATH" ] || [ -z "$MODEL" ]; then
    usage
    exit 1
fi

# Check the path to execute network will find the executable.
if [ -x "$EXECUTE_NETWORK_PATH/ExecuteNetwork" ]; then
    echo -e "Using Execute Network from\t\t\t: $EXECUTE_NETWORK_PATH/ExecuteNetwork"
    EXECUTE_NETWORK="$EXECUTE_NETWORK_PATH/ExecuteNetwork"
else
    echo "Execute Network does not exist at \"$EXECUTE_NETWORK_PATH/ExecuteNetwork\""
    usage
    exit 1
fi

# Check that the model exists and has a supported extension.
if [ -f $MODEL ]; then
    if [[ ! $MODEL =~ (tflite)$ ]]; then
        echo "Only .tflite files are supported."
        exit 1
    fi
else
    echo Model file: "\"$MODEL\" could not be found."
    usage
    exit 1
fi

# Find out the available backends. Unfortunaltey the list of backends spans multiple lines.
# This means we have to do this in several steps.
echo -n -e "Available backends on this executable\t\t:"
HELP_OUTOUT=`$EXECUTE_NETWORK --help`
BACKENDS=`echo $HELP_OUTOUT | sed  's/.*: \[//' | sed 's/\].*//' | sed 's/,//g'`
# Remove the leading space to make it look prettier.
BACKENDS="${BACKENDS:1}"
if [ -z "$BACKENDS" ]; then
    echo ""
    echo "Execute Network reported no available backends!"
    exit 1
else
    echo " $BACKENDS"
    # We really need the CpuRef to be in there.
    if [[ ! $BACKENDS =~ "CpuRef" ]]; then
        echo ""
        echo "Fatal: Please recompile ExecuteNetwork to include the CpuRef backend. (-DARMNNREF=1)"
        exit 1
    fi
fi


# This is where the real work starts.
# Model execution can take a long time. Trap ctrl-c and tell the user.
trap ctrl_c INT

function ctrl_c() {
        echo -e "Interrupted.\nNo patience eh? Try a smaller model."
        exit 1
}


# We need to check that the delegate is supported otherwise we can't run through the tf runtime.
echo -n -e "Is the delegate supported on this executable?\t:"
TFLITE_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -T tflite -c CpuRef -N`
# Check for an error message about building with the delegate.
if [[ $TFLITE_EXECUTION =~ "Tensorflow-Lite delegate support" ]]; then
    echo ""
    echo "Fatal: Please recompile ExecuteNetwork with TfLite delegate support enabled. (-DBUILD_CLASSIC_DELEGATE=1)"
    exit 1
else
    echo " Yes"
fi

# Run through CpuRef to see if Arm NN supports the model.
echo -n -e "Is the model fully supported by Arm NN?\t\t:"
REF_EXECUTION=`$EXECUTE_NETWORK -m $MODEL -c CpuRef -N`
# If it failed look for the most common reason - an unsupported layer.
if [ $? -ne 0 ]; then
    if [[ $REF_EXECUTION =~ "is not supported on requested backend CpuRef" ]]; then
        echo -e " No - One or more layers are not supported by Arm NN"
    else
        echo -e " No - Execution using CpuRef backend failed."
    fi
    echo -e "The Reported problems were\t:"
    echo `echo "$REF_EXECUTION" | sed '/Warning\|ERROR\|Fatal/!d'`
    echo "To recreate this error try: \"$EXECUTE_NETWORK -m $MODEL -c CpuRef\" "
    exit 1
fi
echo " Yes"

# This function will execute the model and return a string representation of the results. This is the
# first time the model will be executed.
# Is done wth -c $BACKEND,CpuRef to allow the odd layer to be supported by an unaccelerated backend.
#
# Parameters:
# $1 Backend string like CpuRef.
# $2 Additional ExecuteNetwork parameters.
#
function RunAccuracyOnBackendWithParameters {
    BACKEND=$1
    ADDITIONAL_PARAM=$2
    # Run on BACKEND to check accuracy against TfLite runtime first. This will be a warning not a failure.
    ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND $ADDITIONAL_PARAM -A -N`
    # Start by checking the return code.
    if [ $? -ne 0 ]; then
        # Maybe this backend isn't supported.
        if [[ $ACCURACY_RUN =~ "None of the preferred backends [$BACKEND ] are supported" ]]; then
            echo -e "\t\t***Is not supported***"
            return 1
        elif [[ $ACCURACY_RUN =~ "is not supported on requested backend" ]]; then
            # One or more layers require a fall back. Run again with CpuRef fall back.
            ACCURACY_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef $ADDITIONAL_PARAM -A -N`
            REQUIRES_CPUREF="*"
        else
            # In the case of a general failure against this backend tell the user what we tried and then
            # ignore this backend.
            echo -e "\t***Execution failed. Ignoring this backend. Command was: \"$EXECUTE_NETWORK -m $MODEL -c $BACKEND -A -N\""
            return 1
        fi
    fi
    # Now check the RMS value. If it isn't 0 then mark this as questionable accuracy.
    ACCURACY_VALUE=`echo "$ACCURACY_RUN" | grep 'Byte level'`
    if [[ ! $ACCURACY_VALUE == *0 ]]; then
        ACCURACY=!`echo $ACCURACY_VALUE | sed 's/[a-zA-Z:]*//g'`
    else
        ACCURACY="OK"
    fi
    # Add on the * if we needed to add CpuRef.
    if [ -z $REQUIRES_CPUREF ]; then
        echo -e "$ACCURACY $REQUIRES_CPUREF\t\t"
    else
        echo -e "$ACCURACY\t\t"
    fi
}

# This function will execute the model and return a string representation of the results. The execution
# Is done wth -c $BACKEND,CpuRef to allow the odd layer to ot be supported by an accelerated backend.
#
# Parameters:
# $1 Backend string like CpuRef.
# $2 Additional ExecuteNetwork parameters.
#
function RunPerformanceOnBackendWithParameters {
    BACKEND=$1
    ADDITIONAL_PARAM=$2
    # Execute with 6 inferences. Mark the first as initial inference. Average the rest.
    SPEED_RUN=`$EXECUTE_NETWORK -m $MODEL -c $BACKEND,CpuRef -I 6 -N $ADDITIONAL_PARAM`

    # Extract the model load time
    MODEL_LOAD_TIME=`echo "$SPEED_RUN" | grep "Initialization time" | sed 's/[a-zA-Z:]*//g'`
    MODEL_LOAD_TIME=`echo ${MODEL_LOAD_TIME::-2}` # Remove the tailing space and full stop.
    # and the optimization time.
    OPTIMIZATION_TIME=`echo "$SPEED_RUN" | grep "Optimization time" | sed 's/[a-zA-Z:]*//g'`
    OPTIMIZATION_TIME=`echo ${OPTIMIZATION_TIME::-1}` # Remove the tailing space.

    # All 6 inference times.
    RAW_INFERENCE=`echo "$SPEED_RUN" | grep "Inference time"`
    # This will take "Info: Inference time: 0.03 ms Info:..." and transform to "0.03 0.01 0.01"
    INFERENCE_TIMES=`echo $RAW_INFERENCE | sed 's/[a-zA-Z:]*//g'`
    INITIAL_INFERENCE_TIME=`echo $INFERENCE_TIMES | cut -d ' ' -f 1`
    # Now remove the initial inference time as it will skew the average.
    INFERENCE_TIMES=`echo $INFERENCE_TIMES | sed 's/[^ ]* //'`
    # Use awk to sum and average the remaining 5 numbers.
    AVERAGE_INFERENCE_TIME=`echo $INFERENCE_TIMES | awk '{s+=$1}END{print s/NR}' RS=" "`

    # Result format is: MODEL LOAD | OPTIMIZATION | INITIAL INFERENCE | AVERAGE INFERENCE
    echo -e "$MODEL_LOAD_TIME\t\t$OPTIMIZATION_TIME\t\t\t$INITIAL_INFERENCE_TIME\t\t\t$AVERAGE_INFERENCE_TIME\t"
}


# Check execution in all available backends.
echo    "==================================================================================="
echo -e "BACKEND\t\tACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)"
for backend in $BACKENDS
do
    echo -n -e "$backend\t\t"
    RESULT=$(RunAccuracyOnBackendWithParameters $backend)
    echo -n -e "$RESULT"
    if [[ $RESULT =~ "*" ]]; then
        REQUIRED_CPU_REF=1
    fi
    # It's possible the backend wasn't supported.
    if [[ ! "$RESULT" =~ "not supported" ]]; then
        # It was, continue.
        RESULT=$(RunPerformanceOnBackendWithParameters $backend)
        echo -n -e "$RESULT"
        # Save some specific values for use later.
        if [ $backend == "CpuAcc" ]; then
            # In the case of CpuAcc we save the avrage inference time.
            CPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
        fi
        if [ $backend == "GpuAcc" ]; then
            # In the case of GpuAcc we save the avrage inference time.
            GPUACC_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
        fi
    else
        # Remove this backend from future tests.
        BACKENDS=`echo $BACKENDS | sed "s/$backend//"`
    fi
    echo
done
# Only print this if it was required.
if [ ! -z $REQUIRED_CPU_REF ]; then
    echo "* denotes this backend required fallback to CpuRef."
    echo
fi

# Now its time to look at backend specific parameters.

# This function first run the accuracy test and then the performance test. It uses the average from earlier
# to compare to.
function RunAccuracyAndPerformanceWithExtraParameter
{
    BACKEND=$1
    EXTRA_PARAM=$2
    AVERAGE_INFERENCE_TIME=$3
    echo -e "ACCURACY\tMODEL LOAD(ms)\tOPTIMIZATION(ms)\tINITIAL INFERENCE(ms)\tAVERAGE INFERENCE(ms)\t\tDELTA(ms)"
    RESULT=$(RunAccuracyOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
    echo -n "$RESULT"
    RESULT=$(RunPerformanceOnBackendWithParameters $BACKEND,CpuRef $EXTRA_PARAM)
    PARAM_AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
    # If adding the parameter was faster then incude by how much.
    if [[ "$PARAM_AVERAGE_INFERENCE_TIME" < "$AVERAGE_INFERENCE_TIME" ]]; then
        DELTA=`echo $AVERAGE_INFERENCE_TIME - $PARAM_AVERAGE_INFERENCE_TIME | bc`
        echo -e "$RESULT\t\t\t$DELTA  ($PARAM_AVERAGE_INFERENCE_TIME v $AVERAGE_INFERENCE_TIME)"
    else
        echo -e "$RESULT\t\t\t**No improvment**"
    fi
}


# Start with CpuAcc. Three knobs to twiddle, threads, fast-math and fp16.
if [[ $BACKENDS =~ "CpuAcc" ]]; then
    echo
    echo    "CpuAcc optimizations."
    echo    "============================"
    echo    "The value of \"number-of-threads\" parameter by default is decided on by the backend."
    echo    "Cycle through number-of-threads=1 -> 12 and see if any are faster than the default."
    echo
    for i in {1..12}
    do
        RESULT=$(RunPerformanceOnBackendWithParameters "CpuAcc,CpuRef" "--number-of-threads $i")
        AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
        # Print something out if the returned average is less than the previously saved average.
        if (( $(echo "$AVERAGE_INFERENCE_TIME < $CPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then
            DELTA=`echo $CPUACC_AVERAGE_INFERENCE_TIME - $AVERAGE_INFERENCE_TIME | bc`
            echo " \"--number-of-threads $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $CPUACC_AVERAGE_INFERENCE_TIME)"
            FASTER=1
        fi
    done
    if [ -z $FASTER ]; then
        echo "No value of \"number-of-threads\" was faster than the default."
    fi
    # Next is fp16-turbo-mode. We do both accuracy and speed on this one.
    echo
    echo -n  "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
    echo
    RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--fp16-turbo-mode" $CPUACC_AVERAGE_INFERENCE_TIME

    # Next is enable-fast-math. Again both accuracy and speed on this one.
    echo
    echo -n  "Now trying \"enable-fast-math\"."
    echo
    RunAccuracyAndPerformanceWithExtraParameter CpuAcc "--enable-fast-math" $CPUACC_AVERAGE_INFERENCE_TIME
fi

# GpuAcc.
# Options to check enable-fast-math, fp16-turbo-mode, and tuning-level/tuning-path.
if [[ $BACKENDS =~ "GpuAcc" ]]; then
    echo
    echo    "GpuAcc optimizations."
    echo    "============================"

    # fp16-turbo-mode. We do both accuracy and speed on this one.
    echo
    echo -n  "Now trying to enable fp16-turbo-mode. This will only have positive results with fp32 models."
    echo
    RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--fp16-turbo-mode" $GPUACC_AVERAGE_INFERENCE_TIME

    # Next is enable-fast-math. Again both accuracy and speed on this one.
    echo
    echo -n  "Now trying \"enable-fast-math\"."
    echo
    RunAccuracyAndPerformanceWithExtraParameter GpuAcc "--enable-fast-math" $GPUACC_AVERAGE_INFERENCE_TIME

    # Next is tuning levels. Just speed on this one.
    echo
    echo -n  "Now trying \"tuning-level/tuning-path\"."
    echo
    for i in {1..3}
    do
        touch ./tuned-network.bin
        # Create tuned network file with the first run.
        OUTPUT=`$EXECUTE_NETWORK -m $MODEL -c $GpuAcc,CpuRef --tuning-path ./tuned-network.bin --tuning-level $i -N`
        AssertZeroExitCode
        # Now run the perforance test reusing that saved network.
        RESULT=$(RunPerformanceOnBackendWithParameters "GpuAcc,CpuRef" "--tuning-path ./tuned-network.bin")
        AVERAGE_INFERENCE_TIME=`echo $RESULT | cut -d ' ' -f 4`
        if (( $(echo "$AVERAGE_INFERENCE_TIME < $GPUACC_AVERAGE_INFERENCE_TIME" | bc -l) )); then
            DELTA=`echo $AVERAGE_INFERENCE_TIME - $GPUACC_AVERAGE_INFERENCE_TIME | bc`
            echo  " \"--tuning-level $i\" resulted in a faster average inference by $DELTA ms. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
        else
            echo  " \"--tuning-level $i\" did not result in a faster average inference time. ($AVERAGE_INFERENCE_TIME v $GPUACC_AVERAGE_INFERENCE_TIME)"
        fi
        rm ./tuned-network.bin
    done
fi