From 998a20832fd511de75c2cd935b7a0e851e91156d Mon Sep 17 00:00:00 2001 From: Kevin May Date: Wed, 2 Mar 2022 12:11:31 +0000 Subject: Typo in Guide * Only file changed is shim/BuildGuideShimSupportLibrary.md * All other files are merge squash from Arm NN 22.02 Signed-off-by: Kevin May Change-Id: Id82a6e9ac8abf74c1073c08744712f50e98dece0 --- BuildGuideAndroidNDK.md | 17 +- BuildGuideCrossCompilation.md | 73 +++- delegate/BuildGuideNative.md | 20 +- delegate/DelegateQuickStartGuide.md | 16 +- docs/Doxyfile | 2 +- docs/FAQ.md | 16 - include/armnn/backends/IWorkload.hpp | 4 + include/armnn/backends/Workload.hpp | 11 +- .../armnnTestUtils/MockWorkloadFactoryHelper.hpp | 41 -- .../tests/testdata/quick_brown_fox_16000khz.wav | Bin 0 -> 196728 bytes shim/BuildGuideShimSupportLibrary.md | 12 +- src/armnn/LoadedNetwork.cpp | 447 +++++++++++---------- src/armnn/LoadedNetwork.hpp | 17 +- src/armnn/Runtime.cpp | 15 +- src/armnnTestUtils/CMakeLists.txt | 3 +- src/armnnTestUtils/UnitTests.hpp | 6 +- src/backends/backendsCommon/WorkloadData.cpp | 1 - .../backendsCommon/test/EndToEndTestImpl.hpp | 53 ++- .../ClBatchNormalizationFloatWorkload.cpp | 38 ++ .../ClBatchNormalizationFloatWorkload.hpp | 7 + .../cl/workloads/ClConvertFp16ToFp32Workload.cpp | 46 ++- .../cl/workloads/ClConvertFp16ToFp32Workload.hpp | 13 + .../cl/workloads/ClConvertFp32ToFp16Workload.cpp | 46 ++- .../cl/workloads/ClConvertFp32ToFp16Workload.hpp | 13 + .../cl/workloads/ClConvolution2dWorkload.cpp | 2 +- .../cl/workloads/ClConvolution2dWorkload.hpp | 2 + src/backends/cl/workloads/ClFloorFloatWorkload.cpp | 39 +- src/backends/cl/workloads/ClFloorFloatWorkload.hpp | 5 + .../workloads/ClL2NormalizationFloatWorkload.cpp | 38 ++ .../workloads/ClL2NormalizationFloatWorkload.hpp | 6 + src/backends/cl/workloads/ClLstmFloatWorkload.cpp | 38 ++ src/backends/cl/workloads/ClLstmFloatWorkload.hpp | 5 + .../cl/workloads/ClNormalizationFloatWorkload.cpp | 38 ++ .../cl/workloads/ClNormalizationFloatWorkload.hpp | 5 + src/backends/neon/NeonTimer.cpp | 3 +- .../workloads/NeonConvertBf16ToFp32Workload.cpp | 38 ++ .../workloads/NeonConvertBf16ToFp32Workload.hpp | 5 + .../workloads/NeonConvertFp16ToFp32Workload.cpp | 38 ++ .../workloads/NeonConvertFp16ToFp32Workload.hpp | 5 + .../workloads/NeonConvertFp32ToBf16Workload.cpp | 38 ++ .../workloads/NeonConvertFp32ToBf16Workload.hpp | 5 + .../workloads/NeonConvertFp32ToFp16Workload.cpp | 38 ++ .../workloads/NeonConvertFp32ToFp16Workload.hpp | 5 + .../neon/workloads/NeonFloorFloatWorkload.cpp | 39 ++ .../neon/workloads/NeonFloorFloatWorkload.hpp | 5 + .../workloads/NeonL2NormalizationFloatWorkload.cpp | 38 ++ .../workloads/NeonL2NormalizationFloatWorkload.hpp | 5 + .../neon/workloads/NeonLstmFloatWorkload.cpp | 38 ++ .../neon/workloads/NeonLstmFloatWorkload.hpp | 5 + .../workloads/NeonNormalizationFloatWorkload.cpp | 38 ++ .../workloads/NeonNormalizationFloatWorkload.hpp | 5 + .../reference/test/RefWorkloadFactoryHelper.hpp | 4 +- src/backends/reference/workloads/CMakeLists.txt | 1 + .../reference/workloads/RefActivationWorkload.hpp | 6 +- .../reference/workloads/RefArgMinMaxWorkload.cpp | 2 +- .../reference/workloads/RefArgMinMaxWorkload.hpp | 4 +- .../reference/workloads/RefBaseWorkload.hpp | 36 ++ .../workloads/RefBatchNormalizationWorkload.cpp | 2 +- .../workloads/RefBatchNormalizationWorkload.hpp | 4 +- .../workloads/RefBatchToSpaceNdWorkload.hpp | 6 +- .../reference/workloads/RefCastWorkload.hpp | 6 +- .../workloads/RefChannelShuffleWorkload.hpp | 6 +- .../reference/workloads/RefComparisonWorkload.cpp | 2 +- .../reference/workloads/RefComparisonWorkload.hpp | 6 +- .../reference/workloads/RefConcatWorkload.hpp | 6 +- .../reference/workloads/RefConstantWorkload.cpp | 2 +- .../reference/workloads/RefConstantWorkload.hpp | 4 +- .../workloads/RefConvertBf16ToFp32Workload.hpp | 2 +- .../workloads/RefConvertFp16ToFp32Workload.hpp | 2 +- .../workloads/RefConvertFp32ToBf16Workload.hpp | 2 +- .../workloads/RefConvertFp32ToFp16Workload.hpp | 2 +- .../workloads/RefConvolution2dWorkload.cpp | 2 +- .../workloads/RefConvolution2dWorkload.hpp | 4 +- .../workloads/RefConvolution3dWorkload.cpp | 2 +- .../workloads/RefConvolution3dWorkload.hpp | 4 +- .../reference/workloads/RefDebugWorkload.hpp | 2 +- .../workloads/RefDepthToSpaceWorkload.hpp | 6 +- .../RefDepthwiseConvolution2dWorkload.cpp | 2 +- .../RefDepthwiseConvolution2dWorkload.hpp | 4 +- .../reference/workloads/RefDequantizeWorkload.hpp | 8 +- .../workloads/RefDetectionPostProcessWorkload.cpp | 2 +- .../workloads/RefDetectionPostProcessWorkload.hpp | 4 +- .../workloads/RefElementwiseUnaryWorkload.cpp | 2 +- .../workloads/RefElementwiseUnaryWorkload.hpp | 6 +- .../reference/workloads/RefElementwiseWorkload.cpp | 2 +- .../reference/workloads/RefElementwiseWorkload.hpp | 6 +- .../RefFakeQuantizationFloat32Workload.hpp | 2 +- .../reference/workloads/RefFillWorkload.hpp | 6 +- .../reference/workloads/RefFloorWorkload.hpp | 6 +- .../workloads/RefFullyConnectedWorkload.cpp | 2 +- .../workloads/RefFullyConnectedWorkload.hpp | 4 +- .../reference/workloads/RefGatherWorkload.hpp | 6 +- .../workloads/RefInstanceNormalizationWorkload.cpp | 2 +- .../workloads/RefInstanceNormalizationWorkload.hpp | 4 +- .../workloads/RefL2NormalizationWorkload.cpp | 2 +- .../workloads/RefL2NormalizationWorkload.hpp | 4 +- .../reference/workloads/RefLogSoftmaxWorkload.hpp | 6 +- .../workloads/RefLogicalBinaryWorkload.cpp | 2 +- .../workloads/RefLogicalBinaryWorkload.hpp | 6 +- .../workloads/RefLogicalUnaryWorkload.cpp | 2 +- .../workloads/RefLogicalUnaryWorkload.hpp | 6 +- .../reference/workloads/RefLstmWorkload.cpp | 2 +- .../reference/workloads/RefLstmWorkload.hpp | 4 +- .../reference/workloads/RefMeanWorkload.cpp | 2 +- .../reference/workloads/RefMeanWorkload.hpp | 4 +- .../workloads/RefNormalizationWorkload.cpp | 2 +- .../workloads/RefNormalizationWorkload.hpp | 4 +- .../reference/workloads/RefPadWorkload.hpp | 6 +- .../reference/workloads/RefPermuteWorkload.hpp | 2 +- .../reference/workloads/RefPooling2dWorkload.hpp | 6 +- .../reference/workloads/RefPooling3dWorkload.hpp | 6 +- .../reference/workloads/RefPreluWorkload.cpp | 2 +- .../reference/workloads/RefPreluWorkload.hpp | 4 +- .../reference/workloads/RefQLstmWorkload.cpp | 2 +- .../reference/workloads/RefQLstmWorkload.hpp | 4 +- .../reference/workloads/RefQuantizeWorkload.cpp | 2 +- .../reference/workloads/RefQuantizeWorkload.hpp | 4 +- .../reference/workloads/RefRankWorkload.hpp | 6 +- .../reference/workloads/RefReduceWorkload.cpp | 2 +- .../reference/workloads/RefReduceWorkload.hpp | 4 +- .../reference/workloads/RefReshapeWorkload.hpp | 6 +- .../reference/workloads/RefResizeWorkload.hpp | 6 +- .../reference/workloads/RefShapeWorkload.hpp | 6 +- .../reference/workloads/RefSliceWorkload.hpp | 6 +- .../reference/workloads/RefSoftmaxWorkload.hpp | 6 +- .../workloads/RefSpaceToBatchNdWorkload.hpp | 6 +- .../workloads/RefSpaceToDepthWorkload.hpp | 6 +- .../reference/workloads/RefSplitterWorkload.hpp | 6 +- .../reference/workloads/RefStackWorkload.cpp | 2 +- .../reference/workloads/RefStackWorkload.hpp | 4 +- .../workloads/RefStridedSliceWorkload.cpp | 2 +- .../workloads/RefStridedSliceWorkload.hpp | 4 +- .../RefTransposeConvolution2dWorkload.cpp | 2 +- .../RefTransposeConvolution2dWorkload.hpp | 4 +- .../reference/workloads/RefTransposeWorkload.hpp | 2 +- .../RefUnidirectionalSequenceLstmWorkload.cpp | 2 +- .../RefUnidirectionalSequenceLstmWorkload.hpp | 4 +- 137 files changed, 1295 insertions(+), 511 deletions(-) delete mode 100644 include/armnnTestUtils/MockWorkloadFactoryHelper.hpp create mode 100644 python/pyarmnn/examples/speech_recognition/tests/testdata/quick_brown_fox_16000khz.wav create mode 100644 src/backends/reference/workloads/RefBaseWorkload.hpp diff --git a/BuildGuideAndroidNDK.md b/BuildGuideAndroidNDK.md index 9936bf2df1..5068a8f5c8 100644 --- a/BuildGuideAndroidNDK.md +++ b/BuildGuideAndroidNDK.md @@ -12,7 +12,7 @@ ## Introduction These are step by step instructions for using the Android NDK to build Arm NN. -They have been tested on a clean install of Ubuntu 18.04, and should also work with other OS versions. +They have been tested on a clean install of Ubuntu 18.04 and 20.04, and should also work with other OS versions. The instructions show how to build the Arm NN core library. Building protobuf is optional. We have given steps should the user wish to build it (i.e. as an Onnx dependency). All downloaded or generated files will be saved inside the `$HOME/armnn-devenv` directory. @@ -37,7 +37,7 @@ All downloaded or generated files will be saved inside the `$HOME/armnn-devenv` ## Build Google's Protobuf library (Optional) -* Clone protobuf: +* Clone protobuf: (Requires Git if not previously installed: `sudo apt install git`) ```bash mkdir $HOME/armnn-devenv/google @@ -47,7 +47,7 @@ cd protobuf git checkout -b v3.12.0 v3.12.0 ``` -* Build a native (x86) version of the protobuf libraries and compiler (protoc): +* Build a native (x86) version of the protobuf libraries and compiler (protoc): (Requires cUrl, autoconf, llibtool, and other build dependencies if not previously installed: `sudo apt install curl autoconf libtool build-essential g++`) ```bash ./autogen.sh @@ -77,7 +77,7 @@ cd .. Note: The ANDROID_API variable should be set to the Android API version number you are using. E.g. "30" for Android R. ## Download Arm NN -* Clone Arm NN: +* Clone Arm NN: (Requires Git if not previously installed: `sudo apt install git`) ```bash @@ -110,15 +110,18 @@ git clone https://github.com/ARM-software/ComputeLibrary.git cd ComputeLibrary git checkout ``` -Arm NN and Arm Compute Library are developed closely together. If you would like to use the Arm NN 21.11 release you will need the 21.11 release of ACL too. For example, if you want to checkout the 21.11 release tag: +For example, if you want to checkout the 21.11 release tag: ```bash git checkout v21.11 ``` + +Arm NN and Arm Compute Library are developed closely together. If you would like to use a particular release of Arm NN you will need the same release tag of ACL too. + Arm NN provides a script that downloads the version of Arm Compute Library that Arm NN was tested with: ```bash git checkout $(../armnn/scripts/get_compute_library.sh -p) ``` -* the Arm Compute Library: +* the Arm Compute Library: (Requires SCons if not previously installed: `sudo apt install scons`) ```bash scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" \ @@ -127,7 +130,7 @@ scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" \ ## Build Arm NN -* Build Arm NN: +* Build Arm NN: (Requires CMake if not previously installed: `sudo apt install cmake`) ```bash mkdir $HOME/armnn-devenv/armnn/build diff --git a/BuildGuideCrossCompilation.md b/BuildGuideCrossCompilation.md index 53a9d719bf..d5d1a227e1 100644 --- a/BuildGuideCrossCompilation.md +++ b/BuildGuideCrossCompilation.md @@ -2,6 +2,7 @@ - [Introduction](#introduction) - [Cross-compiling ToolChain](#cross-compiling-toolchain) +- [Install Cmake](#build-cmake) - [Build and install Google's Protobuf library](#build-and-install-google-s-protobuf-library) - [Download Arm NN](#download-arm-nn) - [Build Arm Compute Library](#build-arm-compute-library) @@ -9,13 +10,14 @@ - [Build Onnx](#build-onnx) - [Build TfLite](#build-tflite) - [Build Arm NN](#build-armnn) +- [Generate TF Lite Schema](#generate-tflite-schema) - [Build Standalone Sample Dynamic Backend](#build-standalone-sample-dynamic-backend) - [Run Unit Tests](#run-unit-tests) - [Troubleshooting and Errors:](#troubleshooting-and-errors-) ## Introduction -These are the step by step instructions on Cross-Compiling Arm NN under an x86_64 system to target an Arm64 system. This build flow has been tested with Ubuntu 18.04 and it depends on the same version of Ubuntu or Debian being installed on both the build host and target machines. The instructions assume you are using a bash shell and show how to build the Arm NN core library, Protobuf, Tflite, Flatbuffer and Compute Libraries. +These are the step by step instructions on Cross-Compiling Arm NN under an x86_64 system to target an Arm64 system. This build flow has been tested with Ubuntu 18.04 and 20.04 and it depends on the same version of Ubuntu or Debian being installed on both the build host and target machines. The instructions assume you are using a bash shell and show how to build the Arm NN core library, Protobuf, Tflite, Flatbuffer and Compute Libraries. Start by creating a directory to contain all components: ''' @@ -29,6 +31,20 @@ cd $HOME/armnn-devenv sudo apt install crossbuild-essential-arm64 ``` +## Install Cmake +Cmake 3.19rc3 is required to build TF Lite Delegate. + +''' +sudo apt-get install libssl-dev +wget https://github.com/Kitware/CMake/releases/download/v3.19.0-rc3/cmake-3.19.0-rc3.tar.gz +tar -zxvf cmake-3.19.0-rc3.tar.gz +cd cmake-3.19.0-rc3 +./bootstrap --prefix=$HOME/armnn-devenv/cmake/install +make all install +cd.. +''' + + ## Build and install Google's Protobuf library We support protobuf version 3.12.0 @@ -101,15 +117,14 @@ Arm NN provides a script that downloads the version of Arm Compute Library that ```bash git checkout $(../armnn/scripts/get_compute_library.sh -p) ``` -* Build the Arm Compute Library: +* Build the Arm Compute Library: (Requires SCons if not previously installed: `sudo apt install scons`) ```bash -scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j4 internal_only=0 +scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j4 ``` ## Build Flatbuffer * Building Flatbuffer version 1.12.0 - (Requires CMake if not previously installed: `sudo apt install cmake`) ```bash cd $HOME/armnn-devenv wget -O flatbuffers-1.12.0.tar.gz https://github.com/google/flatbuffers/archive/v1.12.0.tar.gz @@ -157,8 +172,31 @@ cd $HOME/armnn-devenv git clone https://github.com/tensorflow/tensorflow.git cd tensorflow/ git checkout $(../armnn/scripts/get_tensorflow.sh -p) # Checks out the latest tested version of TF -mkdir tflite -cd tflite +cd .. +``` + +* You will need to download gcc-arm-8.3-2019.03 toolchain and continue building TF Lite as following: +``` +curl -LO https://storage.googleapis.com/mirror.tensorflow.org/developer.arm.com/media/Files/downloads/gnu-a/8.3-2019.03/binrel/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz +mkdir tflite-toolchains +tar xvf gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu.tar.xz -C tflite-toolchains +mkdir tflite/build +cd tflite/build +ARMCC_PREFIX=$HOME/armnn-devenv/tflite-toolchains/gcc-arm-8.3-2019.03-x86_64-aarch64-linux-gnu/bin/aarch64-linux-gnu- \ +ARMCC_FLAGS="-funsafe-math-optimizations" \ +cmake -DCMAKE_C_COMPILER=${ARMCC_PREFIX}gcc \ + -DCMAKE_CXX_COMPILER=${ARMCC_PREFIX}g++ \ + -DCMAKE_C_FLAGS="${ARMCC_FLAGS}" -DCMAKE_CXX_FLAGS="${ARMCC_FLAGS}" \ + -DCMAKE_VERBOSE_MAKEFILE:BOOL=ON -DCMAKE_SYSTEM_NAME=Linux \ + -DTFLITE_ENABLE_XNNPACK=OFF \ + -DCMAKE_SYSTEM_PROCESSOR=aarch64 \ + $HOME/armnn-devenv/tensorflow/tensorflow/lite/ \ +cmake --build . +``` + +## Generate TF Lite Schema +``` +cd $HOME/armnn-devenv cp ../tensorflow/tensorflow/lite/schema/schema.fbs . ../flatbuffers-1.12.0/build/flatc -c --gen-object-api --reflect-types --reflect-names schema.fbs ``` @@ -181,7 +219,7 @@ CXX=aarch64-linux-gnu-g++ CC=aarch64-linux-gnu-gcc cmake .. \ -DONNX_GENERATED_SOURCES=$HOME/armnn-devenv/onnx \ -DBUILD_ONNX_PARSER=1 \ -DBUILD_TF_LITE_PARSER=1 \ --DTF_LITE_GENERATED_PATH=$HOME/armnn-devenv/tflite \ +-DTENSORFLOW_ROOT=$HOME/armnn-devenv/tensorflow \ -DFLATBUFFERS_ROOT=$HOME/armnn-devenv/flatbuffers-arm64 \ -DFLATC_DIR=$HOME/armnn-devenv/flatbuffers-1.12.0/build \ -DPROTOBUF_ROOT=$HOME/armnn-devenv/google/x86_64_pb_install \ @@ -195,6 +233,12 @@ CXX=aarch64-linux-gnu-g++ CC=aarch64-linux-gnu-gcc cmake .. \ -DSAMPLE_DYNAMIC_BACKEND=1 \ -DDYNAMIC_BACKEND_PATHS=$SAMPLE_DYNAMIC_BACKEND_PATH ``` +* If you want to build Arm NN TF Lite Delegate, add the arguments: +```bash +-DTFLITE_LIB_ROOT=$HOME/armnn-devenv/tflite/build \ +-DTF_LITE_SCHEMA_INCLUDE_PATH=$HOME/armnn-devenv/tflite \ +-DBUILD_ARMNN_TFLITE_DELEGATE=1 +``` * Run the build ```bash make -j32 @@ -241,9 +285,18 @@ ln -s libprotobuf.so.23.0.0 ./libprotobuf.so.23 ```bash LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./UnitTests -Running 4493 test cases... +[doctest] doctest version is "2.4.6" +[doctest] run with "--help" for options +=============================================================================== +[doctest] test cases: 4817 | 4817 passed | 0 failed | 0 skipped +[doctest] assertions: 807634 | 807634 passed | 0 failed | +[doctest] Status: SUCCESS! +``` -*** No errors detected +* Run the Delegate UnitTests: + +```bash +LD_LIBRARY_PATH=./:$LD_LIBRARY_PATH ./delegate/DelegateUnitTests ``` ## Troubleshooting and Errors: @@ -309,5 +362,5 @@ cc1plus: error: unrecognized command line option ‘-Wno-implicit-fallthrough’ ``` * Add Werror=0 to the scons command: ``` -scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j8 internal_only=0 Werror=0 +scons arch=arm64-v8a neon=1 opencl=1 embed_kernels=1 extra_cxx_flags="-fPIC" -j8 Werror=0 ``` diff --git a/delegate/BuildGuideNative.md b/delegate/BuildGuideNative.md index 932c74423a..4aa1af3ee9 100644 --- a/delegate/BuildGuideNative.md +++ b/delegate/BuildGuideNative.md @@ -65,7 +65,7 @@ found [here](https://docs.bazel.build/versions/master/install-compile-source.htm compile with CMake. Depending on your operating system and architecture there might be an easier way. ```bash wget -O cmake-3.16.0.tar.gz https://cmake.org/files/v3.16/cmake-3.16.0.tar.gz -tar -xzf cmake-3.16.0.tar.gz -C $BASEDIR/cmake-3.16.0 +tar -xzf cmake-3.16.0.tar.gz -C $BASEDIR/ # If you have an older CMake, remove installed in order to upgrade yes | sudo apt-get purge cmake @@ -89,8 +89,10 @@ git checkout $(../armnn/scripts/get_tensorflow.sh -p) # Minimum version required Now the build process can be started. When calling "cmake", as below, you can specify a number of build flags. But if you have no need to configure your tensorflow build, you can follow the exact commands below: ```bash -cmake $BASEDIR/tensorflow -cmake --build $BASEDIR/tflite-output # This will be your DTFLITE_LIB_ROOT directory +mkdir build # You are already inside $BASEDIR/tensorflow at this point +cd build +cmake $BASEDIR/tensorflow/tensorflow/lite -DTFLITE_ENABLE_XNNPACK=OFF +cmake --build . # This will be your DTFLITE_LIB_ROOT directory ``` ## Build Flatbuffers @@ -123,7 +125,7 @@ To build the Arm Compute Library on your platform, download the Arm Compute Libr the version you want to use. Build it using `scons`. ```bash -cd $HOME/armnn-devenv +cd $BASEDIR git clone https://review.mlplatform.org/ml/ComputeLibrary cd ComputeLibrary/ git checkout $(../armnn/scripts/get_compute_library.sh -p) # e.g. v21.11 @@ -152,7 +154,7 @@ with the additional cmake arguments shown below cd $BASEDIR/armnn/delegate && mkdir build && cd build cmake .. -DCMAKE_BUILD_TYPE=release # A release build rather than a debug build. -DTENSORFLOW_ROOT=$BASEDIR/tensorflow \ # The root directory where tensorflow can be found. - -DTFLITE_LIB_ROOT=$BASEDIR/tflite-output \ # Directory where tensorflow libraries can be found. + -DTFLITE_LIB_ROOT=$BASEDIR/tensorflow/build \ # Directory where tensorflow libraries can be found. -DFLATBUFFERS_ROOT=$BASEDIR/flatbuffers-1.12.0/install \ # Flatbuffers install directory. -DArmnn_DIR=$BASEDIR/armnn/build \ # Directory where the Arm NN library can be found -DARMNN_SOURCE_DIR=$BASEDIR/armnn # The top directory of the Arm NN repository. @@ -199,7 +201,7 @@ cmake .. -DARMCOMPUTE_ROOT=$BASEDIR/ComputeLibrary \ -DBUILD_UNIT_TESTS=0 \ -DBUILD_ARMNN_TFLITE_DELEGATE=1 \ -DTENSORFLOW_ROOT=$BASEDIR/tensorflow \ - -DTFLITE_LIB_ROOT=$BASEDIR/tflite-output \ + -DTFLITE_LIB_ROOT=$BASEDIR/tensorflow/build \ -DFLATBUFFERS_ROOT=$BASEDIR/flatbuffers-1.12.0/install make ``` @@ -227,11 +229,11 @@ wget https://github.com/ARM-software/ML-zoo/blob/master/models/image_classificat ``` ## Execute the benchmarking tool with the Arm NN delegate +You are already at $BASEDIR/benchmarking from the previous stage. ```bash -cd $BASEDIR/benchmarking LD_LIBRARY_PATH=../armnn/build ./benchmark_model --graph=mobilenet_v2_1.0_224_quantized_1_default_1.tflite --external_delegate_path="../armnn/build/delegate/libarmnnDelegate.so" --external_delegate_options="backends:CpuAcc;logging-severity:info" ``` -The "external_delegate_options" here are specific to the Arm NN delegate. They are used to specify a target Arm NN backend or to enable/disable various options in Arm NN. A full description can be found in the parameters of function [tflite_plugin_create_delegate](namespacetflite.xhtml). +The "external_delegate_options" here are specific to the Arm NN delegate. They are used to specify a target Arm NN backend or to enable/disable various options in Arm NN. A full description can be found in the parameters of function tflite_plugin_create_delegate. # Integrate the Arm NN TfLite Delegate into your project @@ -256,4 +258,4 @@ armnnDelegateInterpreter->ModifyGraphWithDelegate(theArmnnDelegate.get()); For further information on using TfLite Delegates please visit the [tensorflow website](https://www.tensorflow.org/lite/guide) -For more details of the kind of options you can pass to the Arm NN delegate please check the parameters of function [tflite_plugin_create_delegate](namespacetflite.xhtml). +For more details of the kind of options you can pass to the Arm NN delegate please check the parameters of function tflite_plugin_create_delegate. diff --git a/delegate/DelegateQuickStartGuide.md b/delegate/DelegateQuickStartGuide.md index b581bce62c..ce58624677 100644 --- a/delegate/DelegateQuickStartGuide.md +++ b/delegate/DelegateQuickStartGuide.md @@ -1,5 +1,5 @@ # TfLite Delegate Quick Start Guide -If you have downloaded the ArmNN Github binaries or built the TfLite delegate yourself, then this tutorial will show you how you can +If you have downloaded the Arm NN Github binaries or built the TfLite delegate yourself, then this tutorial will show you how you can integrate it into TfLite to run models using python. Here is an example python script showing how to do this. In this script we are making use of the @@ -13,7 +13,7 @@ import tflite_runtime.interpreter as tflite # (if you are using the complete tensorflow package you can find load_delegate in tf.experimental.load_delegate) armnn_delegate = tflite.load_delegate( library="/libarmnnDelegate.so", options={"backends": "CpuAcc,GpuAcc,CpuRef", "logging-severity":"info"}) -# Delegates/Executes all operations supported by ArmNN to/with ArmNN +# Delegates/Executes all operations supported by Arm NN to/with Arm NN interpreter = tflite.Interpreter(model_path="/delegate/python/test/test_data/mock_model.tflite", experimental_delegates=[armnn_delegate]) interpreter.allocate_tensors() @@ -36,14 +36,14 @@ print(output_data) # Prepare the environment Pre-requisites: - * Dynamically build Arm NN Delegate library or download the ArmNN binaries + * Dynamically build Arm NN Delegate library or download the Arm NN binaries * python3 (Depends on TfLite version) * virtualenv * numpy (Depends on TfLite version) * tflite_runtime (>=2.5, depends on Arm NN Delegate) If you haven't built the delegate yet then take a look at the [build guide](./BuildGuideNative.md). Otherwise, -you can download the binaries [here](https://github.com/ARM-software/armnn/releases/tag/v21.11) +you can download the binaries [here](https://github.com/ARM-software/armnn/releases/) We recommend creating a virtual environment for this tutorial. For the following code to work python3 is needed. Please also check the documentation of the TfLite version you want to use. There might be additional prerequisites for the python @@ -88,16 +88,16 @@ python ExternalDelegatePythonTutorial.py ``` The output should look similar to this: ```bash -Info: ArmNN v27.0.0 +Info: Arm NN v28.0.0 Info: Initialization time: 0.56 ms -INFO: TfLiteArmnnDelegate: Created TfLite ArmNN delegate. +INFO: TfLiteArmnnDelegate: Created TfLite Arm NN delegate. [[ 12 123 16 12 11 14 20 16 20 12]] Info: Shutdown time: 0.28 ms ``` -For more details of the kind of options you can pass to the Arm NN delegate please check the parameters of function [tflite_plugin_create_delegate](namespacetflite.xhtml). +For more details of the kind of options you can pass to the Arm NN delegate please check the parameters of function tflite_plugin_create_delegate. You can also test the functionality of the external delegate adaptor by running some unit tests: ```bash @@ -105,4 +105,4 @@ pip install pytest cd armnn/delegate/python/test # You can deselect tests that require backends that your hardware doesn't support using markers e.g. -m "not GpuAccTest" pytest --delegate-dir="/libarmnnDelegate.so" -m "not GpuAccTest" -``` \ No newline at end of file +``` diff --git a/docs/Doxyfile b/docs/Doxyfile index 7dd51b50bc..dd87daad4e 100644 --- a/docs/Doxyfile +++ b/docs/Doxyfile @@ -61,7 +61,7 @@ PROJECT_NAME = "ArmNN" # could be handy for archiving the generated documentation or if some version # control system is used. -PROJECT_NUMBER = 21.11 +PROJECT_NUMBER = 22.02 # Using the PROJECT_BRIEF tag one can provide an optional one line description # for a project that appears at the top of each page and should give viewer a diff --git a/docs/FAQ.md b/docs/FAQ.md index 6771e50f19..0e6fcf84b9 100644 --- a/docs/FAQ.md +++ b/docs/FAQ.md @@ -1,10 +1,6 @@ Frequently asked questions ========================== -These are issues that have been commonly seen when using Arm NN. - -Note: The 21.08 release of Arm NN removes Boost library dependency. You are not required to install boost library with 21.08 and newer releases. - Problems seen when trying to build armnn and ComputeLibrary obtained from GitHub ----------------------------------------------------------------------------- @@ -32,18 +28,6 @@ Many DynamicBackendTests fail with "Base path for shared objects does not exist" --------------------------------------------------------- This problem most commonly occurs when the compile and runtime environments for the unit tests differ. These dynamic backend tests rely on a set of test files and directories at runtime. These files are created by default during the cmake build. At runtime the tests will look for these files in src/backends/backendsCommon/test/ relative to where the Unittests executable was built. The usual solution to to copy these files and directories into the new unit test execution environment. You can also specify a new root path for these files by adding a command line parameter to the Unittests executable: Unittests -- --dynamic-backend-build-dir "new path" -UnitTests fail on Max OS-X with errors in boost::program_options ---------------------------------------------------------- -When executing Arm NN UnitTests built in an OS-X environment, runtime errors occur around boost::program_options. When compiling boost the "b2" tool appears to have a bug where it defaults to using the native ranlib even when the correct tool is specified in the user_config.jam. - -To validate that this issue is occurring inspect the boost build log looking for warnings like: -warning: ranlib: warning for library: libboost_atomic.a the table of contents is empty (no object file members in the library define global symbols) - -This problem has previously been reported to the boostorg GitHub project. The solution is to manually execute the correct ranlib on each static library. https://github.com/boostorg/build/issues/160 - -Arm NN fails to build on Ubuntu 20.04 ---------------------------------------------------------- -The compiler version update on Ubuntu 20.04 resulted in build errors in Flat buffers 1.10.0. Update to Flatbuffers 1.12.0 to resolve this problem. In addition when building flatbuffers specify -fPIC CXX flag to allow the libraries to be used in our shared objects. Without this the the Arm NN build can fail with libflatbuffers.a(util.cpp.o): relocation R_X86_64_PC32 against symbol `_ZN11flatbuffers9DirExistsEPKc' can not be used when making a shared object; recompile with -fPIC Tensorflow Lite benchmarking tool fails with segmentation fault when using the Arm NN delegate. --------------------------------------------------------- diff --git a/include/armnn/backends/IWorkload.hpp b/include/armnn/backends/IWorkload.hpp index d63e0acc72..ce3914bc5a 100644 --- a/include/armnn/backends/IWorkload.hpp +++ b/include/armnn/backends/IWorkload.hpp @@ -31,6 +31,10 @@ public: virtual profiling::ProfilingGuid GetGuid() const = 0; + // SupportsTensorHandleReplacement signals that a given workload is capable of + // replacing any of its I/O tensors via ReplaceInput/OutputTensorHandle + virtual bool SupportsTensorHandleReplacement() const = 0; + // Replace input tensor handle with the given TensorHandle virtual void ReplaceInputTensorHandle(ITensorHandle* /*input*/, unsigned int /*slot*/) = 0; diff --git a/include/armnn/backends/Workload.hpp b/include/armnn/backends/Workload.hpp index 07e1abb392..21109480dc 100644 --- a/include/armnn/backends/Workload.hpp +++ b/include/armnn/backends/Workload.hpp @@ -54,16 +54,23 @@ public: profiling::ProfilingGuid GetGuid() const final { return m_Guid; } + virtual bool SupportsTensorHandleReplacement() const override + { + return false; + } + // Replace input tensor handle with the given TensorHandle void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override { - m_Data.m_Inputs[slot] = tensorHandle; + armnn::IgnoreUnused(tensorHandle, slot); + throw armnn::UnimplementedException("ReplaceInputTensorHandle not implemented for this workload"); } // Replace output tensor handle with the given TensorHandle void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override { - m_Data.m_Outputs[slot] = tensorHandle; + armnn::IgnoreUnused(tensorHandle, slot); + throw armnn::UnimplementedException("ReplaceOutputTensorHandle not implemented for this workload"); } protected: diff --git a/include/armnnTestUtils/MockWorkloadFactoryHelper.hpp b/include/armnnTestUtils/MockWorkloadFactoryHelper.hpp deleted file mode 100644 index e9f2b2f6cf..0000000000 --- a/include/armnnTestUtils/MockWorkloadFactoryHelper.hpp +++ /dev/null @@ -1,41 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "MockBackend.hpp" -#include "MockTensorHandleFactory.hpp" -#include - -namespace -{ - -template <> -struct WorkloadFactoryHelper -{ - static armnn::IBackendInternal::IMemoryManagerSharedPtr GetMemoryManager() - { - armnn::MockBackend backend; - return backend.CreateMemoryManager(); - } - - static armnn::MockWorkloadFactory - GetFactory(const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) - { - IgnoreUnused(memoryManager); - return armnn::MockWorkloadFactory(); - } - - static armnn::MockTensorHandleFactory - GetTensorHandleFactory(const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) - { - - return armnn::MockTensorHandleFactory(std::static_pointer_cast(memoryManager)); - } -}; - -using MockWorkloadFactoryHelper = WorkloadFactoryHelper; - -} // anonymous namespace diff --git a/python/pyarmnn/examples/speech_recognition/tests/testdata/quick_brown_fox_16000khz.wav b/python/pyarmnn/examples/speech_recognition/tests/testdata/quick_brown_fox_16000khz.wav new file mode 100644 index 0000000000..761c36062e Binary files /dev/null and b/python/pyarmnn/examples/speech_recognition/tests/testdata/quick_brown_fox_16000khz.wav differ diff --git a/shim/BuildGuideShimSupportLibrary.md b/shim/BuildGuideShimSupportLibrary.md index 05306a2e5b..1d8fb43cae 100644 --- a/shim/BuildGuideShimSupportLibrary.md +++ b/shim/BuildGuideShimSupportLibrary.md @@ -39,7 +39,7 @@ export PATH=$NDK_TOOLCHAIN_ROOT/bin/:$PATH ```bash mkdir -p $AOSP_ROOT/vendor/arm/ -cd $AOSP_SOURCE/vendor/arm/ +cd $AOSP_ROOT/vendor/arm/ git clone https://github.com/ARM-software/armnn.git ``` @@ -139,10 +139,10 @@ mm ``` The built libraries and manifest file can be found here: -$AOSP_SOURCE/out/target/product//vendor/lib64/libarmnn_support_library.so -$AOSP_SOURCE/out/target/product//vendor/bin/hw/android.hardware.neuralnetworks-shim-service-armnn -$AOSP_SOURCE/out/target/product//vendor/etc/vintf/manifest/android.hardware.neuralnetworks-shim-service-armnn.xml +$AOSP_ROOT/out/target/product//vendor/lib64/libarmnn_support_library.so +$AOSP_ROOT/out/target/product//vendor/bin/hw/android.hardware.neuralnetworks-shim-service-armnn +$AOSP_ROOT/out/target/product//vendor/etc/vintf/manifest/android.hardware.neuralnetworks-shim-service-armnn.xml Currently the Arm NN libraries are shared libraries and therefore will need to be pushed to the device: -$AOSP_SOURCE/vendor/arm/armnn/build/libarmnnSerializer.so -$AOSP_SOURCE/vendor/arm/armnn/build/libarmnn.so +$AOSP_ROOT/vendor/arm/armnn/build/libarmnnSerializer.so +$AOSP_ROOT/vendor/arm/armnn/build/libarmnn.so diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index fd7279a294..bcceaf4a99 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -139,6 +139,13 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, bool useExternalMemoryManager = false; bool useInternalMemoryManager = false; Graph& order = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); + + if (!networkProperties.m_AsyncEnabled) + { + m_IsInputImported = std::vector(order.GetNumInputs(), false); + m_IsOutputImported = std::vector(order.GetNumOutputs(), false); + } + for (auto&& layer : order) { auto const& backendId = layer->GetBackendId(); @@ -312,44 +319,6 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, } else { - if (layer->GetNumInputSlots() >= 1) - { - unsigned int inputSlotIndex = 0; - for (auto& inputSlot : layer->GetInputSlots()) - { - if (inputSlot.GetConnectedOutputSlot()->GetOwningLayer().GetType() == LayerType::Input) - { - auto inputLayer = - PolymorphicDowncast( - &inputSlot.GetConnectedOutputSlot()->GetOwningLayer()); - m_InputWorkloadSlotPairs[inputLayer->GetBindingId()] = - std::make_pair(m_WorkloadQueue.size(), inputSlotIndex); - } - ++inputSlotIndex; - } - } - - if (layer->GetNumOutputSlots() >= 1) - { - unsigned int outputSlotIndex = 0; - for (auto& outputSlot : layer->GetOutputSlots()) - { - for (unsigned int i = 0; i < outputSlot.GetNumConnections(); i++) - { - // If any of the connections on this outputSlot are connected to an Output then - // Add its index within layer->GetOutputSlots() to m_OutputWorkloadSlotPairs - if (outputSlot.GetConnection(i)->GetOwningLayer().GetType() == LayerType::Output) - { - auto outputLayer = PolymorphicDowncast( - &outputSlot.GetConnection(i)->GetOwningLayer()); - m_OutputWorkloadSlotPairs[outputLayer->GetBindingId()] = - std::make_pair(m_WorkloadQueue.size(), outputSlotIndex); - continue; - } - } - ++outputSlotIndex; - } - } m_WorkloadQueue.push_back(std::move(workload)); } @@ -361,6 +330,100 @@ LoadedNetwork::LoadedNetwork(std::unique_ptr net, } } + // Gather information about workloads for inputs & outputs + if (!networkProperties.m_AsyncEnabled && m_WorkloadQueue.size() != 0) + { + const int noOfInputs = armnn::numeric_cast(order.GetNumInputs()); + + // Get indices of all workloads connected to each input and + // check if they support tensor handle replacement + for (const BindableLayer* layer: order.GetInputLayers()) + { + const auto bindingId = layer->GetBindingId(); + + bool supportsReplacement = true; + + for (const auto inputSlot: layer->GetOutputSlot(0).GetConnections()) + { + auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(inputSlot->GetOwningLayer())); + workloadIndex -= noOfInputs; + + m_InputWorkloadSlotPairs[bindingId].emplace_back(WorkloadIndices{ + armnn::numeric_cast(workloadIndex), inputSlot->GetSlotIndex()}); + + auto workload = m_WorkloadQueue[m_InputWorkloadSlotPairs[bindingId].back().m_WorkloadIndex].get(); + supportsReplacement &= workload->SupportsTensorHandleReplacement(); + } + + ITensorHandleFactory::FactoryId factoryId = layer->GetOutputSlot(0).GetTensorHandleFactoryId(); + // Get matching import factory Id + ITensorHandleFactory::FactoryId importFactoryId = + m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId); + + ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId); + + if (supportsReplacement && importFactory) + { + m_PreImportedInputHandles.emplace_back( + bindingId, importFactory->CreateTensorHandle(layer->GetOutputSlot(0).GetTensorInfo(), false)); + } + else + { + m_PreImportedInputHandles.emplace_back(bindingId, nullptr); + } + } + + // Get indices of all workloads connected to each output and + // check if they support tensor handle replacement + for (const BindableLayer* layer: order.GetOutputLayers()) + { + const auto bindingId = layer->GetBindingId(); + + const auto outputSlot = layer->GetInputSlot(0).GetConnectedOutputSlot(); + auto& indices = m_OutputWorkloadSlotPairs[bindingId]; + + auto workloadIndex = std::distance(order.begin(), order.GetPosInGraph(outputSlot->GetOwningLayer())); + workloadIndex -= noOfInputs; + + indices.m_OutputSlotIndices = WorkloadIndices{numeric_cast(workloadIndex), + outputSlot->CalculateIndexOnOwner()}; + + bool supportsReplacement = true; + auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get(); + supportsReplacement &= outputWorkload->SupportsTensorHandleReplacement(); + + for (auto &inputSlot: outputSlot->GetConnections()) + { + if(inputSlot->GetOwningLayer().GetType() != LayerType::Output) + { + auto inWorkloadIndex = std::distance(order.begin(), + order.GetPosInGraph(inputSlot->GetOwningLayer())); + inWorkloadIndex -= noOfInputs; + indices.m_InputSlotIndices.emplace_back(WorkloadIndices{numeric_cast(inWorkloadIndex), + inputSlot->GetSlotIndex()}); + auto inputWorkload = m_WorkloadQueue[indices.m_InputSlotIndices.back().m_WorkloadIndex].get(); + supportsReplacement &= inputWorkload->SupportsTensorHandleReplacement(); + } + } + + ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId(); + // Get matching import factory Id + ITensorHandleFactory::FactoryId importFactoryId = + m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId); + ITensorHandleFactory *importFactory = m_TensorHandleFactoryRegistry.GetFactory(importFactoryId); + + if (supportsReplacement && importFactory) + { + m_PreImportedOutputHandles.emplace_back( + bindingId, importFactory->CreateTensorHandle(outputSlot->GetTensorInfo(), false)); + } + else + { + m_PreImportedOutputHandles.emplace_back(bindingId, nullptr); + } + } + } + for (auto&& workloadFactory : m_WorkloadFactories) { workloadFactory.second->AfterWorkloadsCreated(); @@ -699,77 +762,133 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, m_InputQueue.clear(); m_InputQueue.reserve(graph.GetNumInputs()); + if (preImportedInputIds.size() > graph.GetNumInputs()) + { + throw InvalidArgumentException("Invalid number of preImportedInputIds"); + } + + unsigned int inputIndex = 0; + unsigned int importedInputIdIndex = 0; + std::sort(preImportedInputIds.begin(), preImportedInputIds.end()); for (const BindableLayer* inputLayer : graph.GetInputLayers()) { - if (preImportedInputIds.size() > graph.GetNumInputs()) + if (importedInputIdIndex < preImportedInputIds.size() && + inputIndex == preImportedInputIds[importedInputIdIndex]) { - throw InvalidArgumentException("Invalid number of preImportedInputIds"); + // Only replace tensorhandles if they have not already been replaced + if (!m_IsInputImported[inputIndex]) + { + auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get(); + + for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()]) + { + auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get(); + workload->ReplaceInputTensorHandle(outputTensorHandle, workloadInfo.m_SlotIndex); + } + m_IsInputImported[inputIndex] = true; + } + importedInputIdIndex++; } - auto layerBindingId = inputLayer->GetBindingId(); - auto it = std::find_if(preImportedInputIds.begin(), preImportedInputIds.end(), - [=](auto preImportedInputId) + else { - return m_PreImportedInputHandles[preImportedInputId].m_LayerBindingId == layerBindingId; - }); + if (m_IsInputImported[inputIndex]) + { + OutputHandler& handler = const_cast(inputLayer->GetOutputHandler(0)); + + for (const auto& workloadInfo: m_InputWorkloadSlotPairs[inputLayer->GetBindingId()]) + { + auto workload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get(); + workload->ReplaceInputTensorHandle(handler.GetData(), workloadInfo.m_SlotIndex); + } + + m_IsInputImported[inputIndex] = false; + } - if (it == preImportedInputIds.end()) - { // InputTensorHandle is not imported yet, process to enqueue input const TensorPin& pin = workloadData.GetInputTensorPin(inputLayer->GetBindingId()); EnqueueInput(*inputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); } + inputIndex++; } } - // For each output to the network, call EnqueueOutput with the data passed by the user. { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "PrepareOutputs"); m_OutputQueue.clear(); m_OutputQueue.reserve(graph.GetNumOutputs()); + if (preImportedOutputIds.size() > graph.GetNumOutputs()) + { + throw InvalidArgumentException("Invalid number of preImportedOutputIds"); + } + + unsigned int outputIndex = 0; + unsigned int importedOutputIdIndex = 0; + std::sort(preImportedOutputIds.begin(), preImportedOutputIds.end()); for (const BindableLayer* outputLayer : graph.GetOutputLayers()) { - if (preImportedOutputIds.size() > graph.GetNumOutputs()) - { - throw InvalidArgumentException("Invalid number of preImportedOutputIds"); - } - auto layerBindingId = outputLayer->GetBindingId(); - auto it = std::find_if(preImportedOutputIds.begin(), preImportedOutputIds.end(), - [=](auto preImportedOutputId) + if (importedOutputIdIndex < preImportedOutputIds.size() && + outputIndex == preImportedOutputIds[importedOutputIdIndex]) { - return m_PreImportedOutputHandles[preImportedOutputId].m_LayerBindingId == layerBindingId; - }); + // Only replace tensorhandles if they have not already been replaced + ITensorHandle* inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get(); - const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId()); + if (!m_IsOutputImported[outputIndex]) + { + const auto bindingId = outputLayer->GetBindingId(); + const auto& indices = m_OutputWorkloadSlotPairs[bindingId]; - if (it == preImportedOutputIds.end()) - { - // OutputTensorHandle is not imported yet, process to enqueue Output - EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); - } - else - { - // Insert synchronization workload for the imported output - OutputQueueDescriptor outputQueueDescriptor; - WorkloadInfo info; + auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get(); - outputQueueDescriptor.m_Outputs.push_back(pin.GetTensorHandle()); - info.m_OutputTensorInfos.push_back(pin.GetTensorInfo()); + outputWorkload->ReplaceOutputTensorHandle(inputTensorHandle, + indices.m_OutputSlotIndices.m_SlotIndex); - // Gets the output handler from the previous node. - const OutputHandler& outputHandler = - outputLayer->GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler(); + for (const auto& workloadInfo: indices.m_InputSlotIndices) + { + auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get(); + inputWorkload->ReplaceInputTensorHandle(inputTensorHandle, workloadInfo.m_SlotIndex); + } + m_IsOutputImported[outputIndex] = true; + } - const TensorInfo& inputTensorInfo = outputHandler.GetTensorInfo(); - ITensorHandle* inputTensorHandle = outputHandler.GetData(); ARMNN_ASSERT_MSG(inputTensorHandle != nullptr, "Data should have been allocated."); MemSyncQueueDescriptor syncDesc; syncDesc.m_Inputs.push_back(inputTensorHandle); - info.m_InputTensorInfos.push_back(inputTensorInfo); + WorkloadInfo info; + info.m_InputTensorInfos.push_back( + outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo()); auto syncWorkload = std::make_unique(syncDesc, info); ARMNN_ASSERT_MSG(syncWorkload, "No sync workload created"); m_OutputQueue.push_back(move(syncWorkload)); + importedOutputIdIndex++; } + else + { + if (m_IsOutputImported[outputIndex]) + { + const auto bindingId = outputLayer->GetBindingId(); + const auto& indices = m_OutputWorkloadSlotPairs[bindingId]; + + auto outputWorkload = m_WorkloadQueue[indices.m_OutputSlotIndices.m_WorkloadIndex].get(); + const OutputHandler& outputHandler = + outputLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetOutputHandler(); + + outputWorkload->ReplaceOutputTensorHandle( + outputHandler.GetData(), indices.m_OutputSlotIndices.m_SlotIndex); + + for (const auto& workloadInfo: indices.m_InputSlotIndices) + { + auto inputWorkload = m_WorkloadQueue[workloadInfo.m_WorkloadIndex].get(); + inputWorkload->ReplaceInputTensorHandle(outputHandler.GetData(), workloadInfo.m_SlotIndex); + } + m_IsOutputImported[outputIndex] = false; + } + + const TensorPin& pin = workloadData.GetOutputTensorPin(outputLayer->GetBindingId()); + // OutputTensorHandle is not imported yet, process to enqueue Output + EnqueueOutput(*outputLayer, pin.GetTensorHandle(), pin.GetTensorInfo()); + } + outputIndex++; } } @@ -806,6 +925,7 @@ Status LoadedNetwork::EnqueueWorkload(const InputTensors& inputTensors, timelineUtils->RecordEvent(inferenceGuid, LabelsAndEventClasses::ARMNN_PROFILING_EOL_EVENT_CLASS); timelineUtils->Commit(); } + return executionSucceeded ? Status::Success : Status::Failure; } @@ -1186,14 +1306,13 @@ const armnn::Tensor GetOutputTensor(const LayerBindingId layerId, const OutputTe std::vector LoadedNetwork::ImportInputs(const InputTensors& inputTensors, MemorySource forceImportMemorySource) { - if (!m_NetworkProperties.m_ImportEnabled) + if (!m_NetworkProperties.m_AsyncEnabled) { // Cannot import if import is not enabled and forceImportMemorySource is undefined if (forceImportMemorySource == MemorySource::Undefined) { throw MemoryImportException("ImportInputs: Memory Import failed, NetworkProperties.m_ImportEnabled"); } - // If forceImportMemorySource is defined, try import if memory is aligned if (inputTensors.size() != m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().GetNumInputs()) { throw MemoryImportException("ImportInputs: Force Import failed, incorrect number of tensors"); @@ -1201,85 +1320,42 @@ std::vector LoadedNetwork::ImportInputs(const InputTensors& inp std::vector importedInputs; Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); - for (auto inputTensor : inputTensors) + unsigned int inputIndex = 0; + for (const BindableLayer* inputLayer : graph.GetInputLayers()) { - auto layerBindingId = inputTensor.first; - auto it = std::find_if(graph.GetInputLayers().begin(), graph.GetInputLayers().end(), [=](auto* layer) - { - return layer->GetBindingId() == layerBindingId; - }); + auto outputTensorHandle = m_PreImportedInputHandles[inputIndex].m_TensorHandle.get(); - if (it == graph.GetInputLayers().end()) + if (!outputTensorHandle) { - throw MemoryImportException(fmt::format( - "ImportInputs: Memory Import failed, unknown LayerBindingId: {}", layerBindingId)); + inputIndex++; + continue; } - const Layer* layer = *it; - if (layer->GetType() != LayerType::Input) + auto layerBindingId = inputLayer->GetBindingId(); + auto it = std::find_if(inputTensors.begin(), inputTensors.end(), [=](const auto& inputTensor) { - throw InvalidArgumentException("ImportInputs: given layer not an InputLayer"); - } - const OutputSlot& outputSlot = layer->GetOutputSlots()[0]; - ITensorHandleFactory::FactoryId factoryId = outputSlot.GetTensorHandleFactoryId(); - // Get matching import factory Id - ITensorHandleFactory::FactoryId importFactoryId = - m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId); - ITensorHandleFactory* importFactory = - m_TensorHandleFactoryRegistry.GetFactory(importFactoryId, forceImportMemorySource); - if (!importFactory) + return inputTensor.first == layerBindingId; + }); + + if (it == inputTensors.end()) { - throw MemoryImportException("ImportInputs: Force Import failed, cannot find matching Import Factory"); + inputIndex++; + continue; } - OutputHandler& handler = const_cast(layer->GetOutputHandler(0)); - handler.SetAllocatedData(); - handler.CreateTensorHandles(*importFactory, false); - ITensorHandle* outputTensorHandle = handler.GetData(); + const auto& inputTensor = *it; std::unique_ptr passThroughTensorHandle = std::make_unique(inputTensor.second.GetInfo(), inputTensor.second.GetMemoryArea()); - // Check if the input memory can be imported - if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource)) - { - passThroughTensorHandle->Unmap(); - if (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource)) - { - passThroughTensorHandle->Unmap(); - try - { - m_WorkloadQueue[m_InputWorkloadSlotPairs[layerBindingId].first].get()->ReplaceInputTensorHandle( - outputTensorHandle, m_InputWorkloadSlotPairs[layerBindingId].second); - importedInputs.push_back(m_CurImportedInputId++); - // For force import, we want OutputHandler to own the TensorHandle, - // so we do not move the TensorHandle to m_PreImportedInputHandles as in AsyncEnabled networks - ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, nullptr}; - m_PreImportedInputHandles.push_back(std::move(importedTensorHandlePin)); - } - catch(armnn::UnimplementedException& e) - { - IgnoreUnused(e); - // Method not implement, cannot use import tensor and have to use allocated data instead - handler.UseAllocatedData(); - } - } - } - else + + if (outputTensorHandle->CanBeImported(passThroughTensorHandle->Map(), forceImportMemorySource) + && (outputTensorHandle->Import(passThroughTensorHandle->Map(), forceImportMemorySource))) { - // Cannot import, use allocated data - handler.UseAllocatedData(); - // Ensure that the workload get correct tensor - try - { - m_WorkloadQueue[m_InputWorkloadSlotPairs[layerBindingId].first].get()->ReplaceInputTensorHandle( - handler.GetData(), m_InputWorkloadSlotPairs[layerBindingId].second); - } - catch(armnn::UnimplementedException& e) - { - IgnoreUnused(e); - } + importedInputs.push_back(inputIndex); } + passThroughTensorHandle->Unmap(); + inputIndex++; } return importedInputs; @@ -1363,7 +1439,7 @@ std::vector LoadedNetwork::ImportInputs(const InputTensors& inp std::vector LoadedNetwork::ImportOutputs(const OutputTensors& outputTensors, MemorySource forceImportMemorySource) { - if (!m_NetworkProperties.m_ExportEnabled) + if (!m_NetworkProperties.m_AsyncEnabled) { // Cannot import if import is not enabled and forceImportMemorySource is undefined if (forceImportMemorySource == MemorySource::Undefined) @@ -1377,85 +1453,38 @@ std::vector LoadedNetwork::ImportOutputs(const OutputTensors& } std::vector importedOutputs; Graph& graph = m_OptimizedNetwork->pOptimizedNetworkImpl->GetGraph().TopologicalSort(); - for (auto outputTensor : outputTensors) + + unsigned int outputIndex = 0; + for (const BindableLayer* const outputLayer : graph.GetOutputLayers()) { - auto layerBindingId = outputTensor.first; - auto it = std::find_if(graph.GetOutputLayers().begin(), graph.GetOutputLayers().end(), [=](auto* layer) - { - return layer->GetBindingId() == layerBindingId; - }); + auto inputTensorHandle = m_PreImportedOutputHandles[outputIndex].m_TensorHandle.get(); - if (it == graph.GetOutputLayers().end()) + if (!inputTensorHandle) { - throw MemoryImportException(fmt::format("ImportOutputs: Memory Import failed, " - "unknown LayerBindingId: {}", - layerBindingId)); + outputIndex++; + continue; } - const Layer* layer = *it; - if (layer->GetType() != LayerType::Output) + auto layerBindingId = outputLayer->GetBindingId(); + auto it = std::find_if(outputTensors.begin(), outputTensors.end(), [=] (const auto& outputTensor) { - throw InvalidArgumentException("ImportOutputs: given layer not an OutputLayer"); - } + return outputTensor.first == layerBindingId; + }); - const OutputSlot* outputSlot = layer->GetInputSlots()[0].GetConnectedOutputSlot(); - ITensorHandleFactory::FactoryId factoryId = outputSlot->GetTensorHandleFactoryId(); - ITensorHandleFactory::FactoryId importFactoryId = - m_TensorHandleFactoryRegistry.GetMatchingImportFactoryId(factoryId); - ITensorHandleFactory* importFactory = - m_TensorHandleFactoryRegistry.GetFactory(importFactoryId, forceImportMemorySource); - if (!importFactory) + if (it == outputTensors.end()) { - throw MemoryImportException("ImportOutputs: Force Import failed, cannot find matching Import Factory"); + outputIndex++; + continue; } - OutputHandler& outputHandler = - const_cast(layer->GetInputSlots()[0].GetConnectedOutputSlot()->GetOutputHandler()); - outputHandler.SetAllocatedData(); - ITensorHandle* inputTensorHandle = outputHandler.GetData(); - outputHandler.CreateTensorHandles(*importFactory, false); - inputTensorHandle = outputHandler.GetData(); - + const auto outputTensor = *it; // Check if the output memory can be imported - if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource)) - { - if (inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource)) - { - try - { - m_WorkloadQueue[m_OutputWorkloadSlotPairs[layerBindingId].first].get()-> - ReplaceOutputTensorHandle(inputTensorHandle, - m_OutputWorkloadSlotPairs[layerBindingId].second); - importedOutputs.push_back(m_CurImportedOutputId++); - // For force import, we want OutputHandler to own the TensorHandle, - // so we do not move the TensorHandle to m_PreImportedOutputHandles as in AsyncEnabled networks - ImportedTensorHandlePin importedTensorHandlePin{layerBindingId, nullptr}; - m_PreImportedOutputHandles.push_back(std::move(importedTensorHandlePin)); - } - catch(armnn::UnimplementedException& e) - { - IgnoreUnused(e); - // Method not implement, cannot use import tensor and have to use allocated data instead - outputHandler.UseAllocatedData(); - } - } - } - else + if (inputTensorHandle->CanBeImported(outputTensor.second.GetMemoryArea(), forceImportMemorySource) + && inputTensorHandle->Import(outputTensor.second.GetMemoryArea(), forceImportMemorySource)) { - // Cannot import, use allocated memory - outputHandler.UseAllocatedData(); - // Ensure that the workload get correct tensor - try - { - m_WorkloadQueue[m_OutputWorkloadSlotPairs[layerBindingId].first].get()-> - ReplaceOutputTensorHandle(outputHandler.GetData(), - m_OutputWorkloadSlotPairs[layerBindingId].second); - } - catch(armnn::UnimplementedException& e) - { - IgnoreUnused(e); - } + importedOutputs.push_back(outputIndex); } + outputIndex++; } return importedOutputs; } diff --git a/src/armnn/LoadedNetwork.hpp b/src/armnn/LoadedNetwork.hpp index f637dec8eb..dc2f4dc10f 100644 --- a/src/armnn/LoadedNetwork.hpp +++ b/src/armnn/LoadedNetwork.hpp @@ -204,8 +204,21 @@ private: // A set of vectors to record the workload queue indexes and their corresponding Input/Output Slot indexes // which are connected to Inputs and Outputs for the network. - std::unordered_map> m_InputWorkloadSlotPairs; - std::unordered_map> m_OutputWorkloadSlotPairs; + struct WorkloadIndices + { + unsigned int m_WorkloadIndex; + unsigned int m_SlotIndex; + }; + + struct OutputWorkloadIndices + { + WorkloadIndices m_OutputSlotIndices; + std::vector m_InputSlotIndices; + }; + std::unordered_map> m_InputWorkloadSlotPairs; + std::unordered_map m_OutputWorkloadSlotPairs; + std::vector m_IsInputImported; + std::vector m_IsOutputImported; }; diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 1abe0f394b..a91368140c 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -630,15 +630,18 @@ Status RuntimeImpl::EnqueueWorkload(NetworkId networkId, auto status = loadedNetwork->EnqueueWorkload(inputTensors, outputTensors, preImportedInputIds, preImportedOutputIds); - ARMNN_LOG(info) << "Execution time: " << std::setprecision(2) - << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms."; - // Call After EnqueueWorkload events - for (auto&& context : m_BackendContexts) + // Check if we imported, if not there's no need to call the After EnqueueWorkload events + if (!preImportedInputIds.empty() || !preImportedOutputIds.empty()) { - context.second->AfterEnqueueWorkload(networkId); + // Call After EnqueueWorkload events + for (auto&& context : m_BackendContexts) + { + context.second->AfterEnqueueWorkload(networkId); + } } - + ARMNN_LOG(info) << "Execution time: " << std::setprecision(2) + << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms."; return status; } diff --git a/src/armnnTestUtils/CMakeLists.txt b/src/armnnTestUtils/CMakeLists.txt index e33fed77d7..061dd90d3d 100755 --- a/src/armnnTestUtils/CMakeLists.txt +++ b/src/armnnTestUtils/CMakeLists.txt @@ -11,7 +11,6 @@ list(APPEND armnnTestUtils_sources ../../include/armnnTestUtils/MockBackend.hpp ../../include/armnnTestUtils/MockMemoryManager.hpp ../../include/armnnTestUtils/MockTensorHandle.hpp - ../../include/armnnTestUtils/MockWorkloadFactoryHelper.hpp ../../include/armnnTestUtils/PredicateResult.hpp ../../include/armnnTestUtils/TensorCopyUtils.hpp ../../include/armnnTestUtils/WorkloadTestUtils.hpp @@ -57,4 +56,4 @@ install(TARGETS armnnTestUtils ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}) -add_library(Armnn::armnnTestUtils ALIAS armnnTestUtils) \ No newline at end of file +add_library(Armnn::armnnTestUtils ALIAS armnnTestUtils) diff --git a/src/armnnTestUtils/UnitTests.hpp b/src/armnnTestUtils/UnitTests.hpp index f560720c9b..71324dbf81 100644 --- a/src/armnnTestUtils/UnitTests.hpp +++ b/src/armnnTestUtils/UnitTests.hpp @@ -155,11 +155,11 @@ void CompareRefTestFunctionUsingTensorHandleFactory(const char* testName, TFuncP { auto memoryManager = WorkloadFactoryHelper::GetMemoryManager(); FactoryType workloadFactory = WorkloadFactoryHelper::GetFactory(memoryManager); + auto tensorHandleFactory = WorkloadFactoryHelper::GetTensorHandleFactory(memoryManager); armnn::RefWorkloadFactory refWorkloadFactory; - auto tensorHandleFactory = WorkloadFactoryHelper::GetTensorHandleFactory(memoryManager); - auto refTensorHandleFactory = - RefWorkloadFactoryHelper::GetTensorHandleFactory(memoryManager); + auto refMemoryManager = WorkloadFactoryHelper::GetMemoryManager(); + auto refTensorHandleFactory = RefWorkloadFactoryHelper::GetTensorHandleFactory(refMemoryManager); auto testResult = (*testFunction)( workloadFactory, memoryManager, refWorkloadFactory, tensorHandleFactory, refTensorHandleFactory, args...); diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 385affa5fa..fc48ffce28 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -583,7 +583,6 @@ void MemImportQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const void MemSyncQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const { ValidateNumInputs(workloadInfo, "MemSyncQueueDescriptor", 1); - ValidateNumOutputs(workloadInfo, "MemSyncQueueDescriptor" , 1); if (m_Inputs.size() != 1) { diff --git a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp index c69a4a5052..77901df444 100644 --- a/src/backends/backendsCommon/test/EndToEndTestImpl.hpp +++ b/src/backends/backendsCommon/test/EndToEndTestImpl.hpp @@ -951,11 +951,12 @@ inline void ForceImportWithMisalignedInputBuffersEndToEndTest(std::vector(misalignedMemPtr) % alignment); - auto inputBuffer = reinterpret_cast(misalignedMemPtr); - for (int i = 0; i < 4; i++) + std::vector inputData { - inputBuffer[i] = 1.0f + static_cast(i); - } + 1.0f, 2.0f, 3.0f, 4.0f + }; + + std::memcpy(misalignedMemPtr, inputData.data(), 4*sizeof(float)); std::vector outputData(4); // Check our output buffer is aligned @@ -1129,9 +1130,11 @@ inline void ForceImportWithMisalignedOutputBuffersEndToEndTest(std::vector outputData(expectedOutput.size(), 0); + std::memcpy(outputData.data(), misalignedMemPtr, expectedOutput.size() * sizeof(float)); for (auto outputValue : expectedOutput) { - CHECK(outputValue == reinterpret_cast(misalignedMemPtr)[index]); + CHECK(outputValue == outputData[index]); ++index; } std::free(memPtr); @@ -1183,11 +1186,11 @@ inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vect // Check if our pointer is truly misaligned uintptr_t alignment = GetDataTypeSize(DataType::Float32); CHECK (reinterpret_cast(misalignedInputPtr) % alignment); - auto inputBuffer = reinterpret_cast(misalignedInputPtr); - for (int i = 0; i < 4; i++) + std::vector inputData { - inputBuffer[i] = 1.0f + static_cast(i); - } + 1.0f, 2.0f, 3.0f, 4.0f + }; + std::memcpy(misalignedInputPtr, inputData.data(), 4*sizeof(float)); auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char)); float* misalignedOutputPtr = reinterpret_cast(reinterpret_cast(outputMemPtr) + 1); @@ -1238,9 +1241,11 @@ inline void ForceImportWithMisalignedInputAndOutputBuffersEndToEndTest(std::vect } // Check the output is correct unsigned int index = 0; - for (auto outputValue : expectedOutput) + std::vector outputData(expectedOutput.size(), 0); + std::memcpy(outputData.data(), misalignedOutputPtr, expectedOutput.size() * sizeof(float)); + for (auto expectedValue : expectedOutput) { - CHECK(outputValue == reinterpret_cast(misalignedOutputPtr)[index]); + CHECK(expectedValue == outputData[index]); ++index; } std::free(inputMemPtr); @@ -1356,11 +1361,13 @@ inline void ForceImportRepeatedInferencesEndToEndTest(std::vector bac // Check if our pointer is truly misaligned CHECK (reinterpret_cast(misalignedInputPtr) % alignment); - auto inputBuffer = reinterpret_cast(misalignedInputPtr); - for (int i = 0; i < 4; i++) + + std::vector inputValues { - inputBuffer[i] = 2.0f + static_cast(i); - } + 2.0f, 3.0f, 4.0f, 5.0f + }; + + std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size()*sizeof(float)); auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char)); float* misalignedOutputPtr = reinterpret_cast(reinterpret_cast(outputMemPtr) + 1); @@ -1411,9 +1418,11 @@ inline void ForceImportRepeatedInferencesEndToEndTest(std::vector bac } // Check the output is correct unsigned int index = 0; + std::vector alignedOutputData(expectedMisalignedOutput.size(), 0); + std::memcpy(alignedOutputData.data(), misalignedOutputPtr, expectedMisalignedOutput.size() * sizeof(float)); for (auto outputValue : expectedMisalignedOutput) { - CHECK(outputValue == reinterpret_cast(misalignedOutputPtr)[index]); + CHECK(outputValue == alignedOutputData[index]); ++index; } // Clean up to avoid interfering with other tests @@ -1471,11 +1480,11 @@ inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector(misalignedInputPtr) % alignment); - auto inputBuffer = reinterpret_cast(misalignedInputPtr); - for (int i = 0; i < 4; i++) + std::vector inputValues { - inputBuffer[i] = 2.0f + static_cast(i); - } + 2.0f, 3.0f, 4.0f, 5.0f + }; + std::memcpy(misalignedInputPtr, inputValues.data(), inputValues.size() * sizeof(float)); auto outputMemPtr = std::malloc(4 * sizeof(float) + sizeof(char)); float* misalignedOutputPtr = reinterpret_cast(reinterpret_cast(outputMemPtr) + 1); @@ -1530,9 +1539,11 @@ inline void ForceImportRepeatedInferencesInvertedEndToEndTest(std::vector alignedOutput(expectedMisalignedOutput.size()); + std::memcpy(alignedOutput.data(), misalignedOutputPtr, expectedMisalignedOutput.size()*sizeof(float)); for (auto outputValue : expectedMisalignedOutput) { - CHECK(outputValue == reinterpret_cast(misalignedOutputPtr)[index]); + CHECK(outputValue == alignedOutput[index]); ++index; } std::free(inputMemPtr); diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp index 992abc2f56..389605f17d 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.cpp @@ -124,4 +124,42 @@ void ClBatchNormalizationFloatWorkload::FreeUnusedTensors() FreeTensorIfUnused(m_Beta); } +void ClBatchNormalizationFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClBatchNormalizationFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClBatchNormalizationFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp index dc76703382..d47663671e 100644 --- a/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClBatchNormalizationFloatWorkload.hpp @@ -32,6 +32,12 @@ public: using FloatWorkload::FloatWorkload; void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + private: mutable arm_compute::CLBatchNormalizationLayer m_Layer; @@ -41,6 +47,7 @@ private: std::unique_ptr m_Beta; void FreeUnusedTensors(); + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp index 867770a112..8ccf157aca 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.cpp @@ -25,9 +25,13 @@ ClConvertFp16ToFp32Workload::ClConvertFp16ToFp32Workload( arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + // Create Proxy tensor and set the initial tensor handle to it + m_InputProxy = std::make_unique(&input); + m_OutputProxy = std::make_unique(&output); + { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp16ToFp32Workload_configure"); - m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + m_Layer.configure(clCompileContext, m_InputProxy.get(), m_OutputProxy.get(), g_AclConvertPolicy, 0); } } @@ -57,5 +61,45 @@ arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, return aclStatus; } +void ClConvertFp16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClConvertFp16ToFp32Workload::Reconfigure() +{ + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_InputProxy->set(&input); + m_OutputProxy->set(&output); +} } //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp index b392c0be2e..3c6fcd6c08 100644 --- a/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp +++ b/src/backends/cl/workloads/ClConvertFp16ToFp32Workload.hpp @@ -9,6 +9,8 @@ #include +#include + namespace armnn { @@ -21,8 +23,19 @@ public: const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; + bool SupportsTensorHandleReplacement() const override { return true;}; + + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: mutable arm_compute::CLDepthConvertLayer m_Layer; + virtual void Reconfigure(); + + std::unique_ptr m_InputProxy; + std::unique_ptr m_OutputProxy; }; arm_compute::Status ClConvertFp16ToFp32WorkloadValidate(const TensorInfo& input, const TensorInfo& output); diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp index 017fcaf454..a44a80c997 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.cpp @@ -25,9 +25,13 @@ ClConvertFp32ToFp16Workload::ClConvertFp32ToFp16Workload( arm_compute::ICLTensor& input = static_cast(this->m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(this->m_Data.m_Outputs[0])->GetTensor(); + // Create Proxy tensor and set the initial tensor handle to it + m_InputProxy = std::make_unique(&input); + m_OutputProxy = std::make_unique(&output); + { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvertFp32ToFp16Workload_configure"); - m_Layer.configure(clCompileContext, &input, &output, g_AclConvertPolicy, 0); + m_Layer.configure(clCompileContext, m_InputProxy.get(), m_OutputProxy.get(), g_AclConvertPolicy, 0); } } @@ -57,5 +61,45 @@ arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, return aclStatus; } +void ClConvertFp32ToFp16Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClConvertFp32ToFp16Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClConvertFp32ToFp16Workload::Reconfigure() +{ + arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); + arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_InputProxy->set(&input); + m_OutputProxy->set(&output); +} } //namespace armnn diff --git a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp index 1d777b5256..6ce563e4f4 100644 --- a/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp +++ b/src/backends/cl/workloads/ClConvertFp32ToFp16Workload.hpp @@ -9,6 +9,8 @@ #include +#include + namespace armnn { @@ -21,8 +23,19 @@ public: const arm_compute::CLCompileContext& clCompileContext); virtual void Execute() const override; + bool SupportsTensorHandleReplacement() const override { return true;}; + + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: mutable arm_compute::CLDepthConvertLayer m_Layer; + virtual void Reconfigure(); + + std::unique_ptr m_InputProxy; + std::unique_ptr m_OutputProxy; }; arm_compute::Status ClConvertFp32ToFp16WorkloadValidate(const TensorInfo& input, const TensorInfo& output); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index cdfa885f67..bf82fbf255 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -180,9 +180,9 @@ void ClConvolution2dWorkload::FreeUnusedTensors() void ClConvolution2dWorkload::Reconfigure() { - ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClConvolution2dWorkload_Reconfigure"); arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); + m_InputProxy->set(&input); m_OutputProxy->set(&output); } diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index 891d5096cd..e4177e4327 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -40,6 +40,8 @@ public: arm_compute::ConvolutionMethod GetConvolutionMethod() const; + bool SupportsTensorHandleReplacement() const override { return true;}; + protected: void Reconfigure() override; diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp index 679e225c63..0aae1a30e3 100644 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.cpp @@ -29,7 +29,6 @@ ClFloorFloatWorkload::ClFloorFloatWorkload(const FloorQueueDescriptor& descripto arm_compute::ICLTensor& input = static_cast(m_Data.m_Inputs[0])->GetTensor(); arm_compute::ICLTensor& output = static_cast(m_Data.m_Outputs[0])->GetTensor(); - { ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "ClFloorFloatWorkload_configure"); m_Layer.configure(clCompileContext, &input, &output); @@ -42,4 +41,42 @@ void ClFloorFloatWorkload::Execute() const RunClFunction(m_Layer, CHECK_LOCATION()); } +void ClFloorFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClFloorFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClFloorFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp index 5740c6887a..dbe5f6f163 100644 --- a/src/backends/cl/workloads/ClFloorFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClFloorFloatWorkload.hpp @@ -23,9 +23,14 @@ public: const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: mutable arm_compute::CLFloor m_Layer; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp index b34153fff0..d120fb28f6 100644 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.cpp @@ -60,4 +60,42 @@ void ClL2NormalizationFloatWorkload::Execute() const RunClFunction(m_Layer, CHECK_LOCATION()); } +void ClL2NormalizationFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClL2NormalizationFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClL2NormalizationFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp index cfa1a97eec..67e7b8b7b1 100644 --- a/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClL2NormalizationFloatWorkload.hpp @@ -24,10 +24,16 @@ public: const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: // Purposely not a CLL2Normalize function. See constructor. mutable arm_compute::CLL2NormalizeLayer m_Layer; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp index d8d95f5c74..37dfab6a5f 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.cpp @@ -446,4 +446,42 @@ void ClLstmFloatWorkload::FreeUnusedTensors() FreeTensorIfUnused(m_OutputLayerNormWeightsTensor); } +void ClLstmFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClLstmFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClLstmFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp index b9faca8b54..54c5c600dc 100644 --- a/src/backends/cl/workloads/ClLstmFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClLstmFloatWorkload.hpp @@ -22,9 +22,14 @@ public: const WorkloadInfo& info, const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: mutable arm_compute::CLLSTMLayer m_LstmLayer; + virtual void Reconfigure(); std::unique_ptr m_InputToInputWeightsTensor; std::unique_ptr m_InputToForgetWeightsTensor; diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp index d98532d7d1..8de8dd5c3b 100644 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.cpp @@ -62,4 +62,42 @@ void ClNormalizationFloatWorkload::Execute() const RunClFunction(m_NormalizationLayer, CHECK_LOCATION()); } +void ClNormalizationFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void ClNormalizationFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void ClNormalizationFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp index 40b2693cd4..d9db0f2de3 100644 --- a/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp +++ b/src/backends/cl/workloads/ClNormalizationFloatWorkload.hpp @@ -23,9 +23,14 @@ public: const WorkloadInfo& info, const arm_compute::CLCompileContext& clCompileContext); void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: mutable arm_compute::CLNormalizationLayer m_NormalizationLayer; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/NeonTimer.cpp b/src/backends/neon/NeonTimer.cpp index 5cce0511ab..a7d3032b13 100644 --- a/src/backends/neon/NeonTimer.cpp +++ b/src/backends/neon/NeonTimer.cpp @@ -7,6 +7,7 @@ #include "NeonInterceptorScheduler.hpp" #include +#include #include @@ -29,7 +30,7 @@ void NeonTimer::Start() { // Keep the real schedule and add NeonInterceptorScheduler as an interceptor m_RealScheduler = &arm_compute::Scheduler::get(); - arm_compute::Scheduler::set(std::static_pointer_cast(g_Interceptor)); + arm_compute::Scheduler::set(armnn::PolymorphicPointerDowncast(g_Interceptor)); } } diff --git a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp index dcef025a3d..7a2ff9ac1a 100644 --- a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.cpp @@ -40,4 +40,42 @@ void NeonConvertBf16ToFp32Workload::Execute() const } } +void NeonConvertBf16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonConvertBf16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonConvertBf16ToFp32Workload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.hpp b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.hpp index 9770fbdbb0..9d44ad2cac 100644 --- a/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.hpp +++ b/src/backends/neon/workloads/NeonConvertBf16ToFp32Workload.hpp @@ -17,10 +17,15 @@ class NeonConvertBf16ToFp32Workload : public BFloat16ToFloat32Workload; std::vector m_TensorHandlePairs; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp index 1b9e1bcfb5..ce6c785329 100644 --- a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.cpp @@ -40,4 +40,42 @@ void NeonConvertFp16ToFp32Workload::Execute() const } } +void NeonConvertFp16ToFp32Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonConvertFp16ToFp32Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonConvertFp16ToFp32Workload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp index 9159e51f8b..c0165eae78 100644 --- a/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp +++ b/src/backends/neon/workloads/NeonConvertFp16ToFp32Workload.hpp @@ -17,10 +17,15 @@ class NeonConvertFp16ToFp32Workload : public Float16ToFloat32Workload; std::vector m_TensorHandlePairs; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp index ac6a69d21a..acd1a1ea8f 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.cpp @@ -41,4 +41,42 @@ void NeonConvertFp32ToBf16Workload::Execute() const } } +void NeonConvertFp32ToBf16Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonConvertFp32ToBf16Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonConvertFp32ToBf16Workload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.hpp b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.hpp index 6c0118712f..2304f8a1d4 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.hpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToBf16Workload.hpp @@ -17,10 +17,15 @@ class NeonConvertFp32ToBf16Workload : public Float32ToBFloat16Workload; std::vector m_TensorHandlePairs; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp index d65cba046b..089716a4b4 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.cpp @@ -41,4 +41,42 @@ void NeonConvertFp32ToFp16Workload::Execute() const } } +void NeonConvertFp32ToFp16Workload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonConvertFp32ToFp16Workload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonConvertFp32ToFp16Workload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp index 8e9f11b857..666f48794b 100644 --- a/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp +++ b/src/backends/neon/workloads/NeonConvertFp32ToFp16Workload.hpp @@ -17,10 +17,15 @@ class NeonConvertFp32ToFp16Workload : public Float32ToFloat16Workload; std::vector m_TensorHandlePairs; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp index b97e3cef75..1d53245c5f 100644 --- a/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.cpp @@ -32,6 +32,45 @@ void NeonFloorFloatWorkload::Execute() const ARMNN_SCOPED_PROFILING_EVENT_NEON_GUID("NeonFloorFloatWorkload_Execute", this->GetGuid()); m_Layer->run(); } + +void NeonFloorFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonFloorFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonFloorFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp index 7113931673..8ba6b4a5c5 100644 --- a/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonFloorFloatWorkload.hpp @@ -20,9 +20,14 @@ class NeonFloorFloatWorkload : public FloatWorkload public: NeonFloorFloatWorkload(const FloorQueueDescriptor& descriptor, const WorkloadInfo& info); virtual void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: std::unique_ptr m_Layer; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp index 887f25a333..c0c6ed4982 100644 --- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.cpp @@ -60,4 +60,42 @@ void NeonL2NormalizationFloatWorkload::Execute() const m_Layer->run(); } +void NeonL2NormalizationFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonL2NormalizationFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonL2NormalizationFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp index 82f0639e9c..9c591fc7a7 100644 --- a/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonL2NormalizationFloatWorkload.hpp @@ -26,9 +26,14 @@ public: NeonL2NormalizationFloatWorkload(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr& memoryManager); virtual void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: std::unique_ptr m_Layer; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp index b8224e6ca1..2f14ab9022 100644 --- a/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.cpp @@ -464,4 +464,42 @@ void NeonLstmFloatWorkload::FreeUnusedTensors() FreeTensorIfUnused(m_OutputLayerNormWeightsTensor); } +void NeonLstmFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonLstmFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonLstmFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp b/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp index ebbf180371..4bb3ff823e 100644 --- a/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonLstmFloatWorkload.hpp @@ -21,7 +21,11 @@ class NeonLstmFloatWorkload : public FloatWorkload public: NeonLstmFloatWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); virtual void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: mutable arm_compute::NELSTMLayer m_LstmLayer; @@ -51,6 +55,7 @@ private: std::unique_ptr m_OutputLayerNormWeightsTensor; void FreeUnusedTensors(); + virtual void Reconfigure(); }; arm_compute::Status NeonLstmFloatWorkloadValidate(const TensorInfo& input, const TensorInfo& outputStateIn, diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp index f811a0457e..01ac5c1b64 100644 --- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.cpp @@ -110,4 +110,42 @@ void NeonNormalizationFloatWorkload::Execute() const m_NormalizationLayer->run(); } +void NeonNormalizationFloatWorkload::ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +// Replace output tensor handle with the given TensorHandle +void NeonNormalizationFloatWorkload::ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) +{ + ITensorHandle* backupHandle = this->m_Data.m_Inputs[slot]; + this->m_Data.m_Inputs[slot] = tensorHandle; + try + { + Reconfigure(); + } + catch(armnn::UnimplementedException& e) + { + // Cannot reconfigure, revert the slot back and throw the exception. + this->m_Data.m_Inputs[slot] = backupHandle; + throw e; + } +} + +void NeonNormalizationFloatWorkload::Reconfigure() +{ + throw armnn::UnimplementedException("Reconfigure not implemented for this workload"); +} + } //namespace armnn diff --git a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp index ed5453619e..9605ed1543 100644 --- a/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp +++ b/src/backends/neon/workloads/NeonNormalizationFloatWorkload.hpp @@ -26,9 +26,14 @@ public: NeonNormalizationFloatWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info, std::shared_ptr& memoryManager); virtual void Execute() const override; + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override; private: std::unique_ptr m_NormalizationLayer; + virtual void Reconfigure(); }; } //namespace armnn diff --git a/src/backends/reference/test/RefWorkloadFactoryHelper.hpp b/src/backends/reference/test/RefWorkloadFactoryHelper.hpp index e413d045f9..f0a842de57 100644 --- a/src/backends/reference/test/RefWorkloadFactoryHelper.hpp +++ b/src/backends/reference/test/RefWorkloadFactoryHelper.hpp @@ -7,6 +7,8 @@ #include +#include + #include #include #include "reference/RefTensorHandleFactory.hpp" @@ -34,7 +36,7 @@ struct WorkloadFactoryHelper const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) { - return armnn::RefTensorHandleFactory(std::static_pointer_cast(memoryManager)); + return armnn::RefTensorHandleFactory(armnn::PolymorphicPointerDowncast(memoryManager)); } }; diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index 60d8255454..46c2706742 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -68,6 +68,7 @@ list(APPEND armnnRefBackendWorkloads_sources RefActivationWorkload.hpp RefArgMinMaxWorkload.cpp RefArgMinMaxWorkload.hpp + RefBaseWorkload.hpp RefBatchNormalizationWorkload.cpp RefBatchNormalizationWorkload.hpp RefBatchToSpaceNdWorkload.cpp diff --git a/src/backends/reference/workloads/RefActivationWorkload.hpp b/src/backends/reference/workloads/RefActivationWorkload.hpp index 9814ac172b..8dc2d52d9b 100644 --- a/src/backends/reference/workloads/RefActivationWorkload.hpp +++ b/src/backends/reference/workloads/RefActivationWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefActivationWorkload : public BaseWorkload +class RefActivationWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp index 2d635bf6c2..d724273287 100644 --- a/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp +++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.cpp @@ -16,7 +16,7 @@ namespace armnn RefArgMinMaxWorkload::RefArgMinMaxWorkload( const ArgMinMaxQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) {} + : RefBaseWorkload(descriptor, info) {} void RefArgMinMaxWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp index f3c264469b..97c4b45d60 100644 --- a/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp +++ b/src/backends/reference/workloads/RefArgMinMaxWorkload.hpp @@ -5,12 +5,12 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefArgMinMaxWorkload : public BaseWorkload +class RefArgMinMaxWorkload : public RefBaseWorkload { public: explicit RefArgMinMaxWorkload(const ArgMinMaxQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefBaseWorkload.hpp b/src/backends/reference/workloads/RefBaseWorkload.hpp new file mode 100644 index 0000000000..824b4ccc67 --- /dev/null +++ b/src/backends/reference/workloads/RefBaseWorkload.hpp @@ -0,0 +1,36 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include + +namespace armnn +{ + template + class RefBaseWorkload : public BaseWorkload + { + public: + RefBaseWorkload(const QueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info) + {} + + virtual bool SupportsTensorHandleReplacement() const override + { + return true; + } + // Replace input tensor handle with the given TensorHandle + void ReplaceInputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override + { + this->m_Data.m_Inputs[slot] = tensorHandle; + } + + // Replace output tensor handle with the given TensorHandle + void ReplaceOutputTensorHandle(ITensorHandle* tensorHandle, unsigned int slot) override + { + this->m_Data.m_Outputs[slot] = tensorHandle; + } + }; +} //namespace armnn \ No newline at end of file diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp index 282374d89b..a6bd986f1d 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.cpp @@ -15,7 +15,7 @@ namespace armnn RefBatchNormalizationWorkload::RefBatchNormalizationWorkload(const BatchNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) , m_Mean (std::make_unique(*(descriptor.m_Mean))) , m_Variance(std::make_unique(*(descriptor.m_Variance))) , m_Beta (std::make_unique(*(descriptor.m_Beta))) diff --git a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp index 305c0ce573..60dd2a927c 100644 --- a/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefBatchNormalizationWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefBatchNormalizationWorkload : public BaseWorkload +class RefBatchNormalizationWorkload : public RefBaseWorkload { public: explicit RefBatchNormalizationWorkload(const BatchNormalizationQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp index 7d18c12476..d7ee6fc81c 100644 --- a/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp +++ b/src/backends/reference/workloads/RefBatchToSpaceNdWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefBatchToSpaceNdWorkload : public BaseWorkload +class RefBatchToSpaceNdWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefCastWorkload.hpp b/src/backends/reference/workloads/RefCastWorkload.hpp index ccafaafac9..6f7e56a6b6 100644 --- a/src/backends/reference/workloads/RefCastWorkload.hpp +++ b/src/backends/reference/workloads/RefCastWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "RefWorkloadUtils.hpp" @@ -13,10 +13,10 @@ namespace armnn { -class RefCastWorkload : public BaseWorkload +class RefCastWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefChannelShuffleWorkload.hpp b/src/backends/reference/workloads/RefChannelShuffleWorkload.hpp index 0c8037823a..b459b87592 100644 --- a/src/backends/reference/workloads/RefChannelShuffleWorkload.hpp +++ b/src/backends/reference/workloads/RefChannelShuffleWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefChannelShuffleWorkload : public BaseWorkload +class RefChannelShuffleWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefComparisonWorkload.cpp b/src/backends/reference/workloads/RefComparisonWorkload.cpp index 03df7a4c4a..433e3e8ad8 100644 --- a/src/backends/reference/workloads/RefComparisonWorkload.cpp +++ b/src/backends/reference/workloads/RefComparisonWorkload.cpp @@ -21,7 +21,7 @@ namespace armnn RefComparisonWorkload::RefComparisonWorkload(const ComparisonQueueDescriptor& desc, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : RefBaseWorkload(desc, info) {} void RefComparisonWorkload::PostAllocationConfigure() diff --git a/src/backends/reference/workloads/RefComparisonWorkload.hpp b/src/backends/reference/workloads/RefComparisonWorkload.hpp index f2780c7ae5..93cfd1f2b1 100644 --- a/src/backends/reference/workloads/RefComparisonWorkload.hpp +++ b/src/backends/reference/workloads/RefComparisonWorkload.hpp @@ -7,16 +7,16 @@ #include "BaseIterator.hpp" -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefComparisonWorkload : public BaseWorkload +class RefComparisonWorkload : public RefBaseWorkload { public: - using BaseWorkload::m_Data; + using RefBaseWorkload::m_Data; RefComparisonWorkload(const ComparisonQueueDescriptor& descriptor, const WorkloadInfo& info); void PostAllocationConfigure() override; diff --git a/src/backends/reference/workloads/RefConcatWorkload.hpp b/src/backends/reference/workloads/RefConcatWorkload.hpp index cb1ecf06a7..11d6d016ed 100644 --- a/src/backends/reference/workloads/RefConcatWorkload.hpp +++ b/src/backends/reference/workloads/RefConcatWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefConcatWorkload : public BaseWorkload +class RefConcatWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefConstantWorkload.cpp b/src/backends/reference/workloads/RefConstantWorkload.cpp index 6290237d69..571dbb219a 100644 --- a/src/backends/reference/workloads/RefConstantWorkload.cpp +++ b/src/backends/reference/workloads/RefConstantWorkload.cpp @@ -18,7 +18,7 @@ namespace armnn RefConstantWorkload::RefConstantWorkload( const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) {} + : RefBaseWorkload(descriptor, info) {} void RefConstantWorkload::Execute() const { diff --git a/src/backends/reference/workloads/RefConstantWorkload.hpp b/src/backends/reference/workloads/RefConstantWorkload.hpp index c158983d7a..181d79d320 100644 --- a/src/backends/reference/workloads/RefConstantWorkload.hpp +++ b/src/backends/reference/workloads/RefConstantWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include @@ -14,7 +14,7 @@ namespace armnn { // Base class template providing an implementation of the Constant layer common to all data types. -class RefConstantWorkload : public BaseWorkload +class RefConstantWorkload : public RefBaseWorkload { public: RefConstantWorkload(const ConstantQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp index b3af111fa3..8b5c6d56c2 100644 --- a/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertBf16ToFp32Workload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp index acb1995b9f..feb442ef5a 100644 --- a/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp16ToFp32Workload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp index 97a138f49c..cd3cfa4cf3 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToBf16Workload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn diff --git a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp index 8cc822e7d8..fe137ed62f 100644 --- a/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp +++ b/src/backends/reference/workloads/RefConvertFp32ToFp16Workload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp index 20c5c08b17..d57040eaec 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.cpp @@ -14,7 +14,7 @@ namespace armnn { RefConvolution2dWorkload::RefConvolution2dWorkload( const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) { WorkloadInfo detailsInfo; detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; diff --git a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp index 880547dc33..3335782f78 100644 --- a/src/backends/reference/workloads/RefConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefConvolution2dWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" #include "Encoders.hpp" @@ -13,7 +13,7 @@ namespace armnn { -class RefConvolution2dWorkload : public BaseWorkload +class RefConvolution2dWorkload : public RefBaseWorkload { public: explicit RefConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefConvolution3dWorkload.cpp b/src/backends/reference/workloads/RefConvolution3dWorkload.cpp index afab88f0a8..5f542807ed 100644 --- a/src/backends/reference/workloads/RefConvolution3dWorkload.cpp +++ b/src/backends/reference/workloads/RefConvolution3dWorkload.cpp @@ -14,7 +14,7 @@ namespace armnn { RefConvolution3dWorkload::RefConvolution3dWorkload( const Convolution3dQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) { WorkloadInfo detailsInfo; detailsInfo.m_InputTensorInfos = info.m_InputTensorInfos; diff --git a/src/backends/reference/workloads/RefConvolution3dWorkload.hpp b/src/backends/reference/workloads/RefConvolution3dWorkload.hpp index 53ce309eb8..6c74675eec 100644 --- a/src/backends/reference/workloads/RefConvolution3dWorkload.hpp +++ b/src/backends/reference/workloads/RefConvolution3dWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" #include "Encoders.hpp" @@ -13,7 +13,7 @@ namespace armnn { -class RefConvolution3dWorkload : public BaseWorkload +class RefConvolution3dWorkload : public RefBaseWorkload { public: explicit RefConvolution3dWorkload(const Convolution3dQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefDebugWorkload.hpp b/src/backends/reference/workloads/RefDebugWorkload.hpp index 66af9a0b0f..a1579599f4 100644 --- a/src/backends/reference/workloads/RefDebugWorkload.hpp +++ b/src/backends/reference/workloads/RefDebugWorkload.hpp @@ -7,7 +7,7 @@ #include -#include +#include "RefBaseWorkload.hpp" namespace armnn { diff --git a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp index 854a564062..bd179d3b9c 100644 --- a/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthToSpaceWorkload.hpp @@ -5,15 +5,15 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" namespace armnn { -class RefDepthToSpaceWorkload : public BaseWorkload +class RefDepthToSpaceWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp index b447d1a441..ad5edde7e6 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.cpp @@ -17,7 +17,7 @@ namespace armnn RefDepthwiseConvolution2dWorkload::RefDepthwiseConvolution2dWorkload( const DepthwiseConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) { m_Weight = std::make_unique(*(descriptor.m_Weight)); const TensorInfo& rFilterInfo = m_Weight->GetTensorInfo(); diff --git a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp index ae93d03656..5d4b483fa7 100644 --- a/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefDepthwiseConvolution2dWorkload.hpp @@ -2,7 +2,7 @@ // Copyright © 2017 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" #include "Encoders.hpp" @@ -12,7 +12,7 @@ namespace armnn { -class RefDepthwiseConvolution2dWorkload : public BaseWorkload { +class RefDepthwiseConvolution2dWorkload : public RefBaseWorkload { public: explicit RefDepthwiseConvolution2dWorkload(const DepthwiseConvolution2dQueueDescriptor &descriptor, const WorkloadInfo &info); diff --git a/src/backends/reference/workloads/RefDequantizeWorkload.hpp b/src/backends/reference/workloads/RefDequantizeWorkload.hpp index 285c6496bb..8fa8951677 100644 --- a/src/backends/reference/workloads/RefDequantizeWorkload.hpp +++ b/src/backends/reference/workloads/RefDequantizeWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" namespace armnn { -class RefDequantizeWorkload : public BaseWorkload +class RefDequantizeWorkload : public RefBaseWorkload { public: - using BaseWorkload::m_Data; - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::m_Data; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp index 4bc9eb1704..5f01db3280 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.cpp @@ -15,7 +15,7 @@ namespace armnn RefDetectionPostProcessWorkload::RefDetectionPostProcessWorkload( const DetectionPostProcessQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info), + : RefBaseWorkload(descriptor, info), m_Anchors(std::make_unique(*(descriptor.m_Anchors))) {} void RefDetectionPostProcessWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp index 4c3ad42b0f..53b2971063 100644 --- a/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp +++ b/src/backends/reference/workloads/RefDetectionPostProcessWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefDetectionPostProcessWorkload : public BaseWorkload +class RefDetectionPostProcessWorkload : public RefBaseWorkload { public: explicit RefDetectionPostProcessWorkload(const DetectionPostProcessQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp index be153636f9..3ea51b9f69 100644 --- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp +++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.cpp @@ -27,7 +27,7 @@ namespace armnn RefElementwiseUnaryWorkload::RefElementwiseUnaryWorkload(const ElementwiseUnaryQueueDescriptor& desc, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : RefBaseWorkload(desc, info) {} void RefElementwiseUnaryWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp index e055fd012c..91229b3c58 100644 --- a/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp +++ b/src/backends/reference/workloads/RefElementwiseUnaryWorkload.hpp @@ -7,16 +7,16 @@ #include "BaseIterator.hpp" -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefElementwiseUnaryWorkload : public BaseWorkload +class RefElementwiseUnaryWorkload : public RefBaseWorkload { public: - using BaseWorkload::m_Data; + using RefBaseWorkload::m_Data; RefElementwiseUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); void Execute() const override; diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.cpp b/src/backends/reference/workloads/RefElementwiseWorkload.cpp index dd7d325ca5..d14ce075b0 100644 --- a/src/backends/reference/workloads/RefElementwiseWorkload.cpp +++ b/src/backends/reference/workloads/RefElementwiseWorkload.cpp @@ -21,7 +21,7 @@ template ::RefElementwiseWorkload( const ParentDescriptor& desc, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : RefBaseWorkload(desc, info) { } diff --git a/src/backends/reference/workloads/RefElementwiseWorkload.hpp b/src/backends/reference/workloads/RefElementwiseWorkload.hpp index 4b108e4363..065a7833d7 100644 --- a/src/backends/reference/workloads/RefElementwiseWorkload.hpp +++ b/src/backends/reference/workloads/RefElementwiseWorkload.hpp @@ -6,7 +6,7 @@ #pragma once #include -#include +#include "RefBaseWorkload.hpp" #include #include "BaseIterator.hpp" #include "ElementwiseFunction.hpp" @@ -18,12 +18,12 @@ namespace armnn { template -class RefElementwiseWorkload : public BaseWorkload +class RefElementwiseWorkload : public RefBaseWorkload { public: using InType = typename ElementwiseBinaryFunction::InType; using OutType = typename ElementwiseBinaryFunction::OutType; - using BaseWorkload::m_Data; + using RefBaseWorkload::m_Data; RefElementwiseWorkload(const ParentDescriptor& descriptor, const WorkloadInfo& info); void Execute() const override; diff --git a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp index 53b3375a50..85dc6af326 100644 --- a/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp +++ b/src/backends/reference/workloads/RefFakeQuantizationFloat32Workload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn diff --git a/src/backends/reference/workloads/RefFillWorkload.hpp b/src/backends/reference/workloads/RefFillWorkload.hpp index 56d44b85f7..d1e00581cd 100644 --- a/src/backends/reference/workloads/RefFillWorkload.hpp +++ b/src/backends/reference/workloads/RefFillWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefFillWorkload : public BaseWorkload +class RefFillWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefFloorWorkload.hpp b/src/backends/reference/workloads/RefFloorWorkload.hpp index 1a532f7a49..6237ff0c61 100644 --- a/src/backends/reference/workloads/RefFloorWorkload.hpp +++ b/src/backends/reference/workloads/RefFloorWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefFloorWorkload : public BaseWorkload +class RefFloorWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp index 5a7951ec48..c6ea147043 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.cpp @@ -14,7 +14,7 @@ namespace armnn { RefFullyConnectedWorkload::RefFullyConnectedWorkload( const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) { } diff --git a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp index 3ee4a4a83c..432a8879a0 100644 --- a/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp +++ b/src/backends/reference/workloads/RefFullyConnectedWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "BaseIterator.hpp" #include "Decoders.hpp" @@ -15,7 +15,7 @@ namespace armnn { -class RefFullyConnectedWorkload : public BaseWorkload +class RefFullyConnectedWorkload : public RefBaseWorkload { public: explicit RefFullyConnectedWorkload(const FullyConnectedQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefGatherWorkload.hpp b/src/backends/reference/workloads/RefGatherWorkload.hpp index a2698e3a25..ec880a5109 100644 --- a/src/backends/reference/workloads/RefGatherWorkload.hpp +++ b/src/backends/reference/workloads/RefGatherWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include @@ -16,10 +16,10 @@ namespace armnn { -class RefGatherWorkload : public BaseWorkload +class RefGatherWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp index e642dc9b9a..c103a6b9d3 100644 --- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.cpp @@ -16,7 +16,7 @@ namespace armnn RefInstanceNormalizationWorkload::RefInstanceNormalizationWorkload( const InstanceNormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) {} + : RefBaseWorkload(descriptor, info) {} void RefInstanceNormalizationWorkload::Execute() const { diff --git a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp index 3283c444d2..a4b2dd39cb 100644 --- a/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefInstanceNormalizationWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefInstanceNormalizationWorkload : public BaseWorkload +class RefInstanceNormalizationWorkload : public RefBaseWorkload { public: explicit RefInstanceNormalizationWorkload(const InstanceNormalizationQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp index ca31503620..f6fcff3cc5 100644 --- a/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.cpp @@ -22,7 +22,7 @@ namespace armnn RefL2NormalizationWorkload::RefL2NormalizationWorkload( const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) {} + : RefBaseWorkload(descriptor, info) {} void RefL2NormalizationWorkload::Execute() const { diff --git a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp index dd129c663e..c64e2ea0fd 100644 --- a/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefL2NormalizationWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefL2NormalizationWorkload : public BaseWorkload +class RefL2NormalizationWorkload : public RefBaseWorkload { public: explicit RefL2NormalizationWorkload(const L2NormalizationQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp index 9f87def1bd..91ad5f6c36 100644 --- a/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp +++ b/src/backends/reference/workloads/RefLogSoftmaxWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefLogSoftmaxWorkload : public BaseWorkload +class RefLogSoftmaxWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp index f187e0ca31..f0cb846acf 100644 --- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp +++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.cpp @@ -19,7 +19,7 @@ namespace armnn RefLogicalBinaryWorkload::RefLogicalBinaryWorkload(const LogicalBinaryQueueDescriptor& desc, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : RefBaseWorkload(desc, info) {} void RefLogicalBinaryWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp index 053de7daf9..797d937d80 100644 --- a/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp +++ b/src/backends/reference/workloads/RefLogicalBinaryWorkload.hpp @@ -7,16 +7,16 @@ #include "BaseIterator.hpp" -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefLogicalBinaryWorkload : public BaseWorkload +class RefLogicalBinaryWorkload : public RefBaseWorkload { public: - using BaseWorkload::m_Data; + using RefBaseWorkload::m_Data; RefLogicalBinaryWorkload(const LogicalBinaryQueueDescriptor& descriptor, const WorkloadInfo& info); void Execute() const override; diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp index bef2bdc668..ec0aa0e454 100644 --- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp +++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.cpp @@ -19,7 +19,7 @@ namespace armnn RefLogicalUnaryWorkload::RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDescriptor& desc, const WorkloadInfo& info) - : BaseWorkload(desc, info) + : RefBaseWorkload(desc, info) {} void RefLogicalUnaryWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp index 008d24fef8..ebd5826cc5 100644 --- a/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp +++ b/src/backends/reference/workloads/RefLogicalUnaryWorkload.hpp @@ -7,16 +7,16 @@ #include "BaseIterator.hpp" -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefLogicalUnaryWorkload : public BaseWorkload +class RefLogicalUnaryWorkload : public RefBaseWorkload { public: - using BaseWorkload::m_Data; + using RefBaseWorkload::m_Data; RefLogicalUnaryWorkload(const ElementwiseUnaryQueueDescriptor& descriptor, const WorkloadInfo& info); void Execute() const override; diff --git a/src/backends/reference/workloads/RefLstmWorkload.cpp b/src/backends/reference/workloads/RefLstmWorkload.cpp index 1ff6f50ed5..8609811253 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefLstmWorkload.cpp @@ -15,7 +15,7 @@ namespace armnn { RefLstmWorkload::RefLstmWorkload(const LstmQueueDescriptor &descriptor, const WorkloadInfo &info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) , m_InputToInputWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToInputWeights)) , m_InputToForgetWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToForgetWeights)) , m_InputToCellWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToCellWeights)) diff --git a/src/backends/reference/workloads/RefLstmWorkload.hpp b/src/backends/reference/workloads/RefLstmWorkload.hpp index 72f6360281..57526c9ba2 100644 --- a/src/backends/reference/workloads/RefLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefLstmWorkload.hpp @@ -7,13 +7,13 @@ #include -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefLstmWorkload : public BaseWorkload +class RefLstmWorkload : public RefBaseWorkload { public: explicit RefLstmWorkload(const LstmQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/backends/reference/workloads/RefMeanWorkload.cpp b/src/backends/reference/workloads/RefMeanWorkload.cpp index 7941ce2c36..23abaf8ff4 100644 --- a/src/backends/reference/workloads/RefMeanWorkload.cpp +++ b/src/backends/reference/workloads/RefMeanWorkload.cpp @@ -16,7 +16,7 @@ namespace armnn { RefMeanWorkload::RefMeanWorkload(const MeanQueueDescriptor& descriptor, const WorkloadInfo& info) - :BaseWorkload(descriptor, info) {} + :RefBaseWorkload(descriptor, info) {} void RefMeanWorkload::Execute() const { diff --git a/src/backends/reference/workloads/RefMeanWorkload.hpp b/src/backends/reference/workloads/RefMeanWorkload.hpp index 2825d669c4..c4c6a1261c 100644 --- a/src/backends/reference/workloads/RefMeanWorkload.hpp +++ b/src/backends/reference/workloads/RefMeanWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" @@ -14,7 +14,7 @@ namespace armnn { -class RefMeanWorkload : public BaseWorkload +class RefMeanWorkload : public RefBaseWorkload { public: explicit RefMeanWorkload (const MeanQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.cpp b/src/backends/reference/workloads/RefNormalizationWorkload.cpp index 36828acfb3..613868de57 100644 --- a/src/backends/reference/workloads/RefNormalizationWorkload.cpp +++ b/src/backends/reference/workloads/RefNormalizationWorkload.cpp @@ -158,7 +158,7 @@ namespace armnn RefNormalizationWorkload::RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) {} void RefNormalizationWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefNormalizationWorkload.hpp b/src/backends/reference/workloads/RefNormalizationWorkload.hpp index b152072496..5218e1e43a 100644 --- a/src/backends/reference/workloads/RefNormalizationWorkload.hpp +++ b/src/backends/reference/workloads/RefNormalizationWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefNormalizationWorkload : public BaseWorkload +class RefNormalizationWorkload : public RefBaseWorkload { public: explicit RefNormalizationWorkload(const NormalizationQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefPadWorkload.hpp b/src/backends/reference/workloads/RefPadWorkload.hpp index 18c406a4de..c5871059cc 100644 --- a/src/backends/reference/workloads/RefPadWorkload.hpp +++ b/src/backends/reference/workloads/RefPadWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefPadWorkload : public BaseWorkload +class RefPadWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefPermuteWorkload.hpp b/src/backends/reference/workloads/RefPermuteWorkload.hpp index 9424441c37..d1e44520a1 100644 --- a/src/backends/reference/workloads/RefPermuteWorkload.hpp +++ b/src/backends/reference/workloads/RefPermuteWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include diff --git a/src/backends/reference/workloads/RefPooling2dWorkload.hpp b/src/backends/reference/workloads/RefPooling2dWorkload.hpp index 125fea8d4e..a073e3921b 100644 --- a/src/backends/reference/workloads/RefPooling2dWorkload.hpp +++ b/src/backends/reference/workloads/RefPooling2dWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" @@ -13,10 +13,10 @@ namespace armnn { -class RefPooling2dWorkload : public BaseWorkload +class RefPooling2dWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefPooling3dWorkload.hpp b/src/backends/reference/workloads/RefPooling3dWorkload.hpp index 911c438627..92bc4766cf 100644 --- a/src/backends/reference/workloads/RefPooling3dWorkload.hpp +++ b/src/backends/reference/workloads/RefPooling3dWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" @@ -13,10 +13,10 @@ namespace armnn { -class RefPooling3dWorkload : public BaseWorkload +class RefPooling3dWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefPreluWorkload.cpp b/src/backends/reference/workloads/RefPreluWorkload.cpp index c1d8de2d01..94eeea1884 100644 --- a/src/backends/reference/workloads/RefPreluWorkload.cpp +++ b/src/backends/reference/workloads/RefPreluWorkload.cpp @@ -15,7 +15,7 @@ namespace armnn RefPreluWorkload::RefPreluWorkload(const PreluQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) {} void RefPreluWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefPreluWorkload.hpp b/src/backends/reference/workloads/RefPreluWorkload.hpp index b5c97dfa90..51ba2c15a7 100644 --- a/src/backends/reference/workloads/RefPreluWorkload.hpp +++ b/src/backends/reference/workloads/RefPreluWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefPreluWorkload : public BaseWorkload +class RefPreluWorkload : public RefBaseWorkload { public: explicit RefPreluWorkload(const PreluQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefQLstmWorkload.cpp b/src/backends/reference/workloads/RefQLstmWorkload.cpp index dc29d0b92d..74f5f1ef4c 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.cpp @@ -14,7 +14,7 @@ namespace armnn { RefQLstmWorkload::RefQLstmWorkload(const QLstmQueueDescriptor &descriptor, const WorkloadInfo &info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) , m_InputToInputWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToInputWeights)) , m_InputToForgetWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToForgetWeights)) , m_InputToCellWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToCellWeights)) diff --git a/src/backends/reference/workloads/RefQLstmWorkload.hpp b/src/backends/reference/workloads/RefQLstmWorkload.hpp index 093cfd16af..0e64a38ac9 100644 --- a/src/backends/reference/workloads/RefQLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefQLstmWorkload.hpp @@ -7,13 +7,13 @@ #include -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefQLstmWorkload : public BaseWorkload +class RefQLstmWorkload : public RefBaseWorkload { public: explicit RefQLstmWorkload(const QLstmQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.cpp b/src/backends/reference/workloads/RefQuantizeWorkload.cpp index 35791e65fb..10ef0e5e15 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.cpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.cpp @@ -29,7 +29,7 @@ void QuantizeImpl(Decoder& in, Encoder& out, size_t numValues) } //namespace RefQuantizeWorkload::RefQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo &info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) , m_NumElements(info.m_InputTensorInfos[0].GetNumElements()) { } diff --git a/src/backends/reference/workloads/RefQuantizeWorkload.hpp b/src/backends/reference/workloads/RefQuantizeWorkload.hpp index a32efa7dd7..e38241067d 100644 --- a/src/backends/reference/workloads/RefQuantizeWorkload.hpp +++ b/src/backends/reference/workloads/RefQuantizeWorkload.hpp @@ -5,14 +5,14 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" #include "Encoders.hpp" namespace armnn { -class RefQuantizeWorkload : public BaseWorkload +class RefQuantizeWorkload : public RefBaseWorkload { public: RefQuantizeWorkload(const QuantizeQueueDescriptor& descriptor, const WorkloadInfo &info); diff --git a/src/backends/reference/workloads/RefRankWorkload.hpp b/src/backends/reference/workloads/RefRankWorkload.hpp index e1f30c5ba5..000828f9e4 100644 --- a/src/backends/reference/workloads/RefRankWorkload.hpp +++ b/src/backends/reference/workloads/RefRankWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "RefWorkloadUtils.hpp" @@ -13,10 +13,10 @@ namespace armnn { -struct RefRankWorkload : public BaseWorkload +struct RefRankWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; virtual void Execute() const override { Execute(m_Data.m_Inputs, m_Data.m_Outputs); diff --git a/src/backends/reference/workloads/RefReduceWorkload.cpp b/src/backends/reference/workloads/RefReduceWorkload.cpp index 821e828b6e..62881daaf7 100644 --- a/src/backends/reference/workloads/RefReduceWorkload.cpp +++ b/src/backends/reference/workloads/RefReduceWorkload.cpp @@ -16,7 +16,7 @@ namespace armnn RefReduceWorkload::RefReduceWorkload( const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) {} + : RefBaseWorkload(descriptor, info) {} void RefReduceWorkload::Execute() const { diff --git a/src/backends/reference/workloads/RefReduceWorkload.hpp b/src/backends/reference/workloads/RefReduceWorkload.hpp index d2280cc660..d759bc2ef1 100644 --- a/src/backends/reference/workloads/RefReduceWorkload.hpp +++ b/src/backends/reference/workloads/RefReduceWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefReduceWorkload : public BaseWorkload +class RefReduceWorkload : public RefBaseWorkload { public: explicit RefReduceWorkload(const ReduceQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefReshapeWorkload.hpp b/src/backends/reference/workloads/RefReshapeWorkload.hpp index 26a86c1d11..7596685336 100644 --- a/src/backends/reference/workloads/RefReshapeWorkload.hpp +++ b/src/backends/reference/workloads/RefReshapeWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefReshapeWorkload : public BaseWorkload +class RefReshapeWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefResizeWorkload.hpp b/src/backends/reference/workloads/RefResizeWorkload.hpp index 82949ed639..f7747193ec 100644 --- a/src/backends/reference/workloads/RefResizeWorkload.hpp +++ b/src/backends/reference/workloads/RefResizeWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefResizeWorkload : public BaseWorkload +class RefResizeWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefShapeWorkload.hpp b/src/backends/reference/workloads/RefShapeWorkload.hpp index 209cccda68..b7ed761e0c 100644 --- a/src/backends/reference/workloads/RefShapeWorkload.hpp +++ b/src/backends/reference/workloads/RefShapeWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "RefWorkloadUtils.hpp" @@ -13,10 +13,10 @@ namespace armnn { -struct RefShapeWorkload : public BaseWorkload +struct RefShapeWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; virtual void Execute() const override { Execute(m_Data.m_Inputs, m_Data.m_Outputs); diff --git a/src/backends/reference/workloads/RefSliceWorkload.hpp b/src/backends/reference/workloads/RefSliceWorkload.hpp index 69dae5a1aa..b9dca86c4e 100644 --- a/src/backends/reference/workloads/RefSliceWorkload.hpp +++ b/src/backends/reference/workloads/RefSliceWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefSliceWorkload : public BaseWorkload +class RefSliceWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; diff --git a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp index 42dbb53373..cac102a2bb 100644 --- a/src/backends/reference/workloads/RefSoftmaxWorkload.hpp +++ b/src/backends/reference/workloads/RefSoftmaxWorkload.hpp @@ -5,16 +5,16 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefSoftmaxWorkload : public BaseWorkload +class RefSoftmaxWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp index ec764c75bb..eb2d93fb86 100644 --- a/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp +++ b/src/backends/reference/workloads/RefSpaceToBatchNdWorkload.hpp @@ -4,17 +4,17 @@ // #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefSpaceToBatchNdWorkload : public BaseWorkload +class RefSpaceToBatchNdWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp index bc71fde20d..17f8d2f61e 100644 --- a/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp +++ b/src/backends/reference/workloads/RefSpaceToDepthWorkload.hpp @@ -4,17 +4,17 @@ // #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefSpaceToDepthWorkload : public BaseWorkload +class RefSpaceToDepthWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefSplitterWorkload.hpp b/src/backends/reference/workloads/RefSplitterWorkload.hpp index 28dc83db36..0b72bb9fdc 100644 --- a/src/backends/reference/workloads/RefSplitterWorkload.hpp +++ b/src/backends/reference/workloads/RefSplitterWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include #include "Decoders.hpp" #include "Encoders.hpp" @@ -13,10 +13,10 @@ namespace armnn { -class RefSplitterWorkload : public BaseWorkload +class RefSplitterWorkload : public RefBaseWorkload { public: - using BaseWorkload::BaseWorkload; + using RefBaseWorkload::RefBaseWorkload; void Execute() const override; void ExecuteAsync(WorkingMemDescriptor& workingMemDescriptor) override; private: diff --git a/src/backends/reference/workloads/RefStackWorkload.cpp b/src/backends/reference/workloads/RefStackWorkload.cpp index 3f7fd7bda2..f57e6e0f1e 100644 --- a/src/backends/reference/workloads/RefStackWorkload.cpp +++ b/src/backends/reference/workloads/RefStackWorkload.cpp @@ -15,7 +15,7 @@ namespace armnn RefStackWorkload::RefStackWorkload(const StackQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) {} void RefStackWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefStackWorkload.hpp b/src/backends/reference/workloads/RefStackWorkload.hpp index fbca11b2fa..19f4a7be67 100644 --- a/src/backends/reference/workloads/RefStackWorkload.hpp +++ b/src/backends/reference/workloads/RefStackWorkload.hpp @@ -5,13 +5,13 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include namespace armnn { -class RefStackWorkload : public BaseWorkload +class RefStackWorkload : public RefBaseWorkload { public: explicit RefStackWorkload(const StackQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp index 336a687d5c..41fe4c3a1c 100644 --- a/src/backends/reference/workloads/RefStridedSliceWorkload.cpp +++ b/src/backends/reference/workloads/RefStridedSliceWorkload.cpp @@ -12,7 +12,7 @@ namespace armnn RefStridedSliceWorkload::RefStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) {} void RefStridedSliceWorkload::Execute() const diff --git a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp index d2ffca7414..ea443cf80d 100644 --- a/src/backends/reference/workloads/RefStridedSliceWorkload.hpp +++ b/src/backends/reference/workloads/RefStridedSliceWorkload.hpp @@ -5,12 +5,12 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" namespace armnn { -class RefStridedSliceWorkload : public BaseWorkload +class RefStridedSliceWorkload : public RefBaseWorkload { public: RefStridedSliceWorkload(const StridedSliceQueueDescriptor& descriptor, const WorkloadInfo& info); diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp index 8665648fe6..64a2d4c7b2 100644 --- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp +++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.cpp @@ -15,7 +15,7 @@ namespace armnn RefTransposeConvolution2dWorkload::RefTransposeConvolution2dWorkload( const TransposeConvolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) : - BaseWorkload(descriptor, info) + RefBaseWorkload(descriptor, info) { // set up weights decoder m_Weights = std::make_unique(*(descriptor.m_Weight)); diff --git a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp index aa2546f420..6bcee9a838 100644 --- a/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp +++ b/src/backends/reference/workloads/RefTransposeConvolution2dWorkload.hpp @@ -9,12 +9,12 @@ #include "Encoders.hpp" #include -#include +#include "RefBaseWorkload.hpp" namespace armnn { -class RefTransposeConvolution2dWorkload : public BaseWorkload +class RefTransposeConvolution2dWorkload : public RefBaseWorkload { public: RefTransposeConvolution2dWorkload(const TransposeConvolution2dQueueDescriptor& descriptor, diff --git a/src/backends/reference/workloads/RefTransposeWorkload.hpp b/src/backends/reference/workloads/RefTransposeWorkload.hpp index bf59de7813..b8c3649745 100644 --- a/src/backends/reference/workloads/RefTransposeWorkload.hpp +++ b/src/backends/reference/workloads/RefTransposeWorkload.hpp @@ -5,7 +5,7 @@ #pragma once -#include +#include "RefBaseWorkload.hpp" #include diff --git a/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.cpp b/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.cpp index 311fa18f91..d447a46b23 100644 --- a/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.cpp +++ b/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.cpp @@ -19,7 +19,7 @@ namespace armnn RefUnidirectionalSequenceLstmWorkload::RefUnidirectionalSequenceLstmWorkload( const UnidirectionalSequenceLstmQueueDescriptor& descriptor, const WorkloadInfo& info) - : BaseWorkload(descriptor, info) + : RefBaseWorkload(descriptor, info) , m_InputToInputWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToInputWeights)) , m_InputToForgetWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToForgetWeights)) , m_InputToCellWeightsTensor (AssignScopedTensorHandle(descriptor.m_InputToCellWeights)) diff --git a/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.hpp b/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.hpp index d0c000f20d..7a91cee642 100644 --- a/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.hpp +++ b/src/backends/reference/workloads/RefUnidirectionalSequenceLstmWorkload.hpp @@ -7,7 +7,7 @@ #include -#include +#include "RefBaseWorkload.hpp" #include #include "Encoders.hpp" @@ -16,7 +16,7 @@ namespace armnn { -class RefUnidirectionalSequenceLstmWorkload : public BaseWorkload +class RefUnidirectionalSequenceLstmWorkload : public RefBaseWorkload { public: explicit RefUnidirectionalSequenceLstmWorkload(const UnidirectionalSequenceLstmQueueDescriptor& descriptor, -- cgit v1.2.1