From d69c1c595375b904a7f19f562ac1d54098184b4e Mon Sep 17 00:00:00 2001 From: Cathal Corbett Date: Thu, 12 Jan 2023 11:17:03 +0000 Subject: Merge 'main' onto 'experimental/GpuFsa'. * I6c71be11e9b73694747b27fe9febab8d9669b4d4 Signed-off-by: Cathal Corbett Change-Id: Iccaf50e2484559979d801ee9d0e130e848554733 --- CMakeLists.txt | 6 +- README.md | 2 +- cmake/GlobalConfig.cmake | 48 +- delegate/CMakeLists.txt | 2 +- delegate/src/armnn_delegate.cpp | 25 +- delegate/src/test/BatchMatMulTest.cpp | 17 +- delegate/src/test/ControlTest.cpp | 4 +- delegate/src/test/Convolution2dTest.cpp | 14 +- docs/02_operator_list.dox | 9 +- include/armnn/backends/OptimizationViews.hpp | 5 +- include/armnnOnnxParser/IOnnxParser.hpp | 9 +- include/armnnUtils/TensorUtils.hpp | 5 + profiling/client/src/ProfilingService.cpp | 64 +-- python/pyarmnn/test/test_modeloption.py | 11 - samples/CMakeLists.txt | 4 +- scripts/get_compute_library.sh | 4 +- shim/sl/canonical/ArmnnDevice.hpp | 8 +- shim/sl/canonical/ArmnnDriver.hpp | 84 +-- src/armnn/AsyncExecutionCallback.cpp | 4 +- src/armnn/AsyncExecutionCallback.hpp | 3 +- src/armnn/Graph.cpp | 18 +- src/armnn/LoadedNetwork.cpp | 2 + src/armnn/Network.cpp | 14 +- src/armnn/Runtime.cpp | 25 +- src/armnn/Runtime.hpp | 6 +- src/armnn/TypesUtils.cpp | 6 +- src/armnn/test/SubgraphViewTests.cpp | 29 + src/armnnOnnxParser/OnnxParser.cpp | 51 +- src/armnnOnnxParser/OnnxParser.hpp | 10 +- src/armnnSerializer/CMakeLists.txt | 25 +- src/armnnTestUtils/CMakeLists.txt | 6 +- src/armnnTfLiteParser/CMakeLists.txt | 8 +- src/armnnTfLiteParser/TfLiteParser.cpp | 106 ++-- src/armnnTfLiteParser/TfLiteParser.hpp | 8 + src/armnnTfLiteParser/test/Conv2D.cpp | 2 +- src/armnnUtils/TensorUtils.cpp | 91 ++- src/armnnUtils/test/TensorUtilsTest.cpp | 173 +++++- src/backends/aclCommon/ArmComputeTuningUtils.cpp | 60 ++ src/backends/aclCommon/ArmComputeTuningUtils.hpp | 84 +++ src/backends/aclCommon/CMakeLists.txt | 5 +- src/backends/aclCommon/IClTensorHandle.hpp | 22 + src/backends/aclCommon/common.mk | 1 + src/backends/backendsCommon/CMakeLists.txt | 4 +- .../backendsCommon/test/BackendProfilingTests.cpp | 7 +- src/backends/backendsCommon/test/CMakeLists.txt | 4 +- .../test/MultiplicationEndToEndTestImpl.hpp | 96 ++++ .../backendsCommon/test/OptimizationViewsTests.cpp | 13 +- .../test/SubtractionEndToEndTestImpl.hpp | 96 ++++ src/backends/cl/CMakeLists.txt | 3 +- src/backends/cl/ClBackendContext.cpp | 133 +---- src/backends/cl/ClBackendContext.hpp | 5 +- src/backends/cl/ClContextControl.cpp | 53 +- src/backends/cl/ClContextControl.hpp | 22 +- src/backends/cl/ClImportTensorHandle.hpp | 4 +- src/backends/cl/ClLayerSupport.cpp | 21 + src/backends/cl/ClLayerSupport.hpp | 6 + src/backends/cl/ClTensorHandle.hpp | 4 +- src/backends/cl/ClTensorHandleFactory.cpp | 6 +- src/backends/cl/ClTensorHandleFactory.hpp | 6 +- src/backends/cl/ClWorkloadFactory.cpp | 5 + src/backends/cl/IClTensorHandle.hpp | 22 - src/backends/cl/backend.mk | 1 + src/backends/cl/test/CMakeLists.txt | 4 +- src/backends/cl/test/ClDefaultAllocatorTests.cpp | 194 +++++++ src/backends/cl/test/ClLayerTests.cpp | 23 + src/backends/cl/test/DefaultAllocatorTests.cpp | 194 ------- src/backends/cl/workloads/CMakeLists.txt | 2 + .../cl/workloads/ClBatchMatMulWorkload.cpp | 203 +++++++ .../cl/workloads/ClBatchMatMulWorkload.hpp | 41 ++ src/backends/cl/workloads/ClWorkloads.hpp | 1 + src/backends/dynamic/reference/CMakeLists.txt | 6 +- src/backends/neon/NeonLayerSupport.cpp | 608 ++++++++++++--------- src/backends/tosaCommon/TosaMappings.cpp | 9 +- .../operatorMappings/AdditionOperator.cpp | 72 --- .../operatorMappings/AdditionOperator.hpp | 20 - .../tosaCommon/operatorMappings/CMakeLists.txt | 6 +- .../operatorMappings/ElementwiseBinaryOperator.cpp | 103 ++++ .../operatorMappings/ElementwiseBinaryOperator.hpp | 20 + .../operatorMappings/TosaCommonOperators.hpp | 7 +- .../operatorMappings/TransposeOperator.cpp | 65 +++ .../operatorMappings/TransposeOperator.hpp | 20 + .../tosaCommon/test/OneToOneMappingTests.cpp | 106 ++++ src/backends/tosaCommon/test/TosaTestUtils.hpp | 8 + src/backends/tosaReference/TosaRefLayerSupport.cpp | 4 +- .../tosaReference/test/TosaRefEndToEndTests.cpp | 38 ++ .../test/TosaRefLayerSupportTests.cpp | 130 +++++ tests/CMakeLists.txt | 44 +- tests/ExecuteNetwork/ArmNNExecutor.cpp | 4 +- tests/ExecuteNetwork/ExecuteNetworkParams.cpp | 75 +-- .../ExecuteNetworkProgramOptions.cpp | 8 +- tests/InferenceModel.hpp | 14 +- 91 files changed, 2534 insertions(+), 1086 deletions(-) create mode 100644 src/backends/aclCommon/ArmComputeTuningUtils.cpp create mode 100644 src/backends/aclCommon/ArmComputeTuningUtils.hpp create mode 100644 src/backends/aclCommon/IClTensorHandle.hpp create mode 100644 src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp create mode 100644 src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp delete mode 100644 src/backends/cl/IClTensorHandle.hpp create mode 100644 src/backends/cl/test/ClDefaultAllocatorTests.cpp delete mode 100644 src/backends/cl/test/DefaultAllocatorTests.cpp create mode 100644 src/backends/cl/workloads/ClBatchMatMulWorkload.cpp create mode 100644 src/backends/cl/workloads/ClBatchMatMulWorkload.hpp delete mode 100644 src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp delete mode 100644 src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp create mode 100644 src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp create mode 100644 src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp create mode 100644 src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp create mode 100644 src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp diff --git a/CMakeLists.txt b/CMakeLists.txt index 51c4cf2c97..476e080442 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved. # Copyright 2020 NXP # SPDX-License-Identifier: MIT # @@ -446,7 +446,7 @@ if (BUILD_ARMNN_TFLITE_DELEGATE) add_definitions(-DARMNN_TFLITE_DELEGATE) endif() -if(BUILD_BARE_METAL) +if(BUILD_BARE_METAL OR EXECUTE_NETWORK_STATIC) add_library_ex(armnn STATIC ${armnn_sources}) else() if (BUILD_SHARED_LIBS) @@ -484,7 +484,7 @@ target_link_libraries(armnn PUBLIC armnnUtils) target_link_libraries(armnn PUBLIC pipeCommon) target_link_libraries(armnn PUBLIC pipeClient) -if(NOT BUILD_BARE_METAL) +if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) target_link_libraries(armnn PUBLIC ${CMAKE_DL_LIBS}) endif() diff --git a/README.md b/README.md index 5900625329..52e109e9b8 100644 --- a/README.md +++ b/README.md @@ -15,7 +15,7 @@ # Arm NN -**_Arm NN_** is the **most performant** machine learning (ML) inference engine for Android and Linux, accelerating ML +**Arm NN** is the **most performant** machine learning (ML) inference engine for Android and Linux, accelerating ML on **Arm Cortex-A CPUs and Arm Mali GPUs**. This ML inference engine is an open source SDK which bridges the gap between existing neural network frameworks and power-efficient Arm IP. diff --git a/cmake/GlobalConfig.cmake b/cmake/GlobalConfig.cmake index bc6cd32385..bc9117f702 100644 --- a/cmake/GlobalConfig.cmake +++ b/cmake/GlobalConfig.cmake @@ -1,5 +1,5 @@ # -# Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. # Copyright 2020 NXP # SPDX-License-Identifier: MIT # @@ -43,6 +43,19 @@ option(BUILD_BARE_METAL "Disable features requiring operating system support" OF option(BUILD_SHARED_LIBS "Determines if Armnn will be built statically or dynamically. This is an experimental feature and not fully supported. Only the ArmNN core and the Delegate can be built statically." ON) +option(EXECUTE_NETWORK_STATIC " This is a limited experimental build that is entirely static. + It currently only supports being set by changing the current CMake default options like so: + BUILD_TF_LITE_PARSER=1/0 + BUILD_ARMNN_SERIALIZER=1/0 + ARMCOMPUTENEON=1/0 + ARMNNREF=1/0 + ARMCOMPUTECL=0 + BUILD_ONNX_PARSER=0 + BUILD_ARMNN_TFLITE_DELEGATE=0 + BUILD_TIMELINE_DECODER=0 + BUILD_BASE_PIPE_SERVER=0 + BUILD_UNIT_TESTS=0 + BUILD_GATORD_MOCK=0" OFF) include(SelectLibraryConfigurations) @@ -138,6 +151,21 @@ set(CMAKE_MODULE_PATH ${CMAKE_CURRENT_SOURCE_DIR}/cmake/modules ${CMAKE_MODULE_P include(CMakeFindDependencyMacro) + +if(EXECUTE_NETWORK_STATIC) + add_definitions(-DARMNN_DISABLE_SOCKETS + -DBUILD_SHARED_LIBS=0 + -DARMNN_EXECUTE_NETWORK_STATIC) +endif() + +if(BUILD_BARE_METAL) + add_definitions(-DARMNN_BUILD_BARE_METAL + -DARMNN_DISABLE_FILESYSTEM + -DARMNN_DISABLE_PROCESSES + -DARMNN_DISABLE_THREADS + -DARMNN_DISABLE_SOCKETS) +endif() + if (NOT BUILD_PIPE_ONLY) # cxxopts (Alternative to boost::program_options) find_path(CXXOPTS_INCLUDE cxxopts/cxxopts.hpp PATHS third-party NO_CMAKE_FIND_ROOT_PATH) @@ -150,11 +178,19 @@ if (NOT BUILD_PIPE_ONLY) include_directories(SYSTEM "${GHC_INCLUDE}") endif() +if(NOT BUILD_SHARED_LIBS) + set(CMAKE_FIND_LIBRARY_SUFFIXES .a .lib) +endif() + # pthread if (NOT BUILD_BARE_METAL) find_package(Threads) endif() +if (EXECUTE_NETWORK_STATIC) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -static-libstdc++ -static-libgcc -static -pthread") +endif() + # Favour the protobuf passed on command line if(BUILD_ONNX_PARSER) find_library(PROTOBUF_LIBRARY_DEBUG NAMES "protobufd" @@ -389,7 +425,7 @@ if(PROFILING_BACKEND_STREAMLINE) add_definitions(-DARMNN_STREAMLINE_ENABLED) endif() -if(NOT BUILD_BARE_METAL) +if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) if(HEAP_PROFILING OR LEAK_CHECKING) find_path(HEAP_PROFILER_INCLUDE gperftools/heap-profiler.h PATHS ${GPERFTOOLS_ROOT}/include @@ -446,13 +482,5 @@ if(BUILD_PYTHON_WHL OR BUILD_PYTHON_SRC) endif() endif() -if(BUILD_BARE_METAL) - add_definitions(-DARMNN_BUILD_BARE_METAL - -DARMNN_DISABLE_FILESYSTEM - -DARMNN_DISABLE_PROCESSES - -DARMNN_DISABLE_THREADS - -DARMNN_DISABLE_SOCKETS) -endif() - # ArmNN source files required for all build options include_directories(SYSTEM third-party) diff --git a/delegate/CMakeLists.txt b/delegate/CMakeLists.txt index fe5c962321..d044ed991d 100644 --- a/delegate/CMakeLists.txt +++ b/delegate/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # diff --git a/delegate/src/armnn_delegate.cpp b/delegate/src/armnn_delegate.cpp index 4d95522dbd..aa6c1be37d 100644 --- a/delegate/src/armnn_delegate.cpp +++ b/delegate/src/armnn_delegate.cpp @@ -137,13 +137,12 @@ Delegate::Delegate(armnnDelegate::DelegateOptions options) m_Options(std::move(options)) { // Configures logging for ARMNN - if (options.IsLoggingEnabled()) + if (m_Options.IsLoggingEnabled()) { - armnn::ConfigureLogging(true, true, options.GetLoggingSeverity()); + armnn::ConfigureLogging(true, true, m_Options.GetLoggingSeverity()); } - // Create ArmNN Runtime - m_Runtime = armnn::IRuntime::Create(options.GetRuntimeOptions()); + m_Runtime = armnn::IRuntime::Create(m_Options.GetRuntimeOptions()); std::vector backends; if (m_Runtime) @@ -206,8 +205,20 @@ TfLiteIntArray* Delegate::IdentifyOperatorsToDelegate(TfLiteContext* tfLiteConte continue; } - if (ArmnnSubgraph::VisitNode( - delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex) != kTfLiteOk) + TfLiteStatus visitStatus; + + try + { + visitStatus = ArmnnSubgraph::VisitNode( + delegateData, tfLiteContext, tfLiteRegistration, tfLiteNode, nodeIndex); + } + catch(std::exception& ex) + { + ARMNN_LOG(error) << "ArmNN Failed to visit node with error: " << ex.what(); + visitStatus = kTfLiteError; + } + + if ( visitStatus != kTfLiteOk) { // node is not supported by ArmNN unsupportedOperators.insert(tfLiteRegistration->builtin_code); @@ -377,7 +388,7 @@ ArmnnSubgraph* ArmnnSubgraph::Create(TfLiteContext* tfLiteContext, ARMNN_LOG(info) << "Optimize ArmnnSubgraph time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(optimizeStartTime).count() << " ms"; } - catch (std::exception &ex) + catch (std::exception& ex) { std::stringstream exMessage; exMessage << "TfLiteArmnnDelegate: Exception (" << ex.what() << ") caught from optimize."; diff --git a/delegate/src/test/BatchMatMulTest.cpp b/delegate/src/test/BatchMatMulTest.cpp index e5cb976c45..d13d8dcf43 100644 --- a/delegate/src/test/BatchMatMulTest.cpp +++ b/delegate/src/test/BatchMatMulTest.cpp @@ -268,7 +268,7 @@ namespace armnnDelegate { // Set input data std::vector LHSInputShape { 2,2,2 }; - std::vector RHSInputShape { 1,2,2 }; + std::vector RHSInputShape { 2,2 }; std::vector outputShape { 2,2,2 }; std::vector LHSInputValues = { 1, 2, @@ -670,4 +670,19 @@ namespace armnnDelegate BatchMatMul2DFp32SimpleAdjointTest(backends); } } + TEST_SUITE("BATCH_MATMUL_GpuAccTests") + { + TEST_CASE("BATCH_MATMUL_Fp32_GpuAccTests") + { + std::vector backends = {armnn::Compute::GpuAcc}; + BatchMatMul2DFp32SimpleTest (backends); + BatchMatMul3DFp32SimpleTest (backends); + BatchMatMul3DFp32BatchTest (backends); + BatchMatMul3DFp32BroadcastTest (backends); + BatchMatMul3D2DFp32BroadcastTest (backends); + BatchMatMul2DFp32TinyTest (backends); + BatchMatMulNonSquareFp32Test (backends); + BatchMatMul2DFp32SimpleAdjointTest(backends); + } + } } diff --git a/delegate/src/test/ControlTest.cpp b/delegate/src/test/ControlTest.cpp index 43491be982..18bbc5a9a8 100644 --- a/delegate/src/test/ControlTest.cpp +++ b/delegate/src/test/ControlTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020,2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -286,7 +286,7 @@ TEST_CASE ("Concatenation_Three_Inputs_GpuAcc_Test") ConcatThreeInputsTest(backends); } -TEST_CASE ("Concatenation_Axis_CpuRef_Test") +TEST_CASE ("Concatenation_Axis_GpuAcc_Test") { std::vector backends = {armnn::Compute::GpuAcc}; ConcatAxisTest(backends); diff --git a/delegate/src/test/Convolution2dTest.cpp b/delegate/src/test/Convolution2dTest.cpp index b2e5fad8df..10510792a1 100644 --- a/delegate/src/test/Convolution2dTest.cpp +++ b/delegate/src/test/Convolution2dTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020,2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -438,13 +438,13 @@ void TransposeConvFp32Test(std::vector& backends) TEST_SUITE("TransposeConv_CpuRef_Test") { -TEST_CASE ("TransposeConv_Fp32_Test") +TEST_CASE ("TransposeConv_CpuRef_Fp32_Test") { std::vector backends = {armnn::Compute::CpuRef}; TransposeConvFp32Test(backends); } -TEST_CASE ("TransposeConv_Int8_Test") +TEST_CASE ("TransposeConv_CpuRef_Int8_Test") { std::vector backends = {armnn::Compute::CpuRef}; TransposeConvInt8Test(backends); @@ -455,13 +455,13 @@ TEST_CASE ("TransposeConv_Int8_Test") TEST_SUITE("TransposeConv_CpuAcc_Test") { -TEST_CASE ("TransposeConv_Fp32_Test") +TEST_CASE ("TransposeConv_CpuAcc_Fp32_Test") { std::vector backends = {armnn::Compute::CpuAcc}; TransposeConvFp32Test(backends); } -TEST_CASE ("TransposeConv_Int8_Test") +TEST_CASE ("TransposeConv_CpuAcc_Int8_Test") { std::vector backends = {armnn::Compute::CpuAcc}; TransposeConvInt8Test(backends); @@ -472,13 +472,13 @@ TEST_CASE ("TransposeConv_Int8_Test") TEST_SUITE("TransposeConv_GpuAcc_Test") { -TEST_CASE ("TransposeConv_Fp32_Test") +TEST_CASE ("TransposeConv_GpuAcc_Fp32_Test") { std::vector backends = {armnn::Compute::GpuAcc}; TransposeConvFp32Test(backends); } -TEST_CASE ("TransposeConv_Int8_Test") +TEST_CASE ("TransposeConv_GpuAcc_Int8_Test") { std::vector backends = {armnn::Compute::GpuAcc}; TransposeConvInt8Test(backends); diff --git a/docs/02_operator_list.dox b/docs/02_operator_list.dox index d9a3d2c83b..007d4f5e35 100644 --- a/docs/02_operator_list.dox +++ b/docs/02_operator_list.dox @@ -304,12 +304,13 @@ where N = batches, C = channels, H = height, W = width GpuAcc
    -
  • N/A +
  • All
-
    -
  • N/A -
+ +
+
FLOAT32 +
BatchNormalizationLayer Layer to perform batch normalization. diff --git a/include/armnn/backends/OptimizationViews.hpp b/include/armnn/backends/OptimizationViews.hpp index 0357adaa27..59d71ca560 100644 --- a/include/armnn/backends/OptimizationViews.hpp +++ b/include/armnn/backends/OptimizationViews.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2019, 2021-2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -64,7 +64,8 @@ public: ARMNN_DEPRECATED_MSG_REMOVAL_DATE("GetGraph is deprecated. Use GetINetwork instead.", "23.08") Graph& GetGraph() { return m_Graph; } - INetworkPtr& GetINetwork() { return m_INetwork; } + INetwork* GetINetwork() { return m_INetwork.get(); } + INetwork& GetINetworkRef() { return *m_INetwork; } private: Substitutions m_SuccesfulOptimizations; ///< Proposed substitutions from successful optimizations diff --git a/include/armnnOnnxParser/IOnnxParser.hpp b/include/armnnOnnxParser/IOnnxParser.hpp index ba7fc83f93..89c22c03de 100644 --- a/include/armnnOnnxParser/IOnnxParser.hpp +++ b/include/armnnOnnxParser/IOnnxParser.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017,2022 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -27,6 +27,13 @@ public: static IOnnxParserPtr Create(); static void Destroy(IOnnxParser* parser); + /// Create the network from a protobuf binary vector + armnn::INetworkPtr CreateNetworkFromBinary(const std::vector& binaryContent); + + /// Create the network from a protobuf binary vector, with inputShapes specified + armnn::INetworkPtr CreateNetworkFromBinary(const std::vector& binaryContent, + const std::map& inputShapes); + /// Create the network from a protobuf binary file on disk armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile); diff --git a/include/armnnUtils/TensorUtils.hpp b/include/armnnUtils/TensorUtils.hpp index f7f20bd065..2d6ec2fea4 100644 --- a/include/armnnUtils/TensorUtils.hpp +++ b/include/armnnUtils/TensorUtils.hpp @@ -55,4 +55,9 @@ unsigned int GetNumElementsAfter(const armnn::TensorShape& shape, unsigned int a std::pair> GetPerAxisParams(const armnn::TensorInfo& info); +template +std::unique_ptr ToFloatArray(const std::vector& data, const armnn::TensorInfo& tensorInfo); + +std::unique_ptr ToFloatArray(const std::vector& data, const armnn::TensorInfo& tensorInfo); + } // namespace armnnUtils diff --git a/profiling/client/src/ProfilingService.cpp b/profiling/client/src/ProfilingService.cpp index b8e034809b..37f0de9f6f 100644 --- a/profiling/client/src/ProfilingService.cpp +++ b/profiling/client/src/ProfilingService.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2019 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2019, 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -10,7 +10,7 @@ #include #include -#if defined(ARMNN_BUILD_BARE_METAL) +#if defined(ARMNN_BUILD_BARE_METAL) || defined(ARMNN_EXECUTE_NETWORK_STATIC) #include #endif @@ -26,7 +26,7 @@ namespace pipe void ProfilingService::ResetExternalProfilingOptions(const arm::pipe::ProfilingOptions& options, bool resetProfilingService) { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Update the profiling options m_Options = options; m_TimelineReporting = options.m_TimelineEnabled; @@ -41,23 +41,23 @@ void ProfilingService::ResetExternalProfilingOptions(const arm::pipe::ProfilingO #else IgnoreUnused(options); IgnoreUnused(resetProfilingService); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL || ARMNN_EXECUTE_NETWORK_STATIC } bool ProfilingService::IsProfilingEnabled() const { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) return m_Options.m_EnableProfiling; #else return false; -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } ProfilingState ProfilingService::ConfigureProfilingService( const ProfilingOptions& options, bool resetProfilingService) { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) ResetExternalProfilingOptions(options, resetProfilingService); ProfilingState currentState = m_StateMachine.GetCurrentState(); if (options.m_EnableProfiling) @@ -106,12 +106,12 @@ ProfilingState ProfilingService::ConfigureProfilingService( IgnoreUnused(options); IgnoreUnused(resetProfilingService); return ProfilingState::Uninitialised; -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::Update() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) if (!m_Options.m_EnableProfiling) { // Don't run if profiling is disabled @@ -189,12 +189,12 @@ void ProfilingService::Update() throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}", static_cast(currentState))); } -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::Disconnect() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) ProfilingState currentState = m_StateMachine.GetCurrentState(); switch (currentState) { @@ -211,7 +211,7 @@ void ProfilingService::Disconnect() throw arm::pipe::ProfilingException(fmt::format("Unknown profiling service state: {}", static_cast(currentState))); } -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } // Store a profiling context returned from a backend that support profiling, and register its counters @@ -219,7 +219,7 @@ void ProfilingService::AddBackendProfilingContext( const std::string& backendId, std::shared_ptr profilingContext) { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) ARM_PIPE_ASSERT(profilingContext != nullptr); // Register the backend counters m_MaxGlobalCounterId = profilingContext->RegisterCounters(m_MaxGlobalCounterId); @@ -227,7 +227,7 @@ void ProfilingService::AddBackendProfilingContext( #else IgnoreUnused(backendId); IgnoreUnused(profilingContext); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } const ICounterDirectory& ProfilingService::GetCounterDirectory() const { @@ -343,14 +343,14 @@ std::unique_ptr ProfilingService::GetSendTimelinePacket() c void ProfilingService::Initialize() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) m_Initialiser.InitialiseProfilingService(*this); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::InitializeCounterValue(uint16_t counterUid) { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Increase the size of the counter index if necessary if (counterUid >= m_CounterIndex.size()) { @@ -365,12 +365,12 @@ void ProfilingService::InitializeCounterValue(uint16_t counterUid) m_CounterIndex.at(counterUid) = counterValuePtr; #else IgnoreUnused(counterUid); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::Reset() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Stop the profiling service... Stop(); @@ -384,12 +384,12 @@ void ProfilingService::Reset() // ...finally reset the profiling state machine m_StateMachine.Reset(); m_BackendProfilingContexts.clear(); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::Stop() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) { // only lock when we are updating the inference completed variable std::unique_lock lck(m_ServiceActiveMutex); m_ServiceActive = false; @@ -411,24 +411,24 @@ void ProfilingService::Stop() // ...then move to the "NotConnected" state m_StateMachine.TransitionToState(ProfilingState::NotConnected); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } inline void ProfilingService::CheckCounterUid(uint16_t counterUid) const { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) if (!IsCounterRegistered(counterUid)) { throw arm::pipe::InvalidArgumentException(fmt::format("Counter UID {} is not registered", counterUid)); } #else IgnoreUnused(counterUid); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::NotifyBackendsForTimelineReporting() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) BackendProfilingContext::iterator it = m_BackendProfilingContexts.begin(); while (it != m_BackendProfilingContexts.end()) { @@ -437,23 +437,23 @@ void ProfilingService::NotifyBackendsForTimelineReporting() // Increment the Iterator to point to next entry it++; } -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::NotifyProfilingServiceActive() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) { // only lock when we are updating the inference completed variable std::unique_lock lck(m_ServiceActiveMutex); m_ServiceActive = true; } m_ServiceActiveConditionVariable.notify_one(); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } void ProfilingService::WaitForProfilingServiceActivation(unsigned int timeout) { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) std::unique_lock lck(m_ServiceActiveMutex); auto start = std::chrono::high_resolution_clock::now(); @@ -475,14 +475,14 @@ void ProfilingService::WaitForProfilingServiceActivation(unsigned int timeout) } #else IgnoreUnused(timeout); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } ProfilingService::~ProfilingService() { -#if !defined(ARMNN_BUILD_BARE_METAL) +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) Stop(); -#endif // ARMNN_BUILD_BARE_METAL +#endif // ARMNN_BUILD_BARE_METAL && ARMNN_EXECUTE_NETWORK_STATIC } } // namespace pipe diff --git a/python/pyarmnn/test/test_modeloption.py b/python/pyarmnn/test/test_modeloption.py index a47d2da358..4773d90e08 100644 --- a/python/pyarmnn/test/test_modeloption.py +++ b/python/pyarmnn/test/test_modeloption.py @@ -118,17 +118,6 @@ def test_optimizer_options_fail(): assert "Wrong number or type of arguments" in str(err.value) - with pytest.raises(RuntimeError) as err: - OptimizerOptions(True, - False, - True, - ShapeInferenceMethod_InferAndValidate, - True, - [a], - True) - - assert "BFloat16 and Float16 optimization cannot be enabled at the same time" in str(err.value) - with pytest.raises(TypeError) as err: oo = OptimizerOptions(True, False, diff --git a/samples/CMakeLists.txt b/samples/CMakeLists.txt index 31ff5086e1..e5ee711627 100644 --- a/samples/CMakeLists.txt +++ b/samples/CMakeLists.txt @@ -1,8 +1,8 @@ # -# Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # -if (NOT BUILD_BARE_METAL) +if (NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) if(BUILD_SAMPLE_APP AND ARMNNREF) add_executable(SimpleSample SimpleSample.cpp) target_link_libraries(SimpleSample armnn ${CMAKE_THREAD_LIBS_INIT}) diff --git a/scripts/get_compute_library.sh b/scripts/get_compute_library.sh index 53e13d761d..14f71a452c 100755 --- a/scripts/get_compute_library.sh +++ b/scripts/get_compute_library.sh @@ -1,6 +1,6 @@ #!/bin/bash # -# Copyright © 2018-2022 Arm Ltd. All rights reserved. +# Copyright © 2018-2023 Arm Ltd. All rights reserved. # SPDX-License-Identifier: MIT # @@ -10,7 +10,7 @@ CMD=$( basename "$0" ) #DEFAULT_CLFRAMEWORKREVISION="branches/arm_compute_22_11" # Release 22.11 # # For pinning to a revision use this: -DEFAULT_CLFRAMEWORKREVISION="a0ae8d2e6c57fd95c0edaf659b9df8b8c540d051" #8792: Optimize Transposed Convolution for CL backend (Quantized) | https://review.mlplatform.org/c/ml/ComputeLibrary/+/8792 +DEFAULT_CLFRAMEWORKREVISION="f800adf185e966b16385f65b9c7250766949dbe4" #8881: Implement dynamic fusion reshape operator | https://review.mlplatform.org/c/ml/ComputeLibrary/+/8881 usage() { echo -e "get_compute_library.sh: Clones the Arm Compute Library (ACL) repo from the ML Platform server and checks out diff --git a/shim/sl/canonical/ArmnnDevice.hpp b/shim/sl/canonical/ArmnnDevice.hpp index 9597bfc013..93109696f7 100644 --- a/shim/sl/canonical/ArmnnDevice.hpp +++ b/shim/sl/canonical/ArmnnDevice.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -14,11 +14,11 @@ namespace armnn_driver class ArmnnDevice { +friend class ArmnnDriver; -protected: +public: ArmnnDevice(DriverOptions options); - virtual ~ArmnnDevice() {} - + ~ArmnnDevice() {} protected: armnn::IRuntimePtr m_Runtime; armnn::IGpuAccTunedParametersPtr m_ClTunedParameters; diff --git a/shim/sl/canonical/ArmnnDriver.hpp b/shim/sl/canonical/ArmnnDriver.hpp index bf5565a219..6cb06604d2 100644 --- a/shim/sl/canonical/ArmnnDriver.hpp +++ b/shim/sl/canonical/ArmnnDriver.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -28,18 +28,26 @@ namespace armnn_driver //using namespace android::nn; -class ArmnnDriver : public ArmnnDevice, public IDevice +class ArmnnDriver : public IDevice { +private: + std::unique_ptr m_Device; public: - ArmnnDriver(DriverOptions options) - : ArmnnDevice(std::move(options)) - { - VLOG(DRIVER) << "ArmnnDriver::ArmnnDriver()"; - } - ~ArmnnDriver() { - VLOG(DRIVER) << "ArmnnDriver::~ArmnnDriver()"; + try + { + VLOG(DRIVER) << "ArmnnDriver::ArmnnDriver()"; + m_Device = std::unique_ptr(new ArmnnDevice(std::move(options))); + } + catch (armnn::InvalidArgumentException& ex) + { + VLOG(DRIVER) << "ArmnnDevice failed to initialise: " << ex.what(); + } + catch (...) + { + VLOG(DRIVER) << "ArmnnDevice failed to initialise with an unknown error"; + } } public: @@ -80,17 +88,18 @@ public: const Capabilities& getCapabilities() const override { VLOG(DRIVER) << "ArmnnDriver::GetCapabilities()"; - return ArmnnDriverImpl::GetCapabilities(m_Runtime); + return ArmnnDriverImpl::GetCapabilities(m_Device->m_Runtime); } std::pair getNumberOfCacheFilesNeeded() const override { VLOG(DRIVER) << "ArmnnDriver::getNumberOfCacheFilesNeeded()"; unsigned int numberOfCachedModelFiles = 0; - for (auto& backend : m_Options.GetBackends()) + for (auto& backend : m_Device->m_Options.GetBackends()) { numberOfCachedModelFiles += GetNumberOfCacheFiles(backend); - VLOG(DRIVER) << "ArmnnDriver::getNumberOfCacheFilesNeeded() = " << std::to_string(numberOfCachedModelFiles); + VLOG(DRIVER) << "ArmnnDriver::getNumberOfCacheFilesNeeded() = " + << std::to_string(numberOfCachedModelFiles); } return std::make_pair(numberOfCachedModelFiles, 1ul); } @@ -104,22 +113,26 @@ public: GeneralResult> getSupportedOperations(const Model& model) const override { VLOG(DRIVER) << "ArmnnDriver::getSupportedOperations()"; + if (m_Device.get() == nullptr) + { + return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device Unavailable!"; + } std::stringstream ss; ss << "ArmnnDriverImpl::getSupportedOperations()"; std::string fileName; std::string timestamp; - if (!m_Options.GetRequestInputsAndOutputsDumpDir().empty()) + if (!m_Device->m_Options.GetRequestInputsAndOutputsDumpDir().empty()) { ss << " : " - << m_Options.GetRequestInputsAndOutputsDumpDir() + << m_Device->m_Options.GetRequestInputsAndOutputsDumpDir() << "/" // << GetFileTimestamp() << "_getSupportedOperations.txt"; } VLOG(DRIVER) << ss.str().c_str(); - if (!m_Options.GetRequestInputsAndOutputsDumpDir().empty()) + if (!m_Device->m_Options.GetRequestInputsAndOutputsDumpDir().empty()) { //dump the marker file std::ofstream fileStream; @@ -133,7 +146,7 @@ public: } std::vector result; - if (!m_Runtime) + if (!m_Device->m_Runtime) { return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device Unavailable!"; } @@ -145,9 +158,9 @@ public: } // Attempt to convert the model to an ArmNN input network (INetwork). - ModelToINetworkTransformer modelConverter(m_Options.GetBackends(), + ModelToINetworkTransformer modelConverter(m_Device->m_Options.GetBackends(), model, - m_Options.GetForcedUnsupportedOperations()); + m_Device->m_Options.GetForcedUnsupportedOperations()); if (modelConverter.GetConversionResult() != ConversionResult::Success && modelConverter.GetConversionResult() != ConversionResult::UnsupportedFeature) @@ -179,6 +192,10 @@ public: { VLOG(DRIVER) << "ArmnnDriver::prepareModel()"; + if (m_Device.get() == nullptr) + { + return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device Unavailable!"; + } // Validate arguments. if (const auto result = validate(model); !result.ok()) { return NN_ERROR(ErrorStatus::INVALID_ARGUMENT) << "Invalid Model: " << result.error(); @@ -196,15 +213,15 @@ public: return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT); } - return ArmnnDriverImpl::PrepareArmnnModel(m_Runtime, - m_ClTunedParameters, - m_Options, - model, - modelCache, - dataCache, - token, - model.relaxComputationFloat32toFloat16 && m_Options.GetFp16Enabled(), - priority); + return ArmnnDriverImpl::PrepareArmnnModel(m_Device->m_Runtime, + m_Device->m_ClTunedParameters, + m_Device->m_Options, + model, + modelCache, + dataCache, + token, + model.relaxComputationFloat32toFloat16 && m_Device->m_Options.GetFp16Enabled(), + priority); } GeneralResult prepareModelFromCache(OptionalTimePoint deadline, @@ -213,20 +230,23 @@ public: const CacheToken& token) const override { VLOG(DRIVER) << "ArmnnDriver::prepareModelFromCache()"; - + if (m_Device.get() == nullptr) + { + return NN_ERROR(ErrorStatus::DEVICE_UNAVAILABLE) << "Device Unavailable!"; + } // Check if deadline has passed. if (hasDeadlinePassed(deadline)) { return NN_ERROR(ErrorStatus::MISSED_DEADLINE_PERSISTENT); } return ArmnnDriverImpl::PrepareArmnnModelFromCache( - m_Runtime, - m_ClTunedParameters, - m_Options, + m_Device->m_Runtime, + m_Device->m_ClTunedParameters, + m_Device->m_Options, modelCache, dataCache, token, - m_Options.GetFp16Enabled()); + m_Device->m_Options.GetFp16Enabled()); } GeneralResult allocate(const BufferDesc&, diff --git a/src/armnn/AsyncExecutionCallback.cpp b/src/armnn/AsyncExecutionCallback.cpp index 5b87927af2..73ce66b7fb 100644 --- a/src/armnn/AsyncExecutionCallback.cpp +++ b/src/armnn/AsyncExecutionCallback.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -11,6 +11,8 @@ namespace armnn namespace experimental { +InferenceId AsyncExecutionCallback::nextID = 0u; + void AsyncExecutionCallback::Notify(armnn::Status status, InferenceTimingPair timeTaken) { { diff --git a/src/armnn/AsyncExecutionCallback.hpp b/src/armnn/AsyncExecutionCallback.hpp index 9eab06b4fa..d48f80737d 100644 --- a/src/armnn/AsyncExecutionCallback.hpp +++ b/src/armnn/AsyncExecutionCallback.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2021-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -68,7 +68,6 @@ private: armnn::Status m_Status = Status::Failure; InferenceId m_InferenceId; }; -InferenceId AsyncExecutionCallback::nextID = 0u; // Manager to create and monitor AsyncExecutionCallbacks // GetNewCallback will create a callback for use in Threadpool::Schedule diff --git a/src/armnn/Graph.cpp b/src/armnn/Graph.cpp index b5769f75f3..e5d123830c 100644 --- a/src/armnn/Graph.cpp +++ b/src/armnn/Graph.cpp @@ -497,13 +497,19 @@ void Graph::ReplaceSubgraphConnections(const SubgraphView& subgraph, const Subgr IInputSlot* subgraphInputSlot = subgraphInputSlots.at(inputSlotIdx); ARMNN_ASSERT(subgraphInputSlot); - IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); - ARMNN_ASSERT(connectedOutputSlot); - connectedOutputSlot->Disconnect(*subgraphInputSlot); + // Only disconnect if the InputSlot has a connection, this might not be the case when + // dealing with working copies of SubgraphViews + // Note: we don't need this check for OutputSlot as it iterates over a vector of valid connections + if (subgraphInputSlot->GetConnection()) + { + IOutputSlot* connectedOutputSlot = subgraphInputSlot->GetConnection(); + ARMNN_ASSERT(connectedOutputSlot); + connectedOutputSlot->Disconnect(*subgraphInputSlot); - IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); - ARMNN_ASSERT(substituteInputSlot); - connectedOutputSlot->Connect(*substituteInputSlot); + IInputSlot* substituteInputSlot = substituteSubgraphInputSlots.at(inputSlotIdx); + ARMNN_ASSERT(substituteInputSlot); + connectedOutputSlot->Connect(*substituteInputSlot); + } } // Step 2: process output slots diff --git a/src/armnn/LoadedNetwork.cpp b/src/armnn/LoadedNetwork.cpp index 7b24fd77b8..b42874f29d 100644 --- a/src/armnn/LoadedNetwork.cpp +++ b/src/armnn/LoadedNetwork.cpp @@ -1328,6 +1328,7 @@ void LoadedNetwork::EnqueueInput(const ConstTensor& inputTensor, ITensorHandle* } else { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyInput"); std::unique_ptr tensorHandle = std::make_unique(inputTensor.GetInfo(), inputTensor.GetMemoryArea()); @@ -1374,6 +1375,7 @@ void LoadedNetwork::ImportOutputTensor(const Tensor& outputTensor, ITensorHandle void CopyToOutputTensor(const Tensor& outputTensor, ITensorHandle* outputTensorHandle) { + ARMNN_SCOPED_PROFILING_EVENT(Compute::Undefined, "CopyOutput"); auto copyFunc = [](void* dst, const void* src, size_t size) { memcpy(dst, src, size); diff --git a/src/armnn/Network.cpp b/src/armnn/Network.cpp index 158142f48e..42388bfbd7 100644 --- a/src/armnn/Network.cpp +++ b/src/armnn/Network.cpp @@ -626,7 +626,14 @@ OptimizationResult AttemptBackendAssignment(BackendSettings& backendSettings, // need to set the compute device on the layer // before we can check if it is supported layer->SetBackendId(backend); - if (!IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported)) + + // To run FP16 operations on CpuAcc we need at least v8.2 architecture. If the available architecture + // is older than v8.2, we can check if the operator is supported by changing operator inputs & outputs + // to be FP32 and inserting convert layers around the FP32 operator. + bool isLayerSupported = IWorkloadFactory::IsLayerSupported(*layer, EmptyOptional(), reasonIfUnsupported); + std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; + if (!isLayerSupported || + reasonIfUnsupported.find(checkStr) != std::string::npos) { if (dataTypeIn == DataType::Float16 || dataTypeOut == DataType::Float16) { @@ -1568,8 +1575,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, const OptimizerOptions& options, Optional&> messages) { - const auto start_time = armnn::GetTimeNow(); - ARMNN_LOG(debug) << options.ToString(); // Enable profiling @@ -1750,9 +1755,6 @@ IOptimizedNetworkPtr Optimize(const Graph& inGraph, optGraph.AddCompatibilityLayers(backends, tensorHandleFactoryRegistry); } - ARMNN_LOG(info) << "!! New time !! : " << std::setprecision(2) - << std::fixed << armnn::GetTimeDuration(start_time).count() << " ms."; - return optNet; } diff --git a/src/armnn/Runtime.cpp b/src/armnn/Runtime.cpp index 75b1ee8179..ff64e856f4 100644 --- a/src/armnn/Runtime.cpp +++ b/src/armnn/Runtime.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -20,7 +20,10 @@ #include #include +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) #include +#endif + #include #include @@ -334,11 +337,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) throw RuntimeException( "It is not possible to enable timeline reporting without profiling being enabled"); } - +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Load any available/compatible dynamic backend before the runtime // goes through the backend registry LoadDynamicBackends(options.m_DynamicBackendsPath); - +#endif armnn::BackendIdSet supportedBackends; for (const auto& id : BackendRegistryInstance().GetBackendIds()) { @@ -354,9 +357,11 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) if (customAllocatorMapIterator != options.m_CustomAllocatorMap.end() && customAllocatorMapIterator->second == nullptr) { +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // We need to manually clean up the dynamic backends before throwing an exception. DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends()); m_DeviceSpec.ClearDynamicBackends(); +#endif throw armnn::Exception("Allocator associated with id " + id.Get() + " is null"); } @@ -393,6 +398,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) } // No errors so register the Custom Allocator with the BackendRegistry BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + m_AllocatorsAddedByThisRuntime.emplace(id); } else { @@ -428,6 +434,7 @@ RuntimeImpl::RuntimeImpl(const IRuntime::CreationOptions& options) } // No errors so register the Custom Allocator with the BackendRegistry BackendRegistryInstance().RegisterAllocator(id, customAllocatorMapIterator->second); + m_AllocatorsAddedByThisRuntime.emplace(id); } } @@ -577,13 +584,20 @@ RuntimeImpl::~RuntimeImpl() << std::endl; } } - +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) // Clear all dynamic backends. DynamicBackendUtils::DeregisterDynamicBackends(m_DeviceSpec.GetDynamicBackends()); m_DeviceSpec.ClearDynamicBackends(); +#endif m_BackendContexts.clear(); BackendRegistryInstance().SetProfilingService(armnn::EmptyOptional()); + // Remove custom allocators that this runtime has added. + // Note: that as backends can be per process and there can be many instances of a runtime in a process an allocator + // may have been overwritten by another runtime. + for_each(m_AllocatorsAddedByThisRuntime.begin(), m_AllocatorsAddedByThisRuntime.end(), + [](BackendId id) {BackendRegistryInstance().DeregisterAllocator(id);}); + ARMNN_LOG(info) << "Shutdown time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(startTime).count() << " ms."; } @@ -755,6 +769,7 @@ void RuntimeImpl::RegisterDebugCallback(NetworkId networkId, const DebugCallback loadedNetwork->RegisterDebugCallback(func); } +#if !defined(ARMNN_BUILD_BARE_METAL) && !defined(ARMNN_EXECUTE_NETWORK_STATIC) void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath) { // Get the paths where to load the dynamic backends from @@ -772,5 +787,5 @@ void RuntimeImpl::LoadDynamicBackends(const std::string& overrideBackendPath) // Add the registered dynamic backend ids to the list of supported backends m_DeviceSpec.AddSupportedBackends(registeredBackendIds, true); } - +#endif } // namespace armnn diff --git a/src/armnn/Runtime.hpp b/src/armnn/Runtime.hpp index f5dfadf948..9d47b7898d 100644 --- a/src/armnn/Runtime.hpp +++ b/src/armnn/Runtime.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -157,6 +157,10 @@ private: /// Profiling Service Instance std::unique_ptr m_ProfilingService; + + /// Keep track of backend ids of the custom allocators that this instance of the runtime added. The + /// destructor can then clean up for this runtime. + std::set m_AllocatorsAddedByThisRuntime; }; } // namespace armnn diff --git a/src/armnn/TypesUtils.cpp b/src/armnn/TypesUtils.cpp index 4ba9ed19e1..74ac231bc9 100644 --- a/src/armnn/TypesUtils.cpp +++ b/src/armnn/TypesUtils.cpp @@ -81,4 +81,8 @@ float armnn::Dequantize(int16_t value, float scale, int32_t offset); /// Explicit specialization of Dequantize for int32_t template -float armnn::Dequantize(int32_t value, float scale, int32_t offset); \ No newline at end of file +float armnn::Dequantize(int32_t value, float scale, int32_t offset); + +/// Explicit specialization of Dequantize for int64_t +template +float armnn::Dequantize(int64_t value, float scale, int32_t offset); diff --git a/src/armnn/test/SubgraphViewTests.cpp b/src/armnn/test/SubgraphViewTests.cpp index 4ce67b0fec..9bb5e69bbb 100644 --- a/src/armnn/test/SubgraphViewTests.cpp +++ b/src/armnn/test/SubgraphViewTests.cpp @@ -2063,6 +2063,35 @@ TEST_CASE("SubgraphViewWorkingCopySubstituteSubgraph") CHECK_THROWS_AS(workingCopy.GetWorkingCopy(), Exception); } +TEST_CASE("SubgraphViewPartialWorkingCopySubstituteSubgraph") +{ + Graph graph; + + auto input = graph.AddLayer(0, "Input"); + auto activation = graph.AddLayer(ActivationDescriptor{}, "Activation"); + auto output = graph.AddLayer(1, "Output"); + + input->GetOutputSlot(0).Connect(activation->GetInputSlot(0)); + activation->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + //Add in out of order + auto view = CreateSubgraphViewFrom({activation}, + {&activation->GetInputSlot(0)}, + {&activation->GetOutputSlot(0)}); + + auto workingCopy = view->GetWorkingCopy(); + + // First (and only) layer in the subgraph is the Activation + CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation"); + + // Substitute the "Activation" layer for an equivalent layer + auto activation2 = graph.AddLayer(ActivationDescriptor{}, "Activation2"); + SubgraphView pattern(*workingCopy.beginIConnectable()); + workingCopy.SubstituteSubgraph(pattern, activation2); + + CHECK(std::string((*workingCopy.beginIConnectable())->GetName()) == "Activation2"); +} + TEST_CASE("SubgraphViewWorkingCopyOptimizationViews") { Graph graph; diff --git a/src/armnnOnnxParser/OnnxParser.cpp b/src/armnnOnnxParser/OnnxParser.cpp index 63fb60382c..552d4e4163 100644 --- a/src/armnnOnnxParser/OnnxParser.cpp +++ b/src/armnnOnnxParser/OnnxParser.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "OnnxParser.hpp" @@ -50,6 +50,17 @@ armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinaryFile(const char* graphFil return pOnnxParserImpl->CreateNetworkFromBinaryFile(graphFile); } +armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinary(const std::vector& binaryContent) +{ + return pOnnxParserImpl->CreateNetworkFromBinary(binaryContent); +} + +armnn::INetworkPtr IOnnxParser::CreateNetworkFromBinary(const std::vector& binaryContent, + const std::map& inputShapes) +{ + return pOnnxParserImpl->CreateNetworkFromBinary(binaryContent, inputShapes); +} + armnn::INetworkPtr IOnnxParser::CreateNetworkFromTextFile(const char* graphFile) { return pOnnxParserImpl->CreateNetworkFromTextFile(graphFile); @@ -731,6 +742,44 @@ INetworkPtr OnnxParserImpl::CreateNetworkFromTextFile(const char* graphFile, return CreateNetworkFromModel(*modelProto); } +INetworkPtr OnnxParserImpl::CreateNetworkFromBinary(const std::vector& binaryContent) +{ + ResetParser(); + ModelPtr modelProto = LoadModelFromBinary(binaryContent); + return CreateNetworkFromModel(*modelProto); +} + +INetworkPtr OnnxParserImpl::CreateNetworkFromBinary(const std::vector& binaryContent, + const std::map& inputShapes) +{ + ResetParser(); + m_InputShapes = inputShapes; + ModelPtr modelProto = LoadModelFromBinary(binaryContent); + return CreateNetworkFromModel(*modelProto); +} + +ModelPtr OnnxParserImpl::LoadModelFromBinary(const std::vector& binaryContent) +{ + if (binaryContent.size() == 0) + { + throw ParseException(fmt::format("Missing binary content", CHECK_LOCATION().AsString())); + } + // Parse the file into a message + ModelPtr modelProto = std::make_unique(); + + google::protobuf::io::CodedInputStream codedStream(binaryContent.data(), static_cast(binaryContent.size())); + codedStream.SetTotalBytesLimit(INT_MAX); + bool success = modelProto.get()->ParseFromCodedStream(&codedStream); + + if (!success) + { + std::stringstream error; + error << "Failed to parse graph"; + throw ParseException(fmt::format("{} {}", error.str(), CHECK_LOCATION().AsString())); + } + return modelProto; +} + ModelPtr OnnxParserImpl::LoadModelFromBinaryFile(const char* graphFile) { FILE* fd = fopen(graphFile, "rb"); diff --git a/src/armnnOnnxParser/OnnxParser.hpp b/src/armnnOnnxParser/OnnxParser.hpp index bb94472c6d..c9f321a5b5 100644 --- a/src/armnnOnnxParser/OnnxParser.hpp +++ b/src/armnnOnnxParser/OnnxParser.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -38,6 +38,13 @@ public: armnn::INetworkPtr CreateNetworkFromBinaryFile(const char* graphFile, const std::map& inputShapes); + /// Create the network from a protobuf binary + armnn::INetworkPtr CreateNetworkFromBinary(const std::vector& binaryContent); + + /// Create the network from a protobuf binary, with inputShapes specified + armnn::INetworkPtr CreateNetworkFromBinary(const std::vector& binaryContent, + const std::map& inputShapes); + /// Create the network from a protobuf text file on disk armnn::INetworkPtr CreateNetworkFromTextFile(const char* graphFile); @@ -64,6 +71,7 @@ public: OnnxParserImpl(); ~OnnxParserImpl() = default; + static ModelPtr LoadModelFromBinary(const std::vector& binaryContent); static ModelPtr LoadModelFromBinaryFile(const char * fileName); static ModelPtr LoadModelFromTextFile(const char * fileName); static ModelPtr LoadModelFromString(const std::string& inputString); diff --git a/src/armnnSerializer/CMakeLists.txt b/src/armnnSerializer/CMakeLists.txt index 8acdafbc28..01c51e65d7 100755 --- a/src/armnnSerializer/CMakeLists.txt +++ b/src/armnnSerializer/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017, 2019-2020, 2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017, 2019-2020, 2022-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # if(BUILD_ARMNN_SERIALIZER) @@ -36,7 +36,7 @@ if(BUILD_ARMNN_SERIALIZER) ../armnnDeserializer/Deserializer.cpp ) - if(BUILD_BARE_METAL) + if(BUILD_BARE_METAL OR EXECUTE_NETWORK_STATIC) add_library_ex(armnnSerializer STATIC ${armnn_serializer_sources}) else() # We're going to export both a STATIC library and a SHARED library here. @@ -52,9 +52,11 @@ if(BUILD_ARMNN_SERIALIZER) target_include_directories(armnnSerializer PRIVATE ../armnn) target_include_directories(armnnSerializer PRIVATE ../armnnUtils) target_include_directories(armnnSerializer PRIVATE ../../generated) - target_include_directories(armnnSerializer-static PRIVATE ../armnn) - target_include_directories(armnnSerializer-static PRIVATE ../armnnUtils) - target_include_directories(armnnSerializer-static PRIVATE ../../generated) + if (NOT BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) + target_include_directories(armnnSerializer-static PRIVATE ../armnn) + target_include_directories(armnnSerializer-static PRIVATE ../armnnUtils) + target_include_directories(armnnSerializer-static PRIVATE ../../generated) + endif() list(APPEND armnn_serializer_sources ArmnnSchema_generated.h @@ -64,12 +66,13 @@ if(BUILD_ARMNN_SERIALIZER) target_include_directories(armnnSerializer SYSTEM PRIVATE ${CMAKE_CURRENT_BINARY_DIR}) target_link_libraries(armnnSerializer armnn ${FLATBUFFERS_LIBRARY}) - - install(TARGETS armnnSerializer-static - EXPORT armnn-targets - LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} - ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} - ) + if (NOT BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) + install(TARGETS armnnSerializer-static + EXPORT armnn-targets + LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR} + ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR} + ) + endif() install(TARGETS armnnSerializer EXPORT armnn-targets diff --git a/src/armnnTestUtils/CMakeLists.txt b/src/armnnTestUtils/CMakeLists.txt index 3f6fb415a2..a4333cf306 100755 --- a/src/armnnTestUtils/CMakeLists.txt +++ b/src/armnnTestUtils/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -31,7 +31,7 @@ list(APPEND armnnTestUtils_sources TestUtils.hpp ) -if(NOT BUILD_BARE_METAL) +if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) list(APPEND armnnTestUtils_sources UnitTests.cpp UnitTests.hpp @@ -41,6 +41,8 @@ endif() if(BUILD_BARE_METAL) add_library_ex(armnnTestUtils STATIC ${armnnTestUtils_sources}) +elseif(EXECUTE_NETWORK_STATIC) + add_library_ex(armnnTestUtils OBJECT ${armnnTestUtils_sources}) else() add_library_ex(armnnTestUtils SHARED ${armnnTestUtils_sources}) endif() diff --git a/src/armnnTfLiteParser/CMakeLists.txt b/src/armnnTfLiteParser/CMakeLists.txt index f9653b6752..6096d1bf8c 100755 --- a/src/armnnTfLiteParser/CMakeLists.txt +++ b/src/armnnTfLiteParser/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd. All rights reserved. # SPDX-License-Identifier: MIT # if(BUILD_TF_LITE_PARSER) @@ -11,7 +11,11 @@ if(BUILD_TF_LITE_PARSER) TfLiteParser.cpp ) - add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources}) + if(EXECUTE_NETWORK_STATIC) + add_library_ex(armnnTfLiteParser OBJECT ${armnn_tf_lite_parser_sources}) + else() + add_library_ex(armnnTfLiteParser SHARED ${armnn_tf_lite_parser_sources}) + endif() include_directories(SYSTEM "${FLATBUFFERS_INCLUDE_PATH}") set_target_properties(armnnTfLiteParser PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${PROJECT_BINARY_DIR}) diff --git a/src/armnnTfLiteParser/TfLiteParser.cpp b/src/armnnTfLiteParser/TfLiteParser.cpp index 0484c6f478..f6c1ee9d38 100644 --- a/src/armnnTfLiteParser/TfLiteParser.cpp +++ b/src/armnnTfLiteParser/TfLiteParser.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -316,6 +316,14 @@ std::vector GetUIntBuffer(armnn::TensorInfo info, ::memcpy(uint64Buffer.data(), bufferPtr->data.data(), bufferPtr->data.size()); buffer.assign(std::begin(uint64Buffer), std::end(uint64Buffer)); } + else + { + CheckLocation location = CHECK_LOCATION(); + throw ParseException( + fmt::format("Unsupported data type for uint buffer {}, only Signed 32 or Signed 64 are supported. {}", + GetDataTypeName(info.GetDataType()), + location.AsString())); + } return buffer; } @@ -911,42 +919,16 @@ INetworkPtr TfLiteParserImpl::CreateNetworkFromModel() return std::move(m_Network); } -std::unique_ptr AsFloatArray(TfLiteParserImpl::BufferRawPtr bufferPtr, - const TensorInfo& tensorInfo) +bool TfLiteParserImpl::ShouldConstantTensorBeConverted(TfLiteParserImpl::TensorRawPtr tensorPtr, + armnn::DataType inputDataType, + armnn::DataType tensorDataType) { - if (tensorInfo.GetDataType() == DataType::QAsymmS8 || tensorInfo.GetDataType() == DataType::QSymmS8 || - tensorInfo.GetDataType() == DataType::QAsymmU8) - { - std::unique_ptr buffer(new float[tensorInfo.GetNumElements()]); - - if (tensorInfo.HasPerAxisQuantization()) - { - unsigned int axis = tensorInfo.GetQuantizationDim().value(); - auto axisDimensionality = tensorInfo.GetShape()[axis]; - auto axisFactor = armnnUtils::GetNumElementsAfter(tensorInfo.GetShape(), axis); - - for (unsigned int i = 0; i < tensorInfo.GetNumDimensions(); ++i) - { - unsigned int axisIndex = (i / axisFactor) % axisDimensionality; - buffer[i] = Dequantize(bufferPtr->data[i], tensorInfo.GetQuantizationScales()[axisIndex], - tensorInfo.GetQuantizationOffset()); - } - } - else - { - for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i) - { - buffer[i] = Dequantize(bufferPtr->data[i], tensorInfo.GetQuantizationScale(), - tensorInfo.GetQuantizationOffset()); - } - } - return buffer; - } - throw ParseException( - fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}", - GetDataTypeName(DataType::Float32), - GetDataTypeName(tensorInfo.GetDataType()), - CHECK_LOCATION().AsString())); + return (TfLiteParserImpl::IsConstTensor(tensorPtr) && inputDataType == DataType::Float32 && + (tensorDataType == DataType::QAsymmU8 || + tensorDataType == DataType::QAsymmS8 || + tensorDataType == DataType::QSymmS8 || + tensorDataType == DataType::Signed32 || + tensorDataType == DataType::Signed64)); } void TfLiteParserImpl::RegisterProducerOfTensor(size_t subgraphIndex, @@ -1136,9 +1118,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) auto layerName = fmt::format("Conv2D:{}:{}", subgraphIndex, operatorIndex); armnn::IConnectableLayer* layer = m_Network->AddConvolution2dLayer(desc, layerName.c_str()); - if (IsConstTensor(inputs[1]) && inputTensorInfo.GetDataType() == DataType::Float32 && - (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || - filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[1], inputTensorInfo.GetDataType(), filterTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[1]->buffer); } @@ -1150,9 +1130,7 @@ void TfLiteParserImpl::ParseConv2D(size_t subgraphIndex, size_t operatorIndex) // Add the biases input to the registration list, a constant layer will be added by SetupConstantLayers. tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); - if (IsConstTensor(inputs[2]) && inputTensorInfo.GetDataType() == DataType::Float32 && - (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || - filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[2], inputTensorInfo.GetDataType(), biasTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[2]->buffer); } @@ -3112,9 +3090,7 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator // Add the weights input to the registration list, constant layers will be added by SetupConstantLayers if constant. tensorIndexesToRegister.emplace_back(inputTensorIndexes[1]); - if (desc.m_ConstantWeights && inputTensorInfo.GetDataType() == DataType::Float32 && - (filterTensorInfo.GetDataType() == DataType::QAsymmU8 || - filterTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[1], inputTensorInfo.GetDataType(), filterTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[1]->buffer); } @@ -3127,9 +3103,7 @@ void TfLiteParserImpl::ParseFullyConnected(size_t subgraphIndex, size_t operator // Add the biases input to the registration list, constant layer will be added by SetupConstantLayers. tensorIndexesToRegister.emplace_back(inputTensorIndexes[2]); - if (desc.m_ConstantWeights && inputTensorInfo.GetDataType() == DataType::Float32 && - (biasTensorInfo.GetDataType() == DataType::QAsymmU8 || - biasTensorInfo.GetDataType() == DataType::QAsymmS8)) + if (ShouldConstantTensorBeConverted(inputs[2], inputTensorInfo.GetDataType(), biasTensorInfo.GetDataType())) { m_ConstantsToDequantize.emplace_back(inputs[2]->buffer); } @@ -4925,11 +4899,22 @@ TfLiteParserImpl::CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, // Make sure isConstant flag is set. tensorInfo.SetConstant(); - if (inputDataType == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32) + if (inputDataType == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32) { - TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); - std::unique_ptr data = AsFloatArray(bufferPtr, tensorInfo); - return std::make_pair(ConstTensor(constTensorInfo, data.get()), std::move(data)); + try + { + TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); + std::unique_ptr data = armnnUtils::ToFloatArray(bufferPtr->data, tensorInfo); + return std::make_pair(ConstTensor(constTensorInfo, data.get()), std::move(data)); + } + catch (InvalidArgumentException&) + { + throw ParseException( + fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}", + GetDataTypeName(DataType::Float32), + GetDataTypeName(tensorInfo.GetDataType()), + CHECK_LOCATION().AsString())); + } } else { @@ -4950,9 +4935,20 @@ TfLiteParserImpl::CreateConstTensorPtr(TensorRawPtr tensorPtr, armnn::TensorInfo if (inputTensorInfo.GetDataType() == DataType::Float32 && tensorInfo.GetDataType() != DataType::Float32) { - TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); - std::unique_ptr data = AsFloatArray(bufferPtr, tensorInfo); - return std::make_pair(new ConstTensor(constTensorInfo, data.get()), std::move(data)); + try + { + TensorInfo constTensorInfo(tensorInfo.GetShape(), DataType::Float32, 0.0f, 0, true); + std::unique_ptr data = armnnUtils::ToFloatArray(bufferPtr->data, tensorInfo); + return std::make_pair(new ConstTensor(constTensorInfo, data.get()), std::move(data)); + } + catch (InvalidArgumentException&) + { + throw ParseException( + fmt::format("Unsupported input/weights combination: Input {} not supported with Weights {}", + GetDataTypeName(DataType::Float32), + GetDataTypeName(tensorInfo.GetDataType()), + CHECK_LOCATION().AsString())); + } } else { diff --git a/src/armnnTfLiteParser/TfLiteParser.hpp b/src/armnnTfLiteParser/TfLiteParser.hpp index f8ddc55649..7eb6c48501 100644 --- a/src/armnnTfLiteParser/TfLiteParser.hpp +++ b/src/armnnTfLiteParser/TfLiteParser.hpp @@ -242,7 +242,13 @@ private: }; bool ShouldConstantTensorBeCreated(unsigned int tensorIndex); + bool IsConstTensor(TensorRawPtr tensorPtr); + + bool ShouldConstantTensorBeConverted(TfLiteParserImpl::TensorRawPtr tensorPtr, + armnn::DataType inputDataType, + armnn::DataType filterDataType); + armnn::ConstTensor CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo); @@ -250,6 +256,7 @@ private: CreateConstTensorPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::Optional permutationVector); + std::pair> CreateConstTensorNonPermuted(TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, @@ -261,6 +268,7 @@ private: TfLiteParserImpl::TensorRawPtr tensorPtr, armnn::TensorInfo& tensorInfo, armnn::Optional permutationVector); + std::pair> CreateConstTensorPtr(TensorRawPtr tensorPtr, armnn::TensorInfo& inputTensorInfo); diff --git a/src/armnnTfLiteParser/test/Conv2D.cpp b/src/armnnTfLiteParser/test/Conv2D.cpp index 45c4a43519..334c102344 100644 --- a/src/armnnTfLiteParser/test/Conv2D.cpp +++ b/src/armnnTfLiteParser/test/Conv2D.cpp @@ -673,7 +673,7 @@ struct Conv2FloatWithInt8WeightsAndBiasesFixture : Conv2DWithBiasesFixture "[ 1, 2, 2, 1 ]", // filterShape "[ 2,1, 0,6 ]", // filterData "[ 1 ]", // biasShape - "[ 10, 0, 0, 0 ]", // biasData + "[ 10 ]", // biasData "1", // stride w and h "NONE", // activation "1.0", // filterScale diff --git a/src/armnnUtils/TensorUtils.cpp b/src/armnnUtils/TensorUtils.cpp index d77f5d74c3..9e3d719211 100644 --- a/src/armnnUtils/TensorUtils.cpp +++ b/src/armnnUtils/TensorUtils.cpp @@ -128,12 +128,11 @@ TensorShape ExpandDims(const TensorShape& tensorShape, int axis) } outputShape.insert(outputShape.begin() + axis, 1); - return TensorShape(outputDim, outputShape.data()); + return { outputDim, outputShape.data() }; } std::vector SqueezeDims(const TensorShape& tensorShape) { - unsigned int outputDimSize = 0; std::vector squeezedDims; for (unsigned int i = 0; i < tensorShape.GetNumDimensions(); ++i) @@ -141,7 +140,6 @@ std::vector SqueezeDims(const TensorShape& tensorShape) if (tensorShape[i] != 1) { squeezedDims.push_back(tensorShape[i]); - ++outputDimSize; } } return squeezedDims; @@ -201,4 +199,91 @@ std::pair> GetPerAxisParams(const armnn::Tensor return { axisFactor, scales }; } +template +void CheckSizes(const std::vector& data, const armnn::TensorInfo& tensorInfo, unsigned int size = 1) +{ + if (data.size() / size != tensorInfo.GetNumElements()) + { + throw InvalidArgumentException( + fmt::format("The data does not contain the expected number of elements {} != {}. {}", + data.size(), tensorInfo.GetNumElements(), CHECK_LOCATION().AsString())); + } +} + +template +std::unique_ptr ToFloatArray(const std::vector& data, const armnn::TensorInfo& tensorInfo) +{ + CheckSizes(data, tensorInfo); + + std::unique_ptr returnBuffer(new float[tensorInfo.GetNumElements()]); + + if (tensorInfo.HasPerAxisQuantization()) + { + unsigned int axis = tensorInfo.GetQuantizationDim().value(); + auto axisDimensionality = tensorInfo.GetShape()[axis]; + auto axisFactor = armnnUtils::GetNumElementsAfter(tensorInfo.GetShape(), axis); + + for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i) + { + unsigned int axisIndex; + + if (i < axisFactor) + { + axisIndex = 0; + } + else + { + axisIndex = (i / axisFactor) % axisDimensionality; + } + returnBuffer[i] = Dequantize(data[i], + tensorInfo.GetQuantizationScales()[axisIndex], + tensorInfo.GetQuantizationOffset()); + } + } + else + { + for (unsigned int i = 0; i < tensorInfo.GetNumElements(); ++i) + { + returnBuffer[i] = Dequantize(data[i], + tensorInfo.GetQuantizationScale(), + tensorInfo.GetQuantizationOffset()); + } + } + return returnBuffer; +} + +std::unique_ptr ToFloatArray(const std::vector& data, const armnn::TensorInfo& tensorInfo) +{ + if (tensorInfo.GetDataType() == DataType::QAsymmS8 || tensorInfo.GetDataType() == DataType::QSymmS8) + { + CheckSizes(data, tensorInfo); + std::vector buffer(tensorInfo.GetNumElements()); + ::memcpy(buffer.data(), data.data(), data.size()); + return ToFloatArray(buffer, tensorInfo); + } + else if (tensorInfo.GetDataType() == DataType::QAsymmU8) + { + CheckSizes(data, tensorInfo); + return ToFloatArray(data, tensorInfo); + } + else if (tensorInfo.GetDataType() == DataType::Signed32) + { + CheckSizes(data, tensorInfo, 4); + std::vector buffer(tensorInfo.GetNumElements()); + ::memcpy(buffer.data(), data.data(), data.size()); + return ToFloatArray(buffer, tensorInfo); + } + else if (tensorInfo.GetDataType() == DataType::Signed64) + { + CheckSizes(data, tensorInfo, 8); + std::vector buffer(tensorInfo.GetNumElements()); + ::memcpy(buffer.data(), data.data(), data.size()); + return ToFloatArray(buffer, tensorInfo); + } + throw InvalidArgumentException( + fmt::format("Unsupported datatype {}. {}", + GetDataTypeName(tensorInfo.GetDataType()), + CHECK_LOCATION().AsString())); +} + } // namespace armnnUtils diff --git a/src/armnnUtils/test/TensorUtilsTest.cpp b/src/armnnUtils/test/TensorUtilsTest.cpp index 6d5f719eb1..16349c554e 100644 --- a/src/armnnUtils/test/TensorUtilsTest.cpp +++ b/src/armnnUtils/test/TensorUtilsTest.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2019 Arm Ltd. All rights reserved. +// Copyright © 2019,2021-2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -134,4 +134,175 @@ TEST_CASE("ExpandDimsInvalidNegativeAxisTest") CHECK_THROWS_AS(ExpandDims(inputShape, -5), armnn::InvalidArgumentException); } +TEST_CASE("ToFloatArrayInvalidDataType") +{ + armnn::TensorInfo info({ 2, 3, 4 }, armnn::DataType::BFloat16); + std::vector data {1,2,3,4,5,6,7,8,9,10}; + + // Invalid argument + CHECK_THROWS_AS(ToFloatArray(data, info), armnn::InvalidArgumentException); +} + +TEST_CASE("ToFloatArrayQSymmS8PerAxis") +{ + std::vector quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QSymmS8, quantizationScales, quantizationDim); + std::vector data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 24.0f, -37.8f, -46.4f, -10.6f, -19.2f, -25.8f, -30.4f, -6.6f, -11.2f, -13.8f, -14.4f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQSymmS8") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QSymmS8, 0.1f); + std::vector data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 12.0f, -12.6f, -11.6f, -10.6f, -9.6f, -8.6f, -7.6f, -6.6f, -5.6f, -4.6f, -3.6f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQAsymmS8PerAxis") +{ + std::vector quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmS8, quantizationScales, quantizationDim); + std::vector data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 24.0f, -37.8f, -46.4f, -10.6f, -19.2f, -25.8f, -30.4f, -6.6f, -11.2f, -13.8f, -14.4f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQAsymmS8") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmS8, 0.1f); + std::vector data { 100, 120, 130, 140, 150, 160, 170 ,180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 12.0f, -12.6f, -11.6f, -10.6f, -9.6f, -8.6f, -7.6f, -6.6f, -5.6f, -4.6f, -3.6f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQASymmU8PerAxis") +{ + std::vector quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmU8, quantizationScales, quantizationDim); + std::vector data { 100, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArrayQAsymmU8") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::QAsymmU8, 0.1f); + std::vector data { 100, 120, 130, 140, 150, 160, 170, 180, 190, 200, 210, 220 }; + float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned32PerAxis") +{ + std::vector quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed32, quantizationScales, quantizationDim); + std::vector data { 100, 0, 0, 0, 120, 0, 0, 0, 130, 0, 0, 0, 140, 0, 0, 0, 150, 0, 0, 0, 160, 0, 0, 0, + 170, 0, 0, 0, 180, 0, 0, 0, 190, 0, 0, 0, 200, 0, 0, 0, 210, 0, 0, 0, 220, 0, 0, 0 }; + float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned32") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed32, 0.1f); + std::vector data { 100, 0, 0, 0, 120, 0, 0, 0, 130, 0, 0, 0, 140, 0, 0, 0, 150, 0, 0, 0, 160, 0, 0, 0, + 170, 0, 0, 0, 180, 0, 0, 0, 190, 0, 0, 0, 200, 0, 0, 0, 210, 0, 0, 0, 220, 0, 0, 0 }; + float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned64PerAxis") +{ + std::vector quantizationScales { 0.1f, 0.2f, 0.3f, 0.4f }; + unsigned int quantizationDim = 1; + + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed64, quantizationScales, quantizationDim); + std::vector data { 100, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0, + 140, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, + 170, 0, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0, + 200, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0 }; + float expected[] { 10.0f, 24.0f, 39.0f, 56.0f, 15.0f, 32.0f, 51.0f, 72.0f, 19.0f, 40.0f, 63.0f, 88.0f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} + +TEST_CASE("ToFloatArraySigned64") +{ + armnn::TensorInfo info({ 3, 4 }, armnn::DataType::Signed64, 0.1f); + std::vector data { 100, 0, 0, 0, 0, 0, 0, 0, 120, 0, 0, 0, 0, 0, 0, 0, 130, 0, 0, 0, 0, 0, 0, 0, + 140, 0, 0, 0, 0, 0, 0, 0, 150, 0, 0, 0, 0, 0, 0, 0, 160, 0, 0, 0, 0, 0, 0, 0, + 170, 0, 0, 0, 0, 0, 0, 0, 180, 0, 0, 0, 0, 0, 0, 0, 190, 0, 0, 0, 0, 0, 0, 0, + 200, 0, 0, 0, 0, 0, 0, 0, 210, 0, 0, 0, 0, 0, 0, 0, 220, 0, 0, 0, 0, 0, 0, 0 }; + float expected[] { 10.0f, 12.0f, 13.0f, 14.0f, 15.0f, 16.0, 17.0f, 18.0f, 19.0f, 20.0f, 21.0f, 22.0f }; + + std::unique_ptr result = ToFloatArray(data, info); + + for (uint i = 0; i < info.GetNumElements(); ++i) + { + CHECK_EQ(result[i], doctest::Approx(expected[i])); + } +} } diff --git a/src/backends/aclCommon/ArmComputeTuningUtils.cpp b/src/backends/aclCommon/ArmComputeTuningUtils.cpp new file mode 100644 index 0000000000..4680541ae5 --- /dev/null +++ b/src/backends/aclCommon/ArmComputeTuningUtils.cpp @@ -0,0 +1,60 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ArmComputeTuningUtils.hpp" + +namespace armnn +{ + +IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(IGpuAccTunedParameters::Mode mode, + IGpuAccTunedParameters::TuningLevel tuningLevel) +{ + return new ClTunedParameters(mode, tuningLevel); +} + +IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(IGpuAccTunedParameters::Mode mode, + IGpuAccTunedParameters::TuningLevel tuningLevel) +{ + return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy); +} + +void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) +{ + delete params; +} + +ClTunedParameters::ClTunedParameters(IGpuAccTunedParameters::Mode mode, + IGpuAccTunedParameters::TuningLevel tuningLevel) + : m_Mode(mode) + , m_TuningLevel(tuningLevel) + , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) +{ +} + +void ClTunedParameters::Load(const char* filename) +{ + try + { + m_Tuner.load_from_file(filename); + } + catch (const std::exception& e) + { + throw Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + e.what()); + } +} + +void ClTunedParameters::Save(const char* filename) const +{ + try + { + m_Tuner.save_to_file(filename); + } + catch (const std::exception& e) + { + throw Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + e.what()); + } +} + +} \ No newline at end of file diff --git a/src/backends/aclCommon/ArmComputeTuningUtils.hpp b/src/backends/aclCommon/ArmComputeTuningUtils.hpp new file mode 100644 index 0000000000..6d99d3f08e --- /dev/null +++ b/src/backends/aclCommon/ArmComputeTuningUtils.hpp @@ -0,0 +1,84 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace armnn +{ + +enum class TuningLevel +{ + None, + Rapid, + Normal, + Exhaustive +}; + +inline TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue) +{ + if (value.IsInt()) + { + int v = value.AsInt(); + if (v > static_cast(TuningLevel::Exhaustive) || + v < static_cast(TuningLevel::None)) + { + ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. " + "Using default(" << static_cast(defaultValue) << ")"; + } else + { + return static_cast(v); + } + } + return defaultValue; +} + +inline void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level) +{ + tuner.set_tune_new_kernels(true); // Turn on tuning initially. + + switch (level) + { + case TuningLevel::Rapid: + ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)"; + tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID); + break; + case TuningLevel::Normal: + ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)"; + tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL); + break; + case TuningLevel::Exhaustive: + ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)"; + tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE); + break; + case TuningLevel::None: + default: + tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode. + break; + } +} + +class ClTunedParameters : public IGpuAccTunedParameters +{ +public: + ClTunedParameters(IGpuAccTunedParameters::Mode mode, IGpuAccTunedParameters::TuningLevel tuningLevel); + + virtual void Load(const char* filename); + virtual void Save(const char* filename) const; + + Mode m_Mode; + TuningLevel m_TuningLevel; + + arm_compute::CLTuner m_Tuner; + arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle; +}; + +} \ No newline at end of file diff --git a/src/backends/aclCommon/CMakeLists.txt b/src/backends/aclCommon/CMakeLists.txt index 05fbe6cca9..b3bf89e750 100644 --- a/src/backends/aclCommon/CMakeLists.txt +++ b/src/backends/aclCommon/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -8,9 +8,12 @@ list(APPEND armnnAclCommon_sources ArmComputeTensorHandle.hpp ArmComputeTensorUtils.hpp ArmComputeTensorUtils.cpp + ArmComputeTuningUtils.hpp + ArmComputeTuningUtils.cpp ArmComputeUtils.hpp BaseMemoryManager.cpp BaseMemoryManager.hpp + IClTensorHandle.hpp ) if(BUILD_UNIT_TESTS) diff --git a/src/backends/aclCommon/IClTensorHandle.hpp b/src/backends/aclCommon/IClTensorHandle.hpp new file mode 100644 index 0000000000..48cf5f57d6 --- /dev/null +++ b/src/backends/aclCommon/IClTensorHandle.hpp @@ -0,0 +1,22 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include +#include + +namespace armnn +{ + +class IClTensorHandle : public IAclTensorHandle +{ +public: + virtual arm_compute::ICLTensor& GetTensor() = 0; + virtual arm_compute::ICLTensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; +}; + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/aclCommon/common.mk b/src/backends/aclCommon/common.mk index 0ba966af14..b113269df9 100644 --- a/src/backends/aclCommon/common.mk +++ b/src/backends/aclCommon/common.mk @@ -9,6 +9,7 @@ COMMON_SOURCES := \ ArmComputeTensorUtils.cpp \ + ArmComputeTuningUtils.cpp \ BaseMemoryManager.cpp # COMMON_TEST_SOURCES contains the list of files to be included diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt index b2ab932384..8d7e114fa5 100644 --- a/src/backends/backendsCommon/CMakeLists.txt +++ b/src/backends/backendsCommon/CMakeLists.txt @@ -1,9 +1,9 @@ # -# Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # -if(NOT BUILD_BARE_METAL) +if(NOT BUILD_BARE_METAL AND NOT EXECUTE_NETWORK_STATIC) list(APPEND armnnBackendsCommon_sources DynamicBackend.cpp DynamicBackend.hpp diff --git a/src/backends/backendsCommon/test/BackendProfilingTests.cpp b/src/backends/backendsCommon/test/BackendProfilingTests.cpp index d49fa7f2ec..9041b55c57 100644 --- a/src/backends/backendsCommon/test/BackendProfilingTests.cpp +++ b/src/backends/backendsCommon/test/BackendProfilingTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -169,11 +169,6 @@ TEST_CASE("BackendProfilingCounterRegisterMockBackendTest") unsigned int shiftedId = 0; - if (armnn::BackendRegistryInstance().IsBackendRegistered("EthosNAcc")) - { - shiftedId = 4; - } - // Check if the MockBackends 3 dummy counters {0, 1, 2-5 (four cores)} are registered armnn::BackendId mockId = armnn::MockBackendId(); const ICounterMappings& counterMap = GetProfilingService(&runtime).GetCounterMappings(); diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 5fcc8b592e..d251bd2597 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017-2022 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -41,6 +41,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources LogSoftmaxEndToEndTestImpl.hpp MemoryManagerTests.cpp MockBackendId.hpp + MultiplicationEndToEndTestImpl.hpp OptimizeSubgraphViewTests.cpp OptimizationViewsTests.cpp PreluEndToEndTestImpl.hpp @@ -57,6 +58,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources SpaceToDepthEndToEndTestImpl.hpp SplitterEndToEndTestImpl.hpp StridedSliceAsyncEndToEndTest.hpp + SubtractionEndToEndTestImpl.hpp TransposeEndToEndTestImpl.hpp TensorCopyUtils.hpp WorkloadFactoryHelper.hpp diff --git a/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp new file mode 100644 index 0000000000..40442e2d47 --- /dev/null +++ b/src/backends/backendsCommon/test/MultiplicationEndToEndTestImpl.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include +#include + +#include + +namespace +{ + +template +armnn::INetworkPtr CreateMultiplicationNetwork(const armnn::TensorShape& inputXShape, + const armnn::TensorShape& inputYShape, + const armnn::TensorShape& outputShape, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + using namespace armnn; + + INetworkPtr network(INetwork::Create()); + + TensorInfo inputXTensorInfo(inputXShape, DataType, qScale, qOffset, true); + TensorInfo inputYTensorInfo(inputYShape, DataType, qScale, qOffset, true); + + TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset); + + + IConnectableLayer* multiplication = network->AddMultiplicationLayer("multiplication"); + IConnectableLayer* inputX = network->AddInputLayer(0, "inputX"); + IConnectableLayer* inputY = network->AddInputLayer(1, "inputY"); + IConnectableLayer* output = network->AddOutputLayer(0, "output"); + + Connect(inputX, multiplication, inputXTensorInfo, 0, 0); + Connect(inputY, multiplication, inputYTensorInfo, 0, 1); + Connect(multiplication, output, outputTensorInfo, 0, 0); + + return network; +} + +template> +void MultiplicationEndToEnd(const std::vector& backends) +{ + using namespace armnn; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateMultiplicationNetwork(inputXShape, inputYShape, outputShape); + + CHECK(network); + + std::vector inputXData{ 1, 2, 3, 4 }; + std::vector inputYData{ 5, 2, 6, 3 }; + std::vector expectedOutput{ 5, 4, 18, 12 }; + + std::map> inputTensorData = {{ 0, inputXData }, {1, inputYData}}; + std::map> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl(std::move(network), inputTensorData, expectedOutputData, backends); +} + +template +void MultiplicationEndToEndFloat16(const std::vector& backends) +{ + using namespace armnn; + using namespace half_float::literal; + using Half = half_float::half; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateMultiplicationNetwork(inputXShape, inputYShape, outputShape); + CHECK(network); + + std::vector inputXData{ 1._h, 2._h, + 3._h, 4._h }; + std::vector inputYData{ 1._h, 2._h, + 3._h, 4._h }; + std::vector expectedOutput{ 1._h, 4._h, + 9._h, 16._h }; + + std::map> inputTensorData = {{ 0, inputXData }, { 1, inputYData }}; + std::map> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl(std::move(network), inputTensorData, expectedOutputData, backends); +} + +} // anonymous namespace diff --git a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp index 9b86784dce..ff84eea2de 100644 --- a/src/backends/backendsCommon/test/OptimizationViewsTests.cpp +++ b/src/backends/backendsCommon/test/OptimizationViewsTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2019-2022 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -288,4 +288,15 @@ TEST_CASE("OptimizeViewsValidateDeviceMockBackend") CheckLayers(graph); } +TEST_CASE("OptimizedViewsReturnsINetworkReference") +{ + OptimizationViews view; + + auto layer = view.GetINetworkRef().AddInputLayer(0, "input"); + + // Check layer has been added to the referenced INetwork + CHECK(layer); +} + + } \ No newline at end of file diff --git a/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp new file mode 100644 index 0000000000..747fe26df0 --- /dev/null +++ b/src/backends/backendsCommon/test/SubtractionEndToEndTestImpl.hpp @@ -0,0 +1,96 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include + +#include +#include + +#include + +namespace +{ + +template +armnn::INetworkPtr CreateSubtractionNetwork(const armnn::TensorShape& inputXShape, + const armnn::TensorShape& inputYShape, + const armnn::TensorShape& outputShape, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + using namespace armnn; + + INetworkPtr network(INetwork::Create()); + + TensorInfo inputXTensorInfo(inputXShape, DataType, qScale, qOffset, true); + TensorInfo inputYTensorInfo(inputYShape, DataType, qScale, qOffset, true); + + TensorInfo outputTensorInfo(outputShape, DataType, qScale, qOffset); + + + IConnectableLayer* subtraction = network->AddSubtractionLayer("subtraction"); + IConnectableLayer* inputX = network->AddInputLayer(0, "inputX"); + IConnectableLayer* inputY = network->AddInputLayer(1, "inputY"); + IConnectableLayer* output = network->AddOutputLayer(0, "output"); + + Connect(inputX, subtraction, inputXTensorInfo, 0, 0); + Connect(inputY, subtraction, inputYTensorInfo, 0, 1); + Connect(subtraction, output, outputTensorInfo, 0, 0); + + return network; +} + +template> +void SubtractionEndToEnd(const std::vector& backends) +{ + using namespace armnn; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateSubtractionNetwork(inputXShape, inputYShape, outputShape); + + CHECK(network); + + std::vector inputXData{ 10, 11, 12, 13 }; + std::vector inputYData{ 5, 7, 6, 8 }; + std::vector expectedOutput{ 5, 4, 6, 5 }; + + std::map> inputTensorData = {{ 0, inputXData }, {1, inputYData}}; + std::map> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl(std::move(network), inputTensorData, expectedOutputData, backends); +} + +template +void SubtractionEndToEndFloat16(const std::vector& backends) +{ + using namespace armnn; + using namespace half_float::literal; + using Half = half_float::half; + + const TensorShape& inputXShape = { 2, 2 }; + const TensorShape& inputYShape = { 2, 2 }; + const TensorShape& outputShape = { 2, 2 }; + + INetworkPtr network = CreateSubtractionNetwork(inputXShape, inputYShape, outputShape); + CHECK(network); + + std::vector inputXData{ 11._h, 12._h, + 13._h, 14._h }; + std::vector inputYData{ 5._h, 7._h, + 6._h, 8._h }; + std::vector expectedOutput{ 6._h, 5._h, + 7._h, 6._h }; + + std::map> inputTensorData = {{ 0, inputXData }, { 1, inputYData }}; + std::map> expectedOutputData = { { 0, expectedOutput } }; + + EndToEndLayerTestImpl(std::move(network), inputTensorData, expectedOutputData, backends); +} + +} // anonymous namespace diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt index aeb90b069c..20c42061fc 100644 --- a/src/backends/cl/CMakeLists.txt +++ b/src/backends/cl/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -44,7 +44,6 @@ if(ARMCOMPUTECL) ClTensorHandleFactory.hpp ClWorkloadFactory.cpp ClWorkloadFactory.hpp - IClTensorHandle.hpp ICLTensorProxy.hpp OpenClTimer.cpp OpenClTimer.hpp diff --git a/src/backends/cl/ClBackendContext.cpp b/src/backends/cl/ClBackendContext.cpp index 62c6b038da..adee2763ba 100644 --- a/src/backends/cl/ClBackendContext.cpp +++ b/src/backends/cl/ClBackendContext.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -20,20 +20,11 @@ namespace armnn struct ClBackendContext::ClContextControlWrapper { - ClContextControlWrapper() {} - - bool IsInitialised() - { - return m_Initialised; - } - - void Init(arm_compute::CLTuner* tuner, - arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, - bool profilingEnabled) - { - m_ClContextControl = ClContextControl(tuner, heuristicsHandle, profilingEnabled); - m_Initialised = true; - } + ClContextControlWrapper(arm_compute::CLTuner* tuner, + arm_compute::CLGEMMHeuristicsHandle* heuristicsHandle, + bool profilingEnabled) + : m_ClContextControl(tuner, heuristicsHandle, profilingEnabled) + {} bool Sync() { @@ -62,106 +53,12 @@ struct ClBackendContext::ClContextControlWrapper { // There are no loaded networks left, so clear the CL cache to free up memory m_ClContextControl.ClearClCache(); - m_Initialised = false; } } -private: - bool m_Initialised; ClContextControl m_ClContextControl; - }; -/** - * Returns a shared_ptr to the CLContextControlWrapper. This wraps the CLContextControl and ensures that we only create - * and use one at a time. - */ -std::shared_ptr ClBackendContext::Get() -{ - static std::shared_ptr instance - = std::make_shared(); - // Instantiated on first use. - return instance; -} - -std::string LowerString(std::string value) -{ - std::transform(value.begin(), value.end(), value.begin(), - [](unsigned char c){ return std::tolower(c); }); - - return value; -} - -enum class TuningLevel -{ - None, - Rapid, - Normal, - Exhaustive -}; - - -TuningLevel ParseTuningLevel(const BackendOptions::Var& value, TuningLevel defaultValue) -{ - if (value.IsInt()) - { - int v = value.AsInt(); - if (v > static_cast(TuningLevel::Exhaustive) || - v < static_cast(TuningLevel::None)) - { - ARMNN_LOG(warning) << "Invalid GpuAcc tuning level ("<< v << ") selected. " - "Using default(" << static_cast(defaultValue) << ")"; - } else - { - return static_cast(v); - } - } - return defaultValue; -} - -bool ParseBoolean(const BackendOptions::Var& value, bool defaultValue) -{ - if (value.IsBool()) - { - return value.AsBool(); - } - return defaultValue; -} - -std::string ParseFile(const BackendOptions::Var& value, std::string defaultValue) -{ - if (value.IsString()) - { - return value.AsString(); - } - return defaultValue; -} - -void ConfigureTuner(arm_compute::CLTuner &tuner, TuningLevel level) -{ - tuner.set_tune_new_kernels(true); // Turn on tuning initially. - - switch (level) - { - case TuningLevel::Rapid: - ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Rapid (1)"; - tuner.set_tuner_mode(arm_compute::CLTunerMode::RAPID); - break; - case TuningLevel::Normal: - ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Normal (2)"; - tuner.set_tuner_mode(arm_compute::CLTunerMode::NORMAL); - break; - case TuningLevel::Exhaustive: - ARMNN_LOG(info) << "Gpu tuning is activated. TuningLevel: Exhaustive (3)"; - tuner.set_tuner_mode(arm_compute::CLTunerMode::EXHAUSTIVE); - break; - case TuningLevel::None: - default: - tuner.set_tune_new_kernels(false); // Turn off tuning. Set to "use" only mode. - break; - } -} - ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) : IBackendContext(options) , m_TuningFile() @@ -171,7 +68,6 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) arm_compute::CLTuner* tuner = nullptr; arm_compute::CLGEMMHeuristicsHandle* mlgoTuner = nullptr; bool useLegacyTunerAPI = options.m_GpuAccTunedParameters.get() != nullptr; - if (useLegacyTunerAPI) { auto clTunerParams = PolymorphicDowncast( @@ -217,17 +113,17 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) { if (name == "KernelProfilingEnabled") { - kernelProfiling |= ParseBoolean(value, false); + kernelProfiling |= ParseBooleanBackendOption(value, false); } else if (name == "TuningFile") { - m_TuningFile = ParseFile(value, ""); + m_TuningFile = ParseStringBackendOption(value, ""); } else if (name == "TuningLevel") { tuningLevel = ParseTuningLevel(value, defaultTuningLevel); } else if (name == "MLGOTuningFilePath") { - m_MLGOTuningFile = ParseFile(value, ""); + m_MLGOTuningFile = ParseStringBackendOption(value, ""); } }); @@ -272,12 +168,11 @@ ClBackendContext::ClBackendContext(const IRuntime::CreationOptions& options) tuner = m_Tuner.get(); } - m_ClContextControlWrapper = Get(); - - if (!m_ClContextControlWrapper->IsInitialised()) - { - m_ClContextControlWrapper->Init(tuner, mlgoTuner, kernelProfiling); - } + m_ClContextControlWrapper = std::make_unique( + tuner, + mlgoTuner, + kernelProfiling + ); } bool ClBackendContext::BeforeLoadNetwork(NetworkId) diff --git a/src/backends/cl/ClBackendContext.hpp b/src/backends/cl/ClBackendContext.hpp index 276067727b..659d47b7c2 100644 --- a/src/backends/cl/ClBackendContext.hpp +++ b/src/backends/cl/ClBackendContext.hpp @@ -31,11 +31,8 @@ public: private: std::mutex m_Mutex; - struct ClContextControlWrapper; - static std::shared_ptr Get(); - - std::shared_ptr m_ClContextControlWrapper; + std::unique_ptr m_ClContextControlWrapper; std::unordered_set m_NetworkIds; diff --git a/src/backends/cl/ClContextControl.cpp b/src/backends/cl/ClContextControl.cpp index fd2d0f53eb..34eca961b4 100644 --- a/src/backends/cl/ClContextControl.cpp +++ b/src/backends/cl/ClContextControl.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -166,55 +166,4 @@ void ClContextControl::ClearClCache() DoLoadOpenClRuntime(true); } -armnn::IGpuAccTunedParameters* IGpuAccTunedParameters::CreateRaw(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) -{ - return new ClTunedParameters(mode, tuningLevel); -} - -armnn::IGpuAccTunedParametersPtr IGpuAccTunedParameters::Create(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) -{ - return IGpuAccTunedParametersPtr(CreateRaw(mode, tuningLevel), &IGpuAccTunedParameters::Destroy); -} - -void IGpuAccTunedParameters::Destroy(IGpuAccTunedParameters* params) -{ - delete params; -} - -ClTunedParameters::ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, - armnn::IGpuAccTunedParameters::TuningLevel tuningLevel) - : m_Mode(mode) - , m_TuningLevel(tuningLevel) - , m_Tuner(mode == ClTunedParameters::Mode::UpdateTunedParameters) -{ -} - -void ClTunedParameters::Load(const char* filename) -{ - try - { - m_Tuner.load_from_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to load tuned parameters file '") + filename + "': " + - e.what()); - } -} - -void ClTunedParameters::Save(const char* filename) const -{ - try - { - m_Tuner.save_to_file(filename); - } - catch (const std::exception& e) - { - throw armnn::Exception(std::string("Failed to save tuned parameters file to '") + filename + "': " + - e.what()); - } -} - } // namespace armnn diff --git a/src/backends/cl/ClContextControl.hpp b/src/backends/cl/ClContextControl.hpp index 4a640cdf22..7520d102a5 100644 --- a/src/backends/cl/ClContextControl.hpp +++ b/src/backends/cl/ClContextControl.hpp @@ -1,13 +1,10 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once -#include "armnn/IRuntime.hpp" - -#include -#include +#include namespace armnn { @@ -42,19 +39,4 @@ private: bool m_ProfilingEnabled; }; -class ClTunedParameters : public IGpuAccTunedParameters -{ -public: - ClTunedParameters(armnn::IGpuAccTunedParameters::Mode mode, armnn::IGpuAccTunedParameters::TuningLevel tuningLevel); - - virtual void Load(const char* filename); - virtual void Save(const char* filename) const; - - Mode m_Mode; - TuningLevel m_TuningLevel; - - arm_compute::CLTuner m_Tuner; - arm_compute::CLGEMMHeuristicsHandle m_HeuristicsHandle; -}; - } // namespace armnn diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp index 889a2ad5f3..a03a4e9ea6 100644 --- a/src/backends/cl/ClImportTensorHandle.hpp +++ b/src/backends/cl/ClImportTensorHandle.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index a61a5bb640..cb2d756037 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -22,6 +22,7 @@ #include "workloads/ClAdditionWorkload.hpp" #include "workloads/ClActivationWorkload.hpp" #include "workloads/ClArgMinMaxWorkload.hpp" +#include "workloads/ClBatchMatMulWorkload.hpp" #include "workloads/ClBatchNormalizationFloatWorkload.hpp" #include "workloads/ClBatchToSpaceNdWorkload.hpp" #include "workloads/ClCastWorkload.hpp" @@ -201,6 +202,12 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, infos[1], *(PolymorphicDowncast(&descriptor)), reasonIfUnsupported); + case LayerType::BatchMatMul: + return IsBatchMatMulSupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::BatchNormalization: return IsBatchNormalizationSupported(infos[0], infos[1], @@ -640,6 +647,20 @@ bool ClLayerSupport::IsArgMinMaxSupported(const TensorInfo& input, descriptor); } +bool ClLayerSupport::IsBatchMatMulSupported(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor, + Optional reasonIfUnsupported) const +{ + FORWARD_WORKLOAD_VALIDATE_FUNC(ClBatchMatMulValidate, + reasonIfUnsupported, + inputX, + inputY, + output, + descriptor); +} + bool ClLayerSupport::IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index 27311f74aa..2d784e3df8 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -40,6 +40,12 @@ public: const ArgMinMaxDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; + bool IsBatchMatMulSupported(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor, + Optional reasonIfUnsupported = EmptyOptional()) const; + bool IsBatchNormalizationSupported(const TensorInfo& input, const TensorInfo& output, const TensorInfo& mean, diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp index f63f1faa07..3d750f9059 100644 --- a/src/backends/cl/ClTensorHandle.hpp +++ b/src/backends/cl/ClTensorHandle.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -18,7 +18,7 @@ #include #include -#include +#include namespace armnn { diff --git a/src/backends/cl/ClTensorHandleFactory.cpp b/src/backends/cl/ClTensorHandleFactory.cpp index b8ee57f0bf..82e41d3ff6 100644 --- a/src/backends/cl/ClTensorHandleFactory.cpp +++ b/src/backends/cl/ClTensorHandleFactory.cpp @@ -108,12 +108,12 @@ bool ClTensorHandleFactory::SupportsSubTensors() const MemorySourceFlags ClTensorHandleFactory::GetExportFlags() const { - return m_ExportFlags; + return MemorySourceFlags(MemorySource::Undefined); } MemorySourceFlags ClTensorHandleFactory::GetImportFlags() const { - return m_ImportFlags; + return MemorySourceFlags(MemorySource::Undefined); } -} // namespace armnn \ No newline at end of file +} // namespace armnn diff --git a/src/backends/cl/ClTensorHandleFactory.hpp b/src/backends/cl/ClTensorHandleFactory.hpp index 3acab0bce7..8e1c7a8a02 100644 --- a/src/backends/cl/ClTensorHandleFactory.hpp +++ b/src/backends/cl/ClTensorHandleFactory.hpp @@ -24,8 +24,6 @@ public: ClTensorHandleFactory(std::shared_ptr mgr) : m_MemoryManager(mgr) - , m_ImportFlags(static_cast(MemorySource::Undefined)) - , m_ExportFlags(static_cast(MemorySource::Undefined)) {} std::unique_ptr CreateSubTensorHandle(ITensorHandle& parent, @@ -56,8 +54,6 @@ public: private: mutable std::shared_ptr m_MemoryManager; - MemorySourceFlags m_ImportFlags; - MemorySourceFlags m_ExportFlags; }; -} // namespace armnn \ No newline at end of file +} // namespace armnn diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index d0079abd38..6bf510a2ef 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -265,6 +265,11 @@ std::unique_ptr ClWorkloadFactory::CreateWorkload(LayerType type, auto argMinMaxQueueDescriptor = PolymorphicDowncast(&descriptor); return MakeWorkload(*argMinMaxQueueDescriptor, info, m_CLCompileContext); } + case LayerType::BatchMatMul : + { + auto batchMatMulQueueDescriptor = PolymorphicDowncast(&descriptor); + return std::make_unique(*batchMatMulQueueDescriptor, info, m_CLCompileContext); + } case LayerType::BatchNormalization : { auto batchNormalizationQueueDescriptor diff --git a/src/backends/cl/IClTensorHandle.hpp b/src/backends/cl/IClTensorHandle.hpp deleted file mode 100644 index 48cf5f57d6..0000000000 --- a/src/backends/cl/IClTensorHandle.hpp +++ /dev/null @@ -1,22 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// -#pragma once - -#include -#include - -namespace armnn -{ - -class IClTensorHandle : public IAclTensorHandle -{ -public: - virtual arm_compute::ICLTensor& GetTensor() = 0; - virtual arm_compute::ICLTensor const& GetTensor() const = 0; - virtual arm_compute::DataType GetDataType() const = 0; - virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; -}; - -} //namespace armnn \ No newline at end of file diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index 6fda16db05..1f97ae7cc8 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -30,6 +30,7 @@ BACKEND_SOURCES := \ workloads/ClActivationWorkload.cpp \ workloads/ClAdditionWorkload.cpp \ workloads/ClArgMinMaxWorkload.cpp \ + workloads/ClBatchMatMulWorkload.cpp \ workloads/ClBatchNormalizationFloatWorkload.cpp \ workloads/ClBatchToSpaceNdWorkload.cpp \ workloads/ClCastWorkload.cpp \ diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index ec1d0a6c2f..6568d48ce5 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -8,6 +8,7 @@ list(APPEND armnnClBackendUnitTests_sources ClContextControlFixture.hpp ClContextSerializerTests.cpp ClCustomAllocatorTests.cpp + ClDefaultAllocatorTests.cpp ClCreateWorkloadTests.cpp ClEndToEndTests.cpp ClImportTensorHandleFactoryTests.cpp @@ -18,7 +19,6 @@ list(APPEND armnnClBackendUnitTests_sources ClOptimizedNetworkTests.cpp ClRuntimeTests.cpp ClWorkloadFactoryHelper.hpp - DefaultAllocatorTests.cpp Fp16SupportTest.cpp ICLTensorProxyTests.cpp OpenClTimerTest.cpp diff --git a/src/backends/cl/test/ClDefaultAllocatorTests.cpp b/src/backends/cl/test/ClDefaultAllocatorTests.cpp new file mode 100644 index 0000000000..411a480815 --- /dev/null +++ b/src/backends/cl/test/ClDefaultAllocatorTests.cpp @@ -0,0 +1,194 @@ +// +// Copyright © 2021, 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include +#include +#include +#include +#include +#include +// Requires the OpenCl backend to be included (GpuAcc) +#include +#include +#include +#include +#include + +using namespace armnn; + + +namespace +{ + +TEST_SUITE("DefaultAllocatorTests") +{ + +TEST_CASE("DefaultAllocatorTest") +{ + float number = 3; + + TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); + + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + auto customAllocator = std::make_shared(); + options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; + IRuntimePtr run = IRuntime::Create(options); + + // Creates structures for input & output + unsigned int numElements = inputTensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); + + auto* inputPtr = reinterpret_cast(alignedInputPtr); + std::fill_n(inputPtr, numElements, number); + CHECK(inputPtr[0] == 3); + + auto& backendRegistry = armnn::BackendRegistryInstance(); + backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); +} + +TEST_CASE("DefaultAllocatorTestMulti") +{ + float number = 3; + + TensorInfo inputTensorInfo(TensorShape({2, 1}), DataType::Float32); + + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + auto customAllocator = std::make_shared(); + options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; + IRuntimePtr run = IRuntime::Create(options); + + // Creates structures for input & output + unsigned int numElements = inputTensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); + void* alignedInputPtr2 = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); + + auto* inputPtr = reinterpret_cast(alignedInputPtr); + std::fill_n(inputPtr, numElements, number); + CHECK(inputPtr[0] == 3); + CHECK(inputPtr[1] == 3); + + auto* inputPtr2 = reinterpret_cast(alignedInputPtr2); + std::fill_n(inputPtr2, numElements, number); + CHECK(inputPtr2[0] == 3); + CHECK(inputPtr2[1] == 3); + + // No overlap + CHECK(inputPtr[0] == 3); + CHECK(inputPtr[1] == 3); + + auto& backendRegistry = armnn::BackendRegistryInstance(); + backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); +} + +TEST_CASE("DefaultAllocatorTestMock") +{ + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + IRuntimePtr run = IRuntime::Create(options); + + // Initialize Mock Backend + MockBackendInitialiser initialiser; + auto factoryFun = BackendRegistryInstance().GetFactory(MockBackend().GetIdStatic()); + ARMNN_ASSERT(factoryFun != nullptr); + auto backend = factoryFun(); + auto defaultAllocator = backend->GetDefaultAllocator(); + + // GetMemorySourceType + CHECK(defaultAllocator->GetMemorySourceType() == MemorySource::Malloc); + + size_t totalBytes = 1 * sizeof(float); + // Allocate + void* ptr = defaultAllocator->allocate(totalBytes, 0); + + // GetMemoryRegionAtOffset + CHECK(defaultAllocator->GetMemoryRegionAtOffset(ptr, 0, 0)); + + // Free + defaultAllocator->free(ptr); + + // Clean up + auto& backendRegistry = armnn::BackendRegistryInstance(); + backendRegistry.Deregister(MockBackend().GetIdStatic()); + backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); +} + +} + + +TEST_SUITE("ClDefaultAllocatorTests") +{ + +TEST_CASE("ClDefaultAllocatorTest") +{ + float number = 3; + + TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); + + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + auto customAllocator = std::make_shared(); + options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; + IRuntimePtr run = IRuntime::Create(options); + + // Creates structures for input & output + unsigned int numElements = inputTensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); + + auto* inputPtr = reinterpret_cast(alignedInputPtr); + std::fill_n(inputPtr, numElements, number); + CHECK(inputPtr[0] == 3); + + auto& backendRegistry = armnn::BackendRegistryInstance(); + backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); +} + +TEST_CASE("ClDefaultAllocatorTestMulti") +{ + float number = 3; + + TensorInfo inputTensorInfo(TensorShape({2, 1}), DataType::Float32); + + // Create ArmNN runtime + IRuntime::CreationOptions options; // default options + auto customAllocator = std::make_shared(); + options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; + IRuntimePtr run = IRuntime::Create(options); + + // Creates structures for input & output + unsigned int numElements = inputTensorInfo.GetNumElements(); + size_t totalBytes = numElements * sizeof(float); + + void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); + void* alignedInputPtr2 = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); + + auto* inputPtr = reinterpret_cast(alignedInputPtr); + std::fill_n(inputPtr, numElements, number); + CHECK(inputPtr[0] == 3); + CHECK(inputPtr[1] == 3); + + auto* inputPtr2 = reinterpret_cast(alignedInputPtr2); + std::fill_n(inputPtr2, numElements, number); + CHECK(inputPtr2[0] == 3); + CHECK(inputPtr2[1] == 3); + + // No overlap + CHECK(inputPtr[0] == 3); + CHECK(inputPtr[1] == 3); + + auto& backendRegistry = armnn::BackendRegistryInstance(); + backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); +} + +} + +} // namespace armnn \ No newline at end of file diff --git a/src/backends/cl/test/ClLayerTests.cpp b/src/backends/cl/test/ClLayerTests.cpp index 855697c9be..4ba2a9ec3b 100644 --- a/src/backends/cl/test/ClLayerTests.cpp +++ b/src/backends/cl/test/ClLayerTests.cpp @@ -73,6 +73,29 @@ ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Tanh, ClContextControlFixture, TanhTest) // Elu Activation ARMNN_AUTO_TEST_FIXTURE_WITH_THF(Elu, ClContextControlFixture, EluTest) +// Batch Mat Mul +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DSimpleFloat32, + ClContextControlFixture, + BatchMatMul2DSimpleTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DSimpleFloat32, + ClContextControlFixture, + BatchMatMul3DSimpleTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBatchFloat32, + ClContextControlFixture, + BatchMatMul3DBatchTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3DBroadcastFloat32, + ClContextControlFixture, + BatchMatMul3DBroadcastTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul3D2DBroadcastFloat32, + ClContextControlFixture, + BatchMatMul3D2DBroadcastTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DTinyFloat32, + ClContextControlFixture, + BatchMatMul2DTinyTest); +ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchMatMul2DTranspSimpleFloat32, + ClContextControlFixture, + BatchMatMul2DTranspSimpleTest); + // Batch To Space ARMNN_AUTO_TEST_FIXTURE_WITH_THF(BatchToSpaceNdNhwcFloat321, ClContextControlFixture, diff --git a/src/backends/cl/test/DefaultAllocatorTests.cpp b/src/backends/cl/test/DefaultAllocatorTests.cpp deleted file mode 100644 index eaa30c8800..0000000000 --- a/src/backends/cl/test/DefaultAllocatorTests.cpp +++ /dev/null @@ -1,194 +0,0 @@ -// -// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include -#include -#include -#include -#include -#include -// Requires the OpenCl backend to be included (GpuAcc) -#include -#include -#include -#include -#include - -using namespace armnn; - - -namespace -{ - -TEST_SUITE("DefaultAllocatorTests") -{ - -TEST_CASE("DefaultAllocatorTest") -{ - float number = 3; - - TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); - - // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - auto customAllocator = std::make_shared(); - options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; - IRuntimePtr run = IRuntime::Create(options); - - // Creates structures for input & output - unsigned int numElements = inputTensorInfo.GetNumElements(); - size_t totalBytes = numElements * sizeof(float); - - void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); - - auto* inputPtr = reinterpret_cast(alignedInputPtr); - std::fill_n(inputPtr, numElements, number); - CHECK(inputPtr[0] == 3); - - auto& backendRegistry = armnn::BackendRegistryInstance(); - backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); -} - -TEST_CASE("DefaultAllocatorTestMulti") -{ - float number = 3; - - TensorInfo inputTensorInfo(TensorShape({2, 1}), DataType::Float32); - - // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - auto customAllocator = std::make_shared(); - options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; - IRuntimePtr run = IRuntime::Create(options); - - // Creates structures for input & output - unsigned int numElements = inputTensorInfo.GetNumElements(); - size_t totalBytes = numElements * sizeof(float); - - void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); - void* alignedInputPtr2 = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); - - auto* inputPtr = reinterpret_cast(alignedInputPtr); - std::fill_n(inputPtr, numElements, number); - CHECK(inputPtr[0] == 3); - CHECK(inputPtr[1] == 3); - - auto* inputPtr2 = reinterpret_cast(alignedInputPtr2); - std::fill_n(inputPtr2, numElements, number); - CHECK(inputPtr2[0] == 3); - CHECK(inputPtr2[1] == 3); - - // No overlap - CHECK(inputPtr[0] == 3); - CHECK(inputPtr[1] == 3); - - auto& backendRegistry = armnn::BackendRegistryInstance(); - backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); -} - -TEST_CASE("DefaultAllocatorTestMock") -{ - // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - IRuntimePtr run = IRuntime::Create(options); - - // Initialize Mock Backend - MockBackendInitialiser initialiser; - auto factoryFun = BackendRegistryInstance().GetFactory(MockBackend().GetIdStatic()); - ARMNN_ASSERT(factoryFun != nullptr); - auto backend = factoryFun(); - auto defaultAllocator = backend->GetDefaultAllocator(); - - // GetMemorySourceType - CHECK(defaultAllocator->GetMemorySourceType() == MemorySource::Malloc); - - size_t totalBytes = 1 * sizeof(float); - // Allocate - void* ptr = defaultAllocator->allocate(totalBytes, 0); - - // GetMemoryRegionAtOffset - CHECK(defaultAllocator->GetMemoryRegionAtOffset(ptr, 0, 0)); - - // Free - defaultAllocator->free(ptr); - - // Clean up - auto& backendRegistry = armnn::BackendRegistryInstance(); - backendRegistry.Deregister(MockBackend().GetIdStatic()); - backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); -} - -} - - -TEST_SUITE("ClDefaultAllocatorTests") -{ - -TEST_CASE("ClDefaultAllocatorTest") -{ - float number = 3; - - TensorInfo inputTensorInfo(TensorShape({1, 1}), DataType::Float32); - - // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - auto customAllocator = std::make_shared(); - options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; - IRuntimePtr run = IRuntime::Create(options); - - // Creates structures for input & output - unsigned int numElements = inputTensorInfo.GetNumElements(); - size_t totalBytes = numElements * sizeof(float); - - void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); - - auto* inputPtr = reinterpret_cast(alignedInputPtr); - std::fill_n(inputPtr, numElements, number); - CHECK(inputPtr[0] == 3); - - auto& backendRegistry = armnn::BackendRegistryInstance(); - backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); -} - -TEST_CASE("ClDefaultAllocatorTestMulti") -{ - float number = 3; - - TensorInfo inputTensorInfo(TensorShape({2, 1}), DataType::Float32); - - // Create ArmNN runtime - IRuntime::CreationOptions options; // default options - auto customAllocator = std::make_shared(); - options.m_CustomAllocatorMap = {{"GpuAcc", std::move(customAllocator)}}; - IRuntimePtr run = IRuntime::Create(options); - - // Creates structures for input & output - unsigned int numElements = inputTensorInfo.GetNumElements(); - size_t totalBytes = numElements * sizeof(float); - - void* alignedInputPtr = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); - void* alignedInputPtr2 = options.m_CustomAllocatorMap["GpuAcc"]->allocate(totalBytes, 0); - - auto* inputPtr = reinterpret_cast(alignedInputPtr); - std::fill_n(inputPtr, numElements, number); - CHECK(inputPtr[0] == 3); - CHECK(inputPtr[1] == 3); - - auto* inputPtr2 = reinterpret_cast(alignedInputPtr2); - std::fill_n(inputPtr2, numElements, number); - CHECK(inputPtr2[0] == 3); - CHECK(inputPtr2[1] == 3); - - // No overlap - CHECK(inputPtr[0] == 3); - CHECK(inputPtr[1] == 3); - - auto& backendRegistry = armnn::BackendRegistryInstance(); - backendRegistry.DeregisterAllocator(ClBackend::GetIdStatic()); -} - -} - -} // namespace armnn \ No newline at end of file diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index aef7fc7ad2..8616dec078 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -12,6 +12,8 @@ list(APPEND armnnClBackendWorkloads_sources ClAdditionWorkload.hpp ClArgMinMaxWorkload.cpp ClArgMinMaxWorkload.hpp + ClBatchMatMulWorkload.cpp + ClBatchMatMulWorkload.hpp ClBatchNormalizationFloatWorkload.cpp ClBatchNormalizationFloatWorkload.hpp ClBatchToSpaceNdWorkload.cpp diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp new file mode 100644 index 0000000000..4acdef5e5c --- /dev/null +++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.cpp @@ -0,0 +1,203 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ClBatchMatMulWorkload.hpp" + +#include "ClWorkloadUtils.hpp" + +#include +#include + +#include + +#include + +#include + +#include + +#include +#include + + +namespace armnn +{ +arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor) +{ + if (descriptor.m_AdjointX || descriptor.m_AdjointY ) + { + throw Exception("Support for adjoint not implemented."); + } + if (descriptor.m_DataLayoutX != armnn::DataLayout::NCHW || descriptor.m_DataLayoutY != armnn::DataLayout::NCHW ) + { + throw Exception("Only supported the MatMul in the last 2 dimensions"); + } + + arm_compute::Status statusGEMM = arm_compute::Status(arm_compute::ErrorCode::OK); + arm_compute::Status statusPermuteX = arm_compute::Status(arm_compute::ErrorCode::OK); + arm_compute::Status statusPermuteY = arm_compute::Status(arm_compute::ErrorCode::OK); + + const auto aclInputXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputX, descriptor.m_DataLayoutX); + const auto aclInputYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(inputY, descriptor.m_DataLayoutY); + const auto aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo(); + arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo(); + + if (descriptor.m_TransposeX == true) + { + auto permutationXVector = GeneratePermutationVectorOnLastTwoDimensions(inputX.GetNumDimensions()); + const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector); + const TensorInfo permutedXInfo = armnnUtils::Permuted(inputX, permutationXVector); + aclPermutedXInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedXInfo); + + statusPermuteX = arm_compute::CLPermute::validate(&aclInputXInfo, + &aclPermutedXInfo, + aclPermutationXVector); + } + + if ( descriptor.m_TransposeY == true) + { + auto permutationYVector = GeneratePermutationVectorOnLastTwoDimensions(inputY.GetNumDimensions()); + const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector); + const TensorInfo permutedYInfo = armnnUtils::Permuted(inputY, permutationYVector); + aclPermutedYInfo = armcomputetensorutils::BuildArmComputeTensorInfo(permutedYInfo); + + statusPermuteY = arm_compute::CLPermute::validate(&aclInputYInfo, + &aclPermutedYInfo, + aclPermutationYVector); + + } + + const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped + false, // is inputY reshaped + false); // is inputY reshaped only 1st run + + + statusGEMM = arm_compute::CLGEMM::validate(descriptor.m_TransposeX ? &aclPermutedXInfo : &aclInputXInfo, + descriptor.m_TransposeY ? &aclPermutedYInfo : &aclInputYInfo, + nullptr, + &aclOutputInfo, + 1.0, + 0, + gemm_info); + + if (statusPermuteX.error_code() == arm_compute::ErrorCode::OK && + statusPermuteY.error_code() == arm_compute::ErrorCode::OK && + statusGEMM.error_code() == arm_compute::ErrorCode::OK) + { + return arm_compute::Status(arm_compute::ErrorCode::OK, + "All Batch Mat Mul layers validate status OK."); + } + else + { + return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, + "BatchMatMul layer validate status failed." + + statusGEMM.error_description() + + statusPermuteX.error_description() + + statusPermuteY.error_description()); + } + +} + +ClBatchMatMulWorkload::ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext) + : ClBaseWorkload(descriptor, info) +{ + // Report Profiling Details + ARMNN_REPORT_PROFILING_WORKLOAD_DESC("ClBatchMatMulWorkload_Construct", + descriptor.m_Parameters, + info, + this->GetGuid()); + + if (descriptor.m_Parameters.m_AdjointX || descriptor.m_Parameters.m_AdjointY ) + { + throw Exception("Support for adjoint not implemented."); + } + if (descriptor.m_Parameters.m_DataLayoutX != armnn::DataLayout::NCHW || + descriptor.m_Parameters.m_DataLayoutY != armnn::DataLayout::NCHW ) + { + throw Exception("Only supported the MatMul in the last 2 dimensions"); + } + + m_Data.ValidateInputsOutputs("ClBatchMatMulWorkload", 2, 1); + + const arm_compute::ICLTensor& inputX = PolymorphicDowncast(m_Data.m_Inputs[0])->GetTensor(); + const arm_compute::ICLTensor& inputY = PolymorphicDowncast(m_Data.m_Inputs[1])->GetTensor(); + arm_compute::ICLTensor& output = PolymorphicDowncast(m_Data.m_Outputs[0])->GetTensor(); + + inputX.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutX)); + inputY.info()->set_data_layout(armcomputetensorutils::ConvertDataLayout(m_Data.m_Parameters.m_DataLayoutY)); + + arm_compute::TensorInfo aclPermutedXInfo = arm_compute::TensorInfo(); + arm_compute::TensorInfo aclPermutedYInfo = arm_compute::TensorInfo(); + + if (descriptor.m_Parameters.m_TransposeX == true) + { + armnn::PermutationVector permutationXVector + = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions()); + const TensorInfo permutedXInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationXVector); + const auto aclPermutationXVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationXVector); + armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorX, permutedXInfo); + armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorX); + + auto permuteLayerX = std::make_unique(); + permuteLayerX->configure(clCompileContext, + &inputX, + &m_PermutedTensorX, + aclPermutationXVector); + m_PermuteLayerX.reset(permuteLayerX.release()); + } + + if (descriptor.m_Parameters.m_TransposeY == true) + { + armnn::PermutationVector permutationYVector + = GeneratePermutationVectorOnLastTwoDimensions(info.m_InputTensorInfos[0].GetNumDimensions()); + const TensorInfo permutedYInfo = armnnUtils::Permuted(info.m_InputTensorInfos[0], permutationYVector); + const auto aclPermutationYVector = armcomputetensorutils::BuildArmComputePermutationVector(permutationYVector); + armcomputetensorutils::BuildArmComputeTensor(m_PermutedTensorY, permutedYInfo); + armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_PermutedTensorY); + + std::unique_ptr permuteLayerY(new arm_compute::CLPermute()); + permuteLayerY->configure(clCompileContext, + &inputY, + &m_PermutedTensorY, + aclPermutationYVector); + m_PermuteLayerY.reset(permuteLayerY.release()); + } + + const arm_compute::GEMMInfo& gemm_info = arm_compute::GEMMInfo(false, // is inputX reshaped + false, // is inputY reshaped + false); // is inputY reshaped only 1st run + auto gemmLayer = std::make_unique(); + gemmLayer->configure(clCompileContext, + descriptor.m_Parameters.m_TransposeX ? &m_PermutedTensorX : &inputX, + descriptor.m_Parameters.m_TransposeY ? &m_PermutedTensorY : &inputY, + nullptr, + &output, + 1.0, + 0, + gemm_info); + m_GEMMLayer.reset(gemmLayer.release()); +} + +void ClBatchMatMulWorkload::Execute() const +{ + ARMNN_SCOPED_PROFILING_EVENT_CL_GUID("ClBatchMatMulWorkload_Execute", this->GetGuid()); + if (m_PermuteLayerX) + { + m_PermuteLayerX->run(); + } + if (m_PermuteLayerY) + { + m_PermuteLayerY->run(); + } + m_GEMMLayer->run(); +} +} //namespace armnn diff --git a/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp b/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp new file mode 100644 index 0000000000..5277efc947 --- /dev/null +++ b/src/backends/cl/workloads/ClBatchMatMulWorkload.hpp @@ -0,0 +1,41 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "ClBaseWorkload.hpp" + +#include +#include +#include + +namespace armnn +{ + arm_compute::Status ClBatchMatMulValidate(const TensorInfo& inputX, + const TensorInfo& inputY, + const TensorInfo& output, + const BatchMatMulDescriptor& descriptor); + + class ClBatchMatMulWorkload : public ClBaseWorkload + { + public: + ClBatchMatMulWorkload(const BatchMatMulQueueDescriptor& descriptor, + const WorkloadInfo& info, + const arm_compute::CLCompileContext& clCompileContext); + virtual void Execute() const override; + + private: + // ACL layers required to fully form a Batch Mat Mul layer. + std::unique_ptr m_GEMMLayer; + std::unique_ptr m_PermuteLayerX; + std::unique_ptr m_PermuteLayerY; + + // Additional CL arm_compute::Tensors. + // Required to perform permutations. + arm_compute::CLTensor m_PermutedTensorX; + arm_compute::CLTensor m_PermutedTensorY; + + }; +} //namespace armnn diff --git a/src/backends/cl/workloads/ClWorkloads.hpp b/src/backends/cl/workloads/ClWorkloads.hpp index c3a79b7583..44f3798d7d 100644 --- a/src/backends/cl/workloads/ClWorkloads.hpp +++ b/src/backends/cl/workloads/ClWorkloads.hpp @@ -10,6 +10,7 @@ #include "ClArgMinMaxWorkload.hpp" #include "ClComparisonWorkload.hpp" #include "ClConstantWorkload.hpp" +#include "ClBatchMatMulWorkload.hpp" #include "ClBatchNormalizationFloatWorkload.hpp" #include "ClBatchToSpaceNdWorkload.hpp" #include "ClCastWorkload.hpp" diff --git a/src/backends/dynamic/reference/CMakeLists.txt b/src/backends/dynamic/reference/CMakeLists.txt index de46f7a5cb..fe875282f5 100644 --- a/src/backends/dynamic/reference/CMakeLists.txt +++ b/src/backends/dynamic/reference/CMakeLists.txt @@ -1,9 +1,9 @@ # -# Copyright © 2017 Arm Ltd. All rights reserved. +# Copyright © 2017, 2023 Arm Ltd. All rights reserved. # SPDX-License-Identifier: MIT # -if(NOT BUILD_BARE_METAL) +if((NOT BUILD_BARE_METAL) AND (NOT EXECUTE_NETWORK_STATIC)) # File needed to wrap the existing backend into a dynamic one list(APPEND armnnRefDynamicBackend_sources @@ -33,5 +33,5 @@ target_include_directories(Arm_CpuRef_backend PRIVATE ${PROJECT_SOURCE_DIR}/prof set_target_properties(Arm_CpuRef_backend PROPERTIES PREFIX "") target_link_libraries(Arm_CpuRef_backend armnn) -# BUILD_BARE_METAL +# BUILD_BARE_METAL && EXECUTE_NETWORK_STATIC endif() diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 4c97855668..ee155a2c64 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -90,6 +90,19 @@ namespace armnn namespace { +const TensorInfo OverrideDataType(const TensorInfo& info, Optional type) +{ + if (!type) + { + return info; + } + return TensorInfo(info.GetShape(), + type.value(), + info.GetQuantizationScale(), + info.GetQuantizationOffset(), + info.IsConstant()); +} + template< typename ... Args> bool IsNeonBackendSupported(Optional reasonIfUnsupported, Args... args) { @@ -151,61 +164,64 @@ NeonLayerSupport::NeonLayerSupport() { } -bool NeonLayerSupport::IsLayerSupported(const LayerType& type, - const std::vector& infos, - const BaseDescriptor& descriptor, - const Optional& lstmParamsInfo, - const Optional& quantizedLstmParamsInfo, - Optional reasonIfUnsupported) const +bool IsLayerTypeSupported(const LayerType& type, + const std::vector& infos, + const BaseDescriptor& descriptor, + const Optional& lstmParamsInfo, + const Optional& quantizedLstmParamsInfo, + Optional reasonIfUnsupported, + const NeonLayerSupport& support) { switch (type) { case LayerType::Activation: - return IsActivationSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsActivationSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Addition: - return IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::ArgMinMax: - return IsArgMinMaxSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsArgMinMaxSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::BatchMatMul: - return IsBatchMatMulSupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsBatchMatMulSupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::BatchNormalization: - return IsBatchNormalizationSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - *(PolymorphicDowncast - (&descriptor)), - reasonIfUnsupported); + return support.IsBatchNormalizationSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::BatchToSpaceNd: - return IsBatchToSpaceNdSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsBatchToSpaceNdSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Cast: - return IsCastSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsCastSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::ChannelShuffle: - return IsChannelShuffleSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsChannelShuffleSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Comparison: - return IsComparisonSupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsComparisonSupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Concat: { std::vector inputInfos; @@ -213,17 +229,17 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, { inputInfos.push_back(&infos[i]); } - return IsConcatSupported(inputInfos, - infos[infos.size() - 1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsConcatSupported(inputInfos, + infos[infos.size() - 1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); } case LayerType::Constant: - return IsConstantSupported(infos[0], reasonIfUnsupported); + return support.IsConstantSupported(infos[0], reasonIfUnsupported); case LayerType::ConvertFp16ToFp32: - return IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported); + return support.IsConvertFp16ToFp32Supported(infos[0], infos[1], reasonIfUnsupported); case LayerType::ConvertFp32ToFp16: - return IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported); + return support.IsConvertFp32ToFp16Supported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Convolution2d: { if (infos.size() != 4) @@ -235,21 +251,21 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast(&descriptor)); if (infos[3] == TensorInfo()) { - return IsConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::Convolution3d: @@ -263,28 +279,28 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast(&descriptor)); if (infos[3] == TensorInfo()) { - return IsConvolution3dSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsConvolution3dSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsConvolution3dSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsConvolution3dSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::DepthToSpace: - return IsDepthToSpaceSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsDepthToSpaceSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::DepthwiseConvolution2d: { if (infos.size() != 4) @@ -296,217 +312,223 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast(&descriptor)); if (infos[3] == TensorInfo()) { - return IsDepthwiseConvolutionSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsDepthwiseConvolutionSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsDepthwiseConvolutionSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsDepthwiseConvolutionSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::Dequantize: - return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::DetectionPostProcess: { auto desc = *(PolymorphicDowncast(&descriptor)); - return LayerSupportBase::IsDetectionPostProcessSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - infos[6], - desc, - reasonIfUnsupported); + return support.IsDetectionPostProcessSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + infos[6], + desc, + reasonIfUnsupported); } case LayerType::Division: - return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::ElementwiseUnary: - return IsElementwiseUnarySupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsElementwiseUnarySupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Fill: - return IsFillSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsFillSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Floor: - return IsFloorSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsFloorSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::FullyConnected: - return IsFullyConnectedSupported(infos[0], + return support.IsFullyConnectedSupported(infos[0], + infos[1], + infos[2], + infos[3], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); + case LayerType::Gather: + return support.IsGatherSupported(infos[0], infos[1], infos[2], - infos[3], - *(PolymorphicDowncast(&descriptor)), + *(PolymorphicDowncast(&descriptor)), reasonIfUnsupported); - case LayerType::Gather: - return IsGatherSupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); case LayerType::GatherNd: - return IsGatherNdSupported(infos[0], - infos[1], - infos[2], - reasonIfUnsupported); + return support.IsGatherNdSupported(infos[0], + infos[1], + infos[2], + reasonIfUnsupported); case LayerType::Input: - return IsInputSupported(infos[0], reasonIfUnsupported); + return support.IsInputSupported(infos[0], reasonIfUnsupported); case LayerType::InstanceNormalization: - return IsInstanceNormalizationSupported(infos[0], - infos[1], - *(PolymorphicDowncast - (&descriptor)), - reasonIfUnsupported); + return support.IsInstanceNormalizationSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::L2Normalization: - return IsL2NormalizationSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsL2NormalizationSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::LogicalBinary: - return IsLogicalBinarySupported(infos[0], - infos[1], - infos[2], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsLogicalBinarySupported(infos[0], + infos[1], + infos[2], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::LogSoftmax: - return IsLogSoftmaxSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsLogSoftmaxSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Lstm: - return IsLstmSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - infos[6], - *(PolymorphicDowncast(&descriptor)), - lstmParamsInfo.value(), - reasonIfUnsupported); + return support.IsLstmSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + infos[6], + *(PolymorphicDowncast(&descriptor)), + lstmParamsInfo.value(), + reasonIfUnsupported); case LayerType::Map: return true; case LayerType::Maximum: - return IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsMaximumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Mean: - return IsMeanSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsMeanSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::MemCopy: - return LayerSupportBase::IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsMemCopySupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::MemImport: - return LayerSupportBase::IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported); + return support.IsMemImportSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Merge: - return LayerSupportBase::IsMergeSupported(infos[0], + return support.IsMergeSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Minimum: - return IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsMinimumSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Multiplication: - return IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsMultiplicationSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Normalization: - return IsNormalizationSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsNormalizationSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Output: - return IsOutputSupported(infos[0], reasonIfUnsupported); + return support.IsOutputSupported(infos[0], reasonIfUnsupported); case LayerType::Pad: - return IsPadSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsPadSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Permute: - return IsPermuteSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsPermuteSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Pooling2d: - return IsPooling2dSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsPooling2dSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Pooling3d: - return IsPooling3dSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsPooling3dSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Prelu: - return IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsPreluSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::QLstm: - return IsQLstmSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - *(PolymorphicDowncast(&descriptor)), - lstmParamsInfo.value(), - reasonIfUnsupported); - case LayerType::Quantize: - return IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported); - case LayerType::QuantizedLstm: - return IsQuantizedLstmSupported(infos[0], + return support.IsQLstmSupported(infos[0], infos[1], infos[2], infos[3], infos[4], - quantizedLstmParamsInfo.value(), + infos[5], + *(PolymorphicDowncast(&descriptor)), + lstmParamsInfo.value(), reasonIfUnsupported); + case LayerType::Quantize: + return support.IsQuantizeSupported(infos[0], infos[1], reasonIfUnsupported); + case LayerType::QuantizedLstm: + return support.IsQuantizedLstmSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + quantizedLstmParamsInfo.value(), + reasonIfUnsupported); case LayerType::Rank: return true; case LayerType::Reshape: - return IsReshapeSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsReshapeSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Resize: - return IsResizeSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsResizeSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Reduce: - return IsReduceSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsReduceSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Shape: - return LayerSupportBase::IsShapeSupported(infos[0], - infos[1], - reasonIfUnsupported); + return support.IsShapeSupported(infos[0], + infos[1], + reasonIfUnsupported); case LayerType::Slice: - return IsSliceSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsSliceSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Softmax: - return IsSoftmaxSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsSoftmaxSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::SpaceToBatchNd: - return IsSpaceToBatchNdSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsSpaceToBatchNdSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::SpaceToDepth: - return IsSpaceToDepthSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsSpaceToDepthSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Splitter: { std::vector outputInfos; @@ -514,10 +536,10 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, { outputInfos.push_back(infos[i]); } - return IsSplitterSupported(infos[0], - {outputInfos.begin(), outputInfos.end()}, - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsSplitterSupported(infos[0], + {outputInfos.begin(), outputInfos.end()}, + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); } case LayerType::Stack: { @@ -526,23 +548,23 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, { inputInfos.push_back(&infos[i]); } - return IsStackSupported(inputInfos, - infos[infos.size() - 1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsStackSupported(inputInfos, + infos[infos.size() - 1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); } case LayerType::StridedSlice: - return IsStridedSliceSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsStridedSliceSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::Subtraction: - return IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + return support.IsSubtractionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); case LayerType::Transpose: - return IsTransposeSupported(infos[0], - infos[1], - *(PolymorphicDowncast(&descriptor)), - reasonIfUnsupported); + return support.IsTransposeSupported(infos[0], + infos[1], + *(PolymorphicDowncast(&descriptor)), + reasonIfUnsupported); case LayerType::TransposeConvolution2d: { if (infos.size() != 4) @@ -554,34 +576,36 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, auto desc = *(PolymorphicDowncast(&descriptor)); if (infos[3] == TensorInfo()) { - return IsTransposeConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - EmptyOptional(), - reasonIfUnsupported); + return support.IsTransposeConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + EmptyOptional(), + reasonIfUnsupported); } else { - return IsTransposeConvolution2dSupported(infos[0], - infos[1], - desc, - infos[2], - infos[3], - reasonIfUnsupported); + return support.IsTransposeConvolution2dSupported(infos[0], + infos[1], + desc, + infos[2], + infos[3], + reasonIfUnsupported); } } case LayerType::UnidirectionalSequenceLstm: - return IsUnidirectionalSequenceLstmSupported(infos[0], - infos[1], - infos[2], - infos[3], - infos[4], - infos[5], - *(PolymorphicDowncast(&descriptor)), - lstmParamsInfo.value(), - reasonIfUnsupported); + { + auto desc = *(PolymorphicDowncast(&descriptor)); + return support.IsUnidirectionalSequenceLstmSupported(infos[0], + infos[1], + infos[2], + infos[3], + infos[4], + infos[5], + desc, + lstmParamsInfo.value(), + reasonIfUnsupported); + } case LayerType::Unmap: return true; default: @@ -592,6 +616,54 @@ bool NeonLayerSupport::IsLayerSupported(const LayerType& type, } } +bool NeonLayerSupport::IsLayerSupported(const LayerType& type, + const std::vector& infos, + const BaseDescriptor& descriptor, + const Optional& lstmParamsInfo, + const Optional& quantizedLstmParamsInfo, + Optional reasonIfUnsupported) const +{ + bool isSupported = IsLayerTypeSupported(type, + infos, + descriptor, + lstmParamsInfo, + quantizedLstmParamsInfo, + reasonIfUnsupported, + *this); + + // For android-nn-driver and support library, to run FP16 operations on CpuAcc we need at least v8.2 + // architecture. If the available architecture is older than v8.2, we can check if the operator is + // supported by changing operator inputs & outputs to be FP32. + // This does not change the operator datatype in the above parsers to be FP32. We are simply reporting + // to the parsers if the operator can supported in ArmNN. We will then re-enter ArmNN (Network.cpp) + // where we will recheck IsLayerSupported() on the FP16 datatype, update the operator to be FP32, + // and, insert convert layers around the FP32 operator. + if (reasonIfUnsupported.has_value()) + { + std::string checkStr = "This CPU architecture does not support F16 data type, you need v8.2 or above"; + if (!isSupported + && reasonIfUnsupported.value().find(checkStr) != std::string::npos) + { + std::vector newInfos; + for (auto info: infos) + { + newInfos.emplace_back(OverrideDataType(info, DataType::Float32)); + } + + std::string tmpString; + return IsLayerTypeSupported(type, + newInfos, + descriptor, + lstmParamsInfo, + quantizedLstmParamsInfo, + tmpString, + *this); + } + } + + return isSupported; +} + bool NeonLayerSupport::IsActivationSupported(const TensorInfo& input, const TensorInfo& output, const ActivationDescriptor& descriptor, diff --git a/src/backends/tosaCommon/TosaMappings.cpp b/src/backends/tosaCommon/TosaMappings.cpp index 1452e4aefd..0b5fa1a158 100644 --- a/src/backends/tosaCommon/TosaMappings.cpp +++ b/src/backends/tosaCommon/TosaMappings.cpp @@ -24,8 +24,10 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer, switch (type) { case LayerType::Addition: + case LayerType::Multiplication: + case LayerType::Subtraction: { - return ConvertAdditionToTosaOperator(layer, inputs, outputs); + return ConvertElementwiseBinaryToTosaOperator(layer, type, inputs, outputs); } case LayerType::Concat: { @@ -77,6 +79,11 @@ TosaSerializationBasicBlock* GetTosaMapping(const Layer* layer, auto transposeConv2dDesc = PolymorphicDowncast(&descriptor); return ConvertTransposeConv2dToTosaOperator(layer, inputs, outputs, transposeConv2dDesc); } + case LayerType::Transpose: + { + auto transposeDesc = PolymorphicDowncast(&descriptor); + return ConvertTransposeToTosaOperator(layer, inputs, outputs, transposeDesc); + } default: { return CreateEmptyTosaSerializationBasicBlock(); diff --git a/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp b/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp deleted file mode 100644 index 7014886d92..0000000000 --- a/src/backends/tosaCommon/operatorMappings/AdditionOperator.cpp +++ /dev/null @@ -1,72 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#include "AdditionOperator.hpp" - -TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer, - const std::vector& inputs, - const std::vector& outputs) -{ - std::string input0Name = std::string("input0_"); - std::string input1Name = std::string("input1_"); - std::string outputName = std::string("output0_"); - std::string blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID(); - - // If a layer is present then the block will be used for execution, so input and output names need to be determined - // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. - if(layer != nullptr) - { - // Get the layers connected to the input slots and determine unique tensors names. - Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); - input0Name = GenerateUniqueName(connectedLayer0, 0); - - Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); - input1Name = GenerateUniqueName(connectedLayer1, 1); - - // Determine unique output tensor name. - outputName = GenerateUniqueOutputName(*layer, 0); - } - - auto* op = new TosaSerializationOperator(Op_ADD, - Attribute_NONE, - nullptr, - {input0Name, input1Name}, - {outputName}); - - - std::vector tensors; - - // Only add input tensors if connected layer is an input layer. - // As intermediate or constant tensors will be created separately. - // There also can't be duplicate tensor. - if(input0Name.find("input0_") != std::string::npos) - { - std::vector inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); - DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); - - tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); - } - - if(input1Name.find("input1_") != std::string::npos) - { - std::vector inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); - DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); - - tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {})); - } - - std::vector outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); - DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); - - tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); - - // operatorInputNames/operatorOutputNames ends up being the same as - // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings - return new TosaSerializationBasicBlock(blockName, // name - {op}, // operators - tensors, // tensors - {input0Name, input1Name}, // inputs - {outputName}); // outputs -} \ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp b/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp deleted file mode 100644 index 5eb7441531..0000000000 --- a/src/backends/tosaCommon/operatorMappings/AdditionOperator.hpp +++ /dev/null @@ -1,20 +0,0 @@ -// -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. -// SPDX-License-Identifier: MIT -// - -#pragma once - -#include "TosaOperatorUtils.hpp" - -#include - -#include - -using namespace armnn; -using namespace tosa; - -TosaSerializationBasicBlock* ConvertAdditionToTosaOperator(const Layer* layer, - const std::vector& inputs, - const std::vector& outputs); - diff --git a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt index 2443dc0585..2ec052cd43 100644 --- a/src/backends/tosaCommon/operatorMappings/CMakeLists.txt +++ b/src/backends/tosaCommon/operatorMappings/CMakeLists.txt @@ -4,8 +4,6 @@ # list(APPEND armnnTosaBackendOperators_sources - AdditionOperator.hpp - AdditionOperator.cpp AvgPool2DIgnoreValueOperator.hpp AvgPool2DIgnoreValueOperator.cpp ConcatOperator.hpp @@ -14,6 +12,8 @@ list(APPEND armnnTosaBackendOperators_sources ConstantOperator.cpp Conv2dOperator.hpp Conv2dOperator.cpp + ElementwiseBinaryOperator.hpp + ElementwiseBinaryOperator.cpp Pooling2DOperator.hpp Pooling2DOperator.cpp ReshapeOperator.hpp @@ -23,6 +23,8 @@ list(APPEND armnnTosaBackendOperators_sources TosaOperatorUtils.hpp TransposeConv2dOperator.hpp TransposeConv2dOperator.cpp + TransposeOperator.hpp + TransposeOperator.cpp ) add_library(armnnTosaBackendOperators OBJECT ${armnnTosaBackendOperators_sources}) diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp new file mode 100644 index 0000000000..9909e66a7d --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.cpp @@ -0,0 +1,103 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "ElementwiseBinaryOperator.hpp" + +TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer, + const LayerType type, + const std::vector& inputs, + const std::vector& outputs) +{ + std::string input0Name = std::string("input0_"); + std::string input1Name = std::string("input1_"); + std::string outputName = std::string("output0_"); + std::string blockName; + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + // Get the layers connected to the input slots and determine unique tensor names. + Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + input0Name = GenerateUniqueName(connectedLayer0, 0); + + Layer& connectedLayer1 = layer->GetInputSlot(1).GetConnectedOutputSlot()->GetOwningLayer(); + input1Name = GenerateUniqueName(connectedLayer1, 1); + + // Determine unique output tensor name. + outputName = GenerateUniqueOutputName(*layer, 0); + } + + TosaSerializationOperator* op = nullptr; + switch(type) + { + case LayerType::Addition: + { + op = new TosaSerializationOperator(Op_ADD, + Attribute_NONE, + nullptr, + {input0Name, input1Name}, + {outputName}); + blockName = std::string("Op_ADD_block_") + GetUniqueTosaMappingID(); + break; + } + case LayerType::Multiplication: + { + int32_t shift = 0; + TosaMulAttribute mulAttribute(shift); + op = new TosaSerializationOperator(Op_MUL, + Attribute_MulAttribute, + &mulAttribute, + {input0Name, input1Name}, + {outputName}); + blockName = std::string("Op_MUL_block_") + GetUniqueTosaMappingID(); + break; + } + case LayerType::Subtraction: + { + op = new TosaSerializationOperator(Op_SUB, + Attribute_NONE, + nullptr, + {input0Name, input1Name}, + {outputName}); + blockName = std::string("Op_SUB_block_") + GetUniqueTosaMappingID(); + break; + } + default: + throw armnn::Exception("ConvertElementwiseBinaryToTosaOperator: Unsupported layer type."); + } + ARMNN_ASSERT(op != nullptr); + + std::vector tensors; + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(input0Name.find("input0_") != std::string::npos) + { + std::vector inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); + } + if(input1Name.find("input1_") != std::string::npos) + { + std::vector inputShape1 = GetTosaTensorShape(inputs[1]->GetShape()); + DType inputDType1 = ArmNNToDType(inputs[1]->GetDataType()); + tensors.push_back(new TosaSerializationTensor(input1Name, inputShape1, inputDType1, {})); + } + + std::vector outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to Tosa mappings + return new TosaSerializationBasicBlock(blockName, // name + {op}, // operators + tensors, // tensors + {input0Name, input1Name}, // inputs + {outputName}); // outputs +} + diff --git a/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp new file mode 100644 index 0000000000..86031c6e06 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/ElementwiseBinaryOperator.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +#include + +#include + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertElementwiseBinaryToTosaOperator(const Layer* layer, + const LayerType type, + const std::vector& inputs, + const std::vector& outputs); \ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp index 052c54c3af..3f27371295 100644 --- a/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp +++ b/src/backends/tosaCommon/operatorMappings/TosaCommonOperators.hpp @@ -5,12 +5,13 @@ #pragma once -#include "AdditionOperator.hpp" +#include "AvgPool2DIgnoreValueOperator.hpp" #include "ConcatOperator.hpp" #include "ConstantOperator.hpp" #include "Conv2dOperator.hpp" -#include "AvgPool2DIgnoreValueOperator.hpp" +#include "ElementwiseBinaryOperator.hpp" #include "Pooling2DOperator.hpp" #include "ReshapeOperator.hpp" #include "SliceOperator.hpp" -#include "TransposeConv2dOperator.hpp" \ No newline at end of file +#include "TransposeConv2dOperator.hpp" +#include "TransposeOperator.hpp" diff --git a/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp new file mode 100644 index 0000000000..56178e428b --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.cpp @@ -0,0 +1,65 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "TransposeOperator.hpp" + +TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer, + const std::vector& inputs, + const std::vector& outputs, + const TransposeDescriptor* transposeDescriptor) +{ + std::string input0Name = std::string("input0_"); + std::string outputName = std::string("output0_"); + std::string blockName = std::string("Op_TRANSPOSE_block_") + GetUniqueTosaMappingID(); + + // If a layer is present then the block will be used for execution, so input and output names need to be determined + // using the previous and following layers so the graph is connected correctly. For validation this doesn't matter. + if(layer != nullptr) + { + // Get the layers connected to the input slot and determine unique tensor name. + Layer& connectedLayer0 = layer->GetInputSlot(0).GetConnectedOutputSlot()->GetOwningLayer(); + input0Name = GenerateUniqueName(connectedLayer0, 0); + + // Determine unique output tensor name. + outputName = GenerateUniqueOutputName(*layer, 0); + } + + std::vector mappings(transposeDescriptor->m_DimMappings.begin(), + transposeDescriptor->m_DimMappings.end()); + TosaTransposeAttribute attribute(mappings); + + auto* op = new TosaSerializationOperator(Op_TRANSPOSE, + Attribute_TransposeAttribute, + &attribute, + {input0Name}, + {outputName}); + + + std::vector tensors; + + // Only add input tensors if connected layer is an input layer. + // As intermediate or constant tensors will be created separately. + // There also can't be duplicate tensor. + if(input0Name.find("input0_") != std::string::npos) + { + std::vector inputShape0 = GetTosaTensorShape(inputs[0]->GetShape()); + DType inputDType0 = ArmNNToDType(inputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(input0Name, inputShape0, inputDType0, {})); + } + + std::vector outputShape0 = GetTosaTensorShape(outputs[0]->GetShape()); + DType outputDType0 = ArmNNToDType(outputs[0]->GetDataType()); + + tensors.push_back(new TosaSerializationTensor(outputName, outputShape0, outputDType0, {})); + + // operatorInputNames/operatorOutputNames ends up being the same as + // blockInputNames/blockOutputNames for one-to-one ArmNN to TOSA mappings + return new TosaSerializationBasicBlock(blockName, // name + {op}, // operators + tensors, // tensors + {input0Name}, // inputs + {outputName}); // outputs +} \ No newline at end of file diff --git a/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp b/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp new file mode 100644 index 0000000000..3d1e2acd14 --- /dev/null +++ b/src/backends/tosaCommon/operatorMappings/TransposeOperator.hpp @@ -0,0 +1,20 @@ +// +// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "TosaOperatorUtils.hpp" + +#include + +#include + +using namespace armnn; +using namespace tosa; + +TosaSerializationBasicBlock* ConvertTransposeToTosaOperator(const Layer* layer, + const std::vector& inputs, + const std::vector& outputs, + const TransposeDescriptor* transposeDescriptor); diff --git a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp index b3ab14a774..4cc37918e5 100644 --- a/src/backends/tosaCommon/test/OneToOneMappingTests.cpp +++ b/src/backends/tosaCommon/test/OneToOneMappingTests.cpp @@ -253,6 +253,54 @@ TEST_CASE("GetTosaMappingFromLayer_Conv2dLayer") basicBlock, inputShape, outputShape, Op_CONV2D, Attribute_ConvAttribute, descriptor, LayerType::Convolution2d); } +TEST_CASE("GetTosaMapping_MultiplicationLayer") +{ + + const TensorInfo input0Info ({ 1, 2, 4, 2 }, DataType::Float32); + const TensorInfo input1Info ({ 1, 2, 4, 2 }, DataType::Float32); + const TensorInfo outputInfo ({ 1, 2, 4, 2 }, DataType::Float32); + + std::vector> inputShape = {{ 1, 2, 4, 2 }, { 1, 2, 4, 2 }}; + std::vector> outputShape = {{ 1, 2, 4, 2 }}; + + TosaSerializationBasicBlock* basicBlock = + GetTosaMapping(nullptr, LayerType::Multiplication, {&input0Info, &input1Info}, {&outputInfo}, BaseDescriptor()); + AssertTosaOneToOneMappingBasicBlock( basicBlock, inputShape, outputShape, + tosa::Op_MUL, tosa::Attribute_MulAttribute, BaseDescriptor(), LayerType::Multiplication); +} + +TEST_CASE("GetTosaMappingFromLayer_MultiplicationLayer") +{ + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); + IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); + IConnectableLayer* add = net->AddMultiplicationLayer("multiplication"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); + input1->GetOutputSlot(0).Connect(add->GetInputSlot(1)); + add->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo info = TensorInfo({ 2, 2 }, DataType::Float32, 0.0f, 0, true); + + input0->GetOutputSlot(0).SetTensorInfo(info); + input1->GetOutputSlot(0).SetTensorInfo(info); + add->GetOutputSlot(0).SetTensorInfo(info); + + std::vector> inputShape = {{ 2, 2 }, { 2, 2 }}; + std::vector> outputShape = {{ 2, 2 }}; + + TosaSerializationBasicBlock* basicBlock = + GetTosaMappingFromLayer(PolymorphicDowncast(add)); + AssertTosaOneToOneMappingBasicBlock( basicBlock, inputShape, outputShape, + tosa::Op_MUL, Attribute_MulAttribute, BaseDescriptor(), LayerType::Multiplication); +} + TEST_CASE("GetTosaMapping_AvgPool2DLayer") { Pooling2dDescriptor descriptor; @@ -616,6 +664,64 @@ TEST_CASE("GetTosaMappingFromLayer_TransposeConv2dLayer") LayerType::TransposeConvolution2d); } +TEST_CASE("GetTosaMapping_TransposeLayer") +{ + TensorInfo inputInfo = TensorInfo({ 1, 1, 5, 3 }, DataType::Float32, 0.0f, 0, true); + TensorInfo outputInfo = TensorInfo({ 1, 5, 1, 3 }, DataType::Float32, 0.0f, 0, true); + + std::vector> inputShape = {{ 1, 1, 5, 3 }}; + std::vector> outputShape = {{ 1, 5, 1, 3 }}; + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + TosaSerializationBasicBlock* basicBlock = + GetTosaMapping(nullptr, LayerType::Transpose, {&inputInfo,}, {&outputInfo}, transposeDescriptor); + AssertTosaOneToOneMappingBasicBlock(basicBlock, + inputShape, + outputShape, + Op_TRANSPOSE, + Attribute_TransposeAttribute, + transposeDescriptor, + LayerType::Transpose); +} + +TEST_CASE("GetTosaMappingFromLayer_TransposeLayer") +{ + IRuntime::CreationOptions options; + IRuntimePtr runtime(IRuntime::Create(options)); + + // Builds up the structure of the network. + INetworkPtr net(INetwork::Create()); + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + IConnectableLayer* input = net->AddInputLayer(0, "input0"); + IConnectableLayer* transpose = net->AddTransposeLayer(transposeDescriptor, "transpose"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + input->GetOutputSlot(0).Connect(transpose->GetInputSlot(0)); + transpose->GetOutputSlot(0).Connect(output->GetInputSlot(0)); + + TensorInfo inputInfo = TensorInfo({ 1, 1, 5, 3 }, DataType::Float32, 0.0f, 0, true); + TensorInfo outputInfo = TensorInfo({ 1, 5, 1, 3 }, DataType::Float32, 0.0f, 0, true); + + input->GetOutputSlot(0).SetTensorInfo(inputInfo); + transpose->GetOutputSlot(0).SetTensorInfo(outputInfo); + + std::vector> inputShape = {{ 1, 1, 5, 3 }}; + std::vector> outputShape = {{ 1, 5, 1, 3 }}; + + TosaSerializationBasicBlock* basicBlock = + GetTosaMappingFromLayer(PolymorphicDowncast(transpose)); + AssertTosaOneToOneMappingBasicBlock(basicBlock, + inputShape, + outputShape, + Op_TRANSPOSE, + Attribute_TransposeAttribute, + transposeDescriptor, + LayerType::Transpose); +} + TEST_CASE("GetTosaMapping_Unimplemented") { TosaSerializationBasicBlock* basicBlock = diff --git a/src/backends/tosaCommon/test/TosaTestUtils.hpp b/src/backends/tosaCommon/test/TosaTestUtils.hpp index 140cb83983..e24055371f 100644 --- a/src/backends/tosaCommon/test/TosaTestUtils.hpp +++ b/src/backends/tosaCommon/test/TosaTestUtils.hpp @@ -158,6 +158,14 @@ inline void VerifyTosaAttribute(const BaseDescriptor& descriptor, CHECK(stride == transposeConvAttribute.stride()); break; } + case LayerType::Transpose: + { + auto transposeDesc = PolymorphicDowncast(&descriptor); + std::vector outPerm(transposeDesc->m_DimMappings.begin(), transposeDesc->m_DimMappings.end()); + TosaTransposeAttribute transposeAttribute(attribute); + CHECK(outPerm == transposeAttribute.perms()); + break; + } default: break; } diff --git a/src/backends/tosaReference/TosaRefLayerSupport.cpp b/src/backends/tosaReference/TosaRefLayerSupport.cpp index 0d0d07a783..6113b5861a 100644 --- a/src/backends/tosaReference/TosaRefLayerSupport.cpp +++ b/src/backends/tosaReference/TosaRefLayerSupport.cpp @@ -38,6 +38,8 @@ bool TosaRefLayerSupport::IsLayerSupported(const LayerType& type, case LayerType::Output: return true; case LayerType::Addition: + case LayerType::Multiplication: + case LayerType::Subtraction: // Setup inputs and outputs inputInfos.push_back(&infos[0]); inputInfos.push_back(&infos[1]); @@ -69,7 +71,7 @@ bool TosaRefLayerSupport::IsLayerSupported(const LayerType& type, case LayerType::Pooling2d: case LayerType::Reshape: case LayerType::Slice: - // Setup inputs and outputs + case LayerType::Transpose: inputInfos.push_back(&infos[0]); outputInfos.push_back(&infos[1]); break; diff --git a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp index a377293fbf..e19462e986 100644 --- a/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp +++ b/src/backends/tosaReference/test/TosaRefEndToEndTests.cpp @@ -8,10 +8,13 @@ #include "backendsCommon/test/AdditionEndToEndTestImpl.hpp" #include "backendsCommon/test/Convolution2dEndToEndTestImpl.hpp" #include "backendsCommon/test/ConcatEndToEndTestImpl.hpp" +#include "backendsCommon/test/MultiplicationEndToEndTestImpl.hpp" #include "backendsCommon/test/Pooling2dEndToEndTestImpl.hpp" #include "backendsCommon/test/ReshapeEndToEndTestImpl.hpp" #include "backendsCommon/test/SliceEndToEndTestImpl.hpp" +#include "backendsCommon/test/SubtractionEndToEndTestImpl.hpp" #include "backendsCommon/test/TransposeConvolution2dEndToEndTestImpl.hpp" +#include "backendsCommon/test/TransposeEndToEndTestImpl.hpp" #include @@ -150,6 +153,35 @@ TEST_CASE("TosaRefSliceEndtoEndTestFloat16") { SliceEndToEndFloat16(tosaDefaultBackends); } +TEST_CASE("TosaRefSubtractionEndtoEndTestFloat32") +{ + SubtractionEndToEnd(tosaDefaultBackends); +} + +TEST_CASE("TosaRefSubtractionEndtoEndTestInt32") +{ + SubtractionEndToEnd(tosaDefaultBackends); +} + +TEST_CASE("TosaRefSubtractionEndtoEndTestFloat16") +{ + SubtractionEndToEndFloat16(tosaDefaultBackends); +} + +TEST_CASE("TosaRefMultiplicationEndtoEndTestFloat32") +{ + MultiplicationEndToEnd(tosaDefaultBackends); +} + +TEST_CASE("TosaRefMultiplicationEndtoEndTestInt32") +{ + MultiplicationEndToEnd(tosaDefaultBackends); +} + +TEST_CASE("TosaRefMultiplicationEndtoEndTestFloat16") +{ + MultiplicationEndToEndFloat16(tosaDefaultBackends); +} // TransposeConvolution2d TEST_CASE("TosaRefTransposeConvolution2dEndToEndFloatNhwcTest") @@ -164,4 +196,10 @@ TEST_CASE("TosaRefSimpleTransposeConvolution2dEndToEndFloatNhwcTest") tosaDefaultBackends, armnn::DataLayout::NHWC); } +// Transpose +TEST_CASE("TosaRefTransposeEndtoEndTestFloat32") +{ + TransposeEndToEnd(tosaDefaultBackends); +} + } \ No newline at end of file diff --git a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp index 051965f541..66dfbe8dff 100644 --- a/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp +++ b/src/backends/tosaReference/test/TosaRefLayerSupportTests.cpp @@ -190,6 +190,50 @@ TEST_CASE("IsLayerSupportedTosaReferenceConv2dUnsupported") CHECK(!supported); } +TEST_CASE("IsLayerSupportedTosaReferenceMultiplication") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {1,1,3,4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Float32); + TensorInfo in1(shape1, armnn::DataType::Float32); + TensorInfo out(outShape, armnn::DataType::Float32); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Multiplication, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(supported); +} + +TEST_CASE("IsLayerSupportedTosaReferenceMultiplicationUnsupported") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {1,2,3,4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Signed64); + TensorInfo in1(shape1, armnn::DataType::Signed64); + TensorInfo out(outShape, armnn::DataType::Signed64); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Multiplication, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(!supported); +} + TEST_CASE("IsLayerSupportedTosaReferenceMaxPooling2d") { TensorShape inShape = {1,1,3,4}; @@ -376,6 +420,50 @@ TEST_CASE("IsLayerSupportedTosaReferenceSliceUnsupported") CHECK(!supported); } +TEST_CASE("IsLayerSupportedTosaReferenceSubtraction") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {1,1,3,4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Float32); + TensorInfo in1(shape1, armnn::DataType::Float32); + TensorInfo out(outShape, armnn::DataType::Float32); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Subtraction, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(supported); +} + +TEST_CASE("IsLayerSupportedTosaReferenceSubtractionUnsupported") +{ + TensorShape shape0 = {1,1,3,4}; + TensorShape shape1 = {4}; + TensorShape outShape = {1,1,3,4}; + TensorInfo in0(shape0, armnn::DataType::Signed64); + TensorInfo in1(shape1, armnn::DataType::Signed64); + TensorInfo out(outShape, armnn::DataType::Signed64); + + BaseDescriptor desc; + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(armnn::LayerType::Subtraction, + {in0, in1, out}, + desc, + armnn::EmptyOptional(), + armnn::EmptyOptional(), + reasonIfNotSupported); + + CHECK(!supported); +} + TEST_CASE("IsLayerSupportedTosaReferenceTransposeConv2d") { TensorInfo inputInfo ({ 1, 3, 3, 1 }, DataType::Float32); @@ -421,4 +509,46 @@ TEST_CASE("IsLayerSupportedTosaReferenceTransposeConv2dUnsupported") CHECK(!supported); } +TEST_CASE("IsLayerSupportedTosaReferenceTranspose") +{ + TensorShape inShape = { 1, 1, 5, 3 }; + TensorShape outShape = { 1, 5, 1, 3 }; + TensorInfo in(inShape, DataType::Float32); + TensorInfo out(outShape, DataType::Float32); + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Transpose, + {in, out}, + transposeDescriptor, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + + CHECK(supported); +} + +TEST_CASE("IsLayerSupportedTosaReferenceTransposeUnsupported") +{ + TensorShape inShape = { 1, 1, 5, 3 }; + TensorShape outShape = { 1, 5, 1, 3 }; + TensorInfo in(inShape, DataType::Signed64); + TensorInfo out(outShape, DataType::Signed64); + + TransposeDescriptor transposeDescriptor = TransposeDescriptor({ 0, 2, 1 ,3 }); + + TosaRefLayerSupport supportChecker; + std::string reasonIfNotSupported; + auto supported = supportChecker.IsLayerSupported(LayerType::Transpose, + {in, out}, + transposeDescriptor, + EmptyOptional(), + EmptyOptional(), + reasonIfNotSupported); + + CHECK(!supported); +} + } diff --git a/tests/CMakeLists.txt b/tests/CMakeLists.txt index dd18a322ea..4c89267a1a 100644 --- a/tests/CMakeLists.txt +++ b/tests/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2018-2022 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2018-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -21,7 +21,7 @@ target_include_directories(inferenceTest PRIVATE ../src/armnnUtils) target_include_directories(inferenceTest PRIVATE ../src/backends) target_include_directories(inferenceTest PRIVATE ../third-party/stb) -if (BUILD_TF_LITE_PARSER) +if (BUILD_TF_LITE_PARSER AND NOT EXECUTE_NETWORK_STATIC) macro(TfLiteParserTest testName sources) add_executable_ex(${testName} ${sources}) target_include_directories(${testName} PRIVATE ../src/armnnUtils) @@ -112,7 +112,7 @@ if (BUILD_TF_LITE_PARSER) endif() -if (BUILD_ONNX_PARSER) +if (BUILD_ONNX_PARSER AND NOT EXECUTE_NETWORK_STATIC) macro(OnnxParserTest testName sources) add_executable_ex(${testName} ${sources}) target_include_directories(${testName} PRIVATE ../src/armnnUtils) @@ -139,7 +139,6 @@ if (BUILD_ONNX_PARSER) endif() if (BUILD_ARMNN_SERIALIZER - OR BUILD_TF_PARSER OR BUILD_TF_LITE_PARSER OR BUILD_ONNX_PARSER OR BUILD_ARMNN_TFLITE_DELEGATE) @@ -168,21 +167,30 @@ if (BUILD_ARMNN_SERIALIZER target_include_directories(ExecuteNetwork PRIVATE ../src/armnnUtils) target_include_directories(ExecuteNetwork PRIVATE ../src/backends) target_include_directories(ExecuteNetwork PRIVATE ${CMAKE_CURRENT_SOURCE_DIR}) - - if (BUILD_ARMNN_SERIALIZER) - target_link_libraries(ExecuteNetwork armnnSerializer) - endif() - - if (BUILD_TF_LITE_PARSER) - target_link_libraries(ExecuteNetwork armnnTfLiteParser) - endif() - if (BUILD_ONNX_PARSER) - target_link_libraries(ExecuteNetwork armnnOnnxParser) - endif() - if (BUILD_ARMNN_TFLITE_DELEGATE) - target_link_libraries(ExecuteNetwork ArmnnDelegate::ArmnnDelegate) + if(EXECUTE_NETWORK_STATIC) + target_link_libraries(ExecuteNetwork + -Wl,--whole-archive + armnnSerializer + armnnTfLiteParser + armnn + pthread + -Wl,--no-whole-archive + ) + else() + if (BUILD_ARMNN_SERIALIZER) + target_link_libraries(ExecuteNetwork armnnSerializer) + endif() + if (BUILD_TF_LITE_PARSER) + target_link_libraries(ExecuteNetwork armnnTfLiteParser) + endif() + if (BUILD_ONNX_PARSER) + target_link_libraries(ExecuteNetwork armnnOnnxParser) + endif() + if (BUILD_ARMNN_TFLITE_DELEGATE) + target_link_libraries(ExecuteNetwork ArmnnDelegate::ArmnnDelegate) + endif() + target_link_libraries(ExecuteNetwork armnn) endif() - target_link_libraries(ExecuteNetwork armnn) target_link_libraries(ExecuteNetwork ${CMAKE_THREAD_LIBS_INIT}) addDllCopyCommands(ExecuteNetwork) diff --git a/tests/ExecuteNetwork/ArmNNExecutor.cpp b/tests/ExecuteNetwork/ArmNNExecutor.cpp index 9657f05b6d..139da5f830 100644 --- a/tests/ExecuteNetwork/ArmNNExecutor.cpp +++ b/tests/ExecuteNetwork/ArmNNExecutor.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -88,6 +88,7 @@ ArmNNExecutor::ArmNNExecutor(const ExecuteNetworkParams& params, armnn::IRuntime void ArmNNExecutor::ExecuteAsync() { +#if !defined(ARMNN_DISABLE_THREADS) std::vector> memHandles; std::unique_ptr threadpool; armnn::AsyncCallbackManager callbackManager; @@ -157,6 +158,7 @@ void ArmNNExecutor::ExecuteAsync() ARMNN_LOG(info) << "Overall Inference time: " << std::setprecision(2) << std::fixed << totalInferenceDuration.count() << " ms\n"; +#endif } void ArmNNExecutor::ExecuteSync() diff --git a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp index fa467c93f8..fbfd1bc936 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkParams.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkParams.cpp @@ -120,56 +120,67 @@ armnnDelegate::DelegateOptions ExecuteNetworkParams::ToDelegateOptions() const armnnDelegate::DelegateOptions delegateOptions(m_ComputeDevices); delegateOptions.SetDynamicBackendsPath(m_DynamicBackendsPath); delegateOptions.SetGpuProfilingState(m_EnableProfiling); - - armnn::OptimizerOptions options; - options.m_ReduceFp32ToFp16 = m_EnableFp16TurboMode; - options.m_Debug = m_PrintIntermediate; - options.m_DebugToFile = m_PrintIntermediateOutputsToFile; - options.m_ProfilingEnabled = m_EnableProfiling; delegateOptions.SetInternalProfilingParams(m_EnableProfiling, armnn::ProfilingDetailsMethod::DetailsWithEvents); - options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly; + + // GPU Backend options first. + { + armnn::BackendOptions gpuOption("GpuAcc", {{"TuningLevel", m_TuningLevel}}); + delegateOptions.AddBackendOption(gpuOption); + } + { + armnn::BackendOptions gpuOption("GpuAcc", {{"TuningFile", m_TuningPath.c_str()}}); + delegateOptions.AddBackendOption(gpuOption); + } + { + armnn::BackendOptions gpuOption("GpuAcc", {{"KernelProfilingEnabled", m_EnableProfiling}}); + delegateOptions.AddBackendOption(gpuOption); + } + + // Optimizer options next. + armnn::OptimizerOptions optimizerOptions; + optimizerOptions.m_ReduceFp32ToFp16 = m_EnableFp16TurboMode; + optimizerOptions.m_Debug = m_PrintIntermediate; + optimizerOptions.m_DebugToFile = m_PrintIntermediateOutputsToFile; + optimizerOptions.m_ProfilingEnabled = m_EnableProfiling; + optimizerOptions.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::ValidateOnly; if (m_InferOutputShape) { - options.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::InferAndValidate; + optimizerOptions.m_shapeInferenceMethod = armnn::ShapeInferenceMethod::InferAndValidate; + armnn::BackendOptions networkOption("ShapeInferenceMethod", + { + {"InferAndValidate", true} + }); + optimizerOptions.m_ModelOptions.push_back(networkOption); } - armnn::BackendOptions gpuAcc("GpuAcc", - { - { "FastMathEnabled", m_EnableFastMath }, - { "SaveCachedNetwork", m_SaveCachedNetwork }, - { "CachedNetworkFilePath", m_CachedNetworkFilePath }, - { "TuningLevel", m_TuningLevel}, - { "TuningFile", m_TuningPath.c_str()}, - { "KernelProfilingEnabled", m_EnableProfiling}, - { "MLGOTuningFilePath", m_MLGOTuningFilePath} - }); + { + armnn::BackendOptions option("GpuAcc", {{"FastMathEnabled", m_EnableFastMath}}); + optimizerOptions.m_ModelOptions.push_back(option); + } + { + armnn::BackendOptions option("GpuAcc", {{"CachedNetworkFilePath", m_CachedNetworkFilePath}}); + optimizerOptions.m_ModelOptions.push_back(option); + } + { + armnn::BackendOptions option("GpuAcc", {{"MLGOTuningFilePath", m_MLGOTuningFilePath}}); + optimizerOptions.m_ModelOptions.push_back(option); + } armnn::BackendOptions cpuAcc("CpuAcc", { { "FastMathEnabled", m_EnableFastMath }, { "NumberOfThreads", m_NumberOfThreads } }); - options.m_ModelOptions.push_back(gpuAcc); - options.m_ModelOptions.push_back(cpuAcc); - - if (m_InferOutputShape) - { - armnn::BackendOptions networkOption("ShapeInferenceMethod", - { - {"InferAndValidate", true} - }); - options.m_ModelOptions.push_back(networkOption); - } + optimizerOptions.m_ModelOptions.push_back(cpuAcc); if (m_AllowExpandedDims) { armnn::BackendOptions networkOption("AllowExpandedDims", { {"AllowExpandedDims", true} }); - options.m_ModelOptions.push_back(networkOption); + optimizerOptions.m_ModelOptions.push_back(networkOption); } - delegateOptions.SetOptimizerOptions(options); - + delegateOptions.SetOptimizerOptions(optimizerOptions); return delegateOptions; } diff --git a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp index e9d77509e4..cba6748b45 100644 --- a/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp +++ b/tests/ExecuteNetwork/ExecuteNetworkProgramOptions.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -359,9 +359,9 @@ ProgramOptions::ProgramOptions() : m_CxxOptions{"ExecuteNetwork", cxxopts::value()->default_value("parser")) ("C, compare-output", - "Number of Arm NN threads to use when running the network asynchronously via the Arm NN thread pool. " - "The default is set to 0 which equals disabled. If 'thread-pool-size' is greater than 0 the " - "'concurrent' option is automatically set to true.", + "Compare the output of the network with an output file that has been previously " + "produced by running a network through ExecuteNetwork. See --write-outputs-to-file " + "to produce an output file for an execution.", cxxopts::value(m_ExNetParams.m_ComparisonFile)) ("B, compare-output-with-backend", diff --git a/tests/InferenceModel.hpp b/tests/InferenceModel.hpp index 28069242f2..fa1b1b01b6 100644 --- a/tests/InferenceModel.hpp +++ b/tests/InferenceModel.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -7,7 +7,12 @@ #include + +#if !defined(ARMNN_DISABLE_THREADS) #include +#include +#endif + #include #include #include @@ -511,7 +516,7 @@ public: ARMNN_LOG(info) << "Network loading time: " << std::setprecision(2) << std::fixed << armnn::GetTimeDuration(loading_start_time).count() << " ms."; - +#if !defined(ARMNN_DISABLE_THREADS) if (params.m_AsyncEnabled && params.m_ThreadPoolSize > 0) { std::vector> memHandles; @@ -524,6 +529,7 @@ public: m_Runtime.get(), memHandles); } +#endif } if (ret == armnn::Status::Failure) @@ -683,6 +689,7 @@ public: std::vector& outputContainers, std::shared_ptr cb) { +#if !defined(ARMNN_DISABLE_THREADS) for (unsigned int i = 0; i < outputContainers.size(); ++i) { const unsigned int expectedOutputDataSize = GetOutputSize(i); @@ -714,6 +721,7 @@ public: { profiler->Print(std::cout); } +#endif } const armnn::BindingPointInfo& GetInputBindingInfo(unsigned int inputIndex = 0u) const @@ -770,7 +778,9 @@ public: private: armnn::NetworkId m_NetworkIdentifier; std::shared_ptr m_Runtime; +#if !defined(ARMNN_DISABLE_THREADS) std::unique_ptr m_Threadpool; +#endif std::vector m_InputBindings; std::vector m_OutputBindings; -- cgit v1.2.1