From 3ec3077b4eaedcc0c20ab5774bdbe365da541445 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Wed, 8 Mar 2023 13:47:17 +0000 Subject: IVGCVSW-3808 Add ElementwiseBinaryLayer !android-nn-driver:9329 * Added ElementwiseBinaryLayer that can represent all ElementwiseBinary operations including Add, Div, Sub, Maximum, Mul and Minimum. * Updated Delegate to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated Deserializer to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated OnnxParser to use ElementwiseBinaryLayer instead of the Add layer. * Updated TfLiteParser to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated CL and Neon tests to use ElementwiseBinaryLayer. * Updated CL and Neon Backend Specific Optimizations to accept ElementBinaryLayers as well as Add, Div, Mul, Sub, Maximum and Minimum layers. Signed-off-by: Teresa Charlin Signed-off-by: Mike Kelly Change-Id: I7cbb96b60eb01f0e2b57b0541016d48a08b86c75 --- src/backends/aclCommon/ArmComputeSubgraphUtils.hpp | 23 +++- src/backends/backendsCommon/CMakeLists.txt | 2 +- src/backends/backendsCommon/WorkloadData.cpp | 34 +++++- src/backends/backendsCommon/WorkloadFactory.cpp | 41 ++++++- src/backends/backendsCommon/common.mk | 2 +- src/backends/backendsCommon/test/CMakeLists.txt | 1 + .../test/ElementwiseBinaryEndToEndTestImpl.hpp | 107 ++++++++++++++++++ .../test/IsLayerSupportedTestImpl.hpp | 4 +- .../backendsCommon/test/OptimizedNetworkTests.cpp | 10 +- .../test/mockBackend/MockImportLayerSupport.hpp | 9 +- src/backends/cl/ClBackend.cpp | 89 ++++++++++++++- src/backends/cl/ClLayerSupport.cpp | 52 ++++++++- src/backends/cl/ClWorkloadFactory.cpp | 71 +++++++++++- src/backends/cl/test/ClCreateWorkloadTests.cpp | 46 +++----- src/backends/cl/test/ClFallbackTests.cpp | 18 ++-- src/backends/cl/test/Fp16SupportTest.cpp | 4 +- src/backends/neon/NeonBackend.cpp | 88 ++++++++++++++- src/backends/neon/NeonLayerSupport.cpp | 52 ++++++++- src/backends/neon/NeonWorkloadFactory.cpp | 55 +++++++++- src/backends/neon/test/NeonFallbackTests.cpp | 34 +++--- src/backends/reference/RefLayerSupport.cpp | 32 +++++- src/backends/reference/RefLayerSupport.hpp | 2 +- src/backends/reference/RefWorkloadFactory.cpp | 8 +- src/backends/reference/backend.mk | 1 + src/backends/reference/test/RefEndToEndTests.cpp | 54 +++++++++- .../reference/test/RefOptimizedNetworkTests.cpp | 4 +- src/backends/reference/workloads/CMakeLists.txt | 4 +- .../workloads/RefElementwiseBinaryWorkload.cpp | 120 +++++++++++++++++++++ .../workloads/RefElementwiseBinaryWorkload.hpp | 29 +++++ src/backends/reference/workloads/RefWorkloads.hpp | 3 +- 30 files changed, 904 insertions(+), 95 deletions(-) create mode 100644 src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp create mode 100644 src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp create mode 100644 src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp (limited to 'src/backends') diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp index c9d6c71f18..599d3538eb 100644 --- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp +++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2020,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -146,6 +146,27 @@ LayerType* FuseMultiplicationLayer(OptimizationViews& optimizationViews, return replacementLayer; } +template +LayerType* FuseElementwiseBinaryLayer(OptimizationViews& optimizationViews, + LayerType* baseLayer, + ActivationLayer* activationLayer, + ActivationDescriptor& activationDesc, + BinaryOperation operation, + std::string name) +{ + IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddElementwiseBinaryLayer(operation, + name.c_str()); + LayerType* replacementLayer = PolymorphicDowncast(replacement); + + FuseLayer(optimizationViews, + baseLayer, + replacementLayer, + activationLayer, + activationDesc); + + return replacementLayer; +} + template LayerType* FuseBatchNormalizationLayer(OptimizationViews& optimizationViews, LayerType* baseLayer, diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt index 8d7e114fa5..28ff205d6e 100644 --- a/src/backends/backendsCommon/CMakeLists.txt +++ b/src/backends/backendsCommon/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index 62dfc6a38b..6a5963ddcb 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -35,11 +35,8 @@ DataType GetBiasDataType(DataType inputDataType) case DataType::Float32: return DataType::Float32; case DataType::QAsymmS8: - return DataType::Signed32; case DataType::QAsymmU8: - return DataType::Signed32; case DataType::QSymmS8: - return DataType::Signed32; case DataType::QSymmS16: return DataType::Signed32; default: @@ -3668,6 +3665,35 @@ void ComparisonQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const } } +void ElementwiseBinaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const +{ + const std::string descriptorName{"ElementwiseBinaryQueueDescriptor"}; + + ValidateNumInputs(workloadInfo, descriptorName, 2); + ValidateNumOutputs(workloadInfo, descriptorName, 1); + + const TensorInfo& inputTensorInfo0 = workloadInfo.m_InputTensorInfos[0]; + const TensorInfo& inputTensorInfo1 = workloadInfo.m_InputTensorInfos[1]; + const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0]; + + std::vector supportedTypes = + { + DataType::BFloat16, + DataType::Float16, + DataType::Float32, + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS16, + DataType::Signed32 + }; + + ValidateDataTypes(inputTensorInfo0, supportedTypes, descriptorName); + ValidateDataTypes(inputTensorInfo1, supportedTypes, descriptorName); + + ValidateTensorDataTypesMatch(inputTensorInfo0, outputTensorInfo, descriptorName, "input", "output"); + ValidateTensorDataTypesMatch(inputTensorInfo1, outputTensorInfo, descriptorName, "input", "output"); +} + void ElementwiseUnaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const { const std::string descriptorName{"ElementwiseUnaryQueueDescriptor"}; diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 1283f67660..51bc3e60cb 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -15,7 +15,6 @@ #include #include -#include #include @@ -91,7 +90,8 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, auto backendFactory = backendRegistry.GetFactory(backendId); auto backendObject = backendFactory(); - auto layerSupportObject = LayerSupportHandle(backendObject->GetLayerSupport(modelOptions), backendId); + auto layerSupport = backendObject->GetLayerSupport(modelOptions); + auto layerSupportObject = LayerSupportHandle(layerSupport, backendId); switch(layer.GetType()) { @@ -109,6 +109,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, } case LayerType::Addition: { + ARMNN_NO_DEPRECATE_WARN_BEGIN const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); @@ -117,6 +118,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, OverrideDataType(input1, dataType), OverrideDataType(output, dataType), reason); + ARMNN_NO_DEPRECATE_WARN_END break; } case LayerType::ArgMinMax: @@ -392,6 +394,24 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, reason); break; } + case LayerType::ElementwiseBinary: + { + auto cLayer = PolymorphicDowncast(&layer); + + const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); + const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + std::vector infos = { OverrideDataType(input0, dataType), + OverrideDataType(input1, dataType), + OverrideDataType(output, dataType) }; + result = layerSupport->IsLayerSupported(LayerType::ElementwiseBinary, + infos, + cLayer->GetParameters(), + EmptyOptional(), + EmptyOptional(), + reason); + break; + } case LayerType::ElementwiseUnary: { auto cLayer = PolymorphicDowncast(&layer); @@ -740,6 +760,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, } case LayerType::Maximum: { + ARMNN_NO_DEPRECATE_WARN_BEGIN const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); @@ -748,6 +769,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, OverrideDataType(input1, dataType), OverrideDataType(output, dataType), reason); + ARMNN_NO_DEPRECATE_WARN_END break; } case LayerType::MemCopy: @@ -814,6 +836,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, } case LayerType::Multiplication: { + ARMNN_NO_DEPRECATE_WARN_BEGIN const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); @@ -822,6 +845,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, OverrideDataType(input1, dataType), OverrideDataType(output, dataType), reason); + ARMNN_NO_DEPRECATE_WARN_END break; } case LayerType::Normalization: @@ -1052,6 +1076,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, } case LayerType::Division: { + ARMNN_NO_DEPRECATE_WARN_BEGIN const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); @@ -1060,6 +1085,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, OverrideDataType(input1, dataType), OverrideDataType(output, dataType), reason); + ARMNN_NO_DEPRECATE_WARN_END break; } case LayerType::Rank: @@ -1254,6 +1280,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, } case LayerType::Subtraction: { + ARMNN_NO_DEPRECATE_WARN_BEGIN const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); @@ -1262,6 +1289,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, OverrideDataType(input1, dataType), OverrideDataType(output, dataType), reason); + ARMNN_NO_DEPRECATE_WARN_END break; } case LayerType::Switch: @@ -1291,6 +1319,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, } case LayerType::Minimum: { + ARMNN_NO_DEPRECATE_WARN_BEGIN const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo(); const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo(); const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); @@ -1298,6 +1327,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId, OverrideDataType(input1, dataType), OverrideDataType(output, dataType), reason); + ARMNN_NO_DEPRECATE_WARN_END break; } case LayerType::Prelu: @@ -1670,6 +1700,11 @@ std::unique_ptr IWorkloadFactory::CreateWorkload(LayerType type, auto divisionQueueDescriptor = PolymorphicDowncast(&descriptor); return CreateDivision(*divisionQueueDescriptor, info); } + case LayerType::ElementwiseBinary: + { + auto queueDescriptor = PolymorphicDowncast(&descriptor); + return CreateWorkload(LayerType::ElementwiseBinary, *queueDescriptor, info); + } case LayerType::ElementwiseUnary: { auto elementwiseUnaryQueueDescriptor diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk index 3545331c8f..986d2530c2 100644 --- a/src/backends/backendsCommon/common.mk +++ b/src/backends/backendsCommon/common.mk @@ -1,5 +1,5 @@ # -# Copyright © 2017 ARM Ltd. All rights reserved. +# Copyright © 2017-2023 ARM Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt index 509157a378..77335d550a 100644 --- a/src/backends/backendsCommon/test/CMakeLists.txt +++ b/src/backends/backendsCommon/test/CMakeLists.txt @@ -24,6 +24,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources DetectionPostProcessEndToEndTestImpl.hpp DynamicBackendTests.cpp DynamicBackendTests.hpp + ElementwiseBinaryEndToEndTestImpl.hpp ElementwiseUnaryEndToEndTestImpl.hpp EndToEndTestImpl.hpp FillEndToEndTestImpl.hpp diff --git a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp new file mode 100644 index 0000000000..6546a6ae9e --- /dev/null +++ b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp @@ -0,0 +1,107 @@ +// +// Copyright © 2023 Arm Ltd and contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// +#pragma once + +#include "CommonTestUtils.hpp" + +#include + +#include +#include + +#include + +#include + +namespace +{ + +template +INetworkPtr CreateElementwiseBinaryNetwork(const TensorShape& input1Shape, + const TensorShape& input2Shape, + const TensorShape& outputShape, + BinaryOperation operation, + const float qScale = 1.0f, + const int32_t qOffset = 0) +{ + using namespace armnn; + + INetworkPtr net(INetwork::Create()); + + TensorInfo input1TensorInfo(input1Shape, ArmnnTypeInput, qScale, qOffset, true); + TensorInfo input2TensorInfo(input2Shape, ArmnnTypeInput, qScale, qOffset, true); + TensorInfo outputTensorInfo(outputShape, ArmnnTypeInput, qScale, qOffset); + + IConnectableLayer* input1 = net->AddInputLayer(armnn::numeric_cast(0)); + IConnectableLayer* input2 = net->AddInputLayer(armnn::numeric_cast(1)); + IConnectableLayer* elementwiseBinaryLayer = net->AddElementwiseBinaryLayer(operation, "elementwiseUnary"); + IConnectableLayer* output = net->AddOutputLayer(0, "output"); + + Connect(input1, elementwiseBinaryLayer, input1TensorInfo, 0, 0); + Connect(input2, elementwiseBinaryLayer, input2TensorInfo, 0, 1); + Connect(elementwiseBinaryLayer, output, outputTensorInfo, 0, 0); + + return net; +} + +template> +void ElementwiseBinarySimpleEndToEnd(const std::vector& backends, + BinaryOperation operation) +{ + using namespace armnn; + + const float qScale = IsQuantizedType() ? 0.25f : 1.0f; + const int32_t qOffset = IsQuantizedType() ? 50 : 0; + + const TensorShape& input1Shape = { 2, 2, 2, 2 }; + const TensorShape& input2Shape = { 1 }; + const TensorShape& outputShape = { 2, 2, 2, 2 }; + + // Builds up the structure of the network + INetworkPtr net = CreateElementwiseBinaryNetwork(input1Shape, input2Shape, outputShape, + operation, qScale, qOffset); + + CHECK(net); + + const std::vector input1({ 1, -1, 1, 1, 5, -5, 5, 5, -3, 3, 3, 3, 4, 4, -4, 4 }); + + const std::vector input2({ 2 }); + std::vector expectedOutput; + switch (operation) { + case armnn::BinaryOperation::Add: + expectedOutput = { 3, 1, 3, 3, 7, -3, 7, 7, -1, 5, 5, 5, 6, 6, -2, 6 }; + break; + case armnn::BinaryOperation::Div: + expectedOutput = {0.5f, -0.5f, 0.5f, 0.5f, 2.5f, -2.5f, 2.5f, 2.5f, -1.5f, 1.5f, 1.5f, 1.5f, 2, 2, -2, 2}; + break; + case armnn::BinaryOperation::Maximum: + expectedOutput = { 2, 2, 2, 2, 5, 2, 5, 5, 2, 3, 3, 3, 4, 4, 2, 4 }; + break; + case armnn::BinaryOperation::Minimum: + expectedOutput = { 1, -1, 1, 1, 2, -5, 2, 2, -3, 2, 2, 2, 2, 2, -4, 2 }; + break; + case armnn::BinaryOperation::Mul: + expectedOutput = { 2, -2, 2, 2, 10, -10, 10, 10, -6, 6, 6, 6, 8, 8, -8, 8 }; + break; + case armnn::BinaryOperation::Sub: + expectedOutput = { -1, -3, -1, -1, 3, -7, 3, 3, -5, 1, 1, 1, 2, 2, -6, 2 }; + break; + default: + throw("Invalid Elementwise Binary operation"); + } + const std::vector expectedOutput_const = expectedOutput; + // quantize data + std::vector qInput1Data = armnnUtils::QuantizedVector(input1, qScale, qOffset); + std::vector qInput2Data = armnnUtils::QuantizedVector(input2, qScale, qOffset); + std::vector qExpectedOutput = armnnUtils::QuantizedVector(expectedOutput_const, qScale, qOffset); + + std::map> inputTensorData = {{ 0, qInput1Data }, { 1, qInput2Data }}; + std::map> expectedOutputData = {{ 0, qExpectedOutput }}; + + EndToEndLayerTestImpl(std::move(net), inputTensorData, expectedOutputData, backends); +} + +} // anonymous namespace diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp index fb7a0271d4..5b95d3cd92 100644 --- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp +++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -664,6 +664,8 @@ DECLARE_LAYER_POLICY_1_PARAM(Dequantize) DECLARE_LAYER_POLICY_2_PARAM(DetectionPostProcess) +DECLARE_LAYER_POLICY_2_PARAM(ElementwiseBinary) + DECLARE_LAYER_POLICY_2_PARAM(ElementwiseUnary) DECLARE_LAYER_POLICY_2_PARAM(FakeQuantization) diff --git a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp index cd865def71..5e619df8dd 100644 --- a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp +++ b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -21,7 +21,7 @@ TEST_CASE("SerializeToDot") //Defines layers. auto input = net->AddInputLayer(0); - auto add = net->AddAdditionLayer(); + auto add = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Add); auto output = net->AddOutputLayer(0); // Connects layers. @@ -54,7 +54,7 @@ TEST_CASE("SerializeToDot") " edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n" " " << inputId << " [label=\"{Input|Guid : " << inputId << "\\lLayerType : Input\\l" "BackendID : CpuRef\\l}\"];\n" - " " << addId << " [label=\"{Addition|Guid : " << addId << "\\lLayerType : Addition\\l" + " " << addId << " [label=\"{ElementwiseBinary|Guid : " << addId << "\\lLayerType : ElementwiseBinary\\l" "BackendID : CpuRef\\l}\"];\n" " " << outputId << " [label=\"{Output|Guid : " << outputId << "\\lLayerType : Output\\l" "BackendID : CpuRef\\l}\"];\n" @@ -187,7 +187,7 @@ TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDevice") layer->GetOutputSlot(0).SetTensorInfo(desc); armnn::IConnectableLayer* prevLayer = layer; - layer = net->AddMultiplicationLayer("ml"); + layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml"); prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); @@ -258,7 +258,7 @@ TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback") layer->GetOutputSlot(0).SetTensorInfo(desc); armnn::IConnectableLayer* prevLayer = layer; - layer = net->AddMultiplicationLayer("ml"); + layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml"); prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp index 380ce4a3f5..da4b7ab7d0 100644 --- a/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp +++ b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020-2021,2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once @@ -16,7 +16,7 @@ class MockImportLayerSupport : public LayerSupportBase public: bool IsLayerSupported(const LayerType& type, const std::vector& infos, - const BaseDescriptor& /*descriptor*/, + const BaseDescriptor& descriptor, const Optional& /*lstmParamsInfo*/, const Optional& /*quantizedLstmParamsInfo*/, Optional reasonIfUnsupported) const override @@ -25,6 +25,11 @@ public: { case LayerType::Addition: return IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + case LayerType::ElementwiseBinary: + { + auto elementwiseDesc = *(PolymorphicDowncast(&descriptor)); + return (elementwiseDesc.m_Operation == BinaryOperation::Add); + } case LayerType::Input: return IsInputSupported(infos[0], reasonIfUnsupported); case LayerType::Output: diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index d2e8fbfe32..a10b6fbb43 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // @@ -311,7 +311,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division + || base.GetType() == LayerType::ElementwiseBinary) && (base.GetAdditionalInformation() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -542,6 +543,90 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, untouched.erase(activationLayer->GetGuid()); } } + else if (base.GetType() == LayerType::ElementwiseBinary) + { + ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast(&base); + + if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add) + { + arm_compute::Status status = ClAdditionValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Add, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div) + { + arm_compute::Status status = ClDivisionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Div, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul) + { + arm_compute::Status status = ClMultiplicationWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Mul, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub) + { + arm_compute::Status status = ClSubtractionValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Sub, + name); + } + } + // No fusion available for other BinaryOperations + } } } } diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index cb2d756037..89bcf9bc01 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -347,6 +347,56 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Division: return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + case LayerType::ElementwiseBinary: + { + auto desc = *(PolymorphicDowncast(&descriptor)); + + switch (desc.m_Operation) + { + case BinaryOperation::Add: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClAdditionValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Div: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Minimum: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMinimumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Maximum: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMaximumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Mul: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Sub: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClSubtractionValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + default: + return false; + } + } case LayerType::ElementwiseUnary: return IsElementwiseUnarySupported(infos[0], infos[1], diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 6bf510a2ef..022867710c 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ClWorkloadFactory.hpp" @@ -405,6 +405,75 @@ std::unique_ptr ClWorkloadFactory::CreateWorkload(LayerType type, auto divisionQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*divisionQueueDescriptor, info, m_CLCompileContext); } + case LayerType::ElementwiseBinary : + { + auto elementwiseBinaryQueueDescriptor + = PolymorphicDowncast(&descriptor); + + switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation) + { + case BinaryOperation::Add: + { + AdditionQueueDescriptor additionQueueDescriptor; + additionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + additionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + additionQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique(additionQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Div: + { + DivisionQueueDescriptor divisionQueueDescriptor; + divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + divisionQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique(divisionQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Maximum: + { + MaximumQueueDescriptor maximumQueueDescriptor; + maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + maximumQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique(maximumQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Minimum: + { + MinimumQueueDescriptor minimumQueueDescriptor; + minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + minimumQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique(minimumQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Mul: + { + MultiplicationQueueDescriptor multiplicationQueueDescriptor; + multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs; + multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs; + multiplicationQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique(multiplicationQueueDescriptor, + info, + m_CLCompileContext); + } + case BinaryOperation::Sub: + { + SubtractionQueueDescriptor subtractionQueueDescriptor; + subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + subtractionQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique(subtractionQueueDescriptor, + info, + m_CLCompileContext); + } + default: + return nullptr; + } + } case LayerType::ElementwiseUnary : { auto elementwiseUnaryQueueDescriptor diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index adea733582..c49ca23266 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -66,19 +66,17 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload") } template -static void ClCreateElementwiseWorkloadTest() +static void ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - auto workload = CreateElementwiseWorkloadTest(factory, graph); + auto workload = CreateElementwiseBinaryWorkloadTest(factory, graph, binaryOperator); // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest). - DescriptorType queueDescriptor = workload->GetData(); + auto queueDescriptor = workload->GetData(); auto inputHandle1 = PolymorphicDowncast(queueDescriptor.m_Inputs[0]); auto inputHandle2 = PolymorphicDowncast(queueDescriptor.m_Inputs[1]); auto outputHandle = PolymorphicDowncast(queueDescriptor.m_Outputs[0]); @@ -93,73 +91,55 @@ static void ClCreateElementwiseWorkloadTest() TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float32>(BinaryOperation::Add); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float16>(BinaryOperation::Add); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float32>(BinaryOperation::Sub); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float16>(BinaryOperation::Sub); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float32>(BinaryOperation::Mul); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float16>(BinaryOperation::Mul); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::QAsymmU8>(BinaryOperation::Mul); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float32>(BinaryOperation::Div); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest") { ClCreateElementwiseWorkloadTest(); + armnn::DataType::Float16>(BinaryOperation::Div); } template AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -172,8 +172,8 @@ TEST_CASE("ClImportDisabledFallbackToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -301,8 +301,8 @@ TEST_CASE("ClImportEnabledFallbackSubgraphToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); @@ -460,8 +460,8 @@ TEST_CASE("ClImportDisableFallbackSubgraphToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp index da6ea10926..28ae4795ab 100644 --- a/src/backends/cl/test/Fp16SupportTest.cpp +++ b/src/backends/cl/test/Fp16SupportTest.cpp @@ -28,7 +28,7 @@ TEST_CASE("Fp16DataTypeSupport") Layer* const inputLayer1 = graph.AddLayer(1, "input1"); Layer* const inputLayer2 = graph.AddLayer(2, "input2"); - Layer* const additionLayer = graph.AddLayer("addition"); + Layer* const additionLayer = graph.AddLayer(BinaryOperation::Add, "addition"); Layer* const outputLayer = graph.AddLayer(0, "output"); TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); @@ -57,7 +57,7 @@ TEST_CASE("Fp16AdditionTest") IConnectableLayer* inputLayer1 = net->AddInputLayer(0); IConnectableLayer* inputLayer2 = net->AddInputLayer(1); - IConnectableLayer* additionLayer = net->AddAdditionLayer(); + IConnectableLayer* additionLayer = net->AddElementwiseBinaryLayer(BinaryOperation::Add); IConnectableLayer* outputLayer = net->AddOutputLayer(0); inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 968bce48c8..cea2aa3eba 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -395,6 +395,92 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph untouched.erase(activationLayer->GetGuid()); } } + else if (base.GetType() == LayerType::ElementwiseBinary) + { + ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast(&base); + + if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add) + { + arm_compute::Status status = NeonAdditionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Add, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div) + { + arm_compute::Status status = NeonDivisionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Div, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul) + { + arm_compute::Status status = NeonMultiplicationWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Mul, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub) + { + arm_compute::Status status = NeonSubtractionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Sub, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + // No fusion available for other BinaryOperations + } } } } diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index ee155a2c64..672b2f377f 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -346,6 +346,56 @@ bool IsLayerTypeSupported(const LayerType& type, } case LayerType::Division: return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + case LayerType::ElementwiseBinary: + { + auto desc = *(PolymorphicDowncast(&descriptor)); + + switch (desc.m_Operation) + { + case BinaryOperation::Add: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Div: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDivisionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Maximum: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMaximumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Minimum: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMinimumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Mul: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Sub: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + default: + return false; + } + } case LayerType::ElementwiseUnary: return support.IsElementwiseUnarySupported(infos[0], infos[1], diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index dccd4a3a36..08168eca2f 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -280,6 +280,59 @@ std::unique_ptr NeonWorkloadFactory::CreateWorkload(LayerType type, auto divisionQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*divisionQueueDescriptor, info); } + case LayerType::ElementwiseBinary : + { + auto elementwiseBinaryQueueDescriptor + = PolymorphicDowncast(&descriptor); + + switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation) + { + case BinaryOperation::Add: + { + AdditionQueueDescriptor additionQueueDescriptor; + additionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + additionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(additionQueueDescriptor, info); + } + case BinaryOperation::Div: + { + DivisionQueueDescriptor divisionQueueDescriptor; + divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(divisionQueueDescriptor, info); + } + case BinaryOperation::Maximum: + { + MaximumQueueDescriptor maximumQueueDescriptor; + maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(maximumQueueDescriptor, info); + } + case BinaryOperation::Minimum: + { + MinimumQueueDescriptor minimumQueueDescriptor; + minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(minimumQueueDescriptor, info); + } + case BinaryOperation::Mul: + { + MultiplicationQueueDescriptor multiplicationQueueDescriptor; + multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs; + multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(multiplicationQueueDescriptor, info); + } + case BinaryOperation::Sub: + { + SubtractionQueueDescriptor subtractionQueueDescriptor; + subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(subtractionQueueDescriptor, info); + } + default: + return nullptr; + } + } case LayerType::ElementwiseUnary : { auto elementwiseUnaryQueueDescriptor diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp index 8e0e0ab99b..40df2dc315 100644 --- a/src/backends/neon/test/NeonFallbackTests.cpp +++ b/src/backends/neon/test/NeonFallbackTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -38,8 +38,8 @@ TEST_CASE("FallbackImportToCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -183,7 +183,7 @@ TEST_CASE("FallbackPaddingCopyToCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); @@ -318,8 +318,8 @@ TEST_CASE("FallbackImportFromCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); @@ -465,7 +465,7 @@ TEST_CASE("FallbackPaddingCopyFromCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); @@ -599,8 +599,8 @@ TEST_CASE("FallbackDisableImportFromCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); @@ -725,8 +725,8 @@ TEST_CASE("NeonImportEnabledFallbackToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -878,8 +878,8 @@ TEST_CASE("NeonImportDisabledFallbackToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -1013,8 +1013,8 @@ TEST_CASE("NeonImportEnabledFallbackSubgraphToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); @@ -1177,8 +1177,8 @@ TEST_CASE("NeonImportDisableFallbackSubgraphToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index a5015a7376..cbc6723dbc 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -189,6 +189,36 @@ bool RefLayerSupport::IsLayerSupported(const LayerType& type, return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Division: return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + case LayerType::ElementwiseBinary: + { + std::array supportedTypes = + { + DataType::Float32, + DataType::Float16, + DataType::QAsymmS8, + DataType::QAsymmU8, + DataType::QSymmS16, + DataType::Signed32 + }; + + bool supported = true; + supported &= CheckSupportRule(TypeAnyOf(infos[0], supportedTypes), reasonIfUnsupported, + "Reference elementwise unary: input type not supported"); + + supported &= CheckSupportRule(TypeAnyOf(infos[1], supportedTypes), reasonIfUnsupported, + "Reference elementwise unary: input type not supported"); + + supported &= CheckSupportRule(TypeAnyOf(infos[2], supportedTypes), reasonIfUnsupported, + "Reference elementwise unary: output type not supported"); + + supported &= CheckSupportRule(TypesAreEqual(infos[0], infos[1]), reasonIfUnsupported, + "Reference elementwise unary: input types not matching"); + + supported &= CheckSupportRule(TypesAreEqual(infos[0], infos[2]), reasonIfUnsupported, + "Reference elementwise unary: input and output types not matching"); + + return supported; + } case LayerType::ElementwiseUnary: return IsElementwiseUnarySupported(infos[0], infos[1], diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp index f0e9e35978..8e1f68ebfc 100644 --- a/src/backends/reference/RefLayerSupport.hpp +++ b/src/backends/reference/RefLayerSupport.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index bfe37d7bf5..10f623eef3 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include @@ -302,6 +302,12 @@ std::unique_ptr RefWorkloadFactory::CreateWorkload(LayerType type, return std::make_unique>(*divisionQueueDescriptor, info); } } + case LayerType::ElementwiseBinary: + { + auto elementwiseBinaryQueueDescriptor + = PolymorphicDowncast(&descriptor); + return std::make_unique(*elementwiseBinaryQueueDescriptor, info); + } case LayerType::ElementwiseUnary: { auto elementwiseUnaryQueueDescriptor diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk index eb2ec2df44..c23984c3e9 100644 --- a/src/backends/reference/backend.mk +++ b/src/backends/reference/backend.mk @@ -67,6 +67,7 @@ BACKEND_SOURCES := \ workloads/RefDepthwiseConvolution2dWorkload.cpp \ workloads/RefDequantizeWorkload.cpp \ workloads/RefDetectionPostProcessWorkload.cpp \ + workloads/RefElementwiseBinaryWorkload.cpp \ workloads/RefElementwiseWorkload.cpp \ workloads/RefElementwiseUnaryWorkload.cpp \ workloads/RefFakeQuantizationFloat32Workload.cpp \ diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp index 6ff57716d0..8bf414fdb0 100644 --- a/src/backends/reference/test/RefEndToEndTests.cpp +++ b/src/backends/reference/test/RefEndToEndTests.cpp @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -185,7 +186,7 @@ TEST_CASE("TrivialAdd") IConnectableLayer* input1 = net->AddInputLayer(0); IConnectableLayer* input2 = net->AddInputLayer(1); - IConnectableLayer* add = net->AddAdditionLayer(); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(ElementwiseBinaryDescriptor(BinaryOperation::Add)); IConnectableLayer* output = net->AddOutputLayer(0); input1->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -347,7 +348,7 @@ TEST_CASE("TrivialMin") IConnectableLayer* input1 = net->AddInputLayer(0); IConnectableLayer* input2 = net->AddInputLayer(1); - IConnectableLayer* min = net->AddMinimumLayer(); + IConnectableLayer* min = net->AddElementwiseBinaryLayer(ElementwiseBinaryDescriptor(BinaryOperation::Minimum)); IConnectableLayer* output = net->AddOutputLayer(0); input1->GetOutputSlot(0).Connect(min->GetInputSlot(0)); @@ -1547,6 +1548,55 @@ TEST_CASE("RefAsyncFP32StridedSlicedScheduledMultiThreadedEndToEndTest") { armnn::experimental::StridedSlicedEndToEndTest(defaultBackends, 3); } + +TEST_CASE("RefAddEndToEndTestFloat32") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Add); +} +TEST_CASE("RefAddEndToEndTestUint8") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Add); +} +TEST_CASE("RefDivEndToEndTestFloat32") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Div); +} +TEST_CASE("RefDivEndToEndTestUint8") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Div); +} +TEST_CASE("RefMulEndToEndTestFloat32") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Mul); +} +TEST_CASE("RefMulEndToEndTestUint8") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Mul); +} +TEST_CASE("RefSubEndToEndTestFloat32") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Sub); +} +TEST_CASE("RefSubEndToEndTestUint8") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Sub); +} +TEST_CASE("RefMaximumEndToEndTestFloat32") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Maximum); +} +TEST_CASE("RefMaximumEndToEndTestUint8") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Maximum); +} +TEST_CASE("RefMinimumEndToEndTestFloat32") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Minimum); +} +TEST_CASE("RefMinimumEndToEndTestUint8") +{ + ElementwiseBinarySimpleEndToEnd(defaultBackends, BinaryOperation::Minimum); +} #endif } diff --git a/src/backends/reference/test/RefOptimizedNetworkTests.cpp b/src/backends/reference/test/RefOptimizedNetworkTests.cpp index 7ca1e0e505..7e8064fc76 100644 --- a/src/backends/reference/test/RefOptimizedNetworkTests.cpp +++ b/src/backends/reference/test/RefOptimizedNetworkTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -53,7 +53,7 @@ TEST_CASE("OptimizeValidateCpuRefWorkloads") layer->GetOutputSlot(0).SetTensorInfo(desc); armnn::IConnectableLayer* prevLayer = layer; - layer = net->AddMultiplicationLayer("ml"); + layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml"); prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0)); normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1)); diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt index de6c042959..3592f2293d 100644 --- a/src/backends/reference/workloads/CMakeLists.txt +++ b/src/backends/reference/workloads/CMakeLists.txt @@ -1,5 +1,5 @@ # -# Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. # SPDX-License-Identifier: MIT # @@ -108,6 +108,8 @@ list(APPEND armnnRefBackendWorkloads_sources RefDequantizeWorkload.hpp RefDetectionPostProcessWorkload.cpp RefDetectionPostProcessWorkload.hpp + RefElementwiseBinaryWorkload.cpp + RefElementwiseBinaryWorkload.hpp RefElementwiseUnaryWorkload.cpp RefElementwiseUnaryWorkload.hpp RefFakeQuantizationFloat32Workload.cpp diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp new file mode 100644 index 0000000000..5dc77f8496 --- /dev/null +++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp @@ -0,0 +1,120 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "RefElementwiseBinaryWorkload.hpp" + +#include "Decoders.hpp" +#include "ElementwiseFunction.hpp" +#include "Encoders.hpp" +#include "RefWorkloadUtils.hpp" +#include "Maximum.hpp" +#include "Minimum.hpp" + +#include + +#include + +#include + +namespace armnn +{ + +template +void ExecuteFunction(std::vector inputs, + std::vector outputs, + BinaryOperation operation) +{ + const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]); + const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]); + const TensorInfo& outputInfo = GetTensorInfo(outputs[0]); + + const TensorShape& inShape0 = inputInfo0.GetShape(); + const TensorShape& inShape1 = inputInfo1.GetShape(); + const TensorShape& outShape = outputInfo.GetShape(); + + std::unique_ptr> input0 = MakeDecoder(inputInfo0, inputs[0]->Map()); + std::unique_ptr> input1 = MakeDecoder(inputInfo1, inputs[1]->Map()); + std::unique_ptr> output = MakeEncoder(outputInfo, outputs[0]->Map()); + + using AddFunction = ElementwiseBinaryFunction>; + using DivFunction = ElementwiseBinaryFunction>; + using MaximumFunction = ElementwiseBinaryFunction>; + using MinimumFunction = ElementwiseBinaryFunction>; + using MulFunction = ElementwiseBinaryFunction>; + using SubFunction = ElementwiseBinaryFunction>; + + switch (operation) + { + case BinaryOperation::Add: + { + AddFunction(inShape0, inShape1, outShape, *input0, *input1, *output); + break; + } + case BinaryOperation::Div: + { + DivFunction(inShape0, inShape1, outShape, *input0, *input1, *output); + break; + } + case BinaryOperation::Maximum: + { + MaximumFunction(inShape0, inShape1, outShape, *input0, *input1, *output); + break; + } + case BinaryOperation::Minimum: + { + MinimumFunction(inShape0, inShape1, outShape, *input0, *input1, *output); + break; + } + case BinaryOperation::Mul: + { + MulFunction(inShape0, inShape1, outShape, *input0, *input1, *output); + break; + } + case BinaryOperation::Sub: + { + SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output); + break; + } + default: + { + throw InvalidArgumentException(std::string("Unsupported binary operation ") + + GetBinaryOperationAsCString(operation), CHECK_LOCATION()); + } + } +} + +RefElementwiseBinaryWorkload::RefElementwiseBinaryWorkload(const ElementwiseBinaryQueueDescriptor& desc, + const WorkloadInfo& info) + : RefBaseWorkload(desc, info) +{} + +void RefElementwiseBinaryWorkload::Execute() const +{ + Execute(m_Data.m_Inputs, m_Data.m_Outputs); +} + +void RefElementwiseBinaryWorkload::ExecuteAsync(ExecutionData& executionData) +{ + + WorkingMemDescriptor* workingMemDescriptor = static_cast(executionData.m_Data); + Execute(workingMemDescriptor->m_Inputs, workingMemDescriptor->m_Outputs); +} + +void RefElementwiseBinaryWorkload::Execute(std::vector inputs, + std::vector outputs) const +{ + ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefElementwiseBinaryWorkload_Execute"); + + if (GetTensorInfo(inputs[0]).GetDataType() == DataType::Signed32) + { + ExecuteFunction(inputs, outputs, m_Data.m_Parameters.m_Operation); + } + else + { + ExecuteFunction(inputs, outputs, m_Data.m_Parameters.m_Operation); + } +} + +} // namespace armnn diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp new file mode 100644 index 0000000000..37458a1705 --- /dev/null +++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp @@ -0,0 +1,29 @@ +// +// Copyright © 2023 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include "BaseIterator.hpp" + +#include "RefBaseWorkload.hpp" +#include + +namespace armnn +{ + +class RefElementwiseBinaryWorkload : public RefBaseWorkload +{ +public: + using RefBaseWorkload::m_Data; + + RefElementwiseBinaryWorkload(const ElementwiseBinaryQueueDescriptor& descriptor, const WorkloadInfo& info); + void Execute() const override; + void ExecuteAsync(ExecutionData& executionData) override; + +private: + void Execute(std::vector inputs, std::vector outputs) const; +}; + +} // namespace armnn diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp index afed71bfff..dba880bafc 100644 --- a/src/backends/reference/workloads/RefWorkloads.hpp +++ b/src/backends/reference/workloads/RefWorkloads.hpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -26,6 +26,7 @@ #include "RefDetectionPostProcessWorkload.hpp" #include "RefDequantizeWorkload.hpp" #include "RefElementwiseWorkload.hpp" +#include "RefElementwiseBinaryWorkload.hpp" #include "RefElementwiseUnaryWorkload.hpp" #include "RefFakeQuantizationFloat32Workload.hpp" #include "RefFillWorkload.hpp" -- cgit v1.2.1