aboutsummaryrefslogtreecommitdiff
path: root/src/backends
diff options
context:
space:
mode:
authorMike Kelly <mike.kelly@arm.com>2023-03-08 13:47:17 +0000
committerFrancis Murtagh <francis.murtagh@arm.com>2023-03-14 16:40:09 +0000
commit3ec3077b4eaedcc0c20ab5774bdbe365da541445 (patch)
treed601d2000897dec8691bf64cbddc9036f26b8034 /src/backends
parenta088cd00b3cce672d26cdcb4965fc2a86b48f339 (diff)
downloadarmnn-3ec3077b4eaedcc0c20ab5774bdbe365da541445.tar.gz
IVGCVSW-3808 Add ElementwiseBinaryLayer
!android-nn-driver:9329 * Added ElementwiseBinaryLayer that can represent all ElementwiseBinary operations including Add, Div, Sub, Maximum, Mul and Minimum. * Updated Delegate to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated Deserializer to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated OnnxParser to use ElementwiseBinaryLayer instead of the Add layer. * Updated TfLiteParser to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated CL and Neon tests to use ElementwiseBinaryLayer. * Updated CL and Neon Backend Specific Optimizations to accept ElementBinaryLayers as well as Add, Div, Mul, Sub, Maximum and Minimum layers. Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com> Signed-off-by: Mike Kelly <mike.kelly@arm.com> Change-Id: I7cbb96b60eb01f0e2b57b0541016d48a08b86c75
Diffstat (limited to 'src/backends')
-rw-r--r--src/backends/aclCommon/ArmComputeSubgraphUtils.hpp23
-rw-r--r--src/backends/backendsCommon/CMakeLists.txt2
-rw-r--r--src/backends/backendsCommon/WorkloadData.cpp34
-rw-r--r--src/backends/backendsCommon/WorkloadFactory.cpp41
-rw-r--r--src/backends/backendsCommon/common.mk2
-rw-r--r--src/backends/backendsCommon/test/CMakeLists.txt1
-rw-r--r--src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp107
-rw-r--r--src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp4
-rw-r--r--src/backends/backendsCommon/test/OptimizedNetworkTests.cpp10
-rw-r--r--src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp9
-rw-r--r--src/backends/cl/ClBackend.cpp89
-rw-r--r--src/backends/cl/ClLayerSupport.cpp52
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp71
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp46
-rw-r--r--src/backends/cl/test/ClFallbackTests.cpp18
-rw-r--r--src/backends/cl/test/Fp16SupportTest.cpp4
-rw-r--r--src/backends/neon/NeonBackend.cpp88
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp52
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp55
-rw-r--r--src/backends/neon/test/NeonFallbackTests.cpp34
-rw-r--r--src/backends/reference/RefLayerSupport.cpp32
-rw-r--r--src/backends/reference/RefLayerSupport.hpp2
-rw-r--r--src/backends/reference/RefWorkloadFactory.cpp8
-rw-r--r--src/backends/reference/backend.mk1
-rw-r--r--src/backends/reference/test/RefEndToEndTests.cpp54
-rw-r--r--src/backends/reference/test/RefOptimizedNetworkTests.cpp4
-rw-r--r--src/backends/reference/workloads/CMakeLists.txt4
-rw-r--r--src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp120
-rw-r--r--src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp29
-rw-r--r--src/backends/reference/workloads/RefWorkloads.hpp3
30 files changed, 904 insertions, 95 deletions
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
index c9d6c71f18..599d3538eb 100644
--- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -147,6 +147,27 @@ LayerType* FuseMultiplicationLayer(OptimizationViews& optimizationViews,
}
template<typename LayerType>
+LayerType* FuseElementwiseBinaryLayer(OptimizationViews& optimizationViews,
+ LayerType* baseLayer,
+ ActivationLayer* activationLayer,
+ ActivationDescriptor& activationDesc,
+ BinaryOperation operation,
+ std::string name)
+{
+ IConnectableLayer* replacement = optimizationViews.GetINetwork()->AddElementwiseBinaryLayer(operation,
+ name.c_str());
+ LayerType* replacementLayer = PolymorphicDowncast<LayerType*>(replacement);
+
+ FuseLayer(optimizationViews,
+ baseLayer,
+ replacementLayer,
+ activationLayer,
+ activationDesc);
+
+ return replacementLayer;
+}
+
+template<typename LayerType>
LayerType* FuseBatchNormalizationLayer(OptimizationViews& optimizationViews,
LayerType* baseLayer,
ActivationLayer* activationLayer,
diff --git a/src/backends/backendsCommon/CMakeLists.txt b/src/backends/backendsCommon/CMakeLists.txt
index 8d7e114fa5..28ff205d6e 100644
--- a/src/backends/backendsCommon/CMakeLists.txt
+++ b/src/backends/backendsCommon/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017, 2023 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp
index 62dfc6a38b..6a5963ddcb 100644
--- a/src/backends/backendsCommon/WorkloadData.cpp
+++ b/src/backends/backendsCommon/WorkloadData.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -35,11 +35,8 @@ DataType GetBiasDataType(DataType inputDataType)
case DataType::Float32:
return DataType::Float32;
case DataType::QAsymmS8:
- return DataType::Signed32;
case DataType::QAsymmU8:
- return DataType::Signed32;
case DataType::QSymmS8:
- return DataType::Signed32;
case DataType::QSymmS16:
return DataType::Signed32;
default:
@@ -3668,6 +3665,35 @@ void ComparisonQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
}
}
+void ElementwiseBinaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
+{
+ const std::string descriptorName{"ElementwiseBinaryQueueDescriptor"};
+
+ ValidateNumInputs(workloadInfo, descriptorName, 2);
+ ValidateNumOutputs(workloadInfo, descriptorName, 1);
+
+ const TensorInfo& inputTensorInfo0 = workloadInfo.m_InputTensorInfos[0];
+ const TensorInfo& inputTensorInfo1 = workloadInfo.m_InputTensorInfos[1];
+ const TensorInfo& outputTensorInfo = workloadInfo.m_OutputTensorInfos[0];
+
+ std::vector<DataType> supportedTypes =
+ {
+ DataType::BFloat16,
+ DataType::Float16,
+ DataType::Float32,
+ DataType::QAsymmS8,
+ DataType::QAsymmU8,
+ DataType::QSymmS16,
+ DataType::Signed32
+ };
+
+ ValidateDataTypes(inputTensorInfo0, supportedTypes, descriptorName);
+ ValidateDataTypes(inputTensorInfo1, supportedTypes, descriptorName);
+
+ ValidateTensorDataTypesMatch(inputTensorInfo0, outputTensorInfo, descriptorName, "input", "output");
+ ValidateTensorDataTypesMatch(inputTensorInfo1, outputTensorInfo, descriptorName, "input", "output");
+}
+
void ElementwiseUnaryQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const
{
const std::string descriptorName{"ElementwiseUnaryQueueDescriptor"};
diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp
index 1283f67660..51bc3e60cb 100644
--- a/src/backends/backendsCommon/WorkloadFactory.cpp
+++ b/src/backends/backendsCommon/WorkloadFactory.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -15,7 +15,6 @@
#include <armnn/utility/TransformIterator.hpp>
#include <armnn/backends/WorkloadFactory.hpp>
-#include <armnn/backends/TensorHandle.hpp>
#include <sstream>
@@ -91,7 +90,8 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
auto backendFactory = backendRegistry.GetFactory(backendId);
auto backendObject = backendFactory();
- auto layerSupportObject = LayerSupportHandle(backendObject->GetLayerSupport(modelOptions), backendId);
+ auto layerSupport = backendObject->GetLayerSupport(modelOptions);
+ auto layerSupportObject = LayerSupportHandle(layerSupport, backendId);
switch(layer.GetType())
{
@@ -109,6 +109,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
}
case LayerType::Addition:
{
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -117,6 +118,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
OverrideDataType(input1, dataType),
OverrideDataType(output, dataType),
reason);
+ ARMNN_NO_DEPRECATE_WARN_END
break;
}
case LayerType::ArgMinMax:
@@ -392,6 +394,24 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
reason);
break;
}
+ case LayerType::ElementwiseBinary:
+ {
+ auto cLayer = PolymorphicDowncast<const ElementwiseBinaryLayer*>(&layer);
+
+ const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
+ const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
+ const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
+ std::vector<TensorInfo> infos = { OverrideDataType(input0, dataType),
+ OverrideDataType(input1, dataType),
+ OverrideDataType(output, dataType) };
+ result = layerSupport->IsLayerSupported(LayerType::ElementwiseBinary,
+ infos,
+ cLayer->GetParameters(),
+ EmptyOptional(),
+ EmptyOptional(),
+ reason);
+ break;
+ }
case LayerType::ElementwiseUnary:
{
auto cLayer = PolymorphicDowncast<const ElementwiseUnaryLayer*>(&layer);
@@ -740,6 +760,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
}
case LayerType::Maximum:
{
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -748,6 +769,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
OverrideDataType(input1, dataType),
OverrideDataType(output, dataType),
reason);
+ ARMNN_NO_DEPRECATE_WARN_END
break;
}
case LayerType::MemCopy:
@@ -814,6 +836,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
}
case LayerType::Multiplication:
{
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -822,6 +845,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
OverrideDataType(input1, dataType),
OverrideDataType(output, dataType),
reason);
+ ARMNN_NO_DEPRECATE_WARN_END
break;
}
case LayerType::Normalization:
@@ -1052,6 +1076,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
}
case LayerType::Division:
{
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -1060,6 +1085,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
OverrideDataType(input1, dataType),
OverrideDataType(output, dataType),
reason);
+ ARMNN_NO_DEPRECATE_WARN_END
break;
}
case LayerType::Rank:
@@ -1254,6 +1280,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
}
case LayerType::Subtraction:
{
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -1262,6 +1289,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
OverrideDataType(input1, dataType),
OverrideDataType(output, dataType),
reason);
+ ARMNN_NO_DEPRECATE_WARN_END
break;
}
case LayerType::Switch:
@@ -1291,6 +1319,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
}
case LayerType::Minimum:
{
+ ARMNN_NO_DEPRECATE_WARN_BEGIN
const TensorInfo& input0 = layer.GetInputSlot(0).GetConnection()->GetTensorInfo();
const TensorInfo& input1 = layer.GetInputSlot(1).GetConnection()->GetTensorInfo();
const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo();
@@ -1298,6 +1327,7 @@ bool IWorkloadFactory::IsLayerConfigurationSupported(const BackendId& backendId,
OverrideDataType(input1, dataType),
OverrideDataType(output, dataType),
reason);
+ ARMNN_NO_DEPRECATE_WARN_END
break;
}
case LayerType::Prelu:
@@ -1670,6 +1700,11 @@ std::unique_ptr<IWorkload> IWorkloadFactory::CreateWorkload(LayerType type,
auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
return CreateDivision(*divisionQueueDescriptor, info);
}
+ case LayerType::ElementwiseBinary:
+ {
+ auto queueDescriptor = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+ return CreateWorkload(LayerType::ElementwiseBinary, *queueDescriptor, info);
+ }
case LayerType::ElementwiseUnary:
{
auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/backendsCommon/common.mk b/src/backends/backendsCommon/common.mk
index 3545331c8f..986d2530c2 100644
--- a/src/backends/backendsCommon/common.mk
+++ b/src/backends/backendsCommon/common.mk
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 ARM Ltd. All rights reserved.
+# Copyright © 2017-2023 ARM Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
diff --git a/src/backends/backendsCommon/test/CMakeLists.txt b/src/backends/backendsCommon/test/CMakeLists.txt
index 509157a378..77335d550a 100644
--- a/src/backends/backendsCommon/test/CMakeLists.txt
+++ b/src/backends/backendsCommon/test/CMakeLists.txt
@@ -24,6 +24,7 @@ list(APPEND armnnBackendsCommonUnitTests_sources
DetectionPostProcessEndToEndTestImpl.hpp
DynamicBackendTests.cpp
DynamicBackendTests.hpp
+ ElementwiseBinaryEndToEndTestImpl.hpp
ElementwiseUnaryEndToEndTestImpl.hpp
EndToEndTestImpl.hpp
FillEndToEndTestImpl.hpp
diff --git a/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
new file mode 100644
index 0000000000..6546a6ae9e
--- /dev/null
+++ b/src/backends/backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp
@@ -0,0 +1,107 @@
+//
+// Copyright © 2023 Arm Ltd and contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+#pragma once
+
+#include "CommonTestUtils.hpp"
+
+#include <ResolveType.hpp>
+
+#include <armnn/INetwork.hpp>
+#include <armnn/utility/NumericCast.hpp>
+
+#include <doctest/doctest.h>
+
+#include <vector>
+
+namespace
+{
+
+template<armnn::DataType ArmnnTypeInput>
+INetworkPtr CreateElementwiseBinaryNetwork(const TensorShape& input1Shape,
+ const TensorShape& input2Shape,
+ const TensorShape& outputShape,
+ BinaryOperation operation,
+ const float qScale = 1.0f,
+ const int32_t qOffset = 0)
+{
+ using namespace armnn;
+
+ INetworkPtr net(INetwork::Create());
+
+ TensorInfo input1TensorInfo(input1Shape, ArmnnTypeInput, qScale, qOffset, true);
+ TensorInfo input2TensorInfo(input2Shape, ArmnnTypeInput, qScale, qOffset, true);
+ TensorInfo outputTensorInfo(outputShape, ArmnnTypeInput, qScale, qOffset);
+
+ IConnectableLayer* input1 = net->AddInputLayer(armnn::numeric_cast<LayerBindingId>(0));
+ IConnectableLayer* input2 = net->AddInputLayer(armnn::numeric_cast<LayerBindingId>(1));
+ IConnectableLayer* elementwiseBinaryLayer = net->AddElementwiseBinaryLayer(operation, "elementwiseUnary");
+ IConnectableLayer* output = net->AddOutputLayer(0, "output");
+
+ Connect(input1, elementwiseBinaryLayer, input1TensorInfo, 0, 0);
+ Connect(input2, elementwiseBinaryLayer, input2TensorInfo, 0, 1);
+ Connect(elementwiseBinaryLayer, output, outputTensorInfo, 0, 0);
+
+ return net;
+}
+
+template<armnn::DataType ArmnnInType,
+ typename TInput = armnn::ResolveType<ArmnnInType>>
+void ElementwiseBinarySimpleEndToEnd(const std::vector<BackendId>& backends,
+ BinaryOperation operation)
+{
+ using namespace armnn;
+
+ const float qScale = IsQuantizedType<TInput>() ? 0.25f : 1.0f;
+ const int32_t qOffset = IsQuantizedType<TInput>() ? 50 : 0;
+
+ const TensorShape& input1Shape = { 2, 2, 2, 2 };
+ const TensorShape& input2Shape = { 1 };
+ const TensorShape& outputShape = { 2, 2, 2, 2 };
+
+ // Builds up the structure of the network
+ INetworkPtr net = CreateElementwiseBinaryNetwork<ArmnnInType>(input1Shape, input2Shape, outputShape,
+ operation, qScale, qOffset);
+
+ CHECK(net);
+
+ const std::vector<float> input1({ 1, -1, 1, 1, 5, -5, 5, 5, -3, 3, 3, 3, 4, 4, -4, 4 });
+
+ const std::vector<float> input2({ 2 });
+ std::vector<float> expectedOutput;
+ switch (operation) {
+ case armnn::BinaryOperation::Add:
+ expectedOutput = { 3, 1, 3, 3, 7, -3, 7, 7, -1, 5, 5, 5, 6, 6, -2, 6 };
+ break;
+ case armnn::BinaryOperation::Div:
+ expectedOutput = {0.5f, -0.5f, 0.5f, 0.5f, 2.5f, -2.5f, 2.5f, 2.5f, -1.5f, 1.5f, 1.5f, 1.5f, 2, 2, -2, 2};
+ break;
+ case armnn::BinaryOperation::Maximum:
+ expectedOutput = { 2, 2, 2, 2, 5, 2, 5, 5, 2, 3, 3, 3, 4, 4, 2, 4 };
+ break;
+ case armnn::BinaryOperation::Minimum:
+ expectedOutput = { 1, -1, 1, 1, 2, -5, 2, 2, -3, 2, 2, 2, 2, 2, -4, 2 };
+ break;
+ case armnn::BinaryOperation::Mul:
+ expectedOutput = { 2, -2, 2, 2, 10, -10, 10, 10, -6, 6, 6, 6, 8, 8, -8, 8 };
+ break;
+ case armnn::BinaryOperation::Sub:
+ expectedOutput = { -1, -3, -1, -1, 3, -7, 3, 3, -5, 1, 1, 1, 2, 2, -6, 2 };
+ break;
+ default:
+ throw("Invalid Elementwise Binary operation");
+ }
+ const std::vector<float> expectedOutput_const = expectedOutput;
+ // quantize data
+ std::vector<TInput> qInput1Data = armnnUtils::QuantizedVector<TInput>(input1, qScale, qOffset);
+ std::vector<TInput> qInput2Data = armnnUtils::QuantizedVector<TInput>(input2, qScale, qOffset);
+ std::vector<TInput> qExpectedOutput = armnnUtils::QuantizedVector<TInput>(expectedOutput_const, qScale, qOffset);
+
+ std::map<int, std::vector<TInput>> inputTensorData = {{ 0, qInput1Data }, { 1, qInput2Data }};
+ std::map<int, std::vector<TInput>> expectedOutputData = {{ 0, qExpectedOutput }};
+
+ EndToEndLayerTestImpl<ArmnnInType, ArmnnInType>(std::move(net), inputTensorData, expectedOutputData, backends);
+}
+
+} // anonymous namespace
diff --git a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
index fb7a0271d4..5b95d3cd92 100644
--- a/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
+++ b/src/backends/backendsCommon/test/IsLayerSupportedTestImpl.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -664,6 +664,8 @@ DECLARE_LAYER_POLICY_1_PARAM(Dequantize)
DECLARE_LAYER_POLICY_2_PARAM(DetectionPostProcess)
+DECLARE_LAYER_POLICY_2_PARAM(ElementwiseBinary)
+
DECLARE_LAYER_POLICY_2_PARAM(ElementwiseUnary)
DECLARE_LAYER_POLICY_2_PARAM(FakeQuantization)
diff --git a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp
index cd865def71..5e619df8dd 100644
--- a/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp
+++ b/src/backends/backendsCommon/test/OptimizedNetworkTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -21,7 +21,7 @@ TEST_CASE("SerializeToDot")
//Defines layers.
auto input = net->AddInputLayer(0);
- auto add = net->AddAdditionLayer();
+ auto add = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Add);
auto output = net->AddOutputLayer(0);
// Connects layers.
@@ -54,7 +54,7 @@ TEST_CASE("SerializeToDot")
" edge [fontsize=8 fontcolor=\"blue\" fontname=\"arial-bold\"];\n"
" " << inputId << " [label=\"{Input|Guid : " << inputId << "\\lLayerType : Input\\l"
"BackendID : CpuRef\\l}\"];\n"
- " " << addId << " [label=\"{Addition|Guid : " << addId << "\\lLayerType : Addition\\l"
+ " " << addId << " [label=\"{ElementwiseBinary|Guid : " << addId << "\\lLayerType : ElementwiseBinary\\l"
"BackendID : CpuRef\\l}\"];\n"
" " << outputId << " [label=\"{Output|Guid : " << outputId << "\\lLayerType : Output\\l"
"BackendID : CpuRef\\l}\"];\n"
@@ -187,7 +187,7 @@ TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDevice")
layer->GetOutputSlot(0).SetTensorInfo(desc);
armnn::IConnectableLayer* prevLayer = layer;
- layer = net->AddMultiplicationLayer("ml");
+ layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml");
prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
@@ -258,7 +258,7 @@ TEST_CASE("OptimizeValidateWorkloadsUndefinedComputeDeviceWithFallback")
layer->GetOutputSlot(0).SetTensorInfo(desc);
armnn::IConnectableLayer* prevLayer = layer;
- layer = net->AddMultiplicationLayer("ml");
+ layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml");
prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
diff --git a/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp
index 380ce4a3f5..da4b7ab7d0 100644
--- a/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp
+++ b/src/backends/backendsCommon/test/mockBackend/MockImportLayerSupport.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2021,2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
@@ -16,7 +16,7 @@ class MockImportLayerSupport : public LayerSupportBase
public:
bool IsLayerSupported(const LayerType& type,
const std::vector<TensorInfo>& infos,
- const BaseDescriptor& /*descriptor*/,
+ const BaseDescriptor& descriptor,
const Optional<LstmInputParamsInfo>& /*lstmParamsInfo*/,
const Optional<QuantizedLstmInputParamsInfo>& /*quantizedLstmParamsInfo*/,
Optional<std::string&> reasonIfUnsupported) const override
@@ -25,6 +25,11 @@ public:
{
case LayerType::Addition:
return IsAdditionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ case LayerType::ElementwiseBinary:
+ {
+ auto elementwiseDesc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor*>(&descriptor));
+ return (elementwiseDesc.m_Operation == BinaryOperation::Add);
+ }
case LayerType::Input:
return IsInputSupported(infos[0], reasonIfUnsupported);
case LayerType::Output:
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index d2e8fbfe32..a10b6fbb43 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -311,7 +311,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
|| base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
|| base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
- || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division
+ || base.GetType() == LayerType::ElementwiseBinary)
&& (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
{
for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
@@ -542,6 +543,90 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
untouched.erase(activationLayer->GetGuid());
}
}
+ else if (base.GetType() == LayerType::ElementwiseBinary)
+ {
+ ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
+
+ if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
+ {
+ arm_compute::Status status = ClAdditionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Add,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
+ {
+ arm_compute::Status status = ClDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Div,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
+ {
+ arm_compute::Status status = ClMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Mul,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
+ {
+ arm_compute::Status status = ClSubtractionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Sub,
+ name);
+ }
+ }
+ // No fusion available for other BinaryOperations
+ }
}
}
}
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index cb2d756037..89bcf9bc01 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -347,6 +347,56 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type,
return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::Division:
return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ case LayerType::ElementwiseBinary:
+ {
+ auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
+
+ switch (desc.m_Operation)
+ {
+ case BinaryOperation::Add:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClAdditionValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Div:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Minimum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMinimumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Maximum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMaximumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Mul:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Sub:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClSubtractionValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ default:
+ return false;
+ }
+ }
case LayerType::ElementwiseUnary:
return IsElementwiseUnarySupported(infos[0],
infos[1],
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 6bf510a2ef..022867710c 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "ClWorkloadFactory.hpp"
@@ -405,6 +405,75 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
}
+ case LayerType::ElementwiseBinary :
+ {
+ auto elementwiseBinaryQueueDescriptor
+ = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+
+ switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
+ {
+ case BinaryOperation::Add:
+ {
+ AdditionQueueDescriptor additionQueueDescriptor;
+ additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ additionQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Div:
+ {
+ DivisionQueueDescriptor divisionQueueDescriptor;
+ divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ divisionQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Maximum:
+ {
+ MaximumQueueDescriptor maximumQueueDescriptor;
+ maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ maximumQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Minimum:
+ {
+ MinimumQueueDescriptor minimumQueueDescriptor;
+ minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ minimumQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Mul:
+ {
+ MultiplicationQueueDescriptor multiplicationQueueDescriptor;
+ multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ multiplicationQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
+ info,
+ m_CLCompileContext);
+ }
+ case BinaryOperation::Sub:
+ {
+ SubtractionQueueDescriptor subtractionQueueDescriptor;
+ subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ subtractionQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
+ info,
+ m_CLCompileContext);
+ }
+ default:
+ return nullptr;
+ }
+ }
case LayerType::ElementwiseUnary :
{
auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index adea733582..c49ca23266 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -66,19 +66,17 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
}
template <typename WorkloadType,
- typename DescriptorType,
- typename LayerType,
armnn::DataType DataType>
-static void ClCreateElementwiseWorkloadTest()
+static void ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator)
{
Graph graph;
ClWorkloadFactory factory =
ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
- auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
+ auto workload = CreateElementwiseBinaryWorkloadTest<WorkloadType, DataType>(factory, graph, binaryOperator);
// Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
- DescriptorType queueDescriptor = workload->GetData();
+ auto queueDescriptor = workload->GetData();
auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
@@ -93,73 +91,55 @@ static void ClCreateElementwiseWorkloadTest()
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
{
ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
- AdditionQueueDescriptor,
- AdditionLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Add);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
{
ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
- AdditionQueueDescriptor,
- AdditionLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Add);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
{
ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
- SubtractionQueueDescriptor,
- SubtractionLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Sub);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
{
ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
- SubtractionQueueDescriptor,
- SubtractionLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Sub);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
- MultiplicationQueueDescriptor,
- MultiplicationLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Mul);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
- MultiplicationQueueDescriptor,
- MultiplicationLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Mul);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
- MultiplicationQueueDescriptor,
- MultiplicationLayer,
- armnn::DataType::QAsymmU8>();
+ armnn::DataType::QAsymmU8>(BinaryOperation::Mul);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
- DivisionQueueDescriptor,
- DivisionLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Div);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
- DivisionQueueDescriptor,
- DivisionLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Div);
}
template <typename WorkloadType,
diff --git a/src/backends/cl/test/ClFallbackTests.cpp b/src/backends/cl/test/ClFallbackTests.cpp
index 51a983a681..9443116c92 100644
--- a/src/backends/cl/test/ClFallbackTests.cpp
+++ b/src/backends/cl/test/ClFallbackTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -24,8 +24,8 @@ TEST_CASE("ClImportEnabledFallbackToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -172,8 +172,8 @@ TEST_CASE("ClImportDisabledFallbackToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -301,8 +301,8 @@ TEST_CASE("ClImportEnabledFallbackSubgraphToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
@@ -460,8 +460,8 @@ TEST_CASE("ClImportDisableFallbackSubgraphToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp
index da6ea10926..28ae4795ab 100644
--- a/src/backends/cl/test/Fp16SupportTest.cpp
+++ b/src/backends/cl/test/Fp16SupportTest.cpp
@@ -28,7 +28,7 @@ TEST_CASE("Fp16DataTypeSupport")
Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1");
Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2");
- Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition");
+ Layer* const additionLayer = graph.AddLayer<ElementwiseBinaryLayer>(BinaryOperation::Add, "addition");
Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16);
@@ -57,7 +57,7 @@ TEST_CASE("Fp16AdditionTest")
IConnectableLayer* inputLayer1 = net->AddInputLayer(0);
IConnectableLayer* inputLayer2 = net->AddInputLayer(1);
- IConnectableLayer* additionLayer = net->AddAdditionLayer();
+ IConnectableLayer* additionLayer = net->AddElementwiseBinaryLayer(BinaryOperation::Add);
IConnectableLayer* outputLayer = net->AddOutputLayer(0);
inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 968bce48c8..cea2aa3eba 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -395,6 +395,92 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
untouched.erase(activationLayer->GetGuid());
}
}
+ else if (base.GetType() == LayerType::ElementwiseBinary)
+ {
+ ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
+
+ if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
+ {
+ arm_compute::Status status = NeonAdditionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Add,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
+ {
+ arm_compute::Status status = NeonDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Div,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
+ {
+ arm_compute::Status status = NeonMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Mul,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
+ {
+ arm_compute::Status status = NeonSubtractionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Sub,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ // No fusion available for other BinaryOperations
+ }
}
}
}
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index ee155a2c64..672b2f377f 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -346,6 +346,56 @@ bool IsLayerTypeSupported(const LayerType& type,
}
case LayerType::Division:
return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ case LayerType::ElementwiseBinary:
+ {
+ auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
+
+ switch (desc.m_Operation)
+ {
+ case BinaryOperation::Add:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Div:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDivisionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Maximum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMaximumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Minimum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMinimumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Mul:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Sub:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ default:
+ return false;
+ }
+ }
case LayerType::ElementwiseUnary:
return support.IsElementwiseUnarySupported(infos[0],
infos[1],
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index dccd4a3a36..08168eca2f 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -280,6 +280,59 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateWorkload(LayerType type,
auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
return std::make_unique<NeonDivisionWorkload>(*divisionQueueDescriptor, info);
}
+ case LayerType::ElementwiseBinary :
+ {
+ auto elementwiseBinaryQueueDescriptor
+ = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+
+ switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
+ {
+ case BinaryOperation::Add:
+ {
+ AdditionQueueDescriptor additionQueueDescriptor;
+ additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonAdditionWorkload>(additionQueueDescriptor, info);
+ }
+ case BinaryOperation::Div:
+ {
+ DivisionQueueDescriptor divisionQueueDescriptor;
+ divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonDivisionWorkload>(divisionQueueDescriptor, info);
+ }
+ case BinaryOperation::Maximum:
+ {
+ MaximumQueueDescriptor maximumQueueDescriptor;
+ maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonMaximumWorkload>(maximumQueueDescriptor, info);
+ }
+ case BinaryOperation::Minimum:
+ {
+ MinimumQueueDescriptor minimumQueueDescriptor;
+ minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonMinimumWorkload>(minimumQueueDescriptor, info);
+ }
+ case BinaryOperation::Mul:
+ {
+ MultiplicationQueueDescriptor multiplicationQueueDescriptor;
+ multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonMultiplicationWorkload>(multiplicationQueueDescriptor, info);
+ }
+ case BinaryOperation::Sub:
+ {
+ SubtractionQueueDescriptor subtractionQueueDescriptor;
+ subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonSubtractionWorkload>(subtractionQueueDescriptor, info);
+ }
+ default:
+ return nullptr;
+ }
+ }
case LayerType::ElementwiseUnary :
{
auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp
index 8e0e0ab99b..40df2dc315 100644
--- a/src/backends/neon/test/NeonFallbackTests.cpp
+++ b/src/backends/neon/test/NeonFallbackTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -38,8 +38,8 @@ TEST_CASE("FallbackImportToCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -183,7 +183,7 @@ TEST_CASE("FallbackPaddingCopyToCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
@@ -318,8 +318,8 @@ TEST_CASE("FallbackImportFromCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
@@ -465,7 +465,7 @@ TEST_CASE("FallbackPaddingCopyFromCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
@@ -599,8 +599,8 @@ TEST_CASE("FallbackDisableImportFromCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
@@ -725,8 +725,8 @@ TEST_CASE("NeonImportEnabledFallbackToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -878,8 +878,8 @@ TEST_CASE("NeonImportDisabledFallbackToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -1013,8 +1013,8 @@ TEST_CASE("NeonImportEnabledFallbackSubgraphToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
@@ -1177,8 +1177,8 @@ TEST_CASE("NeonImportDisableFallbackSubgraphToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp
index a5015a7376..cbc6723dbc 100644
--- a/src/backends/reference/RefLayerSupport.cpp
+++ b/src/backends/reference/RefLayerSupport.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -189,6 +189,36 @@ bool RefLayerSupport::IsLayerSupported(const LayerType& type,
return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::Division:
return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ case LayerType::ElementwiseBinary:
+ {
+ std::array<DataType, 7> supportedTypes =
+ {
+ DataType::Float32,
+ DataType::Float16,
+ DataType::QAsymmS8,
+ DataType::QAsymmU8,
+ DataType::QSymmS16,
+ DataType::Signed32
+ };
+
+ bool supported = true;
+ supported &= CheckSupportRule(TypeAnyOf(infos[0], supportedTypes), reasonIfUnsupported,
+ "Reference elementwise unary: input type not supported");
+
+ supported &= CheckSupportRule(TypeAnyOf(infos[1], supportedTypes), reasonIfUnsupported,
+ "Reference elementwise unary: input type not supported");
+
+ supported &= CheckSupportRule(TypeAnyOf(infos[2], supportedTypes), reasonIfUnsupported,
+ "Reference elementwise unary: output type not supported");
+
+ supported &= CheckSupportRule(TypesAreEqual(infos[0], infos[1]), reasonIfUnsupported,
+ "Reference elementwise unary: input types not matching");
+
+ supported &= CheckSupportRule(TypesAreEqual(infos[0], infos[2]), reasonIfUnsupported,
+ "Reference elementwise unary: input and output types not matching");
+
+ return supported;
+ }
case LayerType::ElementwiseUnary:
return IsElementwiseUnarySupported(infos[0],
infos[1],
diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp
index f0e9e35978..8e1f68ebfc 100644
--- a/src/backends/reference/RefLayerSupport.hpp
+++ b/src/backends/reference/RefLayerSupport.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp
index bfe37d7bf5..10f623eef3 100644
--- a/src/backends/reference/RefWorkloadFactory.cpp
+++ b/src/backends/reference/RefWorkloadFactory.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include <Layer.hpp>
@@ -302,6 +302,12 @@ std::unique_ptr<IWorkload> RefWorkloadFactory::CreateWorkload(LayerType type,
return std::make_unique<RefDivisionWorkload<float>>(*divisionQueueDescriptor, info);
}
}
+ case LayerType::ElementwiseBinary:
+ {
+ auto elementwiseBinaryQueueDescriptor
+ = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+ return std::make_unique<RefElementwiseBinaryWorkload>(*elementwiseBinaryQueueDescriptor, info);
+ }
case LayerType::ElementwiseUnary:
{
auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/reference/backend.mk b/src/backends/reference/backend.mk
index eb2ec2df44..c23984c3e9 100644
--- a/src/backends/reference/backend.mk
+++ b/src/backends/reference/backend.mk
@@ -67,6 +67,7 @@ BACKEND_SOURCES := \
workloads/RefDepthwiseConvolution2dWorkload.cpp \
workloads/RefDequantizeWorkload.cpp \
workloads/RefDetectionPostProcessWorkload.cpp \
+ workloads/RefElementwiseBinaryWorkload.cpp \
workloads/RefElementwiseWorkload.cpp \
workloads/RefElementwiseUnaryWorkload.cpp \
workloads/RefFakeQuantizationFloat32Workload.cpp \
diff --git a/src/backends/reference/test/RefEndToEndTests.cpp b/src/backends/reference/test/RefEndToEndTests.cpp
index 6ff57716d0..8bf414fdb0 100644
--- a/src/backends/reference/test/RefEndToEndTests.cpp
+++ b/src/backends/reference/test/RefEndToEndTests.cpp
@@ -18,6 +18,7 @@
#include <backendsCommon/test/DepthToSpaceEndToEndTestImpl.hpp>
#include <backendsCommon/test/DequantizeEndToEndTestImpl.hpp>
#include <backendsCommon/test/DetectionPostProcessEndToEndTestImpl.hpp>
+#include <backendsCommon/test/ElementwiseBinaryEndToEndTestImpl.hpp>
#include <backendsCommon/test/ElementwiseUnaryEndToEndTestImpl.hpp>
#include <backendsCommon/test/FillEndToEndTestImpl.hpp>
#include <backendsCommon/test/FullyConnectedEndToEndTestImpl.hpp>
@@ -185,7 +186,7 @@ TEST_CASE("TrivialAdd")
IConnectableLayer* input1 = net->AddInputLayer(0);
IConnectableLayer* input2 = net->AddInputLayer(1);
- IConnectableLayer* add = net->AddAdditionLayer();
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(ElementwiseBinaryDescriptor(BinaryOperation::Add));
IConnectableLayer* output = net->AddOutputLayer(0);
input1->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -347,7 +348,7 @@ TEST_CASE("TrivialMin")
IConnectableLayer* input1 = net->AddInputLayer(0);
IConnectableLayer* input2 = net->AddInputLayer(1);
- IConnectableLayer* min = net->AddMinimumLayer();
+ IConnectableLayer* min = net->AddElementwiseBinaryLayer(ElementwiseBinaryDescriptor(BinaryOperation::Minimum));
IConnectableLayer* output = net->AddOutputLayer(0);
input1->GetOutputSlot(0).Connect(min->GetInputSlot(0));
@@ -1547,6 +1548,55 @@ TEST_CASE("RefAsyncFP32StridedSlicedScheduledMultiThreadedEndToEndTest")
{
armnn::experimental::StridedSlicedEndToEndTest<armnn::DataType::Float32>(defaultBackends, 3);
}
+
+TEST_CASE("RefAddEndToEndTestFloat32")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Add);
+}
+TEST_CASE("RefAddEndToEndTestUint8")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Add);
+}
+TEST_CASE("RefDivEndToEndTestFloat32")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Div);
+}
+TEST_CASE("RefDivEndToEndTestUint8")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Div);
+}
+TEST_CASE("RefMulEndToEndTestFloat32")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Mul);
+}
+TEST_CASE("RefMulEndToEndTestUint8")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Mul);
+}
+TEST_CASE("RefSubEndToEndTestFloat32")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Sub);
+}
+TEST_CASE("RefSubEndToEndTestUint8")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Sub);
+}
+TEST_CASE("RefMaximumEndToEndTestFloat32")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Maximum);
+}
+TEST_CASE("RefMaximumEndToEndTestUint8")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Maximum);
+}
+TEST_CASE("RefMinimumEndToEndTestFloat32")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::Float32>(defaultBackends, BinaryOperation::Minimum);
+}
+TEST_CASE("RefMinimumEndToEndTestUint8")
+{
+ ElementwiseBinarySimpleEndToEnd<armnn::DataType::QAsymmU8>(defaultBackends, BinaryOperation::Minimum);
+}
#endif
}
diff --git a/src/backends/reference/test/RefOptimizedNetworkTests.cpp b/src/backends/reference/test/RefOptimizedNetworkTests.cpp
index 7ca1e0e505..7e8064fc76 100644
--- a/src/backends/reference/test/RefOptimizedNetworkTests.cpp
+++ b/src/backends/reference/test/RefOptimizedNetworkTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -53,7 +53,7 @@ TEST_CASE("OptimizeValidateCpuRefWorkloads")
layer->GetOutputSlot(0).SetTensorInfo(desc);
armnn::IConnectableLayer* prevLayer = layer;
- layer = net->AddMultiplicationLayer("ml");
+ layer = net->AddElementwiseBinaryLayer(armnn::BinaryOperation::Mul, "ml");
prevLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(0));
normLayer->GetOutputSlot(0).Connect(layer->GetInputSlot(1));
diff --git a/src/backends/reference/workloads/CMakeLists.txt b/src/backends/reference/workloads/CMakeLists.txt
index de6c042959..3592f2293d 100644
--- a/src/backends/reference/workloads/CMakeLists.txt
+++ b/src/backends/reference/workloads/CMakeLists.txt
@@ -1,5 +1,5 @@
#
-# Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+# Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
# SPDX-License-Identifier: MIT
#
@@ -108,6 +108,8 @@ list(APPEND armnnRefBackendWorkloads_sources
RefDequantizeWorkload.hpp
RefDetectionPostProcessWorkload.cpp
RefDetectionPostProcessWorkload.hpp
+ RefElementwiseBinaryWorkload.cpp
+ RefElementwiseBinaryWorkload.hpp
RefElementwiseUnaryWorkload.cpp
RefElementwiseUnaryWorkload.hpp
RefFakeQuantizationFloat32Workload.cpp
diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
new file mode 100644
index 0000000000..5dc77f8496
--- /dev/null
+++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.cpp
@@ -0,0 +1,120 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#include "RefElementwiseBinaryWorkload.hpp"
+
+#include "Decoders.hpp"
+#include "ElementwiseFunction.hpp"
+#include "Encoders.hpp"
+#include "RefWorkloadUtils.hpp"
+#include "Maximum.hpp"
+#include "Minimum.hpp"
+
+#include <Profiling.hpp>
+
+#include <armnn/TypesUtils.hpp>
+
+#include <functional>
+
+namespace armnn
+{
+
+template<typename DataType>
+void ExecuteFunction(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs,
+ BinaryOperation operation)
+{
+ const TensorInfo& inputInfo0 = GetTensorInfo(inputs[0]);
+ const TensorInfo& inputInfo1 = GetTensorInfo(inputs[1]);
+ const TensorInfo& outputInfo = GetTensorInfo(outputs[0]);
+
+ const TensorShape& inShape0 = inputInfo0.GetShape();
+ const TensorShape& inShape1 = inputInfo1.GetShape();
+ const TensorShape& outShape = outputInfo.GetShape();
+
+ std::unique_ptr<Decoder<DataType>> input0 = MakeDecoder<DataType>(inputInfo0, inputs[0]->Map());
+ std::unique_ptr<Decoder<DataType>> input1 = MakeDecoder<DataType>(inputInfo1, inputs[1]->Map());
+ std::unique_ptr<Encoder<DataType>> output = MakeEncoder<DataType>(outputInfo, outputs[0]->Map());
+
+ using AddFunction = ElementwiseBinaryFunction<std::plus<DataType>>;
+ using DivFunction = ElementwiseBinaryFunction<std::divides<DataType>>;
+ using MaximumFunction = ElementwiseBinaryFunction<armnn::maximum<DataType>>;
+ using MinimumFunction = ElementwiseBinaryFunction<armnn::minimum<DataType>>;
+ using MulFunction = ElementwiseBinaryFunction<std::multiplies<DataType>>;
+ using SubFunction = ElementwiseBinaryFunction<std::minus<DataType>>;
+
+ switch (operation)
+ {
+ case BinaryOperation::Add:
+ {
+ AddFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+ break;
+ }
+ case BinaryOperation::Div:
+ {
+ DivFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+ break;
+ }
+ case BinaryOperation::Maximum:
+ {
+ MaximumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+ break;
+ }
+ case BinaryOperation::Minimum:
+ {
+ MinimumFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+ break;
+ }
+ case BinaryOperation::Mul:
+ {
+ MulFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+ break;
+ }
+ case BinaryOperation::Sub:
+ {
+ SubFunction(inShape0, inShape1, outShape, *input0, *input1, *output);
+ break;
+ }
+ default:
+ {
+ throw InvalidArgumentException(std::string("Unsupported binary operation ") +
+ GetBinaryOperationAsCString(operation), CHECK_LOCATION());
+ }
+ }
+}
+
+RefElementwiseBinaryWorkload::RefElementwiseBinaryWorkload(const ElementwiseBinaryQueueDescriptor& desc,
+ const WorkloadInfo& info)
+ : RefBaseWorkload<ElementwiseBinaryQueueDescriptor>(desc, info)
+{}
+
+void RefElementwiseBinaryWorkload::Execute() const
+{
+ Execute(m_Data.m_Inputs, m_Data.m_Outputs);
+}
+
+void RefElementwiseBinaryWorkload::ExecuteAsync(ExecutionData& executionData)
+{
+
+ WorkingMemDescriptor* workingMemDescriptor = static_cast<WorkingMemDescriptor*>(executionData.m_Data);
+ Execute(workingMemDescriptor->m_Inputs, workingMemDescriptor->m_Outputs);
+}
+
+void RefElementwiseBinaryWorkload::Execute(std::vector<ITensorHandle*> inputs,
+ std::vector<ITensorHandle*> outputs) const
+{
+ ARMNN_SCOPED_PROFILING_EVENT(Compute::CpuRef, "RefElementwiseBinaryWorkload_Execute");
+
+ if (GetTensorInfo(inputs[0]).GetDataType() == DataType::Signed32)
+ {
+ ExecuteFunction<int32_t>(inputs, outputs, m_Data.m_Parameters.m_Operation);
+ }
+ else
+ {
+ ExecuteFunction<float>(inputs, outputs, m_Data.m_Parameters.m_Operation);
+ }
+}
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp
new file mode 100644
index 0000000000..37458a1705
--- /dev/null
+++ b/src/backends/reference/workloads/RefElementwiseBinaryWorkload.hpp
@@ -0,0 +1,29 @@
+//
+// Copyright © 2023 Arm Ltd and Contributors. All rights reserved.
+// SPDX-License-Identifier: MIT
+//
+
+#pragma once
+
+#include "BaseIterator.hpp"
+
+#include "RefBaseWorkload.hpp"
+#include <armnn/backends/WorkloadData.hpp>
+
+namespace armnn
+{
+
+class RefElementwiseBinaryWorkload : public RefBaseWorkload<ElementwiseBinaryQueueDescriptor>
+{
+public:
+ using RefBaseWorkload<ElementwiseBinaryQueueDescriptor>::m_Data;
+
+ RefElementwiseBinaryWorkload(const ElementwiseBinaryQueueDescriptor& descriptor, const WorkloadInfo& info);
+ void Execute() const override;
+ void ExecuteAsync(ExecutionData& executionData) override;
+
+private:
+ void Execute(std::vector<ITensorHandle*> inputs, std::vector<ITensorHandle*> outputs) const;
+};
+
+} // namespace armnn
diff --git a/src/backends/reference/workloads/RefWorkloads.hpp b/src/backends/reference/workloads/RefWorkloads.hpp
index afed71bfff..dba880bafc 100644
--- a/src/backends/reference/workloads/RefWorkloads.hpp
+++ b/src/backends/reference/workloads/RefWorkloads.hpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -26,6 +26,7 @@
#include "RefDetectionPostProcessWorkload.hpp"
#include "RefDequantizeWorkload.hpp"
#include "RefElementwiseWorkload.hpp"
+#include "RefElementwiseBinaryWorkload.hpp"
#include "RefElementwiseUnaryWorkload.hpp"
#include "RefFakeQuantizationFloat32Workload.hpp"
#include "RefFillWorkload.hpp"