aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl')
-rw-r--r--src/backends/cl/ClBackend.cpp89
-rw-r--r--src/backends/cl/ClLayerSupport.cpp52
-rw-r--r--src/backends/cl/ClWorkloadFactory.cpp71
-rw-r--r--src/backends/cl/test/ClCreateWorkloadTests.cpp46
-rw-r--r--src/backends/cl/test/ClFallbackTests.cpp18
-rw-r--r--src/backends/cl/test/Fp16SupportTest.cpp4
6 files changed, 232 insertions, 48 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index d2e8fbfe32..a10b6fbb43 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2022 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -311,7 +311,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
|| base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
|| base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
- || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division
+ || base.GetType() == LayerType::ElementwiseBinary)
&& (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
{
for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
@@ -542,6 +543,90 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
untouched.erase(activationLayer->GetGuid());
}
}
+ else if (base.GetType() == LayerType::ElementwiseBinary)
+ {
+ ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
+
+ if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
+ {
+ arm_compute::Status status = ClAdditionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Add,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
+ {
+ arm_compute::Status status = ClDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Div,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
+ {
+ arm_compute::Status status = ClMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Mul,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
+ {
+ arm_compute::Status status = ClSubtractionValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Sub,
+ name);
+ }
+ }
+ // No fusion available for other BinaryOperations
+ }
}
}
}
diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp
index cb2d756037..89bcf9bc01 100644
--- a/src/backends/cl/ClLayerSupport.cpp
+++ b/src/backends/cl/ClLayerSupport.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -347,6 +347,56 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type,
return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported);
case LayerType::Division:
return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ case LayerType::ElementwiseBinary:
+ {
+ auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
+
+ switch (desc.m_Operation)
+ {
+ case BinaryOperation::Add:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClAdditionValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Div:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Minimum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMinimumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Maximum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMaximumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Mul:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Sub:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(ClSubtractionValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ default:
+ return false;
+ }
+ }
case LayerType::ElementwiseUnary:
return IsElementwiseUnarySupported(infos[0],
infos[1],
diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp
index 6bf510a2ef..022867710c 100644
--- a/src/backends/cl/ClWorkloadFactory.cpp
+++ b/src/backends/cl/ClWorkloadFactory.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#include "ClWorkloadFactory.hpp"
@@ -405,6 +405,75 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type,
auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext);
}
+ case LayerType::ElementwiseBinary :
+ {
+ auto elementwiseBinaryQueueDescriptor
+ = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+
+ switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
+ {
+ case BinaryOperation::Add:
+ {
+ AdditionQueueDescriptor additionQueueDescriptor;
+ additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ additionQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Div:
+ {
+ DivisionQueueDescriptor divisionQueueDescriptor;
+ divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ divisionQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Maximum:
+ {
+ MaximumQueueDescriptor maximumQueueDescriptor;
+ maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ maximumQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Minimum:
+ {
+ MinimumQueueDescriptor minimumQueueDescriptor;
+ minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ minimumQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext);
+ }
+ case BinaryOperation::Mul:
+ {
+ MultiplicationQueueDescriptor multiplicationQueueDescriptor;
+ multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ multiplicationQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor,
+ info,
+ m_CLCompileContext);
+ }
+ case BinaryOperation::Sub:
+ {
+ SubtractionQueueDescriptor subtractionQueueDescriptor;
+ subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ subtractionQueueDescriptor.m_AdditionalInfoObject =
+ elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject;
+ return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor,
+ info,
+ m_CLCompileContext);
+ }
+ default:
+ return nullptr;
+ }
+ }
case LayerType::ElementwiseUnary :
{
auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp
index adea733582..c49ca23266 100644
--- a/src/backends/cl/test/ClCreateWorkloadTests.cpp
+++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -66,19 +66,17 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload")
}
template <typename WorkloadType,
- typename DescriptorType,
- typename LayerType,
armnn::DataType DataType>
-static void ClCreateElementwiseWorkloadTest()
+static void ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator)
{
Graph graph;
ClWorkloadFactory factory =
ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager());
- auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph);
+ auto workload = CreateElementwiseBinaryWorkloadTest<WorkloadType, DataType>(factory, graph, binaryOperator);
// Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest).
- DescriptorType queueDescriptor = workload->GetData();
+ auto queueDescriptor = workload->GetData();
auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]);
auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]);
auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]);
@@ -93,73 +91,55 @@ static void ClCreateElementwiseWorkloadTest()
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload")
{
ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
- AdditionQueueDescriptor,
- AdditionLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Add);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload")
{
ClCreateElementwiseWorkloadTest<ClAdditionWorkload,
- AdditionQueueDescriptor,
- AdditionLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Add);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload")
{
ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
- SubtractionQueueDescriptor,
- SubtractionLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Sub);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload")
{
ClCreateElementwiseWorkloadTest<ClSubtractionWorkload,
- SubtractionQueueDescriptor,
- SubtractionLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Sub);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
- MultiplicationQueueDescriptor,
- MultiplicationLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Mul);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
- MultiplicationQueueDescriptor,
- MultiplicationLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Mul);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload,
- MultiplicationQueueDescriptor,
- MultiplicationLayer,
- armnn::DataType::QAsymmU8>();
+ armnn::DataType::QAsymmU8>(BinaryOperation::Mul);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
- DivisionQueueDescriptor,
- DivisionLayer,
- armnn::DataType::Float32>();
+ armnn::DataType::Float32>(BinaryOperation::Div);
}
TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest")
{
ClCreateElementwiseWorkloadTest<ClDivisionWorkload,
- DivisionQueueDescriptor,
- DivisionLayer,
- armnn::DataType::Float16>();
+ armnn::DataType::Float16>(BinaryOperation::Div);
}
template <typename WorkloadType,
diff --git a/src/backends/cl/test/ClFallbackTests.cpp b/src/backends/cl/test/ClFallbackTests.cpp
index 51a983a681..9443116c92 100644
--- a/src/backends/cl/test/ClFallbackTests.cpp
+++ b/src/backends/cl/test/ClFallbackTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -24,8 +24,8 @@ TEST_CASE("ClImportEnabledFallbackToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -172,8 +172,8 @@ TEST_CASE("ClImportDisabledFallbackToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -301,8 +301,8 @@ TEST_CASE("ClImportEnabledFallbackSubgraphToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
@@ -460,8 +460,8 @@ TEST_CASE("ClImportDisableFallbackSubgraphToNeon")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp
index da6ea10926..28ae4795ab 100644
--- a/src/backends/cl/test/Fp16SupportTest.cpp
+++ b/src/backends/cl/test/Fp16SupportTest.cpp
@@ -28,7 +28,7 @@ TEST_CASE("Fp16DataTypeSupport")
Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1");
Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2");
- Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition");
+ Layer* const additionLayer = graph.AddLayer<ElementwiseBinaryLayer>(BinaryOperation::Add, "addition");
Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output");
TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16);
@@ -57,7 +57,7 @@ TEST_CASE("Fp16AdditionTest")
IConnectableLayer* inputLayer1 = net->AddInputLayer(0);
IConnectableLayer* inputLayer2 = net->AddInputLayer(1);
- IConnectableLayer* additionLayer = net->AddAdditionLayer();
+ IConnectableLayer* additionLayer = net->AddElementwiseBinaryLayer(BinaryOperation::Add);
IConnectableLayer* outputLayer = net->AddOutputLayer(0);
inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0));