aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon')
-rw-r--r--src/backends/neon/NeonBackend.cpp88
-rw-r--r--src/backends/neon/NeonLayerSupport.cpp52
-rw-r--r--src/backends/neon/NeonWorkloadFactory.cpp55
-rw-r--r--src/backends/neon/test/NeonFallbackTests.cpp34
4 files changed, 209 insertions, 20 deletions
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index 968bce48c8..cea2aa3eba 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -395,6 +395,92 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
untouched.erase(activationLayer->GetGuid());
}
}
+ else if (base.GetType() == LayerType::ElementwiseBinary)
+ {
+ ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base);
+
+ if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add)
+ {
+ arm_compute::Status status = NeonAdditionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Add,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div)
+ {
+ arm_compute::Status status = NeonDivisionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Div,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul)
+ {
+ arm_compute::Status status = NeonMultiplicationWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Mul,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub)
+ {
+ arm_compute::Status status = NeonSubtractionWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(),
+ activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ &activationDesc);
+
+ if (status)
+ {
+ FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews,
+ baseLayer,
+ activationLayer,
+ activationDesc,
+ BinaryOperation::Sub,
+ name);
+ untouched.erase(baseLayer->GetGuid());
+ untouched.erase(activationLayer->GetGuid());
+ }
+ }
+ // No fusion available for other BinaryOperations
+ }
}
}
}
diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp
index ee155a2c64..672b2f377f 100644
--- a/src/backends/neon/NeonLayerSupport.cpp
+++ b/src/backends/neon/NeonLayerSupport.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -346,6 +346,56 @@ bool IsLayerTypeSupported(const LayerType& type,
}
case LayerType::Division:
return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported);
+ case LayerType::ElementwiseBinary:
+ {
+ auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor));
+
+ switch (desc.m_Operation)
+ {
+ case BinaryOperation::Add:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Div:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDivisionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Maximum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMaximumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Minimum:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMinimumWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2]);
+ case BinaryOperation::Mul:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ case BinaryOperation::Sub:
+ FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate,
+ reasonIfUnsupported,
+ infos[0],
+ infos[1],
+ infos[2],
+ nullptr);
+ default:
+ return false;
+ }
+ }
case LayerType::ElementwiseUnary:
return support.IsElementwiseUnarySupported(infos[0],
infos[1],
diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp
index dccd4a3a36..08168eca2f 100644
--- a/src/backends/neon/NeonWorkloadFactory.cpp
+++ b/src/backends/neon/NeonWorkloadFactory.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2017 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -280,6 +280,59 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateWorkload(LayerType type,
auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor);
return std::make_unique<NeonDivisionWorkload>(*divisionQueueDescriptor, info);
}
+ case LayerType::ElementwiseBinary :
+ {
+ auto elementwiseBinaryQueueDescriptor
+ = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor);
+
+ switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation)
+ {
+ case BinaryOperation::Add:
+ {
+ AdditionQueueDescriptor additionQueueDescriptor;
+ additionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ additionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonAdditionWorkload>(additionQueueDescriptor, info);
+ }
+ case BinaryOperation::Div:
+ {
+ DivisionQueueDescriptor divisionQueueDescriptor;
+ divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonDivisionWorkload>(divisionQueueDescriptor, info);
+ }
+ case BinaryOperation::Maximum:
+ {
+ MaximumQueueDescriptor maximumQueueDescriptor;
+ maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonMaximumWorkload>(maximumQueueDescriptor, info);
+ }
+ case BinaryOperation::Minimum:
+ {
+ MinimumQueueDescriptor minimumQueueDescriptor;
+ minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonMinimumWorkload>(minimumQueueDescriptor, info);
+ }
+ case BinaryOperation::Mul:
+ {
+ MultiplicationQueueDescriptor multiplicationQueueDescriptor;
+ multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonMultiplicationWorkload>(multiplicationQueueDescriptor, info);
+ }
+ case BinaryOperation::Sub:
+ {
+ SubtractionQueueDescriptor subtractionQueueDescriptor;
+ subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs;
+ subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs;
+ return std::make_unique<NeonSubtractionWorkload>(subtractionQueueDescriptor, info);
+ }
+ default:
+ return nullptr;
+ }
+ }
case LayerType::ElementwiseUnary :
{
auto elementwiseUnaryQueueDescriptor
diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp
index 8e0e0ab99b..40df2dc315 100644
--- a/src/backends/neon/test/NeonFallbackTests.cpp
+++ b/src/backends/neon/test/NeonFallbackTests.cpp
@@ -1,5 +1,5 @@
//
-// Copyright © 2020 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
@@ -38,8 +38,8 @@ TEST_CASE("FallbackImportToCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -183,7 +183,7 @@ TEST_CASE("FallbackPaddingCopyToCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
@@ -318,8 +318,8 @@ TEST_CASE("FallbackImportFromCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
@@ -465,7 +465,7 @@ TEST_CASE("FallbackPaddingCopyFromCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0));
@@ -599,8 +599,8 @@ TEST_CASE("FallbackDisableImportFromCpuAcc")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
- IConnectableLayer* add = net->AddAdditionLayer("add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0));
@@ -725,8 +725,8 @@ TEST_CASE("NeonImportEnabledFallbackToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -878,8 +878,8 @@ TEST_CASE("NeonImportDisabledFallbackToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
input0->GetOutputSlot(0).Connect(add->GetInputSlot(0));
@@ -1013,8 +1013,8 @@ TEST_CASE("NeonImportEnabledFallbackSubgraphToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");
@@ -1177,8 +1177,8 @@ TEST_CASE("NeonImportDisableFallbackSubgraphToCl")
IConnectableLayer* input0 = net->AddInputLayer(0, "input0");
IConnectableLayer* input1 = net->AddInputLayer(1, "input1");
IConnectableLayer* input2 = net->AddInputLayer(2, "input2");
- IConnectableLayer* add = net->AddAdditionLayer("add");
- IConnectableLayer* sub = net->AddSubtractionLayer("sub");
+ IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add");
+ IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub");
IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling");
IConnectableLayer* output = net->AddOutputLayer(0, "output");