From 3ec3077b4eaedcc0c20ab5774bdbe365da541445 Mon Sep 17 00:00:00 2001 From: Mike Kelly Date: Wed, 8 Mar 2023 13:47:17 +0000 Subject: IVGCVSW-3808 Add ElementwiseBinaryLayer !android-nn-driver:9329 * Added ElementwiseBinaryLayer that can represent all ElementwiseBinary operations including Add, Div, Sub, Maximum, Mul and Minimum. * Updated Delegate to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated Deserializer to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated OnnxParser to use ElementwiseBinaryLayer instead of the Add layer. * Updated TfLiteParser to use ElementwiseBinaryLayer instead of the Add, Div, Sub, Maximum, Mul and Minimum layers. * Updated CL and Neon tests to use ElementwiseBinaryLayer. * Updated CL and Neon Backend Specific Optimizations to accept ElementBinaryLayers as well as Add, Div, Mul, Sub, Maximum and Minimum layers. Signed-off-by: Teresa Charlin Signed-off-by: Mike Kelly Change-Id: I7cbb96b60eb01f0e2b57b0541016d48a08b86c75 --- src/backends/neon/NeonBackend.cpp | 88 +++++++++++++++++++++++++++- src/backends/neon/NeonLayerSupport.cpp | 52 +++++++++++++++- src/backends/neon/NeonWorkloadFactory.cpp | 55 ++++++++++++++++- src/backends/neon/test/NeonFallbackTests.cpp | 34 +++++------ 4 files changed, 209 insertions(+), 20 deletions(-) (limited to 'src/backends/neon') diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 968bce48c8..cea2aa3eba 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -395,6 +395,92 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph untouched.erase(activationLayer->GetGuid()); } } + else if (base.GetType() == LayerType::ElementwiseBinary) + { + ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast(&base); + + if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add) + { + arm_compute::Status status = NeonAdditionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Add, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div) + { + arm_compute::Status status = NeonDivisionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Div, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul) + { + arm_compute::Status status = NeonMultiplicationWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Mul, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub) + { + arm_compute::Status status = NeonSubtractionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Sub, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + // No fusion available for other BinaryOperations + } } } } diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index ee155a2c64..672b2f377f 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -346,6 +346,56 @@ bool IsLayerTypeSupported(const LayerType& type, } case LayerType::Division: return support.IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + case LayerType::ElementwiseBinary: + { + auto desc = *(PolymorphicDowncast(&descriptor)); + + switch (desc.m_Operation) + { + case BinaryOperation::Add: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonAdditionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Div: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonDivisionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Maximum: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMaximumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Minimum: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMinimumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Mul: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMultiplicationWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Sub: + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonSubtractionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + default: + return false; + } + } case LayerType::ElementwiseUnary: return support.IsElementwiseUnarySupported(infos[0], infos[1], diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index dccd4a3a36..08168eca2f 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -280,6 +280,59 @@ std::unique_ptr NeonWorkloadFactory::CreateWorkload(LayerType type, auto divisionQueueDescriptor = PolymorphicDowncast(&descriptor); return std::make_unique(*divisionQueueDescriptor, info); } + case LayerType::ElementwiseBinary : + { + auto elementwiseBinaryQueueDescriptor + = PolymorphicDowncast(&descriptor); + + switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation) + { + case BinaryOperation::Add: + { + AdditionQueueDescriptor additionQueueDescriptor; + additionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + additionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(additionQueueDescriptor, info); + } + case BinaryOperation::Div: + { + DivisionQueueDescriptor divisionQueueDescriptor; + divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(divisionQueueDescriptor, info); + } + case BinaryOperation::Maximum: + { + MaximumQueueDescriptor maximumQueueDescriptor; + maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(maximumQueueDescriptor, info); + } + case BinaryOperation::Minimum: + { + MinimumQueueDescriptor minimumQueueDescriptor; + minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(minimumQueueDescriptor, info); + } + case BinaryOperation::Mul: + { + MultiplicationQueueDescriptor multiplicationQueueDescriptor; + multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs; + multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(multiplicationQueueDescriptor, info); + } + case BinaryOperation::Sub: + { + SubtractionQueueDescriptor subtractionQueueDescriptor; + subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + return std::make_unique(subtractionQueueDescriptor, info); + } + default: + return nullptr; + } + } case LayerType::ElementwiseUnary : { auto elementwiseUnaryQueueDescriptor diff --git a/src/backends/neon/test/NeonFallbackTests.cpp b/src/backends/neon/test/NeonFallbackTests.cpp index 8e0e0ab99b..40df2dc315 100644 --- a/src/backends/neon/test/NeonFallbackTests.cpp +++ b/src/backends/neon/test/NeonFallbackTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -38,8 +38,8 @@ TEST_CASE("FallbackImportToCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -183,7 +183,7 @@ TEST_CASE("FallbackPaddingCopyToCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); @@ -318,8 +318,8 @@ TEST_CASE("FallbackImportFromCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); @@ -465,7 +465,7 @@ TEST_CASE("FallbackPaddingCopyFromCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(pooling->GetInputSlot(0)); @@ -599,8 +599,8 @@ TEST_CASE("FallbackDisableImportFromCpuAcc") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); - IConnectableLayer* add = net->AddAdditionLayer("add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(sub->GetInputSlot(0)); @@ -725,8 +725,8 @@ TEST_CASE("NeonImportEnabledFallbackToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -878,8 +878,8 @@ TEST_CASE("NeonImportDisabledFallbackToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -1013,8 +1013,8 @@ TEST_CASE("NeonImportEnabledFallbackSubgraphToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); @@ -1177,8 +1177,8 @@ TEST_CASE("NeonImportDisableFallbackSubgraphToCl") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); -- cgit v1.2.1