diff options
Diffstat (limited to 'src/backends/cl')
-rw-r--r-- | src/backends/cl/ClBackend.cpp | 89 | ||||
-rw-r--r-- | src/backends/cl/ClLayerSupport.cpp | 52 | ||||
-rw-r--r-- | src/backends/cl/ClWorkloadFactory.cpp | 71 | ||||
-rw-r--r-- | src/backends/cl/test/ClCreateWorkloadTests.cpp | 46 | ||||
-rw-r--r-- | src/backends/cl/test/ClFallbackTests.cpp | 18 | ||||
-rw-r--r-- | src/backends/cl/test/Fp16SupportTest.cpp | 4 |
6 files changed, 232 insertions, 48 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index d2e8fbfe32..a10b6fbb43 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2022 Arm Ltd. All rights reserved. +// Copyright © 2017-2023 Arm Ltd. All rights reserved. // SPDX-License-Identifier: MIT // @@ -311,7 +311,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division + || base.GetType() == LayerType::ElementwiseBinary) && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -542,6 +543,90 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, untouched.erase(activationLayer->GetGuid()); } } + else if (base.GetType() == LayerType::ElementwiseBinary) + { + ElementwiseBinaryLayer* baseLayer = PolymorphicDowncast<ElementwiseBinaryLayer*>(&base); + + if (baseLayer->GetParameters().m_Operation == BinaryOperation::Add) + { + arm_compute::Status status = ClAdditionValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Add, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Div) + { + arm_compute::Status status = ClDivisionWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Div, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Mul) + { + arm_compute::Status status = ClMultiplicationWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Mul, + name); + untouched.erase(baseLayer->GetGuid()); + untouched.erase(activationLayer->GetGuid()); + } + } + else if (baseLayer->GetParameters().m_Operation == BinaryOperation::Sub) + { + arm_compute::Status status = ClSubtractionValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetInputSlot(1).GetConnectedOutputSlot()->GetTensorInfo(), + activationLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + &activationDesc); + + if (status) + { + FuseElementwiseBinaryLayer<ElementwiseBinaryLayer>(optimizationViews, + baseLayer, + activationLayer, + activationDesc, + BinaryOperation::Sub, + name); + } + } + // No fusion available for other BinaryOperations + } } } } diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index cb2d756037..89bcf9bc01 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -347,6 +347,56 @@ bool ClLayerSupport::IsLayerSupported(const LayerType& type, return IsDequantizeSupported(infos[0], infos[1], reasonIfUnsupported); case LayerType::Division: return IsDivisionSupported(infos[0], infos[1], infos[2], reasonIfUnsupported); + case LayerType::ElementwiseBinary: + { + auto desc = *(PolymorphicDowncast<const ElementwiseBinaryDescriptor *>(&descriptor)); + + switch (desc.m_Operation) + { + case BinaryOperation::Add: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClAdditionValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Div: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClDivisionWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Minimum: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMinimumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Maximum: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMaximumWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2]); + case BinaryOperation::Mul: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMultiplicationWorkloadValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + case BinaryOperation::Sub: + FORWARD_WORKLOAD_VALIDATE_FUNC(ClSubtractionValidate, + reasonIfUnsupported, + infos[0], + infos[1], + infos[2], + nullptr); + default: + return false; + } + } case LayerType::ElementwiseUnary: return IsElementwiseUnarySupported(infos[0], infos[1], diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 6bf510a2ef..022867710c 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "ClWorkloadFactory.hpp" @@ -405,6 +405,75 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateWorkload(LayerType type, auto divisionQueueDescriptor = PolymorphicDowncast<const DivisionQueueDescriptor*>(&descriptor); return std::make_unique<ClDivisionWorkload>(*divisionQueueDescriptor, info, m_CLCompileContext); } + case LayerType::ElementwiseBinary : + { + auto elementwiseBinaryQueueDescriptor + = PolymorphicDowncast<const ElementwiseBinaryQueueDescriptor*>(&descriptor); + + switch (elementwiseBinaryQueueDescriptor->m_Parameters.m_Operation) + { + case BinaryOperation::Add: + { + AdditionQueueDescriptor additionQueueDescriptor; + additionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + additionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + additionQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique<ClAdditionWorkload>(additionQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Div: + { + DivisionQueueDescriptor divisionQueueDescriptor; + divisionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + divisionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + divisionQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique<ClDivisionWorkload>(divisionQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Maximum: + { + MaximumQueueDescriptor maximumQueueDescriptor; + maximumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + maximumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + maximumQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique<ClMaximumWorkload>(maximumQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Minimum: + { + MinimumQueueDescriptor minimumQueueDescriptor; + minimumQueueDescriptor.m_Inputs = descriptor.m_Inputs; + minimumQueueDescriptor.m_Outputs = descriptor.m_Outputs; + minimumQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique<ClMinimumWorkload>(minimumQueueDescriptor, info, m_CLCompileContext); + } + case BinaryOperation::Mul: + { + MultiplicationQueueDescriptor multiplicationQueueDescriptor; + multiplicationQueueDescriptor.m_Inputs = descriptor.m_Inputs; + multiplicationQueueDescriptor.m_Outputs = descriptor.m_Outputs; + multiplicationQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique<ClMultiplicationWorkload>(multiplicationQueueDescriptor, + info, + m_CLCompileContext); + } + case BinaryOperation::Sub: + { + SubtractionQueueDescriptor subtractionQueueDescriptor; + subtractionQueueDescriptor.m_Inputs = descriptor.m_Inputs; + subtractionQueueDescriptor.m_Outputs = descriptor.m_Outputs; + subtractionQueueDescriptor.m_AdditionalInfoObject = + elementwiseBinaryQueueDescriptor->m_AdditionalInfoObject; + return std::make_unique<ClSubtractionWorkload>(subtractionQueueDescriptor, + info, + m_CLCompileContext); + } + default: + return nullptr; + } + } case LayerType::ElementwiseUnary : { auto elementwiseUnaryQueueDescriptor diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index adea733582..c49ca23266 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2017,2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2017-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -66,19 +66,17 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "CreateActivationFloat16Workload") } template <typename WorkloadType, - typename DescriptorType, - typename LayerType, armnn::DataType DataType> -static void ClCreateElementwiseWorkloadTest() +static void ClCreateElementwiseWorkloadTest(BinaryOperation binaryOperator) { Graph graph; ClWorkloadFactory factory = ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager()); - auto workload = CreateElementwiseWorkloadTest<WorkloadType, DescriptorType, LayerType, DataType>(factory, graph); + auto workload = CreateElementwiseBinaryWorkloadTest<WorkloadType, DataType>(factory, graph, binaryOperator); // Checks that inputs/outputs are as we expect them (see definition of CreateElementwiseWorkloadTest). - DescriptorType queueDescriptor = workload->GetData(); + auto queueDescriptor = workload->GetData(); auto inputHandle1 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[0]); auto inputHandle2 = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Inputs[1]); auto outputHandle = PolymorphicDowncast<IClTensorHandle*>(queueDescriptor.m_Outputs[0]); @@ -93,73 +91,55 @@ static void ClCreateElementwiseWorkloadTest() TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloatWorkload") { ClCreateElementwiseWorkloadTest<ClAdditionWorkload, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float32>(); + armnn::DataType::Float32>(BinaryOperation::Add); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateAdditionFloat16Workload") { ClCreateElementwiseWorkloadTest<ClAdditionWorkload, - AdditionQueueDescriptor, - AdditionLayer, - armnn::DataType::Float16>(); + armnn::DataType::Float16>(BinaryOperation::Add); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloatWorkload") { ClCreateElementwiseWorkloadTest<ClSubtractionWorkload, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float32>(); + armnn::DataType::Float32>(BinaryOperation::Sub); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateSubtractionFloat16Workload") { ClCreateElementwiseWorkloadTest<ClSubtractionWorkload, - SubtractionQueueDescriptor, - SubtractionLayer, - armnn::DataType::Float16>(); + armnn::DataType::Float16>(BinaryOperation::Sub); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloatWorkloadTest") { ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float32>(); + armnn::DataType::Float32>(BinaryOperation::Mul); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationFloat16WorkloadTest") { ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::Float16>(); + armnn::DataType::Float16>(BinaryOperation::Mul); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateMultiplicationUint8WorkloadTest") { ClCreateElementwiseWorkloadTest<ClMultiplicationWorkload, - MultiplicationQueueDescriptor, - MultiplicationLayer, - armnn::DataType::QAsymmU8>(); + armnn::DataType::QAsymmU8>(BinaryOperation::Mul); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloatWorkloadTest") { ClCreateElementwiseWorkloadTest<ClDivisionWorkload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float32>(); + armnn::DataType::Float32>(BinaryOperation::Div); } TEST_CASE_FIXTURE(ClContextControlFixture, "CreateDivisionFloat16WorkloadTest") { ClCreateElementwiseWorkloadTest<ClDivisionWorkload, - DivisionQueueDescriptor, - DivisionLayer, - armnn::DataType::Float16>(); + armnn::DataType::Float16>(BinaryOperation::Div); } template <typename WorkloadType, diff --git a/src/backends/cl/test/ClFallbackTests.cpp b/src/backends/cl/test/ClFallbackTests.cpp index 51a983a681..9443116c92 100644 --- a/src/backends/cl/test/ClFallbackTests.cpp +++ b/src/backends/cl/test/ClFallbackTests.cpp @@ -1,5 +1,5 @@ // -// Copyright © 2020 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2020-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // @@ -24,8 +24,8 @@ TEST_CASE("ClImportEnabledFallbackToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -172,8 +172,8 @@ TEST_CASE("ClImportDisabledFallbackToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); input0->GetOutputSlot(0).Connect(add->GetInputSlot(0)); @@ -301,8 +301,8 @@ TEST_CASE("ClImportEnabledFallbackSubgraphToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); @@ -460,8 +460,8 @@ TEST_CASE("ClImportDisableFallbackSubgraphToNeon") IConnectableLayer* input0 = net->AddInputLayer(0, "input0"); IConnectableLayer* input1 = net->AddInputLayer(1, "input1"); IConnectableLayer* input2 = net->AddInputLayer(2, "input2"); - IConnectableLayer* add = net->AddAdditionLayer("add"); - IConnectableLayer* sub = net->AddSubtractionLayer("sub"); + IConnectableLayer* add = net->AddElementwiseBinaryLayer(BinaryOperation::Add, "add"); + IConnectableLayer* sub = net->AddElementwiseBinaryLayer(BinaryOperation::Sub, "sub"); IConnectableLayer* pooling = net->AddPooling2dLayer(desc, "pooling"); IConnectableLayer* output = net->AddOutputLayer(0, "output"); diff --git a/src/backends/cl/test/Fp16SupportTest.cpp b/src/backends/cl/test/Fp16SupportTest.cpp index da6ea10926..28ae4795ab 100644 --- a/src/backends/cl/test/Fp16SupportTest.cpp +++ b/src/backends/cl/test/Fp16SupportTest.cpp @@ -28,7 +28,7 @@ TEST_CASE("Fp16DataTypeSupport") Layer* const inputLayer1 = graph.AddLayer<InputLayer>(1, "input1"); Layer* const inputLayer2 = graph.AddLayer<InputLayer>(2, "input2"); - Layer* const additionLayer = graph.AddLayer<AdditionLayer>("addition"); + Layer* const additionLayer = graph.AddLayer<ElementwiseBinaryLayer>(BinaryOperation::Add, "addition"); Layer* const outputLayer = graph.AddLayer<armnn::OutputLayer>(0, "output"); TensorInfo fp16TensorInfo({1, 2, 3, 5}, armnn::DataType::Float16); @@ -57,7 +57,7 @@ TEST_CASE("Fp16AdditionTest") IConnectableLayer* inputLayer1 = net->AddInputLayer(0); IConnectableLayer* inputLayer2 = net->AddInputLayer(1); - IConnectableLayer* additionLayer = net->AddAdditionLayer(); + IConnectableLayer* additionLayer = net->AddElementwiseBinaryLayer(BinaryOperation::Add); IConnectableLayer* outputLayer = net->AddOutputLayer(0); inputLayer1->GetOutputSlot(0).Connect(additionLayer->GetInputSlot(0)); |