From d905decd256558bbee165e636ce4242ac3b9c917 Mon Sep 17 00:00:00 2001 From: Matthew Sloyan Date: Mon, 3 May 2021 12:22:03 +0100 Subject: MLCE-418 Reduce layer does not support multiple axes * Added backend specific optimization to chain new reduces layers for each axis to simulate behaviour of a layer with multiple axes. * Added function to calculate reduced output shape. * Added unit tests. Signed-off-by: Matthew Sloyan Change-Id: I180b0b111b7bcf3d0c283f1db0b82d5f17757682 --- src/backends/neon/NeonBackend.cpp | 24 +++++++++- src/backends/neon/workloads/NeonReduceWorkload.cpp | 53 +++++++++++++++++----- 2 files changed, 65 insertions(+), 12 deletions(-) (limited to 'src/backends/neon') diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index a1299fb458..6d5eab0ddf 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -29,6 +29,7 @@ #include "workloads/NeonDivisionWorkload.hpp" #include "workloads/NeonFullyConnectedWorkload.hpp" #include "workloads/NeonMultiplicationWorkload.hpp" +#include "workloads/NeonReduceWorkload.hpp" #include "workloads/NeonSubtractionWorkload.hpp" #include @@ -164,7 +165,8 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division + || base.GetType() == LayerType::Reduce) && (base.GetAdditionalInformation() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -389,6 +391,26 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph } } } + + // Separate check for Reduce as we aren't fusing with activation layer + if (base.GetType() == LayerType::Reduce) + { + ReduceLayer* baseLayer = PolymorphicDowncast(&base); + + // Get params from base layer + ReduceDescriptor reduceDescriptor = baseLayer->GetParameters(); + + arm_compute::Status status = NeonReduceWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetOutputSlot(0).GetTensorInfo(), + reduceDescriptor); + + if (status) + { + ChainReduceLayers(optimizationViews, baseLayer, reduceDescriptor); + untouched.erase(baseLayer->GetGuid()); + } + } } } } diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp index 0e1b46a3a1..6125f3609d 100644 --- a/src/backends/neon/workloads/NeonReduceWorkload.cpp +++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp @@ -21,22 +21,52 @@ arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, const ReduceDescriptor& desc) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, - "NeonReduceWorkload: Reduction is supported only on 1 axis."); - } arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), desc.m_vAxis); - return arm_compute::NEReductionOperation::validate(&aclInputInfo, - &aclOutputInfo, - static_cast(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); + // As ACL only support one axis, validate the layer for each axis if more than one is present. + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + arm_compute::Status status; + + for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i) + { + TensorInfo inputToModify = input; + std::vector singleAxis(1, desc.m_vAxis[i]); + + // Calculate the output shape using the input shape for a single axis. + // Currently the output TensorInfo inferred will be reduced upon multiple axis + // which will fail validation as only one axis is supported. + const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims); + inputToModify.SetShape(reducedShape); + + const arm_compute::TensorInfo aclOutputInfoModified = + armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify); + + status = arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfoModified, + static_cast(coords[i]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); + if (!status) + { + break; + } + } + return status; + } + else + { + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); + } } NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) @@ -50,6 +80,7 @@ NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), info.m_InputTensorInfos[0].GetNumDimensions(), m_Data.m_Parameters.m_vAxis); + m_Layer.configure(&input, &output, static_cast(coords[0]), -- cgit v1.2.1