diff options
Diffstat (limited to 'src/backends')
-rw-r--r-- | src/backends/aclCommon/ArmComputeSubgraphUtils.hpp | 85 | ||||
-rw-r--r-- | src/backends/aclCommon/ArmComputeUtils.hpp | 55 | ||||
-rw-r--r-- | src/backends/cl/ClBackend.cpp | 24 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClReduceWorkload.cpp | 51 | ||||
-rw-r--r-- | src/backends/neon/NeonBackend.cpp | 24 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonReduceWorkload.cpp | 53 |
6 files changed, 268 insertions, 24 deletions
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp index a0fca46330..9439ddb61e 100644 --- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp +++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp @@ -6,6 +6,9 @@ #pragma once #include <armnn/backends/OptimizationViews.hpp> +#include <armnn/utility/Assert.hpp> + +#include <aclCommon/ArmComputeUtils.hpp> namespace armnn { @@ -147,4 +150,86 @@ LayerType* FuseLayerWithWeightsAndBiases(OptimizationViews& optimizationViews, return replacementLayer; } +// +// If reduce layer has multiple axes, add new layer for each axis to simulate the same behaviour +// as currently only one axis is supported. +// +template<typename LayerType> +void ChainReduceLayers(OptimizationViews& optimizationViews, + LayerType* baseLayer, + ReduceDescriptor& reduceDescriptor) +{ + // If layer has single axis don't chain layers. + if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1) + { + // Save base layer output shape to compare against the output of the final layer added. + const TensorInfo baseLayerInfo = baseLayer->GetOutputSlot(0).GetTensorInfo(); + + // Vector of new chained layers, used for substitution. + std::vector<Layer*> layers; + + // Vector of axes so each layer is reshaped correctly. + std::vector<uint32_t> reduceAxis; + unsigned int recalulateAxis = 0; + + for (unsigned int i = 0; i != reduceDescriptor.m_vAxis.size(); ++i) + { + // Get TensorInfo to populate subsequent layers with. + TensorInfo layerInfoToModify = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(); + + reduceAxis.emplace_back(reduceDescriptor.m_vAxis[i]); + + // Calculate new shape based on the axes. + const TensorShape& reducedShape = ComputeReductionTensorShape(layerInfoToModify, + reduceAxis, + reduceDescriptor.m_KeepDims); + layerInfoToModify.SetShape(reducedShape); + + // Create a vector for the single axis to be assigned to the descriptor. + // Update axis if keepDims is set reduce layers correctly. + std::vector<uint32_t> singleAxis(1, reduceDescriptor.m_vAxis[i] - recalulateAxis); + + // Create a descriptor and assign single axis. + ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters(); + newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end()); + + // Add new layer to graph. + std::string layerName = "reduce_layer_" + std::to_string(i); + Layer* replacementLayer = optimizationViews.GetGraph().AddLayer<LayerType>(newReduceDescriptor, + layerName.c_str()); + + // Connect previous layer with new layer. + // The first and last layer will be connected when the subgraph is replaced. + if (!layers.empty()) + { + layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0)); + } + + // Set updated tensorInfo for new layer. + replacementLayer->GetOutputSlot(0).SetTensorInfo(layerInfoToModify); + + if (!reduceDescriptor.m_KeepDims) + { + recalulateAxis++; + } + + layers.emplace_back(replacementLayer); + } + + // Check if the TensorInfo from the last layer equals the inferred output from the original layer. + ARMNN_ASSERT(baseLayerInfo == layers.back()->GetOutputSlot().GetTensorInfo()); + + std::list<Layer*> replacementLayers(layers.begin(), layers.end()); + + // Substitute new chained subgraph for original reduce layer. + SubgraphView substitutionSubgraph(baseLayer); + SubgraphView replacementSubgraph(CreateInputsFrom({replacementLayers.front()}), + CreateOutputsFrom({replacementLayers.back()}), + std::move(replacementLayers)); + + optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph}); + + } +} + } // namespace armnn diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp index d9efab288f..5bc5abcb05 100644 --- a/src/backends/aclCommon/ArmComputeUtils.hpp +++ b/src/backends/aclCommon/ArmComputeUtils.hpp @@ -7,6 +7,7 @@ #include <armnn/Descriptors.hpp> #include <armnn/Tensor.hpp> #include <armnn/utility/Assert.hpp> +#include <armnn/utility/NumericCast.hpp> #include <backendsCommon/WorkloadData.hpp> #include <arm_compute/core/Types.h> @@ -267,4 +268,58 @@ inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const Redu } } +/// Function to compute the output tensor shape based on the axes and if keepDims is set. +inline const TensorShape ComputeReductionTensorShape(const armnn::TensorInfo& input, + const std::vector<uint32_t>& vAxis, + const bool keepDims) +{ + unsigned int rank = input.GetNumDimensions(); + unsigned int outputRank = 0; + + // Calculate output dimension + if (keepDims) + { + outputRank = rank; + } + else if (vAxis.empty()) + { + outputRank = 1; + } + else if (vAxis.size() > input.GetNumDimensions()) + { + throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions"); + } + else + { + outputRank = input.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size()); + if (outputRank == 0) + { + outputRank = 1; + } + } + + std::vector<unsigned int> dimSizes(outputRank, 1); + if (!vAxis.empty()) + { + // Skip the dimension that has been reduced unless keepDims is true. + unsigned int outputIndex = 0; + for (unsigned int i = 0; i < input.GetNumDimensions(); ++i) + { + if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end()) + { + dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(input.GetShape()[i]); + ++outputIndex; + } + else if (keepDims) + { + dimSizes[outputIndex] = 1; + ++outputIndex; + } + } + } + + const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data()); + return inferredShape; +} + } // namespace armnn diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index f97cb4bba8..92a06aa8e1 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -29,6 +29,7 @@ #include "workloads/ClDivisionWorkload.hpp" #include "workloads/ClFullyConnectedWorkload.hpp" #include "workloads/ClMultiplicationWorkload.hpp" +#include "workloads/ClReduceWorkload.hpp" #include "workloads/ClSubtractionWorkload.hpp" #include <Optimizer.hpp> @@ -188,7 +189,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division + || base.GetType() == LayerType::Reduce) && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -412,6 +414,26 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph, } } } + + // Separate check for Reduce as we aren't fusing with activation layer + if (base.GetType() == LayerType::Reduce) + { + ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base); + + // Get params from base layer + ReduceDescriptor reduceDescriptor = baseLayer->GetParameters(); + + arm_compute::Status status = ClReduceWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetOutputSlot(0).GetTensorInfo(), + reduceDescriptor); + + if (status) + { + ChainReduceLayers<ReduceLayer>(optimizationViews, baseLayer, reduceDescriptor); + untouched.erase(baseLayer->GetGuid()); + } + } } } } diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp index 6f594ff7a9..0ad6259cc2 100644 --- a/src/backends/cl/workloads/ClReduceWorkload.cpp +++ b/src/backends/cl/workloads/ClReduceWorkload.cpp @@ -20,23 +20,52 @@ arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input, const ReduceDescriptor& desc) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, - "ClReduceWorkload: Reduction is supported only on 1 axis."); - } arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), desc.m_vAxis); + // As ACL only support one axis, validate the layer for each axis if more than one is present. + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + arm_compute::Status status; + + for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i) + { + TensorInfo inputToModify = input; + std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i]); - return arm_compute::CLReductionOperation::validate(&aclInputInfo, - &aclOutputInfo, - static_cast<unsigned int>(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); + // Calculate the output shape using the input shape for a single axis. + // Currently the output TensorInfo inferred will be reduced upon multiple axis + // which will fail validation as only one axis is supported. + const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims); + inputToModify.SetShape(reducedShape); + + const arm_compute::TensorInfo aclOutputInfoModified = + armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify); + + status = arm_compute::CLReductionOperation::validate(&aclInputInfo, + &aclOutputInfoModified, + static_cast<unsigned int>(coords[i]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); + if (!status) + { + break; + } + } + return status; + } + else + { + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::CLReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast<unsigned int>(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); + } } ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index a1299fb458..6d5eab0ddf 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -29,6 +29,7 @@ #include "workloads/NeonDivisionWorkload.hpp" #include "workloads/NeonFullyConnectedWorkload.hpp" #include "workloads/NeonMultiplicationWorkload.hpp" +#include "workloads/NeonReduceWorkload.hpp" #include "workloads/NeonSubtractionWorkload.hpp" #include <Optimizer.hpp> @@ -164,7 +165,8 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d || base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected || base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication - || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division) + || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division + || base.GetType() == LayerType::Reduce) && (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr)) { for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output) @@ -389,6 +391,26 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph } } } + + // Separate check for Reduce as we aren't fusing with activation layer + if (base.GetType() == LayerType::Reduce) + { + ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base); + + // Get params from base layer + ReduceDescriptor reduceDescriptor = baseLayer->GetParameters(); + + arm_compute::Status status = NeonReduceWorkloadValidate( + baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(), + baseLayer->GetOutputSlot(0).GetTensorInfo(), + reduceDescriptor); + + if (status) + { + ChainReduceLayers<ReduceLayer>(optimizationViews, baseLayer, reduceDescriptor); + untouched.erase(baseLayer->GetGuid()); + } + } } } } diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp index 0e1b46a3a1..6125f3609d 100644 --- a/src/backends/neon/workloads/NeonReduceWorkload.cpp +++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp @@ -21,22 +21,52 @@ arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input, const ReduceDescriptor& desc) { const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input); - const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); - if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) - { - return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR, - "NeonReduceWorkload: Reduction is supported only on 1 axis."); - } arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(), input.GetNumDimensions(), desc.m_vAxis); - return arm_compute::NEReductionOperation::validate(&aclInputInfo, - &aclOutputInfo, - static_cast<unsigned int>(coords[0]), - ConvertReductionOperationToAcl(desc), - desc.m_KeepDims); + // As ACL only support one axis, validate the layer for each axis if more than one is present. + if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1) + { + arm_compute::Status status; + + for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i) + { + TensorInfo inputToModify = input; + std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i]); + + // Calculate the output shape using the input shape for a single axis. + // Currently the output TensorInfo inferred will be reduced upon multiple axis + // which will fail validation as only one axis is supported. + const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims); + inputToModify.SetShape(reducedShape); + + const arm_compute::TensorInfo aclOutputInfoModified = + armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify); + + status = arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfoModified, + static_cast<unsigned int>(coords[i]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); + if (!status) + { + break; + } + } + return status; + } + else + { + const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output); + + return arm_compute::NEReductionOperation::validate(&aclInputInfo, + &aclOutputInfo, + static_cast<unsigned int>(coords[0]), + ConvertReductionOperationToAcl(desc), + desc.m_KeepDims); + } } NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info) @@ -50,6 +80,7 @@ NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(), info.m_InputTensorInfos[0].GetNumDimensions(), m_Data.m_Parameters.m_vAxis); + m_Layer.configure(&input, &output, static_cast<unsigned int>(coords[0]), |