aboutsummaryrefslogtreecommitdiff
path: root/src/backends
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends')
-rw-r--r--src/backends/aclCommon/ArmComputeSubgraphUtils.hpp84
-rw-r--r--src/backends/aclCommon/ArmComputeUtils.hpp94
-rw-r--r--src/backends/cl/ClBackend.cpp21
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.cpp34
-rw-r--r--src/backends/neon/NeonBackend.cpp21
-rw-r--r--src/backends/neon/workloads/NeonReduceWorkload.cpp34
6 files changed, 259 insertions, 29 deletions
diff --git a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
index a0fca46330..521c17cd62 100644
--- a/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeSubgraphUtils.hpp
@@ -6,6 +6,9 @@
#pragma once
#include <armnn/backends/OptimizationViews.hpp>
+#include <armnn/utility/Assert.hpp>
+
+#include <aclCommon/ArmComputeUtils.hpp>
namespace armnn
{
@@ -147,4 +150,85 @@ LayerType* FuseLayerWithWeightsAndBiases(OptimizationViews& optimizationViews,
return replacementLayer;
}
+//
+// If reduce layer has multiple axes, add new layer for each axis to simulate the same behaviour
+// as currently only one axis is supported.
+//
+template<typename LayerType>
+std::vector<Layer*> ChainReduceLayers(OptimizationViews& optimizationViews,
+ LayerType* baseLayer,
+ ReduceDescriptor& desc)
+{
+ // Vector of new chained layers, used for substitution.
+ std::vector<Layer*> layers;
+
+ // Vector of axes so each layer is reshaped correctly.
+ std::vector<uint32_t> axes;
+ unsigned int recalulatedAxis = 0;
+
+ for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
+ {
+ // Get TensorInfo from base layer and reduce shape using axis.
+ TensorInfo layerInfo = baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo();
+
+ axes.emplace_back(desc.m_vAxis[i]);
+
+ const TensorInfo& reducedTensorInfo = ComputeReductionTensorShape(layerInfo,
+ axes,
+ desc.m_KeepDims);
+
+ // Create a vector for the single axis to be assigned to the descriptor.
+ // Update axis if keepDims is set reduce layers correctly.
+ std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis);
+
+ // Create a descriptor and assign single axis.
+ ReduceDescriptor newReduceDescriptor = baseLayer->GetParameters();
+ newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end());
+
+ // Add new layer to graph.
+ std::string layerName = "reduce_layer_" + std::to_string(i);
+ Layer* replacementLayer = optimizationViews.GetGraph().AddLayer<LayerType>(newReduceDescriptor,
+ layerName.c_str());
+ // Connect previous layer with new layer.
+ // The first and last layer will be connected when the subgraph is replaced.
+ if (!layers.empty())
+ {
+ layers[i - 1]->GetOutputSlot(0).Connect(replacementLayer->GetInputSlot(0));
+ }
+
+ // Set updated tensorInfo for new layer.
+ replacementLayer->GetOutputSlot(0).SetTensorInfo(reducedTensorInfo);
+
+ if (!desc.m_KeepDims)
+ {
+ recalulatedAxis++;
+ }
+
+ layers.emplace_back(replacementLayer);
+ }
+
+ // Check if the TensorInfo from the last layer equals the inferred output from the original layer.
+ ARMNN_ASSERT(baseLayer->GetOutputSlot(0).GetTensorInfo() == layers.back()->GetOutputSlot().GetTensorInfo());
+
+ return layers;
+}
+
+//
+// Substitute baseLayer with new subgraph
+//
+template<typename LayerType>
+void ReplaceLayers(OptimizationViews& optimizationViews,
+ LayerType* baseLayer,
+ std::vector<Layer*>& layers)
+{
+ std::list<Layer*> replacementLayers(layers.begin(), layers.end());
+
+ SubgraphView substitutionSubgraph(baseLayer);
+ SubgraphView replacementSubgraph(CreateInputsFrom({replacementLayers.front()}),
+ CreateOutputsFrom({replacementLayers.back()}),
+ std::move(replacementLayers));
+
+ optimizationViews.AddSubstitution({substitutionSubgraph, replacementSubgraph});
+}
+
} // namespace armnn
diff --git a/src/backends/aclCommon/ArmComputeUtils.hpp b/src/backends/aclCommon/ArmComputeUtils.hpp
index d9efab288f..624ce5df7a 100644
--- a/src/backends/aclCommon/ArmComputeUtils.hpp
+++ b/src/backends/aclCommon/ArmComputeUtils.hpp
@@ -7,10 +7,19 @@
#include <armnn/Descriptors.hpp>
#include <armnn/Tensor.hpp>
#include <armnn/utility/Assert.hpp>
+#include <armnn/utility/NumericCast.hpp>
#include <backendsCommon/WorkloadData.hpp>
#include <arm_compute/core/Types.h>
+#if defined(ARMCOMPUTENEON_ENABLED)
+#include "neon/workloads/NeonReduceWorkload.hpp"
+#endif
+
+#if defined(ARMCOMPUTECL_ENABLED)
+#include "cl/workloads/ClReduceWorkload.hpp"
+#endif
+
namespace armnn
{
@@ -267,4 +276,89 @@ inline arm_compute::ReductionOperation ConvertReductionOperationToAcl(const Redu
}
}
+/// Function to compute the output tensor shape based on the axes and if keepDims is set.
+inline const TensorInfo ComputeReductionTensorShape(const armnn::TensorInfo& input,
+ const std::vector<uint32_t>& vAxis,
+ const bool keepDims)
+{
+ auto reducedTensorInfo = input;
+ unsigned int rank = reducedTensorInfo.GetNumDimensions();
+ unsigned int outputRank = 0;
+ // Calculate output dimension
+ if (keepDims)
+ {
+ outputRank = rank;
+ }
+ else if (vAxis.empty())
+ {
+ outputRank = 1;
+ }
+ else if (vAxis.size() > reducedTensorInfo.GetNumDimensions())
+ {
+ throw LayerValidationException("ReduceLayer: Dimensions to reduce can not be bigger than input dimensions");
+ }
+ else
+ {
+ outputRank = reducedTensorInfo.GetNumDimensions() - armnn::numeric_cast<unsigned int>(vAxis.size());
+ if (outputRank == 0)
+ {
+ outputRank = 1;
+ }
+ }
+ std::vector<unsigned int> dimSizes(outputRank, 1);
+ if (!vAxis.empty())
+ {
+ // Skip the dimension that has been reduced unless keepDims is true.
+ unsigned int outputIndex = 0;
+ for (unsigned int i = 0; i < reducedTensorInfo.GetNumDimensions(); ++i)
+ {
+ if (std::find(vAxis.begin(), vAxis.end(), i) == vAxis.end())
+ {
+ dimSizes[outputIndex] = armnn::numeric_cast<unsigned int>(reducedTensorInfo.GetShape()[i]);
+ ++outputIndex;
+ }
+ else if (keepDims)
+ {
+ dimSizes[outputIndex] = 1;
+ ++outputIndex;
+ }
+ }
+ }
+ const TensorShape inferredShape = TensorShape(outputRank, dimSizes.data());
+ reducedTensorInfo.SetShape(inferredShape);
+ return reducedTensorInfo;
+}
+
+/// Macro function check if layer with multiple axes is supported on each backend
+#define IS_MULTI_AXES_REDUCE_SUPPORTED(func, input, desc, status) \
+ armnn::TensorInfo inputTensorInfo = input; \
+ unsigned int recalulatedAxis = 0; \
+ std::vector<uint32_t> axes; \
+ \
+ for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i) \
+ { \
+ axes.emplace_back(desc.m_vAxis[i]); \
+ \
+ const armnn::TensorInfo& reducedTensorInfo = \
+ ComputeReductionTensorShape(input, axes, desc.m_KeepDims); \
+ \
+ std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i] - recalulatedAxis); \
+ \
+ armnn::ReduceDescriptor newReduceDescriptor = desc; \
+ newReduceDescriptor.m_vAxis.assign(singleAxis.begin(), singleAxis.end()); \
+ \
+ status = func(inputTensorInfo, reducedTensorInfo, newReduceDescriptor); \
+ if (!status) \
+ { \
+ break; \
+ } \
+ \
+ if (!desc.m_KeepDims) \
+ { \
+ recalulatedAxis++; \
+ } \
+ \
+ inputTensorInfo = reducedTensorInfo; \
+ }
+
} // namespace armnn
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index 35770d9219..a9ab237325 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -30,6 +30,7 @@
#include "workloads/ClDivisionWorkload.hpp"
#include "workloads/ClFullyConnectedWorkload.hpp"
#include "workloads/ClMultiplicationWorkload.hpp"
+#include "workloads/ClReduceWorkload.hpp"
#include "workloads/ClSubtractionWorkload.hpp"
#include <Optimizer.hpp>
@@ -220,6 +221,7 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
--it;
Layer& base = **it;
+ // Fuse activation into previous layer if supported by backend
if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
|| base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
|| base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
@@ -451,6 +453,25 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
}
}
}
+
+ // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
+ if (base.GetType() == LayerType::Reduce)
+ {
+ ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
+ ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
+
+ if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
+ {
+ // Add new layers to the graph and connect them.
+ std::vector<Layer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
+ baseLayer,
+ reduceDescriptor);
+
+ // Replace existing baselayer with new subgraph.
+ ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
+ untouched.erase(baseLayer->GetGuid());
+ }
+ }
}
if (optimizationViews.GetSubstitutions().empty())
diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp
index 6f594ff7a9..18415c4cba 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.cpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.cpp
@@ -19,24 +19,28 @@ arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const ReduceDescriptor& desc)
{
- const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
- const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
- if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+ if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty())
{
- return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
- "ClReduceWorkload: Reduction is supported only on 1 axis.");
- }
-
- arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
- input.GetNumDimensions(),
- desc.m_vAxis);
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+ input.GetNumDimensions(),
+ desc.m_vAxis);
- return arm_compute::CLReductionOperation::validate(&aclInputInfo,
- &aclOutputInfo,
- static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(desc),
- desc.m_KeepDims);
+ return arm_compute::CLReductionOperation::validate(&aclInputInfo,
+ &aclOutputInfo,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(desc),
+ desc.m_KeepDims);
+ }
+ else
+ {
+ // Validate layer if there are multiple axes.
+ arm_compute::Status status;
+ IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, desc, status);
+ return status;
+ }
}
ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp
index a1299fb458..b496238cf3 100644
--- a/src/backends/neon/NeonBackend.cpp
+++ b/src/backends/neon/NeonBackend.cpp
@@ -29,6 +29,7 @@
#include "workloads/NeonDivisionWorkload.hpp"
#include "workloads/NeonFullyConnectedWorkload.hpp"
#include "workloads/NeonMultiplicationWorkload.hpp"
+#include "workloads/NeonReduceWorkload.hpp"
#include "workloads/NeonSubtractionWorkload.hpp"
#include <Optimizer.hpp>
@@ -161,6 +162,7 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
--it;
Layer& base = **it;
+ // Fuse activation into previous layer if supported by backend
if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
|| base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
|| base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
@@ -393,6 +395,25 @@ OptimizationViews NeonBackend::OptimizeSubgraphView(const SubgraphView& subgraph
}
}
}
+
+ // Separate reduce layer with multiple axes into multiple reduce layers with 1 axis.
+ if (base.GetType() == LayerType::Reduce)
+ {
+ ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
+ ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
+
+ if (!reduceDescriptor.m_vAxis.empty() && reduceDescriptor.m_vAxis.size() > 1)
+ {
+ // Add new layers to the graph and connect them.
+ std::vector<Layer*> layers = ChainReduceLayers<ReduceLayer>(optimizationViews,
+ baseLayer,
+ reduceDescriptor);
+
+ // Replace existing baselayer with new subgraph.
+ ReplaceLayers<ReduceLayer>(optimizationViews, baseLayer, layers);
+ untouched.erase(baseLayer->GetGuid());
+ }
+ }
}
if (optimizationViews.GetSubstitutions().empty())
diff --git a/src/backends/neon/workloads/NeonReduceWorkload.cpp b/src/backends/neon/workloads/NeonReduceWorkload.cpp
index 0e1b46a3a1..1436cd1192 100644
--- a/src/backends/neon/workloads/NeonReduceWorkload.cpp
+++ b/src/backends/neon/workloads/NeonReduceWorkload.cpp
@@ -20,23 +20,28 @@ arm_compute::Status NeonReduceWorkloadValidate(const TensorInfo& input,
const TensorInfo& output,
const ReduceDescriptor& desc)
{
- const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
- const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
- if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+ if ( desc.m_vAxis.size()==1 || desc.m_vAxis.empty())
{
- return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
- "NeonReduceWorkload: Reduction is supported only on 1 axis.");
- }
+ const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
- arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
- input.GetNumDimensions(),
- desc.m_vAxis);
+ arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
+ input.GetNumDimensions(),
+ desc.m_vAxis);
- return arm_compute::NEReductionOperation::validate(&aclInputInfo,
- &aclOutputInfo,
- static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(desc),
- desc.m_KeepDims);
+ return arm_compute::NEReductionOperation::validate(&aclInputInfo,
+ &aclOutputInfo,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(desc),
+ desc.m_KeepDims);
+ }
+ else
+ {
+ // Validate layer if there are multiple axes.
+ arm_compute::Status status;
+ IS_MULTI_AXES_REDUCE_SUPPORTED(NeonReduceWorkloadValidate, input, desc, status);
+ return status;
+ }
}
NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)
@@ -50,6 +55,7 @@ NeonReduceWorkload::NeonReduceWorkload(const ReduceQueueDescriptor& descriptor,
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(input.info()->num_dimensions(),
info.m_InputTensorInfos[0].GetNumDimensions(),
m_Data.m_Parameters.m_vAxis);
+
m_Layer.configure(&input,
&output,
static_cast<unsigned int>(coords[0]),