aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl
diff options
context:
space:
mode:
authorMatthew Sloyan <matthew.sloyan@arm.com>2021-05-03 12:22:03 +0100
committerMatthew Sloyan <matthew.sloyan@arm.com>2021-05-06 17:58:26 +0000
commitd905decd256558bbee165e636ce4242ac3b9c917 (patch)
tree86f51622399553d1741b66ff232a429de8fc43f8 /src/backends/cl
parent1f58f03d82c482626b1b4673b6c0e25da4338fb5 (diff)
downloadarmnn-d905decd256558bbee165e636ce4242ac3b9c917.tar.gz
MLCE-418 Reduce layer does not support multiple axes
* Added backend specific optimization to chain new reduces layers for each axis to simulate behaviour of a layer with multiple axes. * Added function to calculate reduced output shape. * Added unit tests. Signed-off-by: Matthew Sloyan <matthew.sloyan@arm.com> Change-Id: I180b0b111b7bcf3d0c283f1db0b82d5f17757682
Diffstat (limited to 'src/backends/cl')
-rw-r--r--src/backends/cl/ClBackend.cpp24
-rw-r--r--src/backends/cl/workloads/ClReduceWorkload.cpp51
2 files changed, 63 insertions, 12 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp
index f97cb4bba8..92a06aa8e1 100644
--- a/src/backends/cl/ClBackend.cpp
+++ b/src/backends/cl/ClBackend.cpp
@@ -29,6 +29,7 @@
#include "workloads/ClDivisionWorkload.hpp"
#include "workloads/ClFullyConnectedWorkload.hpp"
#include "workloads/ClMultiplicationWorkload.hpp"
+#include "workloads/ClReduceWorkload.hpp"
#include "workloads/ClSubtractionWorkload.hpp"
#include <Optimizer.hpp>
@@ -188,7 +189,8 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
if ((base.GetType() == LayerType::DepthwiseConvolution2d || base.GetType() == LayerType::Convolution2d
|| base.GetType() == LayerType::BatchNormalization || base.GetType() == LayerType::FullyConnected
|| base.GetType() == LayerType::Addition || base.GetType() == LayerType::Multiplication
- || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division)
+ || base.GetType() == LayerType::Subtraction || base.GetType() == LayerType::Division
+ || base.GetType() == LayerType::Reduce)
&& (base.GetAdditionalInformation<ActivationDescriptor>() == nullptr))
{
for (auto output = base.BeginOutputSlots(); output != base.EndOutputSlots(); ++output)
@@ -412,6 +414,26 @@ OptimizationViews ClBackend::OptimizeSubgraphView(const SubgraphView& subgraph,
}
}
}
+
+ // Separate check for Reduce as we aren't fusing with activation layer
+ if (base.GetType() == LayerType::Reduce)
+ {
+ ReduceLayer* baseLayer = PolymorphicDowncast<ReduceLayer*>(&base);
+
+ // Get params from base layer
+ ReduceDescriptor reduceDescriptor = baseLayer->GetParameters();
+
+ arm_compute::Status status = ClReduceWorkloadValidate(
+ baseLayer->GetInputSlot(0).GetConnectedOutputSlot()->GetTensorInfo(),
+ baseLayer->GetOutputSlot(0).GetTensorInfo(),
+ reduceDescriptor);
+
+ if (status)
+ {
+ ChainReduceLayers<ReduceLayer>(optimizationViews, baseLayer, reduceDescriptor);
+ untouched.erase(baseLayer->GetGuid());
+ }
+ }
}
}
}
diff --git a/src/backends/cl/workloads/ClReduceWorkload.cpp b/src/backends/cl/workloads/ClReduceWorkload.cpp
index 6f594ff7a9..0ad6259cc2 100644
--- a/src/backends/cl/workloads/ClReduceWorkload.cpp
+++ b/src/backends/cl/workloads/ClReduceWorkload.cpp
@@ -20,23 +20,52 @@ arm_compute::Status ClReduceWorkloadValidate(const TensorInfo& input,
const ReduceDescriptor& desc)
{
const arm_compute::TensorInfo aclInputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(input);
- const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
- if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
- {
- return arm_compute::Status(arm_compute::ErrorCode::RUNTIME_ERROR,
- "ClReduceWorkload: Reduction is supported only on 1 axis.");
- }
arm_compute::Coordinates coords = BuildArmComputeReductionCoordinates(aclInputInfo.num_dimensions(),
input.GetNumDimensions(),
desc.m_vAxis);
+ // As ACL only support one axis, validate the layer for each axis if more than one is present.
+ if (!desc.m_vAxis.empty() && desc.m_vAxis.size() > 1)
+ {
+ arm_compute::Status status;
+
+ for (unsigned int i = 0; i != desc.m_vAxis.size(); ++i)
+ {
+ TensorInfo inputToModify = input;
+ std::vector<uint32_t> singleAxis(1, desc.m_vAxis[i]);
- return arm_compute::CLReductionOperation::validate(&aclInputInfo,
- &aclOutputInfo,
- static_cast<unsigned int>(coords[0]),
- ConvertReductionOperationToAcl(desc),
- desc.m_KeepDims);
+ // Calculate the output shape using the input shape for a single axis.
+ // Currently the output TensorInfo inferred will be reduced upon multiple axis
+ // which will fail validation as only one axis is supported.
+ const TensorShape& reducedShape = ComputeReductionTensorShape(inputToModify, singleAxis, desc.m_KeepDims);
+ inputToModify.SetShape(reducedShape);
+
+ const arm_compute::TensorInfo aclOutputInfoModified =
+ armcomputetensorutils::BuildArmComputeTensorInfo(inputToModify);
+
+ status = arm_compute::CLReductionOperation::validate(&aclInputInfo,
+ &aclOutputInfoModified,
+ static_cast<unsigned int>(coords[i]),
+ ConvertReductionOperationToAcl(desc),
+ desc.m_KeepDims);
+ if (!status)
+ {
+ break;
+ }
+ }
+ return status;
+ }
+ else
+ {
+ const arm_compute::TensorInfo aclOutputInfo = armcomputetensorutils::BuildArmComputeTensorInfo(output);
+
+ return arm_compute::CLReductionOperation::validate(&aclInputInfo,
+ &aclOutputInfo,
+ static_cast<unsigned int>(coords[0]),
+ ConvertReductionOperationToAcl(desc),
+ desc.m_KeepDims);
+ }
}
ClReduceWorkload::ClReduceWorkload(const ReduceQueueDescriptor& descriptor, const WorkloadInfo& info)