aboutsummaryrefslogtreecommitdiff
path: root/src/backends/neon/workloads/NeonMergerWorkload.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/neon/workloads/NeonMergerWorkload.cpp')
-rw-r--r--src/backends/neon/workloads/NeonMergerWorkload.cpp52
1 files changed, 33 insertions, 19 deletions
diff --git a/src/backends/neon/workloads/NeonMergerWorkload.cpp b/src/backends/neon/workloads/NeonMergerWorkload.cpp
index be096b4b25..64d4d93d97 100644
--- a/src/backends/neon/workloads/NeonMergerWorkload.cpp
+++ b/src/backends/neon/workloads/NeonMergerWorkload.cpp
@@ -11,12 +11,20 @@
#include <backendsCommon/CpuTensorHandle.hpp>
#include <neon/NeonTensorHandle.hpp>
-#include <arm_compute/runtime/NEON/functions/NEConcatenateLayer.h>
+
namespace armnn
{
using namespace armcomputetensorutils;
+namespace
+{
+size_t CalcAxis(const armnn::MergerDescriptor& desc)
+{
+ return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+}
+} //namespace
+
arm_compute::Status NeonMergerWorkloadValidate(const std::vector<const TensorInfo*>& inputs,
const TensorInfo& output,
const MergerDescriptor& descriptor)
@@ -25,60 +33,66 @@ arm_compute::Status NeonMergerWorkloadValidate(const std::vector<const TensorInf
std::vector<arm_compute::TensorInfo> aclInputs;
for (const TensorInfo* input : inputs)
{
- arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
- aclInputs.emplace_back(aclInputInfo);
+ arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW);
+ aclInputs.emplace_back(aclInputInfo);
}
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
- arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
-
std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
for (arm_compute::ITensorInfo& input : aclInputs)
{
aclInputPtrs.emplace_back(&input);
}
+ size_t aclAxis = CalcAxis(descriptor);
return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
-
}
NeonMergerWorkload::NeonMergerWorkload(
const MergerQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<MergerQueueDescriptor>(descriptor, info)
{
- m_Execute = true;
+ bool allInputsAreSubtensors = true;
- unsigned int innerAxisOrder = descriptor.m_Parameters.GetNumDimensions() - descriptor.m_Parameters.GetConcatAxis();
+ // Check that all inputs are sub-tensors
+ for (auto input : descriptor.m_Inputs)
+ {
+ if (!input->GetParent())
+ {
+ // Non sub-tensor input found so we need to execute the merger function
+ allInputsAreSubtensors = false;
+ break;
+ }
+ }
- if (innerAxisOrder != 1)
+ if (allInputsAreSubtensors)
{
- m_Execute = false;
+ // Can skip configuring the merger function since it's not executed
return;
}
std::vector<arm_compute::ITensor *> aclInputs;
- arm_compute::DataLayout aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
for (auto input : m_Data.m_Inputs)
{
arm_compute::ITensor& aclInput = boost::polymorphic_pointer_downcast<INeonTensorHandle>(input)->GetTensor();
- aclInput.info()->set_data_layout(aclDataLayout);
aclInputs.emplace_back(&aclInput);
}
arm_compute::ITensor& output = boost::polymorphic_pointer_downcast<INeonTensorHandle>(
- m_Data.m_Outputs[0])->GetTensor();
- output.info()->set_data_layout(aclDataLayout);
+ m_Data.m_Outputs[0])->GetTensor();
- arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
+ // Create the layer function
+ m_Layer.reset(new arm_compute::NEConcatenateLayer());
- auto layer = std::make_unique<arm_compute::NEConcatenateLayer>();
- layer->configure(aclInputs, &output, aclAxis);
- m_Layer.reset(layer.release());
+ // Configure input and output tensors
+ size_t aclAxis = CalcAxis(descriptor.m_Parameters);
+ m_Layer->configure(aclInputs, &output, aclAxis);
+ // Prepare
m_Layer->prepare();
}
void NeonMergerWorkload::Execute() const
{
- if (m_Execute)
+ if (m_Layer)
{
ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerWorkload_Execute");
m_Layer->run();