aboutsummaryrefslogtreecommitdiff
path: root/src/backends/cl/workloads/ClMergerWorkload.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/cl/workloads/ClMergerWorkload.cpp')
-rw-r--r--src/backends/cl/workloads/ClMergerWorkload.cpp51
1 files changed, 33 insertions, 18 deletions
diff --git a/src/backends/cl/workloads/ClMergerWorkload.cpp b/src/backends/cl/workloads/ClMergerWorkload.cpp
index e06d8c51f5..610acb91fb 100644
--- a/src/backends/cl/workloads/ClMergerWorkload.cpp
+++ b/src/backends/cl/workloads/ClMergerWorkload.cpp
@@ -9,16 +9,25 @@
#include <cl/ClTensorHandle.hpp>
#include <cl/ClLayerSupport.hpp>
+#include <arm_compute/core/Types.h>
+
#include <boost/polymorphic_pointer_cast.hpp>
namespace armnn
{
using namespace armcomputetensorutils;
+namespace
+{
+size_t CalcAxis(const MergerDescriptor& desc)
+{
+ return (desc.GetNumDimensions() - desc.GetConcatAxis()) - 1;
+}
+} //namespace
+
arm_compute::Status ClMergerWorkloadValidate(const std::vector<const TensorInfo*>& inputs,
const TensorInfo& output,
const MergerDescriptor& descriptor)
-
{
std::vector<arm_compute::TensorInfo> aclInputs;
for (const TensorInfo* input : inputs)
@@ -27,59 +36,65 @@ arm_compute::Status ClMergerWorkloadValidate(const std::vector<const TensorInfo*
aclInputs.emplace_back(aclInputInfo);
}
const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output);
- arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
-
std::vector<arm_compute::ITensorInfo*> aclInputPtrs;
for (arm_compute::ITensorInfo& input : aclInputs)
{
aclInputPtrs.emplace_back(&input);
}
+ size_t aclAxis = CalcAxis(descriptor);
return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis);
-
}
ClMergerWorkload::ClMergerWorkload(const MergerQueueDescriptor& descriptor, const WorkloadInfo& info)
: BaseWorkload<MergerQueueDescriptor>(descriptor, info)
{
- m_Execute = true;
+ bool allInputsAreSubtensors = true;
- unsigned int innerAxisOrder = descriptor.m_Parameters.GetNumDimensions() - descriptor.m_Parameters.GetConcatAxis();
+ // Check that all inputs are sub-tensors
+ for (auto input : descriptor.m_Inputs)
+ {
+ if (!input->GetParent())
+ {
+ // Non sub-tensor input found so we need to execute the merger function
+ allInputsAreSubtensors = false;
+ break;
+ }
+ }
- if (innerAxisOrder != 1)
+ if (allInputsAreSubtensors)
{
- m_Execute = false;
+ // Can skip configuring the merger function since it's not executed
return;
}
std::vector<arm_compute::ICLTensor *> aclInputs;
- arm_compute::DataLayout aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW);
for (auto input : m_Data.m_Inputs)
{
arm_compute::ICLTensor& aclInput = boost::polymorphic_pointer_downcast<IClTensorHandle>(input)->GetTensor();
- aclInput.info()->set_data_layout(aclDataLayout);
aclInputs.emplace_back(&aclInput);
}
arm_compute::ICLTensor& output = boost::polymorphic_pointer_downcast<IClTensorHandle>(
m_Data.m_Outputs[0])->GetTensor();
- output.info()->set_data_layout(aclDataLayout);
-
- arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH;
- m_Layer.configure(aclInputs, &output, aclAxis);
+ // Create the layer function
+ m_Layer.reset(new arm_compute::CLConcatenateLayer());
- m_Layer.prepare();
+ // Configure input and output tensors
+ size_t aclAxis = CalcAxis(descriptor.m_Parameters);
+ m_Layer->configure(aclInputs, &output, aclAxis);
+ // Prepare
+ m_Layer->prepare();
}
void ClMergerWorkload::Execute() const
{
- if (m_Execute)
+ if (m_Layer)
{
ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerWorkload_Execute");
- m_Layer.run();
+ m_Layer->run();
}
-
}
} //namespace armnn \ No newline at end of file