From 8599a415c159aa867db12853b3195f0f0a51ee6b Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Mon, 19 Nov 2018 14:51:07 +0000 Subject: IVGCVSW-2043 - Merger using ACL for innermost concat axis * Add ClMergerWorkload and NeonMergerWorkload to call ACL for innermost concat axis * Modify layer support to call ClMergerWorkloadValidate and NeonMergerWorkloadValidate when concat axis is inner most * Add m_ConcatAxis to MergerDescriptor * Modify MergerQueueDescriptor::Validate to check sub tensor only when using subtensor !android-nn-driver:166 Change-Id: I56676b43964c8d6d726387b41b3cc34a512c0f0a --- include/armnn/Descriptors.hpp | 10 ++- include/armnn/ILayerSupport.hpp | 1 + include/armnn/LayerSupport.hpp | 1 + src/armnn/Descriptors.cpp | 19 ++++- src/armnn/LayerSupport.cpp | 3 +- src/armnn/layers/MergerLayer.cpp | 5 +- src/backends/backendsCommon/ILayerSupport.cpp | 1 + src/backends/backendsCommon/WorkloadData.cpp | 10 +++ src/backends/backendsCommon/WorkloadFactory.cpp | 4 +- src/backends/backendsCommon/test/LayerTests.cpp | 1 + src/backends/cl/ClLayerSupport.cpp | 22 ++++-- src/backends/cl/ClLayerSupport.hpp | 1 + src/backends/cl/backend.mk | 1 + src/backends/cl/workloads/CMakeLists.txt | 1 + src/backends/cl/workloads/ClMergerWorkload.cpp | 85 ++++++++++++++++++++++ src/backends/cl/workloads/ClMergerWorkload.hpp | 17 +++-- src/backends/neon/NeonLayerSupport.cpp | 22 ++++-- src/backends/neon/NeonLayerSupport.hpp | 1 + src/backends/neon/backend.mk | 1 + src/backends/neon/workloads/CMakeLists.txt | 1 + src/backends/neon/workloads/NeonMergerWorkload.cpp | 84 +++++++++++++++++++++ src/backends/neon/workloads/NeonMergerWorkload.hpp | 16 +++- src/backends/reference/RefLayerSupport.cpp | 2 + src/backends/reference/RefLayerSupport.hpp | 1 + 24 files changed, 282 insertions(+), 28 deletions(-) create mode 100644 src/backends/cl/workloads/ClMergerWorkload.cpp create mode 100644 src/backends/neon/workloads/NeonMergerWorkload.cpp diff --git a/include/armnn/Descriptors.hpp b/include/armnn/Descriptors.hpp index 2781786f10..0abc7583b9 100644 --- a/include/armnn/Descriptors.hpp +++ b/include/armnn/Descriptors.hpp @@ -63,11 +63,14 @@ struct OriginsDescriptor const uint32_t* GetViewOrigin(uint32_t idx) const; void ReorderOrigins(unsigned int* newOrdering, unsigned int numNewOrdering); friend void swap(OriginsDescriptor& first, OriginsDescriptor& second); + void SetConcatAxis(unsigned int concatAxis); + unsigned int GetConcatAxis() const; private: - uint32_t m_NumViews; - uint32_t m_NumDimensions; - uint32_t** m_ViewOrigins; + unsigned int m_ConcatAxis; + uint32_t m_NumViews; + uint32_t m_NumDimensions; + uint32_t** m_ViewOrigins; }; struct ViewsDescriptor @@ -138,6 +141,7 @@ OriginsDescriptor CreateMergerDescriptorForConcatenation(TensorShapeIt first, Te } OriginsDescriptor viewsDescriptor(static_cast(numInputs), numDimensions); + viewsDescriptor.SetConcatAxis(concatenationDimension); uint32_t viewIndex = 0u; uint32_t coordAlongConcatDim = 0u; diff --git a/include/armnn/ILayerSupport.hpp b/include/armnn/ILayerSupport.hpp index e679f84109..7677971c6b 100644 --- a/include/armnn/ILayerSupport.hpp +++ b/include/armnn/ILayerSupport.hpp @@ -132,6 +132,7 @@ public: Optional reasonIfUnsupported = EmptyOptional()) const; virtual bool IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const; diff --git a/include/armnn/LayerSupport.hpp b/include/armnn/LayerSupport.hpp index 7bed5779d3..83d79ec79e 100644 --- a/include/armnn/LayerSupport.hpp +++ b/include/armnn/LayerSupport.hpp @@ -147,6 +147,7 @@ bool IsLstmSupported(const BackendId& backend, const TensorInfo& input, const Te /// Deprecated in favor of IBackend and ILayerSupport interfaces bool IsMergerSupported(const BackendId& backend, const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, char* reasonIfUnsupported = nullptr, size_t reasonIfUnsupportedMaxLength = 1024); diff --git a/src/armnn/Descriptors.cpp b/src/armnn/Descriptors.cpp index a017806782..a200c6462c 100644 --- a/src/armnn/Descriptors.cpp +++ b/src/armnn/Descriptors.cpp @@ -72,13 +72,15 @@ PermutationVector::PermutationVector(std::initializer_list dimMapping } OriginsDescriptor::OriginsDescriptor() -: m_NumViews(0) +: m_ConcatAxis(1) +, m_NumViews(0) , m_NumDimensions(0) , m_ViewOrigins(nullptr) {} OriginsDescriptor::OriginsDescriptor(uint32_t numViews, uint32_t numDimensions /*= 4*/) -: m_NumViews(numViews) +: m_ConcatAxis(1) +, m_NumViews(numViews) , m_NumDimensions(numDimensions) , m_ViewOrigins(numViews && numDimensions > 0 ? new uint32_t *[numViews]() : nullptr) { @@ -89,7 +91,8 @@ OriginsDescriptor::OriginsDescriptor(uint32_t numViews, uint32_t numDimensions / } OriginsDescriptor::OriginsDescriptor(const OriginsDescriptor& other) -: m_NumViews(other.m_NumViews) +: m_ConcatAxis(other.m_ConcatAxis) +, m_NumViews(other.m_NumViews) , m_NumDimensions(other.m_NumDimensions) , m_ViewOrigins(other.m_NumViews && other.m_NumDimensions > 0 ? new uint32_t *[other.m_NumViews]() : nullptr) { @@ -121,6 +124,15 @@ OriginsDescriptor& OriginsDescriptor::operator=(OriginsDescriptor rhs) return *this; } +void OriginsDescriptor::SetConcatAxis(unsigned int concatAxis) +{ + m_ConcatAxis = concatAxis; +} +unsigned int OriginsDescriptor::GetConcatAxis() const +{ + return m_ConcatAxis; +} + Status OriginsDescriptor::SetViewOriginCoord(uint32_t view, uint32_t coord, uint32_t value) { if (view >= m_NumViews) @@ -284,6 +296,7 @@ void swap(OriginsDescriptor& first, OriginsDescriptor& second) swap(first.m_NumViews, second.m_NumViews); swap(first.m_NumDimensions, second.m_NumDimensions); swap(first.m_ViewOrigins, second.m_ViewOrigins); + swap(first.m_ConcatAxis, second.m_ConcatAxis); } void swap(ViewsDescriptor& first, ViewsDescriptor& second) diff --git a/src/armnn/LayerSupport.cpp b/src/armnn/LayerSupport.cpp index 91aca4ea41..6489fe4f66 100644 --- a/src/armnn/LayerSupport.cpp +++ b/src/armnn/LayerSupport.cpp @@ -257,12 +257,13 @@ bool IsLstmSupported(const BackendId& backend, const TensorInfo& input, const Te } bool IsMergerSupported(const BackendId& backend, std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, char* reasonIfUnsupported, size_t reasonIfUnsupportedMaxLength) { BOOST_ASSERT(inputs.size() > 0); - FORWARD_LAYER_SUPPORT_FUNC(backend, IsMergerSupported, inputs, descriptor); + FORWARD_LAYER_SUPPORT_FUNC(backend, IsMergerSupported, inputs, output, descriptor); } bool IsMultiplicationSupported(const BackendId& backend, diff --git a/src/armnn/layers/MergerLayer.cpp b/src/armnn/layers/MergerLayer.cpp index e80661a493..545c7e0e0e 100644 --- a/src/armnn/layers/MergerLayer.cpp +++ b/src/armnn/layers/MergerLayer.cpp @@ -40,7 +40,10 @@ void MergerLayer::CreateTensorHandles(Graph& graph, const IWorkloadFactory& fact //just needs to make sure that the outputs of the prev layer //are made subtensors of the output of the merger layer. m_OutputHandlers[0].CreateTensorHandles(factory); - if (factory.SupportsSubTensors()) + + signed long innerAxis = m_Param.GetNumDimensions() - m_Param.GetConcatAxis(); + + if (factory.SupportsSubTensors() && innerAxis != 1) { std::queue m_MergerLayers; diff --git a/src/backends/backendsCommon/ILayerSupport.cpp b/src/backends/backendsCommon/ILayerSupport.cpp index dc106e344e..55dd447369 100644 --- a/src/backends/backendsCommon/ILayerSupport.cpp +++ b/src/backends/backendsCommon/ILayerSupport.cpp @@ -192,6 +192,7 @@ bool ILayerSupport::IsMeanSupported(const TensorInfo& input, } bool ILayerSupport::IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported) const { diff --git a/src/backends/backendsCommon/WorkloadData.cpp b/src/backends/backendsCommon/WorkloadData.cpp index e1146543ff..18ab4a8709 100644 --- a/src/backends/backendsCommon/WorkloadData.cpp +++ b/src/backends/backendsCommon/WorkloadData.cpp @@ -390,6 +390,16 @@ void MergerQueueDescriptor::Validate(const WorkloadInfo& workloadInfo) const throw InvalidArgumentException("MergerQueueDescriptor: At least one TensorInfo output needs to be provided."); } + if(m_Parameters.GetConcatAxis() > workloadInfo.m_InputTensorInfos[0].GetShape().GetNumDimensions()) + { + throw InvalidArgumentException("Invalid Concatenation Axis provided"); + } + + if (workloadInfo.m_InputTensorInfos[0].GetShape().GetNumDimensions() - m_Parameters.GetConcatAxis() == 1) + { + return; + } + if (workloadInfo.m_InputTensorInfos.size() != m_ViewOrigins.size()) { throw InvalidArgumentException( diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index dc38f1a721..2e2824e521 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -471,7 +471,9 @@ bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, auto endPtr = boost::make_transform_iterator(inputs.end(), getTensorInfoPtr); std::vector inputPtrs(beginPtr, endPtr); - result = layerSupportObject->IsMergerSupported(inputPtrs, cLayer->GetParameters(), reason); + const TensorInfo& output = layer.GetOutputSlot(0).GetTensorInfo(); + + result = layerSupportObject->IsMergerSupported(inputPtrs, output, cLayer->GetParameters(), reason); break; } case LayerType::Multiplication: diff --git a/src/backends/backendsCommon/test/LayerTests.cpp b/src/backends/backendsCommon/test/LayerTests.cpp index bd8b38da01..f5689e74bf 100755 --- a/src/backends/backendsCommon/test/LayerTests.cpp +++ b/src/backends/backendsCommon/test/LayerTests.cpp @@ -2235,6 +2235,7 @@ void Concatenate( } armnn::OriginsDescriptor viewsDescriptor = CreateMergerDescriptorForConcatenation(inputTensorInfos, concatDim); + queueDescriptor.m_Parameters = viewsDescriptor; queueDescriptor.m_ViewOrigins.reserve(viewsDescriptor.GetNumViews()); for (unsigned int i = 0; i < viewsDescriptor.GetNumViews(); ++i) diff --git a/src/backends/cl/ClLayerSupport.cpp b/src/backends/cl/ClLayerSupport.cpp index 039f1c24f0..a5030beb47 100644 --- a/src/backends/cl/ClLayerSupport.cpp +++ b/src/backends/cl/ClLayerSupport.cpp @@ -27,6 +27,7 @@ #include "workloads/ClL2NormalizationFloatWorkload.hpp" #include "workloads/ClLstmFloatWorkload.hpp" #include "workloads/ClMeanWorkload.hpp" +#include "workloads/ClMergerWorkload.hpp" #include "workloads/ClMultiplicationWorkload.hpp" #include "workloads/ClNormalizationFloatWorkload.hpp" #include "workloads/ClPadWorkload.hpp" @@ -366,14 +367,25 @@ bool ClLayerSupport::IsMeanSupported(const TensorInfo& input, } bool ClLayerSupport::IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported) const { - ignore_unused(descriptor); - return IsSupportedForDataTypeCl(reasonIfUnsupported, - inputs[0]->GetDataType(), - &TrueFunc<>, - &FalseFuncU8<>); + if(descriptor.GetNumDimensions() - descriptor.GetConcatAxis() == 1) + { + FORWARD_WORKLOAD_VALIDATE_FUNC(ClMergerWorkloadValidate, + reasonIfUnsupported, + inputs, + output, + descriptor); + } + else + { + return IsSupportedForDataTypeCl(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &FalseFuncU8<>); + } } bool ClLayerSupport::IsMultiplicationSupported(const TensorInfo& input0, diff --git a/src/backends/cl/ClLayerSupport.hpp b/src/backends/cl/ClLayerSupport.hpp index 6bdeb5a6f6..b4d278995d 100644 --- a/src/backends/cl/ClLayerSupport.hpp +++ b/src/backends/cl/ClLayerSupport.hpp @@ -117,6 +117,7 @@ public: Optional reasonIfUnsupported = EmptyOptional()) const override; bool IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; diff --git a/src/backends/cl/backend.mk b/src/backends/cl/backend.mk index fd7ea80f33..551e1a0e4b 100644 --- a/src/backends/cl/backend.mk +++ b/src/backends/cl/backend.mk @@ -28,6 +28,7 @@ BACKEND_SOURCES := \ workloads/ClL2NormalizationFloatWorkload.cpp \ workloads/ClLstmFloatWorkload.cpp \ workloads/ClMeanWorkload.cpp \ + workloads/ClMergerWorkload.cpp \ workloads/ClMultiplicationWorkload.cpp \ workloads/ClNormalizationFloatWorkload.cpp \ workloads/ClPadWorkload.cpp \ diff --git a/src/backends/cl/workloads/CMakeLists.txt b/src/backends/cl/workloads/CMakeLists.txt index 901dd748e4..736cf5c4e5 100644 --- a/src/backends/cl/workloads/CMakeLists.txt +++ b/src/backends/cl/workloads/CMakeLists.txt @@ -32,6 +32,7 @@ list(APPEND armnnClBackendWorkloads_sources ClLstmFloatWorkload.hpp ClMeanWorkload.cpp ClMeanWorkload.hpp + ClMergerWorkload.cpp ClMergerWorkload.hpp ClMultiplicationWorkload.cpp ClMultiplicationWorkload.hpp diff --git a/src/backends/cl/workloads/ClMergerWorkload.cpp b/src/backends/cl/workloads/ClMergerWorkload.cpp new file mode 100644 index 0000000000..e06d8c51f5 --- /dev/null +++ b/src/backends/cl/workloads/ClMergerWorkload.cpp @@ -0,0 +1,85 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// +#include "ClMergerWorkload.hpp" +#include "ClWorkloadUtils.hpp" +#include +#include +#include +#include + +#include + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status ClMergerWorkloadValidate(const std::vector& inputs, + const TensorInfo& output, + const MergerDescriptor& descriptor) + +{ + std::vector aclInputs; + for (const TensorInfo* input : inputs) + { + arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW); + aclInputs.emplace_back(aclInputInfo); + } + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH; + + std::vector aclInputPtrs; + for (arm_compute::ITensorInfo& input : aclInputs) + { + aclInputPtrs.emplace_back(&input); + } + + return arm_compute::CLConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis); + +} + +ClMergerWorkload::ClMergerWorkload(const MergerQueueDescriptor& descriptor, const WorkloadInfo& info) +: BaseWorkload(descriptor, info) +{ + m_Execute = true; + + unsigned int innerAxisOrder = descriptor.m_Parameters.GetNumDimensions() - descriptor.m_Parameters.GetConcatAxis(); + + if (innerAxisOrder != 1) + { + m_Execute = false; + return; + } + + std::vector aclInputs; + arm_compute::DataLayout aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW); + for (auto input : m_Data.m_Inputs) + { + arm_compute::ICLTensor& aclInput = boost::polymorphic_pointer_downcast(input)->GetTensor(); + aclInput.info()->set_data_layout(aclDataLayout); + aclInputs.emplace_back(&aclInput); + } + arm_compute::ICLTensor& output = boost::polymorphic_pointer_downcast( + m_Data.m_Outputs[0])->GetTensor(); + output.info()->set_data_layout(aclDataLayout); + + arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH; + + m_Layer.configure(aclInputs, &output, aclAxis); + + m_Layer.prepare(); + +} + +void ClMergerWorkload::Execute() const +{ + if (m_Execute) + { + ARMNN_SCOPED_PROFILING_EVENT_CL("ClMergerWorkload_Execute"); + m_Layer.run(); + } + +} + +} //namespace armnn \ No newline at end of file diff --git a/src/backends/cl/workloads/ClMergerWorkload.hpp b/src/backends/cl/workloads/ClMergerWorkload.hpp index 948fb58bd1..8189a1b24a 100644 --- a/src/backends/cl/workloads/ClMergerWorkload.hpp +++ b/src/backends/cl/workloads/ClMergerWorkload.hpp @@ -7,18 +7,25 @@ #include +#include + namespace armnn { +arm_compute::Status ClMergerWorkloadValidate(const std::vector& inputs, + const TensorInfo& output, + const MergerDescriptor& descriptor); + class ClMergerWorkload : public BaseWorkload { public: - using BaseWorkload::BaseWorkload; + ClMergerWorkload(const MergerQueueDescriptor& descriptor, const WorkloadInfo& info); + + void Execute() const override; - void Execute() const override - { - // With subtensors, merger is a no-op. - } +private: + mutable arm_compute::CLConcatenateLayer m_Layer; + bool m_Execute; }; } //namespace armnn diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 28c4b75f2a..fd279e5d55 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -23,6 +23,7 @@ #include "workloads/NeonConvolution2dWorkload.hpp" #include "workloads/NeonDepthwiseConvolutionWorkload.hpp" #include "workloads/NeonL2NormalizationFloatWorkload.hpp" +#include "workloads/NeonMergerWorkload.hpp" #include "workloads/NeonMultiplicationFloatWorkload.hpp" #include "workloads/NeonNormalizationFloatWorkload.hpp" #include "workloads/NeonFullyConnectedWorkload.hpp" @@ -334,14 +335,25 @@ bool NeonLayerSupport::IsMeanSupported(const TensorInfo& input, } bool NeonLayerSupport::IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported) const { - ignore_unused(descriptor); - return IsSupportedForDataTypeNeon(reasonIfUnsupported, - inputs[0]->GetDataType(), - &TrueFunc<>, - &TrueFunc<>); + if(descriptor.GetNumDimensions() - descriptor.GetConcatAxis() == 1) + { + FORWARD_WORKLOAD_VALIDATE_FUNC(NeonMergerWorkloadValidate, + reasonIfUnsupported, + inputs, + output, + descriptor); + } + else + { + return IsSupportedForDataTypeNeon(reasonIfUnsupported, + inputs[0]->GetDataType(), + &TrueFunc<>, + &TrueFunc<>); + } } bool NeonLayerSupport::IsMultiplicationSupported(const TensorInfo& input0, diff --git a/src/backends/neon/NeonLayerSupport.hpp b/src/backends/neon/NeonLayerSupport.hpp index fb1567c12d..e5cd3cc062 100644 --- a/src/backends/neon/NeonLayerSupport.hpp +++ b/src/backends/neon/NeonLayerSupport.hpp @@ -117,6 +117,7 @@ public: Optional reasonIfUnsupported = EmptyOptional()) const override; bool IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; diff --git a/src/backends/neon/backend.mk b/src/backends/neon/backend.mk index 8f7e72b17c..2f74ecd1ce 100644 --- a/src/backends/neon/backend.mk +++ b/src/backends/neon/backend.mk @@ -25,6 +25,7 @@ BACKEND_SOURCES := \ workloads/NeonFullyConnectedWorkload.cpp \ workloads/NeonL2NormalizationFloatWorkload.cpp \ workloads/NeonLstmFloatWorkload.cpp \ + workloads/NeonMergerWorkload.cpp \ workloads/NeonMultiplicationFloatWorkload.cpp \ workloads/NeonNormalizationFloatWorkload.cpp \ workloads/NeonPermuteWorkload.cpp \ diff --git a/src/backends/neon/workloads/CMakeLists.txt b/src/backends/neon/workloads/CMakeLists.txt index f6e8d1c04a..e383b04f25 100644 --- a/src/backends/neon/workloads/CMakeLists.txt +++ b/src/backends/neon/workloads/CMakeLists.txt @@ -28,6 +28,7 @@ list(APPEND armnnNeonBackendWorkloads_sources NeonL2NormalizationFloatWorkload.hpp NeonLstmFloatWorkload.cpp NeonLstmFloatWorkload.hpp + NeonMergerWorkload.cpp NeonMergerWorkload.hpp NeonMultiplicationFloatWorkload.cpp NeonMultiplicationFloatWorkload.hpp diff --git a/src/backends/neon/workloads/NeonMergerWorkload.cpp b/src/backends/neon/workloads/NeonMergerWorkload.cpp new file mode 100644 index 0000000000..f82e24453a --- /dev/null +++ b/src/backends/neon/workloads/NeonMergerWorkload.cpp @@ -0,0 +1,84 @@ +// +// Copyright © 2017 Arm Ltd. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include "NeonMergerWorkload.hpp" +#include +#include +#include +#include + + +namespace armnn +{ +using namespace armcomputetensorutils; + +arm_compute::Status NeonMergerWorkloadValidate(const std::vector& inputs, + const TensorInfo& output, + const MergerDescriptor& descriptor) + +{ + std::vector aclInputs; + for (const TensorInfo* input : inputs) + { + arm_compute::TensorInfo aclInputInfo = BuildArmComputeTensorInfo(*input, armnn::DataLayout::NCHW); + aclInputs.emplace_back(aclInputInfo); + } + const arm_compute::TensorInfo aclOutputInfo = BuildArmComputeTensorInfo(output); + arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH; + + std::vector aclInputPtrs; + for (arm_compute::ITensorInfo& input : aclInputs) + { + aclInputPtrs.emplace_back(&input); + } + + return arm_compute::NEConcatenateLayer::validate(aclInputPtrs, &aclOutputInfo, aclAxis); + +} + +NeonMergerWorkload::NeonMergerWorkload( +const MergerQueueDescriptor& descriptor, const WorkloadInfo& info) + : BaseWorkload(descriptor, info) +{ + m_Execute = true; + + unsigned int innerAxisOrder = descriptor.m_Parameters.GetNumDimensions() - descriptor.m_Parameters.GetConcatAxis(); + + if (innerAxisOrder != 1) + { + m_Execute = false; + return; + } + + std::vector aclInputs; + arm_compute::DataLayout aclDataLayout = ConvertDataLayout(armnn::DataLayout::NCHW); + for (auto input : m_Data.m_Inputs) + { + arm_compute::ITensor& aclInput = boost::polymorphic_pointer_downcast(input)->GetTensor(); + aclInput.info()->set_data_layout(aclDataLayout); + aclInputs.emplace_back(&aclInput); + } + arm_compute::ITensor& output = boost::polymorphic_pointer_downcast( + m_Data.m_Outputs[0])->GetTensor(); + output.info()->set_data_layout(aclDataLayout); + + arm_compute::DataLayoutDimension aclAxis = arm_compute::DataLayoutDimension::WIDTH; + + m_Layer.configure(aclInputs, &output, aclAxis); + + m_Layer.prepare(); +} + +void NeonMergerWorkload::Execute() const +{ + if (m_Execute) + { + ARMNN_SCOPED_PROFILING_EVENT_NEON("NeonMergerWorkload_Execute"); + m_Layer.run(); + } +} + +} //namespace armnn + diff --git a/src/backends/neon/workloads/NeonMergerWorkload.hpp b/src/backends/neon/workloads/NeonMergerWorkload.hpp index 7103d8a469..a4f36d18bc 100644 --- a/src/backends/neon/workloads/NeonMergerWorkload.hpp +++ b/src/backends/neon/workloads/NeonMergerWorkload.hpp @@ -6,18 +6,26 @@ #pragma once #include +#include namespace armnn { +arm_compute::Status NeonMergerWorkloadValidate(const std::vector& inputs, + const TensorInfo& output, + const MergerDescriptor& descriptor); + class NeonMergerWorkload : public BaseWorkload { public: + NeonMergerWorkload(const MergerQueueDescriptor& descriptor, const WorkloadInfo& info); + using BaseWorkload::BaseWorkload; + void Execute() const override; + +private: + mutable arm_compute::NEConcatenateLayer m_Layer; + bool m_Execute; - virtual void Execute() const override - { - // With subtensors, merger is a no-op. - } }; } //namespace armnn diff --git a/src/backends/reference/RefLayerSupport.cpp b/src/backends/reference/RefLayerSupport.cpp index 167cba54e8..43a2fa2d07 100644 --- a/src/backends/reference/RefLayerSupport.cpp +++ b/src/backends/reference/RefLayerSupport.cpp @@ -322,10 +322,12 @@ bool RefLayerSupport::IsMeanSupported(const TensorInfo& input, } bool RefLayerSupport::IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported) const { ignore_unused(descriptor); + ignore_unused(output); return IsSupportedForDataTypeRef(reasonIfUnsupported, inputs[0]->GetDataType(), &TrueFunc<>, diff --git a/src/backends/reference/RefLayerSupport.hpp b/src/backends/reference/RefLayerSupport.hpp index 2e86ecee29..a03c89c48c 100644 --- a/src/backends/reference/RefLayerSupport.hpp +++ b/src/backends/reference/RefLayerSupport.hpp @@ -122,6 +122,7 @@ public: Optional reasonIfUnsupported = EmptyOptional()) const override; bool IsMergerSupported(const std::vector inputs, + const TensorInfo& output, const OriginsDescriptor& descriptor, Optional reasonIfUnsupported = EmptyOptional()) const override; -- cgit v1.2.1