From c577f2c6a3b4ddb6ba87a882723c53a248afbeba Mon Sep 17 00:00:00 2001 From: telsoa01 Date: Fri, 31 Aug 2018 09:22:23 +0100 Subject: Release 18.08 --- src/armnn/backends/NeonWorkloadFactory.cpp | 110 +++++++++++++++++++++-------- 1 file changed, 81 insertions(+), 29 deletions(-) (limited to 'src/armnn/backends/NeonWorkloadFactory.cpp') diff --git a/src/armnn/backends/NeonWorkloadFactory.cpp b/src/armnn/backends/NeonWorkloadFactory.cpp index a17988de5a..6ea72f77cc 100644 --- a/src/armnn/backends/NeonWorkloadFactory.cpp +++ b/src/armnn/backends/NeonWorkloadFactory.cpp @@ -9,10 +9,13 @@ #ifdef ARMCOMPUTENEON_ENABLED #include "arm_compute/runtime/Allocator.h" + #include "MemCopyWorkload.hpp" #include "NeonTensorHandle.hpp" #include "NeonWorkloadUtils.hpp" #include "NeonWorkloads.hpp" + +#include "memory/IPoolManager.hpp" #endif #include "MakeWorkloadHelper.hpp" @@ -22,7 +25,8 @@ namespace armnn { -bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType, std::string& outReasonIfUnsupported) +bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, boost::optional dataType, + std::string& outReasonIfUnsupported) { return IWorkloadFactory::IsLayerSupported(Compute::CpuAcc, layer, dataType, outReasonIfUnsupported); } @@ -30,7 +34,7 @@ bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, DataType dataType #ifdef ARMCOMPUTENEON_ENABLED NeonWorkloadFactory::NeonWorkloadFactory() -: m_MemoryManager(std::make_unique()) + : m_MemoryManager(std::make_unique(), BaseMemoryManager::MemoryAffinity::Offset) { } @@ -46,30 +50,33 @@ std::unique_ptr NeonWorkloadFactory::CreateSubTensorHandle(ITenso coords.set_num_dimensions(subTensorShape.GetNumDimensions()); for (unsigned int i = 0; i < subTensorShape.GetNumDimensions(); i++) { - // arm compute indexes tensor coords in reverse order + // Arm compute indexes tensor coords in reverse order. unsigned int revertedIndex = subTensorShape.GetNumDimensions() - i - 1; coords.set(i, boost::numeric_cast(subTensorOrigin[revertedIndex])); } - return std::make_unique(boost::polymorphic_downcast(&parent)->GetTensor(), - shape, coords); + return std::make_unique( + boost::polymorphic_downcast(&parent), shape, coords); } std::unique_ptr NeonWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const { - return std::make_unique(tensorInfo); + auto tensorHandle = std::make_unique(tensorInfo); + tensorHandle->SetMemoryGroup(m_MemoryManager.GetInterLayerMemoryGroup()); + + return tensorHandle; } std::unique_ptr NeonWorkloadFactory::CreateInput(const InputQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info); + return MakeWorkload(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreateOutput(const OutputQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info); + return MakeWorkload(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreateActivation(const ActivationQueueDescriptor& descriptor, @@ -82,7 +89,7 @@ std::unique_ptr NeonWorkloadFactory::CreateSoftmax(const SoftmaxQueue const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, - m_MemoryManager.Get()); + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr NeonWorkloadFactory::CreateSplitter(const SplitterQueueDescriptor& descriptor, @@ -100,13 +107,14 @@ std::unique_ptr NeonWorkloadFactory::CreateMerger(const Merger std::unique_ptr NeonWorkloadFactory::CreateFullyConnected( const FullyConnectedQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info, m_MemoryManager.Get()); + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr NeonWorkloadFactory::CreatePermute(const PermuteQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info); + return MakeWorkload(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreatePooling2d(const Pooling2dQueueDescriptor& descriptor, @@ -119,7 +127,7 @@ std::unique_ptr NeonWorkloadFactory::CreateConvolution2d( const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { return MakeWorkload(descriptor, info, - m_MemoryManager.Get()); + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr NeonWorkloadFactory::CreateDepthwiseConvolution2d( @@ -132,7 +140,8 @@ std::unique_ptr NeonWorkloadFactory::CreateDepthwiseConvolution2d( std::unique_ptr NeonWorkloadFactory::CreateNormalization( const NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info, m_MemoryManager.Get()); + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr NeonWorkloadFactory::CreateAddition(const AdditionQueueDescriptor& descriptor, @@ -161,21 +170,7 @@ std::unique_ptr NeonWorkloadFactory::CreateMemCopy(const MemCo throw InvalidArgumentException("NeonWorkloadFactory: Invalid null input for MemCopy workload"); } - // Create a workload that will copy tensor data from the inputs, which can have a number of different formats, - // to Neon tensors. - switch (descriptor.m_Inputs[0]->GetType()) - { - case ITensorHandle::Cpu: - return MakeWorkload(descriptor, info); -#if ARMCOMPUTECL_ENABLED - case ITensorHandle::CL: - { - return MakeWorkload(descriptor, info); - } -#endif - default: - throw InvalidArgumentException("NeonWorkloadFactory: Destination type not supported for MemCopy Workload."); - } + return MakeWorkload(descriptor, info); } std::unique_ptr NeonWorkloadFactory::CreateResizeBilinear( @@ -195,7 +190,8 @@ std::unique_ptr NeonWorkloadFactory::CreateFakeQuantization( std::unique_ptr NeonWorkloadFactory::CreateL2Normalization(const L2NormalizationQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info, m_MemoryManager.Get()); + return MakeWorkload(descriptor, info, + m_MemoryManager.GetIntraLayerManager()); } std::unique_ptr NeonWorkloadFactory::CreateConstant(const ConstantQueueDescriptor& descriptor, @@ -216,11 +212,41 @@ std::unique_ptr NeonWorkloadFactory::CreateFloor(const FloorQueueDesc return MakeWorkload(descriptor, info); } +std::unique_ptr NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return MakeWorkload(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return std::make_unique(descriptor, info); +} + void NeonWorkloadFactory::Finalize() { m_MemoryManager.Finalize(); } +void NeonWorkloadFactory::Release() +{ + m_MemoryManager.Release(); +} + +void NeonWorkloadFactory::Acquire() +{ + m_MemoryManager.Acquire(); +} + #else // Compiled without ArmCompute libs NeonWorkloadFactory::NeonWorkloadFactory() @@ -371,9 +397,35 @@ std::unique_ptr NeonWorkloadFactory::CreateFloor(const FloorQueueDesc return nullptr; } +std::unique_ptr NeonWorkloadFactory::CreateLstm(const LstmQueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp16ToFp32( + const ConvertFp16ToFp32QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + +std::unique_ptr NeonWorkloadFactory::CreateConvertFp32ToFp16( + const ConvertFp32ToFp16QueueDescriptor& descriptor, + const WorkloadInfo& info) const +{ + return nullptr; +} + void NeonWorkloadFactory::Finalize() {} +void NeonWorkloadFactory::Release() +{} + +void NeonWorkloadFactory::Acquire() +{} + #endif } //namespace armnn -- cgit v1.2.1