diff options
Diffstat (limited to 'src/backends/neon')
-rw-r--r-- | src/backends/neon/NeonBackend.cpp | 20 | ||||
-rw-r--r-- | src/backends/neon/NeonBackend.hpp | 6 | ||||
-rw-r--r-- | src/backends/neon/NeonLayerSupport.cpp | 2 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.cpp | 35 | ||||
-rw-r--r-- | src/backends/neon/NeonWorkloadFactory.hpp | 10 | ||||
-rw-r--r-- | src/backends/neon/test/NeonCreateWorkloadTests.cpp | 29 | ||||
-rw-r--r-- | src/backends/neon/test/NeonWorkloadFactoryHelper.hpp | 8 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonConvolution2dWorkload.cpp | 25 | ||||
-rw-r--r-- | src/backends/neon/workloads/NeonConvolution2dWorkload.hpp | 10 |
9 files changed, 133 insertions, 12 deletions
diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 31e08ceaf5..d300960052 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -48,6 +48,13 @@ IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( } IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const +{ + return std::make_unique<NeonWorkloadFactory>( + PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + +IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const { auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(), @@ -60,6 +67,19 @@ IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const +{ + auto memoryManager = std::make_shared<NeonMemoryManager>(std::make_unique<arm_compute::Allocator>(), + BaseMemoryManager::MemoryAffinity::Offset); + + tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager); + tensorHandleFactoryRegistry.RegisterFactory(std::make_unique<NeonTensorHandleFactory>(memoryManager)); + + return std::make_unique<NeonWorkloadFactory>( + PolymorphicPointerDowncast<NeonMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const { return IBackendContextPtr{}; diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index 6458eccb6b..42c6666930 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -26,6 +26,12 @@ public: IWorkloadFactoryPtr CreateWorkloadFactory( class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override; + IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const override; + + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions) const override; + IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override; diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 853a518b45..0084dbd03f 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -329,7 +329,7 @@ bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input, { if (m_ModelContextPtr.get() != nullptr) { - auto modelOptions = armnn::PolymorphicDowncast<NeonBackendModelContext*>(m_ModelContextPtr.get()); + auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get()); if (modelOptions) { isFastMathEnabled = modelOptions->IsFastMathEnabled(); diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 40010fe329..928989b1e4 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -4,6 +4,7 @@ // #include "NeonBackendId.hpp" +#include "NeonBackendModelContext.hpp" #include "NeonTensorHandle.hpp" #include "NeonWorkloadFactory.hpp" @@ -36,13 +37,27 @@ bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } +bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, + Optional<DataType> dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); +} + const BackendId& NeonWorkloadFactory::GetBackendId() const { return s_Id; } NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager) - : m_MemoryManager(memoryManager) + : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) +{ +} + +NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) + : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { } @@ -184,8 +199,22 @@ std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateConvertFp32ToFp16( std::unique_ptr<armnn::IWorkload> NeonWorkloadFactory::CreateConvolution2d( const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return std::make_unique<NeonConvolution2dWorkload>(descriptor, info, - m_MemoryManager->GetIntraLayerManager()); + bool isFastMathEnabled = false; + if (m_ModelContextPtr) + { + if (m_ModelContextPtr.get() != nullptr) + { + auto modelOptions = dynamic_cast<NeonBackendModelContext*>(m_ModelContextPtr.get()); + if (modelOptions) + { + isFastMathEnabled = modelOptions->IsFastMathEnabled(); + } + } + } + return std::make_unique<NeonConvolution2dWorkload>(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + isFastMathEnabled); } std::unique_ptr<IWorkload> NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 09ae839e27..6a514e2812 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -5,6 +5,7 @@ #pragma once #include <armnn/Optional.hpp> +#include <armnn/backends/IBackendInternal.hpp> #include <backendsCommon/WorkloadFactoryBase.hpp> #include <aclCommon/BaseMemoryManager.hpp> @@ -19,12 +20,20 @@ class NeonWorkloadFactory : public WorkloadFactoryBase public: NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager); + NeonWorkloadFactory(const std::shared_ptr<NeonMemoryManager>& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr); + const BackendId& GetBackendId() const override; static bool IsLayerSupported(const Layer& layer, Optional<DataType> dataType, std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const IConnectableLayer& layer, + Optional<DataType> dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + bool SupportsSubTensors() const override { return true; } ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") @@ -238,6 +247,7 @@ public: private: mutable std::shared_ptr<NeonMemoryManager> m_MemoryManager; + const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; } // namespace armnn diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp index 37d026f107..99ff9ae8b8 100644 --- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp +++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp @@ -6,6 +6,8 @@ #include "NeonWorkloadFactoryHelper.hpp" #include <aclCommon/ArmComputeTensorUtils.hpp> +#include <armnn/utility/Assert.hpp> +#include <armnn/utility/IgnoreUnused.hpp> #include <armnn/utility/PolymorphicDowncast.hpp> #include <backendsCommon/MemCopyWorkload.hpp> @@ -276,6 +278,33 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) NeonCreateConvolution2dWorkloadTest<DataType::Float32>(DataLayout::NHWC); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload) +{ + Graph graph; + using ModelOptions = std::vector<BackendOptions>; + ModelOptions modelOptions = {}; + BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", true } + }); + modelOptions.push_back(cpuAcc); + NeonWorkloadFactory factory = + NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions); + + auto workload = + CreateConvolution2dWorkloadTest<NeonConvolution2dWorkload, armnn::DataType::Float32>(factory, + graph, + DataLayout::NCHW, + modelOptions); + + ARMNN_ASSERT(workload != nullptr); + auto conv2dWorkload = PolymorphicDowncast<NeonConvolution2dWorkload*>(workload.get()); + IgnoreUnused(conv2dWorkload); + ARMNN_ASSERT(conv2dWorkload != nullptr); + // fast_math enabled but configuration does not match with WINOGRAD + ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM); +} + template <typename armnn::DataType DataType> static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) { diff --git a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp index 87150115f8..89052d634e 100644 --- a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp +++ b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp @@ -27,10 +27,12 @@ struct WorkloadFactoryHelper<armnn::NeonWorkloadFactory> } static armnn::NeonWorkloadFactory GetFactory( - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ModelOptions& modelOptions = {}) { - return armnn::NeonWorkloadFactory( - armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager)); + armnn::NeonBackend backend; + return armnn::NeonWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::NeonMemoryManager>(memoryManager), + backend.CreateBackendSpecificModelContext(modelOptions)); } static armnn::NeonTensorHandleFactory GetTensorHandleFactory( diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index 83f761158a..d35b9685be 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -59,8 +59,10 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, } NeonConvolution2dWorkload::NeonConvolution2dWorkload( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const bool isFastMathEnabled) : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info) { using arm_compute::NEDirectConvolutionLayer; @@ -97,7 +99,19 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( &output, padStrideInfo, arm_compute::WeightsInfo(), - aclDilationInfo); + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); + + m_ConvolutionMethod = + convolutionLayer->get_convolution_method(input.info(), + m_KernelTensor->info(), + output.info(), + padStrideInfo, + arm_compute::WeightsInfo(), + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); m_ConvolutionLayer.reset(convolutionLayer.release()); @@ -120,6 +134,11 @@ void NeonConvolution2dWorkload::Execute() const m_ConvolutionLayer->run(); } +arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() const +{ + return m_ConvolutionMethod; +} + void NeonConvolution2dWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp index 54e08a2042..860d78ba7e 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp @@ -28,17 +28,23 @@ class NeonConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescript public: using BaseWorkload<Convolution2dQueueDescriptor>::m_Data; - NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const bool isFastMathENabled = false); void Execute() const override; + arm_compute::ConvolutionMethod GetConvolutionMethod() const; + private: std::unique_ptr<arm_compute::IFunction> m_ConvolutionLayer; std::unique_ptr<arm_compute::Tensor> m_KernelTensor; std::unique_ptr<arm_compute::Tensor> m_BiasTensor; + arm_compute::ConvolutionMethod m_ConvolutionMethod; + void FreeUnusedTensors(); }; |