From 04a729708f986b1a69c1efc42d5cf18271cfae1e Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Mon, 14 Sep 2020 15:44:18 +0100 Subject: IVGCVSW-5157 'Pipe ModelOption through Network::LoadNetwork() to Workload factory' * Pass ModelOptions to WorkloadFactory * Updated signature of CL and NEON Convolution2d workloads added FastMathEnabled param. Signed-off-by: Sadik Armagan Change-Id: I536178be8e4dd4083489e69febadaf0feeba46d2 --- src/backends/backendsCommon/IBackendInternal.cpp | 22 ++++++++++++++ src/backends/backendsCommon/WorkloadFactory.cpp | 13 ++++++++ src/backends/backendsCommon/WorkloadFactory.hpp | 6 ++++ src/backends/cl/ClBackend.cpp | 19 ++++++++++++ src/backends/cl/ClBackend.hpp | 6 ++++ src/backends/cl/ClWorkloadFactory.cpp | 34 +++++++++++++++++++-- src/backends/cl/ClWorkloadFactory.hpp | 11 +++++++ src/backends/cl/test/ClCreateWorkloadTests.cpp | 31 +++++++++++++++++++ src/backends/cl/test/ClWorkloadFactoryHelper.hpp | 7 +++-- .../cl/workloads/ClConvolution2dWorkload.cpp | 24 +++++++++++++-- .../cl/workloads/ClConvolution2dWorkload.hpp | 10 +++++-- src/backends/neon/NeonBackend.cpp | 20 +++++++++++++ src/backends/neon/NeonBackend.hpp | 6 ++++ src/backends/neon/NeonLayerSupport.cpp | 2 +- src/backends/neon/NeonWorkloadFactory.cpp | 35 ++++++++++++++++++++-- src/backends/neon/NeonWorkloadFactory.hpp | 10 +++++++ src/backends/neon/test/NeonCreateWorkloadTests.cpp | 29 ++++++++++++++++++ .../neon/test/NeonWorkloadFactoryHelper.hpp | 8 +++-- .../neon/workloads/NeonConvolution2dWorkload.cpp | 25 ++++++++++++++-- .../neon/workloads/NeonConvolution2dWorkload.hpp | 10 +++++-- src/backends/reference/RefWorkloadFactory.cpp | 8 +++++ src/backends/reference/RefWorkloadFactory.hpp | 5 ++++ 22 files changed, 321 insertions(+), 20 deletions(-) (limited to 'src/backends') diff --git a/src/backends/backendsCommon/IBackendInternal.cpp b/src/backends/backendsCommon/IBackendInternal.cpp index 1cca61efa5..08060331e7 100644 --- a/src/backends/backendsCommon/IBackendInternal.cpp +++ b/src/backends/backendsCommon/IBackendInternal.cpp @@ -39,6 +39,28 @@ IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( return IWorkloadFactoryPtr{}; } +IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( + const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const +{ + if(modelOptions.empty()) + { + return CreateWorkloadFactory(memoryManager); + } + return IWorkloadFactoryPtr{}; +} + +IBackendInternal::IWorkloadFactoryPtr IBackendInternal::CreateWorkloadFactory( + class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions) const +{ + if(modelOptions.empty()) + { + return CreateWorkloadFactory(tensorHandleFactoryRegistry); + } + return IWorkloadFactoryPtr{}; +} + IBackendInternal::IBackendContextPtr IBackendInternal::CreateBackendContext(const IRuntime::CreationOptions&) const { return IBackendContextPtr{}; diff --git a/src/backends/backendsCommon/WorkloadFactory.cpp b/src/backends/backendsCommon/WorkloadFactory.cpp index 0bafda257c..54a4157fe3 100644 --- a/src/backends/backendsCommon/WorkloadFactory.cpp +++ b/src/backends/backendsCommon/WorkloadFactory.cpp @@ -1243,6 +1243,19 @@ bool IWorkloadFactory::IsLayerSupported(const IConnectableLayer& connectableLaye modelOptions); } +bool IWorkloadFactory::IsLayerSupported(const BackendId& backendId, + const IConnectableLayer& connectableLayer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IsLayerConfigurationSupported(backendId, + connectableLayer, + dataType, + outReasonIfUnsupported, + modelOptions); +} + // Default Implementations std::unique_ptr IWorkloadFactory::CreateAbs(const AbsQueueDescriptor& /*descriptor*/, const WorkloadInfo& /*info*/) const diff --git a/src/backends/backendsCommon/WorkloadFactory.hpp b/src/backends/backendsCommon/WorkloadFactory.hpp index 68f9da650e..5096c3ba51 100644 --- a/src/backends/backendsCommon/WorkloadFactory.hpp +++ b/src/backends/backendsCommon/WorkloadFactory.hpp @@ -39,6 +39,12 @@ public: std::string& outReasonIfUnsupported, const ModelOptions& modelOptions); + static bool IsLayerSupported(const BackendId& backendId, + const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + virtual bool SupportsSubTensors() const = 0; ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 49636d9b08..6254b0a32a 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -45,6 +45,13 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( PolymorphicPointerDowncast(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const +{ + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const { @@ -57,6 +64,18 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( PolymorphicPointerDowncast(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const +{ + auto memoryManager = std::make_shared(std::make_unique()); + + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique(memoryManager)); + + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + std::vector ClBackend::GetHandleFactoryPreferences() const { return std::vector {ClTensorHandleFactory::GetIdStatic()}; diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index 108124cac9..af5534e0d0 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -26,6 +26,12 @@ public: IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const override; + IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const override; + + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions) const override; + std::vector GetHandleFactoryPreferences() const override; void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 58e17df5b8..4acfa570f2 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -4,6 +4,7 @@ // #include "ClWorkloadFactory.hpp" #include "ClBackendId.hpp" +#include "ClBackendModelContext.hpp" #include @@ -42,6 +43,14 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } +bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); +} + const BackendId& ClWorkloadFactory::GetBackendId() const { return s_Id; @@ -78,7 +87,13 @@ std::unique_ptr ClWorkloadFactory::MakeWorkload(const QueueDescriptor } ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr& memoryManager) - : m_MemoryManager(memoryManager) + : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) +{ +} + +ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) + : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { } @@ -205,7 +220,22 @@ std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + bool isFastMathEnabled = false; + if (m_ModelContextPtr) + { + if (m_ModelContextPtr.get() != nullptr) + { + auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); + if (modelOptions) + { + isFastMathEnabled = modelOptions->IsFastMathEnabled(); + } + } + } + return MakeWorkload(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + isFastMathEnabled); } std::unique_ptr ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 80cd7c51f1..fad5dd04fa 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -7,6 +7,8 @@ #include #include +#include + #include #include @@ -19,12 +21,20 @@ class ClWorkloadFactory : public WorkloadFactoryBase public: ClWorkloadFactory(const std::shared_ptr& memoryManager); + ClWorkloadFactory(const std::shared_ptr& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr); + const BackendId& GetBackendId() const override; static bool IsLayerSupported(const Layer& layer, Optional dataType, std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + bool SupportsSubTensors() const override { return true; } ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") @@ -242,6 +252,7 @@ private: Args&&... args); mutable std::shared_ptr m_MemoryManager; + const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; } // namespace armnn diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 1dd0abeadd..fc5ccfe487 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -6,6 +6,8 @@ #include "ClContextControlFixture.hpp" #include "ClWorkloadFactoryHelper.hpp" +#include +#include #include #include @@ -304,6 +306,35 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload) ClConvolution2dWorkloadTest(DataLayout::NHWC); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload) +{ + Graph graph; + + using ModelOptions = std::vector; + ModelOptions modelOptions = {}; + BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", true } + }); + modelOptions.push_back(gpuAcc); + + ClWorkloadFactory factory = + ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions); + + auto workload = + CreateConvolution2dWorkloadTest(factory, + graph, + DataLayout::NCHW, + modelOptions); + + ARMNN_ASSERT(workload != nullptr); + auto conv2dWorkload = PolymorphicDowncast(workload.get()); + IgnoreUnused(conv2dWorkload); + ARMNN_ASSERT(conv2dWorkload != nullptr); + // fast_math enabled but configuration does not match with WINOGRAD + ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM); +} + template static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout) { diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp index 6e3c6fc05a..f7f1629b27 100644 --- a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp +++ b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp @@ -27,9 +27,12 @@ struct WorkloadFactoryHelper } static armnn::ClWorkloadFactory GetFactory( - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ModelOptions& modelOptions = {}) { - return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast(memoryManager)); + armnn::ClBackend backend; + return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast(memoryManager), + backend.CreateBackendSpecificModelContext(modelOptions)); } static armnn::ClTensorHandleFactory GetTensorHandleFactory( diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index 42c9903dc4..7b52f2784f 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -59,7 +59,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, } ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) + const WorkloadInfo& info, + std::shared_ptr& memoryManager, + const bool isFastMathEnabled) : BaseWorkload(descriptor, info) , m_ConvolutionLayer(memoryManager) { @@ -95,7 +97,20 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip &output, padStrideInfo, arm_compute::WeightsInfo(), - aclDilationInfo); + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); + + m_ConvolutionMethod = + m_ConvolutionLayer.get_convolution_method(input.info(), + m_KernelTensor->info(), + output.info(), + padStrideInfo, + arm_compute::WeightsInfo(), + arm_compute::ActivationLayerInfo(), + arm_compute::CLScheduler::get().target(), + aclDilationInfo, + isFastMathEnabled); InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); @@ -116,6 +131,11 @@ void ClConvolution2dWorkload::Execute() const RunClFunction(m_ConvolutionLayer, CHECK_LOCATION()); } +arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const +{ + return m_ConvolutionMethod; +} + void ClConvolution2dWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index 8b0afada36..f769422a0a 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -28,16 +28,22 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, class ClConvolution2dWorkload : public BaseWorkload { public: - ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); + ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager, + const bool isFastMathEnabled = false); void Execute() const override; + arm_compute::ConvolutionMethod GetConvolutionMethod() const; + private: mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; std::unique_ptr m_KernelTensor; std::unique_ptr m_BiasTensor; + arm_compute::ConvolutionMethod m_ConvolutionMethod; + void FreeUnusedTensors(); }; diff --git a/src/backends/neon/NeonBackend.cpp b/src/backends/neon/NeonBackend.cpp index 31e08ceaf5..d300960052 100644 --- a/src/backends/neon/NeonBackend.cpp +++ b/src/backends/neon/NeonBackend.cpp @@ -47,6 +47,13 @@ IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( PolymorphicPointerDowncast(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const +{ + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const { @@ -60,6 +67,19 @@ IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( PolymorphicPointerDowncast(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr NeonBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, const ModelOptions& modelOptions) const +{ + auto memoryManager = std::make_shared(std::make_unique(), + BaseMemoryManager::MemoryAffinity::Offset); + + tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager); + tensorHandleFactoryRegistry.RegisterFactory(std::make_unique(memoryManager)); + + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + IBackendInternal::IBackendContextPtr NeonBackend::CreateBackendContext(const IRuntime::CreationOptions&) const { return IBackendContextPtr{}; diff --git a/src/backends/neon/NeonBackend.hpp b/src/backends/neon/NeonBackend.hpp index 6458eccb6b..42c6666930 100644 --- a/src/backends/neon/NeonBackend.hpp +++ b/src/backends/neon/NeonBackend.hpp @@ -26,6 +26,12 @@ public: IWorkloadFactoryPtr CreateWorkloadFactory( class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override; + IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const override; + + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions) const override; + IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override; diff --git a/src/backends/neon/NeonLayerSupport.cpp b/src/backends/neon/NeonLayerSupport.cpp index 853a518b45..0084dbd03f 100644 --- a/src/backends/neon/NeonLayerSupport.cpp +++ b/src/backends/neon/NeonLayerSupport.cpp @@ -329,7 +329,7 @@ bool NeonLayerSupport::IsConvolution2dSupported(const TensorInfo& input, { if (m_ModelContextPtr.get() != nullptr) { - auto modelOptions = armnn::PolymorphicDowncast(m_ModelContextPtr.get()); + auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); if (modelOptions) { isFastMathEnabled = modelOptions->IsFastMathEnabled(); diff --git a/src/backends/neon/NeonWorkloadFactory.cpp b/src/backends/neon/NeonWorkloadFactory.cpp index 40010fe329..928989b1e4 100644 --- a/src/backends/neon/NeonWorkloadFactory.cpp +++ b/src/backends/neon/NeonWorkloadFactory.cpp @@ -4,6 +4,7 @@ // #include "NeonBackendId.hpp" +#include "NeonBackendModelContext.hpp" #include "NeonTensorHandle.hpp" #include "NeonWorkloadFactory.hpp" @@ -36,13 +37,27 @@ bool NeonWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } +bool NeonWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); +} + const BackendId& NeonWorkloadFactory::GetBackendId() const { return s_Id; } NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr& memoryManager) - : m_MemoryManager(memoryManager) + : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) +{ +} + +NeonWorkloadFactory::NeonWorkloadFactory(const std::shared_ptr& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) + : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { } @@ -184,8 +199,22 @@ std::unique_ptr NeonWorkloadFactory::CreateConvertFp32ToFp16( std::unique_ptr NeonWorkloadFactory::CreateConvolution2d( const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return std::make_unique(descriptor, info, - m_MemoryManager->GetIntraLayerManager()); + bool isFastMathEnabled = false; + if (m_ModelContextPtr) + { + if (m_ModelContextPtr.get() != nullptr) + { + auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); + if (modelOptions) + { + isFastMathEnabled = modelOptions->IsFastMathEnabled(); + } + } + } + return std::make_unique(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + isFastMathEnabled); } std::unique_ptr NeonWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, diff --git a/src/backends/neon/NeonWorkloadFactory.hpp b/src/backends/neon/NeonWorkloadFactory.hpp index 09ae839e27..6a514e2812 100644 --- a/src/backends/neon/NeonWorkloadFactory.hpp +++ b/src/backends/neon/NeonWorkloadFactory.hpp @@ -5,6 +5,7 @@ #pragma once #include +#include #include #include @@ -19,12 +20,20 @@ class NeonWorkloadFactory : public WorkloadFactoryBase public: NeonWorkloadFactory(const std::shared_ptr& memoryManager); + NeonWorkloadFactory(const std::shared_ptr& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr); + const BackendId& GetBackendId() const override; static bool IsLayerSupported(const Layer& layer, Optional dataType, std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + bool SupportsSubTensors() const override { return true; } ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") @@ -238,6 +247,7 @@ public: private: mutable std::shared_ptr m_MemoryManager; + const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; } // namespace armnn diff --git a/src/backends/neon/test/NeonCreateWorkloadTests.cpp b/src/backends/neon/test/NeonCreateWorkloadTests.cpp index 37d026f107..99ff9ae8b8 100644 --- a/src/backends/neon/test/NeonCreateWorkloadTests.cpp +++ b/src/backends/neon/test/NeonCreateWorkloadTests.cpp @@ -6,6 +6,8 @@ #include "NeonWorkloadFactoryHelper.hpp" #include +#include +#include #include #include @@ -276,6 +278,33 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloatNhwcWorkload) NeonCreateConvolution2dWorkloadTest(DataLayout::NHWC); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload) +{ + Graph graph; + using ModelOptions = std::vector; + ModelOptions modelOptions = {}; + BackendOptions cpuAcc("CpuAcc", + { + { "FastMathEnabled", true } + }); + modelOptions.push_back(cpuAcc); + NeonWorkloadFactory factory = + NeonWorkloadFactoryHelper::GetFactory(NeonWorkloadFactoryHelper::GetMemoryManager(), modelOptions); + + auto workload = + CreateConvolution2dWorkloadTest(factory, + graph, + DataLayout::NCHW, + modelOptions); + + ARMNN_ASSERT(workload != nullptr); + auto conv2dWorkload = PolymorphicDowncast(workload.get()); + IgnoreUnused(conv2dWorkload); + ARMNN_ASSERT(conv2dWorkload != nullptr); + // fast_math enabled but configuration does not match with WINOGRAD + ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM); +} + template static void NeonCreateDepthWiseConvolutionWorkloadTest(DataLayout dataLayout) { diff --git a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp index 87150115f8..89052d634e 100644 --- a/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp +++ b/src/backends/neon/test/NeonWorkloadFactoryHelper.hpp @@ -27,10 +27,12 @@ struct WorkloadFactoryHelper } static armnn::NeonWorkloadFactory GetFactory( - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ModelOptions& modelOptions = {}) { - return armnn::NeonWorkloadFactory( - armnn::PolymorphicPointerDowncast(memoryManager)); + armnn::NeonBackend backend; + return armnn::NeonWorkloadFactory(armnn::PolymorphicPointerDowncast(memoryManager), + backend.CreateBackendSpecificModelContext(modelOptions)); } static armnn::NeonTensorHandleFactory GetTensorHandleFactory( diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp index 83f761158a..d35b9685be 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.cpp @@ -59,8 +59,10 @@ arm_compute::Status NeonConvolution2dWorkloadValidate(const TensorInfo& input, } NeonConvolution2dWorkload::NeonConvolution2dWorkload( - const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager) + const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager, + const bool isFastMathEnabled) : BaseWorkload(descriptor, info) { using arm_compute::NEDirectConvolutionLayer; @@ -97,7 +99,19 @@ NeonConvolution2dWorkload::NeonConvolution2dWorkload( &output, padStrideInfo, arm_compute::WeightsInfo(), - aclDilationInfo); + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); + + m_ConvolutionMethod = + convolutionLayer->get_convolution_method(input.info(), + m_KernelTensor->info(), + output.info(), + padStrideInfo, + arm_compute::WeightsInfo(), + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); m_ConvolutionLayer.reset(convolutionLayer.release()); @@ -120,6 +134,11 @@ void NeonConvolution2dWorkload::Execute() const m_ConvolutionLayer->run(); } +arm_compute::ConvolutionMethod NeonConvolution2dWorkload::GetConvolutionMethod() const +{ + return m_ConvolutionMethod; +} + void NeonConvolution2dWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); diff --git a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp index 54e08a2042..860d78ba7e 100644 --- a/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp +++ b/src/backends/neon/workloads/NeonConvolution2dWorkload.hpp @@ -28,17 +28,23 @@ class NeonConvolution2dWorkload : public BaseWorkload::m_Data; - NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); + NeonConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager, + const bool isFastMathENabled = false); void Execute() const override; + arm_compute::ConvolutionMethod GetConvolutionMethod() const; + private: std::unique_ptr m_ConvolutionLayer; std::unique_ptr m_KernelTensor; std::unique_ptr m_BiasTensor; + arm_compute::ConvolutionMethod m_ConvolutionMethod; + void FreeUnusedTensors(); }; diff --git a/src/backends/reference/RefWorkloadFactory.cpp b/src/backends/reference/RefWorkloadFactory.cpp index 4ab1701391..e7e57b15d1 100644 --- a/src/backends/reference/RefWorkloadFactory.cpp +++ b/src/backends/reference/RefWorkloadFactory.cpp @@ -103,6 +103,14 @@ bool RefWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } +bool RefWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); +} + std::unique_ptr RefWorkloadFactory::CreateTensorHandle(const TensorInfo& tensorInfo, const bool isMemoryManaged) const { diff --git a/src/backends/reference/RefWorkloadFactory.hpp b/src/backends/reference/RefWorkloadFactory.hpp index 3a09588363..5f22c9eac2 100644 --- a/src/backends/reference/RefWorkloadFactory.hpp +++ b/src/backends/reference/RefWorkloadFactory.hpp @@ -41,6 +41,11 @@ public: Optional dataType, std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + bool SupportsSubTensors() const override { return false; } ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") -- cgit v1.2.1