From 04a729708f986b1a69c1efc42d5cf18271cfae1e Mon Sep 17 00:00:00 2001 From: Sadik Armagan Date: Mon, 14 Sep 2020 15:44:18 +0100 Subject: IVGCVSW-5157 'Pipe ModelOption through Network::LoadNetwork() to Workload factory' * Pass ModelOptions to WorkloadFactory * Updated signature of CL and NEON Convolution2d workloads added FastMathEnabled param. Signed-off-by: Sadik Armagan Change-Id: I536178be8e4dd4083489e69febadaf0feeba46d2 --- src/backends/cl/ClBackend.cpp | 19 ++++++++++++ src/backends/cl/ClBackend.hpp | 6 ++++ src/backends/cl/ClWorkloadFactory.cpp | 34 ++++++++++++++++++++-- src/backends/cl/ClWorkloadFactory.hpp | 11 +++++++ src/backends/cl/test/ClCreateWorkloadTests.cpp | 31 ++++++++++++++++++++ src/backends/cl/test/ClWorkloadFactoryHelper.hpp | 7 +++-- .../cl/workloads/ClConvolution2dWorkload.cpp | 24 +++++++++++++-- .../cl/workloads/ClConvolution2dWorkload.hpp | 10 +++++-- 8 files changed, 134 insertions(+), 8 deletions(-) (limited to 'src/backends/cl') diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 49636d9b08..6254b0a32a 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -45,6 +45,13 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( PolymorphicPointerDowncast(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const +{ + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const { @@ -57,6 +64,18 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( PolymorphicPointerDowncast(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const +{ + auto memoryManager = std::make_shared(std::make_unique()); + + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique(memoryManager)); + + return std::make_unique( + PolymorphicPointerDowncast(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + std::vector ClBackend::GetHandleFactoryPreferences() const { return std::vector {ClTensorHandleFactory::GetIdStatic()}; diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index 108124cac9..af5534e0d0 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -26,6 +26,12 @@ public: IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const override; + IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const override; + + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions) const override; + std::vector GetHandleFactoryPreferences() const override; void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 58e17df5b8..4acfa570f2 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -4,6 +4,7 @@ // #include "ClWorkloadFactory.hpp" #include "ClBackendId.hpp" +#include "ClBackendModelContext.hpp" #include @@ -42,6 +43,14 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } +bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); +} + const BackendId& ClWorkloadFactory::GetBackendId() const { return s_Id; @@ -78,7 +87,13 @@ std::unique_ptr ClWorkloadFactory::MakeWorkload(const QueueDescriptor } ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr& memoryManager) - : m_MemoryManager(memoryManager) + : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) +{ +} + +ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) + : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { } @@ -205,7 +220,22 @@ std::unique_ptr ClWorkloadFactory::CreateConvertFp32ToFp16( std::unique_ptr ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + bool isFastMathEnabled = false; + if (m_ModelContextPtr) + { + if (m_ModelContextPtr.get() != nullptr) + { + auto modelOptions = dynamic_cast(m_ModelContextPtr.get()); + if (modelOptions) + { + isFastMathEnabled = modelOptions->IsFastMathEnabled(); + } + } + } + return MakeWorkload(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + isFastMathEnabled); } std::unique_ptr ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 80cd7c51f1..fad5dd04fa 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -7,6 +7,8 @@ #include #include +#include + #include #include @@ -19,12 +21,20 @@ class ClWorkloadFactory : public WorkloadFactoryBase public: ClWorkloadFactory(const std::shared_ptr& memoryManager); + ClWorkloadFactory(const std::shared_ptr& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr); + const BackendId& GetBackendId() const override; static bool IsLayerSupported(const Layer& layer, Optional dataType, std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const IConnectableLayer& layer, + Optional dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + bool SupportsSubTensors() const override { return true; } ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") @@ -242,6 +252,7 @@ private: Args&&... args); mutable std::shared_ptr m_MemoryManager; + const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; } // namespace armnn diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 1dd0abeadd..fc5ccfe487 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -6,6 +6,8 @@ #include "ClContextControlFixture.hpp" #include "ClWorkloadFactoryHelper.hpp" +#include +#include #include #include @@ -304,6 +306,35 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload) ClConvolution2dWorkloadTest(DataLayout::NHWC); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload) +{ + Graph graph; + + using ModelOptions = std::vector; + ModelOptions modelOptions = {}; + BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", true } + }); + modelOptions.push_back(gpuAcc); + + ClWorkloadFactory factory = + ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions); + + auto workload = + CreateConvolution2dWorkloadTest(factory, + graph, + DataLayout::NCHW, + modelOptions); + + ARMNN_ASSERT(workload != nullptr); + auto conv2dWorkload = PolymorphicDowncast(workload.get()); + IgnoreUnused(conv2dWorkload); + ARMNN_ASSERT(conv2dWorkload != nullptr); + // fast_math enabled but configuration does not match with WINOGRAD + ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM); +} + template static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout) { diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp index 6e3c6fc05a..f7f1629b27 100644 --- a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp +++ b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp @@ -27,9 +27,12 @@ struct WorkloadFactoryHelper } static armnn::ClWorkloadFactory GetFactory( - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ModelOptions& modelOptions = {}) { - return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast(memoryManager)); + armnn::ClBackend backend; + return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast(memoryManager), + backend.CreateBackendSpecificModelContext(modelOptions)); } static armnn::ClTensorHandleFactory GetTensorHandleFactory( diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index 42c9903dc4..7b52f2784f 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -59,7 +59,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, } ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr& memoryManager) + const WorkloadInfo& info, + std::shared_ptr& memoryManager, + const bool isFastMathEnabled) : BaseWorkload(descriptor, info) , m_ConvolutionLayer(memoryManager) { @@ -95,7 +97,20 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip &output, padStrideInfo, arm_compute::WeightsInfo(), - aclDilationInfo); + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); + + m_ConvolutionMethod = + m_ConvolutionLayer.get_convolution_method(input.info(), + m_KernelTensor->info(), + output.info(), + padStrideInfo, + arm_compute::WeightsInfo(), + arm_compute::ActivationLayerInfo(), + arm_compute::CLScheduler::get().target(), + aclDilationInfo, + isFastMathEnabled); InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); @@ -116,6 +131,11 @@ void ClConvolution2dWorkload::Execute() const RunClFunction(m_ConvolutionLayer, CHECK_LOCATION()); } +arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const +{ + return m_ConvolutionMethod; +} + void ClConvolution2dWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index 8b0afada36..f769422a0a 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -28,16 +28,22 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, class ClConvolution2dWorkload : public BaseWorkload { public: - ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr& memoryManager); + ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr& memoryManager, + const bool isFastMathEnabled = false); void Execute() const override; + arm_compute::ConvolutionMethod GetConvolutionMethod() const; + private: mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; std::unique_ptr m_KernelTensor; std::unique_ptr m_BiasTensor; + arm_compute::ConvolutionMethod m_ConvolutionMethod; + void FreeUnusedTensors(); }; -- cgit v1.2.1