diff options
Diffstat (limited to 'src/backends/cl')
-rw-r--r-- | src/backends/cl/ClBackend.cpp | 19 | ||||
-rw-r--r-- | src/backends/cl/ClBackend.hpp | 6 | ||||
-rw-r--r-- | src/backends/cl/ClWorkloadFactory.cpp | 34 | ||||
-rw-r--r-- | src/backends/cl/ClWorkloadFactory.hpp | 11 | ||||
-rw-r--r-- | src/backends/cl/test/ClCreateWorkloadTests.cpp | 31 | ||||
-rw-r--r-- | src/backends/cl/test/ClWorkloadFactoryHelper.hpp | 7 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClConvolution2dWorkload.cpp | 24 | ||||
-rw-r--r-- | src/backends/cl/workloads/ClConvolution2dWorkload.hpp | 10 |
8 files changed, 134 insertions, 8 deletions
diff --git a/src/backends/cl/ClBackend.cpp b/src/backends/cl/ClBackend.cpp index 49636d9b08..6254b0a32a 100644 --- a/src/backends/cl/ClBackend.cpp +++ b/src/backends/cl/ClBackend.cpp @@ -46,6 +46,13 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( } IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager, const ModelOptions& modelOptions) const +{ + return std::make_unique<ClWorkloadFactory>( + PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const { auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); @@ -57,6 +64,18 @@ IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( PolymorphicPointerDowncast<ClMemoryManager>(memoryManager)); } +IBackendInternal::IWorkloadFactoryPtr ClBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& registry, const ModelOptions& modelOptions) const +{ + auto memoryManager = std::make_shared<ClMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::make_unique<ClTensorHandleFactory>(memoryManager)); + + return std::make_unique<ClWorkloadFactory>( + PolymorphicPointerDowncast<ClMemoryManager>(memoryManager), CreateBackendSpecificModelContext(modelOptions)); +} + std::vector<ITensorHandleFactory::FactoryId> ClBackend::GetHandleFactoryPreferences() const { return std::vector<ITensorHandleFactory::FactoryId> {ClTensorHandleFactory::GetIdStatic()}; diff --git a/src/backends/cl/ClBackend.hpp b/src/backends/cl/ClBackend.hpp index 108124cac9..af5534e0d0 100644 --- a/src/backends/cl/ClBackend.hpp +++ b/src/backends/cl/ClBackend.hpp @@ -26,6 +26,12 @@ public: IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( TensorHandleFactoryRegistry& registry) const override; + IWorkloadFactoryPtr CreateWorkloadFactory( const IMemoryManagerSharedPtr& memoryManager, + const ModelOptions& modelOptions) const override; + + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions) const override; + std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override; void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; diff --git a/src/backends/cl/ClWorkloadFactory.cpp b/src/backends/cl/ClWorkloadFactory.cpp index 58e17df5b8..4acfa570f2 100644 --- a/src/backends/cl/ClWorkloadFactory.cpp +++ b/src/backends/cl/ClWorkloadFactory.cpp @@ -4,6 +4,7 @@ // #include "ClWorkloadFactory.hpp" #include "ClBackendId.hpp" +#include "ClBackendModelContext.hpp" #include <Layer.hpp> @@ -42,6 +43,14 @@ bool ClWorkloadFactory::IsLayerSupported(const Layer& layer, return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported); } +bool ClWorkloadFactory::IsLayerSupported(const IConnectableLayer& layer, + Optional<DataType> dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions) +{ + return IWorkloadFactory::IsLayerSupported(s_Id, layer, dataType, outReasonIfUnsupported, modelOptions); +} + const BackendId& ClWorkloadFactory::GetBackendId() const { return s_Id; @@ -78,7 +87,13 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::MakeWorkload(const QueueDescriptor } ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager) - : m_MemoryManager(memoryManager) + : m_MemoryManager(memoryManager), m_ModelContextPtr(IBackendInternal::IBackendSpecificModelContextPtr{}) +{ +} + +ClWorkloadFactory::ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr) + : m_MemoryManager(memoryManager), m_ModelContextPtr(modelContextPtr) { } @@ -205,7 +220,22 @@ std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvertFp32ToFp16( std::unique_ptr<IWorkload> ClWorkloadFactory::CreateConvolution2d(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info) const { - return MakeWorkload<ClConvolution2dWorkload>(descriptor, info, m_MemoryManager->GetIntraLayerManager()); + bool isFastMathEnabled = false; + if (m_ModelContextPtr) + { + if (m_ModelContextPtr.get() != nullptr) + { + auto modelOptions = dynamic_cast<ClBackendModelContext*>(m_ModelContextPtr.get()); + if (modelOptions) + { + isFastMathEnabled = modelOptions->IsFastMathEnabled(); + } + } + } + return MakeWorkload<ClConvolution2dWorkload>(descriptor, + info, + m_MemoryManager->GetIntraLayerManager(), + isFastMathEnabled); } std::unique_ptr<IWorkload> ClWorkloadFactory::CreateDebug(const DebugQueueDescriptor& descriptor, diff --git a/src/backends/cl/ClWorkloadFactory.hpp b/src/backends/cl/ClWorkloadFactory.hpp index 80cd7c51f1..fad5dd04fa 100644 --- a/src/backends/cl/ClWorkloadFactory.hpp +++ b/src/backends/cl/ClWorkloadFactory.hpp @@ -7,6 +7,8 @@ #include <armnn/IRuntime.hpp> #include <armnn/Optional.hpp> +#include <armnn/backends/IBackendInternal.hpp> + #include <backendsCommon/WorkloadFactoryBase.hpp> #include <aclCommon/BaseMemoryManager.hpp> @@ -19,12 +21,20 @@ class ClWorkloadFactory : public WorkloadFactoryBase public: ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager); + ClWorkloadFactory(const std::shared_ptr<ClMemoryManager>& memoryManager, + const IBackendInternal::IBackendSpecificModelContextPtr& modelContextPtr); + const BackendId& GetBackendId() const override; static bool IsLayerSupported(const Layer& layer, Optional<DataType> dataType, std::string& outReasonIfUnsupported); + static bool IsLayerSupported(const IConnectableLayer& layer, + Optional<DataType> dataType, + std::string& outReasonIfUnsupported, + const ModelOptions& modelOptions); + bool SupportsSubTensors() const override { return true; } ARMNN_DEPRECATED_MSG("Use ITensorHandleFactory::CreateSubTensorHandle instead") @@ -242,6 +252,7 @@ private: Args&&... args); mutable std::shared_ptr<ClMemoryManager> m_MemoryManager; + const IBackendInternal::IBackendSpecificModelContextPtr m_ModelContextPtr; }; } // namespace armnn diff --git a/src/backends/cl/test/ClCreateWorkloadTests.cpp b/src/backends/cl/test/ClCreateWorkloadTests.cpp index 1dd0abeadd..fc5ccfe487 100644 --- a/src/backends/cl/test/ClCreateWorkloadTests.cpp +++ b/src/backends/cl/test/ClCreateWorkloadTests.cpp @@ -6,6 +6,8 @@ #include "ClContextControlFixture.hpp" #include "ClWorkloadFactoryHelper.hpp" +#include <armnn/utility/Assert.hpp> +#include <armnn/utility/IgnoreUnused.hpp> #include <armnn/utility/PolymorphicDowncast.hpp> #include <backendsCommon/MemCopyWorkload.hpp> @@ -304,6 +306,35 @@ BOOST_AUTO_TEST_CASE(CreateConvolution2dFloat16NhwcWorkload) ClConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float16>(DataLayout::NHWC); } +BOOST_AUTO_TEST_CASE(CreateConvolution2dFastMathEnabledWorkload) +{ + Graph graph; + + using ModelOptions = std::vector<BackendOptions>; + ModelOptions modelOptions = {}; + BackendOptions gpuAcc("GpuAcc", + { + { "FastMathEnabled", true } + }); + modelOptions.push_back(gpuAcc); + + ClWorkloadFactory factory = + ClWorkloadFactoryHelper::GetFactory(ClWorkloadFactoryHelper::GetMemoryManager(), modelOptions); + + auto workload = + CreateConvolution2dWorkloadTest<ClConvolution2dWorkload, armnn::DataType::Float32>(factory, + graph, + DataLayout::NCHW, + modelOptions); + + ARMNN_ASSERT(workload != nullptr); + auto conv2dWorkload = PolymorphicDowncast<ClConvolution2dWorkload*>(workload.get()); + IgnoreUnused(conv2dWorkload); + ARMNN_ASSERT(conv2dWorkload != nullptr); + // fast_math enabled but configuration does not match with WINOGRAD + ARMNN_ASSERT(conv2dWorkload->GetConvolutionMethod() == arm_compute::ConvolutionMethod::GEMM); +} + template <typename DepthwiseConvolutionWorkloadType, typename armnn::DataType DataType> static void ClDepthwiseConvolutionWorkloadTest(DataLayout dataLayout) { diff --git a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp index 6e3c6fc05a..f7f1629b27 100644 --- a/src/backends/cl/test/ClWorkloadFactoryHelper.hpp +++ b/src/backends/cl/test/ClWorkloadFactoryHelper.hpp @@ -27,9 +27,12 @@ struct WorkloadFactoryHelper<armnn::ClWorkloadFactory> } static armnn::ClWorkloadFactory GetFactory( - const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager) + const armnn::IBackendInternal::IMemoryManagerSharedPtr& memoryManager, + const armnn::ModelOptions& modelOptions = {}) { - return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager)); + armnn::ClBackend backend; + return armnn::ClWorkloadFactory(armnn::PolymorphicPointerDowncast<armnn::ClMemoryManager>(memoryManager), + backend.CreateBackendSpecificModelContext(modelOptions)); } static armnn::ClTensorHandleFactory GetTensorHandleFactory( diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp index 42c9903dc4..7b52f2784f 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.cpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.cpp @@ -59,7 +59,9 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, } ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, - const WorkloadInfo& info, std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager) + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const bool isFastMathEnabled) : BaseWorkload<Convolution2dQueueDescriptor>(descriptor, info) , m_ConvolutionLayer(memoryManager) { @@ -95,7 +97,20 @@ ClConvolution2dWorkload::ClConvolution2dWorkload(const Convolution2dQueueDescrip &output, padStrideInfo, arm_compute::WeightsInfo(), - aclDilationInfo); + aclDilationInfo, + arm_compute::ActivationLayerInfo(), + isFastMathEnabled); + + m_ConvolutionMethod = + m_ConvolutionLayer.get_convolution_method(input.info(), + m_KernelTensor->info(), + output.info(), + padStrideInfo, + arm_compute::WeightsInfo(), + arm_compute::ActivationLayerInfo(), + arm_compute::CLScheduler::get().target(), + aclDilationInfo, + isFastMathEnabled); InitializeArmComputeClTensorData(*m_KernelTensor, m_Data.m_Weight); @@ -116,6 +131,11 @@ void ClConvolution2dWorkload::Execute() const RunClFunction(m_ConvolutionLayer, CHECK_LOCATION()); } +arm_compute::ConvolutionMethod ClConvolution2dWorkload::GetConvolutionMethod() const +{ + return m_ConvolutionMethod; +} + void ClConvolution2dWorkload::FreeUnusedTensors() { FreeTensorIfUnused(m_KernelTensor); diff --git a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp index 8b0afada36..f769422a0a 100644 --- a/src/backends/cl/workloads/ClConvolution2dWorkload.hpp +++ b/src/backends/cl/workloads/ClConvolution2dWorkload.hpp @@ -28,16 +28,22 @@ arm_compute::Status ClConvolution2dWorkloadValidate(const TensorInfo& input, class ClConvolution2dWorkload : public BaseWorkload<Convolution2dQueueDescriptor> { public: - ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, const WorkloadInfo& info, - std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager); + ClConvolution2dWorkload(const Convolution2dQueueDescriptor& descriptor, + const WorkloadInfo& info, + std::shared_ptr<arm_compute::MemoryManagerOnDemand>& memoryManager, + const bool isFastMathEnabled = false); void Execute() const override; + arm_compute::ConvolutionMethod GetConvolutionMethod() const; + private: mutable arm_compute::CLConvolutionLayer m_ConvolutionLayer; std::unique_ptr<arm_compute::CLTensor> m_KernelTensor; std::unique_ptr<arm_compute::CLTensor> m_BiasTensor; + arm_compute::ConvolutionMethod m_ConvolutionMethod; + void FreeUnusedTensors(); }; |