diff options
author | Cathal Corbett <cathal.corbett@arm.com> | 2023-01-09 12:47:48 +0000 |
---|---|---|
committer | Cathal Corbett <cathal.corbett@arm.com> | 2023-01-12 11:58:50 +0000 |
commit | 2b32a69f3aac5496d0a966d9740cb4854504f3d9 (patch) | |
tree | 0ffc0710d5dd0feb9aa35be3defc8111d1c035b8 /src/backends/gpuFsa/GpuFsaBackend.cpp | |
parent | d69c1c595375b904a7f19f562ac1d54098184b4e (diff) | |
download | armnn-2b32a69f3aac5496d0a966d9740cb4854504f3d9.tar.gz |
IVGCVSW-7380 Update the GpuFsa Skeleton to build and load ACL
* Reuse cl backend to be able to create ClRuntime, ClContexts etc. for the new GpuFsa backend.
* Can access code defined in the experimental interface dynamic_fusion.
* No BackendModelContext as model/backend options not required for now.
* Any of the serializer and deserializer is emitted as context caching not required.
* No ImportTensorHandle and ImportTensorHandleFactory for now.
* Moved tuning and IClTensorHandle code to aclCommon as it is accessed by both cl and gpuFsa.
* Small code refactor of cl backend.
* Added DefaultAllocatorTests to GpuFsa backend.
Signed-off-by: Cathal Corbett <cathal.corbett@arm.com>
Change-Id: I6ae591360e9d2a783aafd06e2d7bf8e0b3e623ee
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackend.cpp')
-rw-r--r-- | src/backends/gpuFsa/GpuFsaBackend.cpp | 172 |
1 files changed, 125 insertions, 47 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.cpp b/src/backends/gpuFsa/GpuFsaBackend.cpp index 9c2f4a0df6..ae7ff0c243 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.cpp +++ b/src/backends/gpuFsa/GpuFsaBackend.cpp @@ -1,23 +1,24 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #include "GpuFsaBackend.hpp" +#include "GpuFsaBackendContext.hpp" +#include "GpuFsaBackendDefaultAllocator.hpp" #include "GpuFsaBackendId.hpp" -#include "GpuFsaWorkloadFactory.hpp" #include "GpuFsaLayerSupport.hpp" #include "GpuFsaTensorHandleFactory.hpp" +#include "GpuFsaWorkloadFactory.hpp" -#include <armnn/BackendRegistry.hpp> #include <armnn/backends/IBackendContext.hpp> #include <armnn/backends/IMemoryManager.hpp> -#include <armnn/utility/PolymorphicDowncast.hpp> -#include <backendsCommon/DefaultAllocator.hpp> -#include <backendsCommon/SubgraphUtils.hpp> - #include <Optimizer.hpp> +#include <aclCommon/BaseMemoryManager.hpp> + +#include <arm_compute/runtime/CL/CLBufferAllocator.h> + namespace armnn { @@ -27,6 +28,15 @@ const BackendId& GpuFsaBackend::GetIdStatic() return s_Id; } +IBackendInternal::IMemoryManagerUniquePtr GpuFsaBackend::CreateMemoryManager() const +{ + if (m_UsingCustomAllocator) + { + return std::make_unique<GpuFsaMemoryManager>(m_CustomAllocator); + } + return std::make_unique<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); +} + IBackendInternal::IWorkloadFactoryPtr GpuFsaBackend::CreateWorkloadFactory( const IBackendInternal::IMemoryManagerSharedPtr& memoryManager) const { @@ -34,74 +44,142 @@ IBackendInternal::IWorkloadFactoryPtr GpuFsaBackend::CreateWorkloadFactory( } IBackendInternal::IWorkloadFactoryPtr GpuFsaBackend::CreateWorkloadFactory( - class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const + TensorHandleFactoryRegistry& registry) const { - auto memoryManager = std::make_shared<GpuFsaMemoryManager>(); - - tensorHandleFactoryRegistry.RegisterMemoryManager(memoryManager); + std::shared_ptr<GpuFsaMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } + + std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager); - auto factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager); - // Register copy and import factory pair - tensorHandleFactoryRegistry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId()); - // Register the factory - tensorHandleFactoryRegistry.RegisterFactory(std::move(factory)); + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::move(factory)); return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager)); } -IBackendInternal::IBackendContextPtr GpuFsaBackend::CreateBackendContext(const IRuntime::CreationOptions&) const +IBackendInternal::IWorkloadFactoryPtr GpuFsaBackend::CreateWorkloadFactory( + TensorHandleFactoryRegistry& registry, + const ModelOptions& modelOptions, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) const { - return IBackendContextPtr{}; + IgnoreUnused(modelOptions); + + // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc + if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined)) + { + inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc); + } + if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined)) + { + outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc); + } + + std::shared_ptr<GpuFsaMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } + + std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager); + + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::move(factory)); + + return std::make_unique<GpuFsaWorkloadFactory>(PolymorphicPointerDowncast<GpuFsaMemoryManager>(memoryManager)); } -IBackendInternal::IBackendProfilingContextPtr GpuFsaBackend::CreateBackendProfilingContext( - const IRuntime::CreationOptions&, IBackendProfilingPtr&) +std::vector<ITensorHandleFactory::FactoryId> GpuFsaBackend::GetHandleFactoryPreferences() const { - return IBackendProfilingContextPtr{}; + return std::vector<ITensorHandleFactory::FactoryId> { GpuFsaTensorHandleFactory::GetIdStatic() }; } -IBackendInternal::IMemoryManagerUniquePtr GpuFsaBackend::CreateMemoryManager() const +void GpuFsaBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) { - return std::make_unique<GpuFsaMemoryManager>(); + std::shared_ptr<GpuFsaMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } + + std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager); + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::move(factory)); + } -IBackendInternal::ILayerSupportSharedPtr GpuFsaBackend::GetLayerSupport() const +void GpuFsaBackend::RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) { - static ILayerSupportSharedPtr layerSupport{new GpuFsaLayerSupport}; - return layerSupport; + // To allow force import if inputFlags/outputFlags are Undefined, set it as Malloc + if (inputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined)) + { + inputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc); + } + if (outputFlags == static_cast<MemorySourceFlags>(MemorySource::Undefined)) + { + outputFlags = static_cast<MemorySourceFlags>(MemorySource::Malloc); + } + + std::shared_ptr<GpuFsaMemoryManager> memoryManager; + if (m_UsingCustomAllocator) + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(m_CustomAllocator); + } + else + { + memoryManager = std::make_shared<GpuFsaMemoryManager>(std::make_unique<arm_compute::CLBufferAllocator>()); + } + + std::unique_ptr<ITensorHandleFactory> factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager); + registry.RegisterMemoryManager(memoryManager); + registry.RegisterFactory(std::move(factory)); } -OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgraph, - const ModelOptions& modelOptions) const +IBackendInternal::IBackendContextPtr GpuFsaBackend::CreateBackendContext(const IRuntime::CreationOptions& options) const { - OptimizationViews optimizationViews(modelOptions); - optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); - - return optimizationViews; + return IBackendContextPtr{new GpuFsaBackendContext{options}}; } -std::vector<ITensorHandleFactory::FactoryId> GpuFsaBackend::GetHandleFactoryPreferences() const +IBackendInternal::IBackendProfilingContextPtr GpuFsaBackend::CreateBackendProfilingContext( + const IRuntime::CreationOptions&, IBackendProfilingPtr&) { - return std::vector<ITensorHandleFactory::FactoryId> { GpuFsaTensorHandleFactory::GetIdStatic() }; + return IBackendProfilingContextPtr{}; } -void GpuFsaBackend::RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) +IBackendInternal::ILayerSupportSharedPtr GpuFsaBackend::GetLayerSupport() const { - auto memoryManager = std::make_shared<GpuFsaMemoryManager>(); - - registry.RegisterMemoryManager(memoryManager); - - auto factory = std::make_unique<GpuFsaTensorHandleFactory>(memoryManager); - - // Register copy and import factory pair - registry.RegisterCopyAndImportFactoryPair(factory->GetId(), factory->GetId()); - // Register the factory - registry.RegisterFactory(std::move(factory)); + static ILayerSupportSharedPtr layerSupport{new GpuFsaLayerSupport}; + return layerSupport; } std::unique_ptr<ICustomAllocator> GpuFsaBackend::GetDefaultAllocator() const { - return std::make_unique<DefaultAllocator>(); + return std::make_unique<GpuFsaBackendDefaultAllocator>(); +} + +OptimizationViews GpuFsaBackend::OptimizeSubgraphView(const SubgraphView& subgraph, + const ModelOptions& modelOptions) const +{ + OptimizationViews optimizationViews(modelOptions); + optimizationViews.AddUntouchedSubgraph(SubgraphView(subgraph)); + return optimizationViews; } -} // namespace armnn
\ No newline at end of file +} // namespace armnn |