diff options
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackend.hpp')
-rw-r--r-- | src/backends/gpuFsa/GpuFsaBackend.hpp | 271 |
1 files changed, 251 insertions, 20 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.hpp b/src/backends/gpuFsa/GpuFsaBackend.hpp index 803c6a4c66..6d886a12b1 100644 --- a/src/backends/gpuFsa/GpuFsaBackend.hpp +++ b/src/backends/gpuFsa/GpuFsaBackend.hpp @@ -1,56 +1,287 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include <armnn/backends/IBackendInternal.hpp> +#include <aclCommon/BaseMemoryManager.hpp> + +#include <arm_compute/runtime/CL/CLBufferAllocator.h> +#include <arm_compute/runtime/CL/CLMemoryRegion.h> +#include <arm_compute/core/CL/CLKernelLibrary.h> +#include <CL/cl_ext.h> + +// System includes for mapping and unmapping memory +#include <sys/mman.h> namespace armnn { +// add new capabilities here.. +const BackendCapabilities gpuFsaCapabilities("GpuFsa", + { + {"NonConstWeights", false}, + {"AsyncExecution", false}, + {"ProtectedContentAllocation", true}, + {"ConstantTensorsAsInputs", true}, + {"PreImportIOTensors", false}, + {"ExternallyManagedMemory", true}, + {"MultiAxisPacking", false}, + {"SingleAxisPacking", true} + }); + class GpuFsaBackend : public IBackendInternal { public: - GpuFsaBackend() = default; + GpuFsaBackend() : m_CustomAllocator(nullptr) {}; + GpuFsaBackend(std::shared_ptr<ICustomAllocator> allocator) + { + std::string err; + UseCustomMemoryAllocator(allocator, err); + } ~GpuFsaBackend() = default; static const BackendId& GetIdStatic(); - const BackendId& GetId() const override - { - return GetIdStatic(); - } + const BackendId& GetId() const override { return GetIdStatic(); } IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override; IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( - const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override; + const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override; - IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory( - class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override; + IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(TensorHandleFactoryRegistry& registry) const override; - IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry, + const ModelOptions& modelOptions, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) const override; + + std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override; + + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override; - IBackendInternal::IBackendProfilingContextPtr - CreateBackendProfilingContext(const IRuntime::CreationOptions& creationOptions, - IBackendProfilingPtr& backendProfiling) override; + void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry, + MemorySourceFlags inputFlags, + MemorySourceFlags outputFlags) override; + + IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override; + IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext( + const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override; IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override; OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph, const ModelOptions& modelOptions) const override; - std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override; + std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override; - void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) override; + BackendCapabilities GetCapabilities() const override + { + return gpuFsaCapabilities; + }; - std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override; + virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator, + armnn::Optional<std::string&> errMsg) override + { + IgnoreUnused(errMsg); + ARMNN_LOG(info) << "Using Custom Allocator for GpuFsaBackend"; + + // Set flag to signal the backend to use a custom memory allocator + m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator)); + m_UsingCustomAllocator = true; + return m_UsingCustomAllocator; + } + + // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this + class GpuFsaBackendCustomAllocatorWrapper : public arm_compute::IAllocator + { + public: + GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc) + {} + // Inherited methods overridden: + void* allocate(size_t size, size_t alignment) override + { + auto alloc = m_CustomAllocator->allocate(size, alignment); + return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType()); + } + void free(void* ptr) override + { + auto hostMemPtr = m_AllocatedBufferMappings[ptr]; + clReleaseMemObject(static_cast<cl_mem>(ptr)); + m_CustomAllocator->free(hostMemPtr); + } + std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override + { + auto hostMemPtr = m_CustomAllocator->allocate(size, alignment); + cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType()); + + return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer), + hostMemPtr, + m_CustomAllocator->GetMemorySourceType()); + } + private: + cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source) + { + // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE + auto cachelineAlignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>(); + auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment); + + if (source == MemorySource::Malloc) + { + const cl_import_properties_arm importProperties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_HOST_ARM, + 0 + }; + cl_int error = CL_SUCCESS; + cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), + CL_MEM_READ_WRITE, + importProperties, + memory, + roundedSize, + &error); + if (error == CL_SUCCESS) + { + m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); + return buffer; + } + throw armnn::Exception( + "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error)); + } + else if (source == MemorySource::DmaBuf) + { + const cl_import_properties_arm importProperties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_DMA_BUF_ARM, + CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM, + CL_TRUE, + 0 + }; + cl_int error = CL_SUCCESS; + cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), + CL_MEM_READ_WRITE, + importProperties, + memory, + roundedSize, + &error); + if (error == CL_SUCCESS) + { + m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); + return buffer; + } + throw armnn::Exception( + "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + + std::to_string(error)); + } + else if (source == MemorySource::DmaBufProtected) + { + const cl_import_properties_arm importProperties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_DMA_BUF_ARM, + CL_IMPORT_TYPE_PROTECTED_ARM, + CL_TRUE, + 0 + }; + cl_int error = CL_SUCCESS; + cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), + CL_MEM_READ_WRITE, + importProperties, + memory, + roundedSize, + &error); + if (error == CL_SUCCESS) + { + m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory)); + return buffer; + } + throw armnn::Exception( + "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + + std::to_string(error)); + } + throw armnn::Exception( + "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator"); + } + std::shared_ptr<ICustomAllocator> m_CustomAllocator; + std::map<void*, void*> m_AllocatedBufferMappings; + }; + + class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion + { + public: + // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access + ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source) + : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>()) + { + _mem = buffer; + m_HostMemPtr = hostMemPtr; + m_MemorySource = source; + } + + // Inherited methods overridden : + void* ptr() override + { + return nullptr; + } + + void* map(cl::CommandQueue &q, bool blocking) override + { + armnn::IgnoreUnused(q, blocking); + if (m_HostMemPtr == nullptr) + { + throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr"); + } + if (_mapping != nullptr) + { + throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped"); + } + switch (m_MemorySource) + { + case armnn::MemorySource::Malloc: + _mapping = m_HostMemPtr; + return _mapping; + break; + case armnn::MemorySource::DmaBuf: + case armnn::MemorySource::DmaBufProtected: + // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd + _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0); + return _mapping; + break; + default: + throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source"); + break; + } + } -private: - // Private members + void unmap(cl::CommandQueue &q) override + { + armnn::IgnoreUnused(q); + switch (m_MemorySource) + { + case armnn::MemorySource::Malloc: + _mapping = nullptr; + break; + case armnn::MemorySource::DmaBuf: + case armnn::MemorySource::DmaBufProtected: + munmap(_mapping, _size); + _mapping = nullptr; + break; + default: + throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source"); + break; + } + } + private: + void* m_HostMemPtr = nullptr; + armnn::MemorySource m_MemorySource; + }; -protected: - // Protected members + std::shared_ptr<GpuFsaBackendCustomAllocatorWrapper> m_CustomAllocator; + bool m_UsingCustomAllocator = false; }; } // namespace armnn |