aboutsummaryrefslogtreecommitdiff
path: root/src/backends/gpuFsa/GpuFsaBackend.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaBackend.hpp')
-rw-r--r--src/backends/gpuFsa/GpuFsaBackend.hpp271
1 files changed, 251 insertions, 20 deletions
diff --git a/src/backends/gpuFsa/GpuFsaBackend.hpp b/src/backends/gpuFsa/GpuFsaBackend.hpp
index 803c6a4c66..6d886a12b1 100644
--- a/src/backends/gpuFsa/GpuFsaBackend.hpp
+++ b/src/backends/gpuFsa/GpuFsaBackend.hpp
@@ -1,56 +1,287 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
#include <armnn/backends/IBackendInternal.hpp>
+#include <aclCommon/BaseMemoryManager.hpp>
+
+#include <arm_compute/runtime/CL/CLBufferAllocator.h>
+#include <arm_compute/runtime/CL/CLMemoryRegion.h>
+#include <arm_compute/core/CL/CLKernelLibrary.h>
+#include <CL/cl_ext.h>
+
+// System includes for mapping and unmapping memory
+#include <sys/mman.h>
namespace armnn
{
+// add new capabilities here..
+const BackendCapabilities gpuFsaCapabilities("GpuFsa",
+ {
+ {"NonConstWeights", false},
+ {"AsyncExecution", false},
+ {"ProtectedContentAllocation", true},
+ {"ConstantTensorsAsInputs", true},
+ {"PreImportIOTensors", false},
+ {"ExternallyManagedMemory", true},
+ {"MultiAxisPacking", false},
+ {"SingleAxisPacking", true}
+ });
+
class GpuFsaBackend : public IBackendInternal
{
public:
- GpuFsaBackend() = default;
+ GpuFsaBackend() : m_CustomAllocator(nullptr) {};
+ GpuFsaBackend(std::shared_ptr<ICustomAllocator> allocator)
+ {
+ std::string err;
+ UseCustomMemoryAllocator(allocator, err);
+ }
~GpuFsaBackend() = default;
static const BackendId& GetIdStatic();
- const BackendId& GetId() const override
- {
- return GetIdStatic();
- }
+ const BackendId& GetId() const override { return GetIdStatic(); }
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override;
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
- const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
+ const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
- IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(
- class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry) const override;
+ IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(TensorHandleFactoryRegistry& registry) const override;
- IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
+ IWorkloadFactoryPtr CreateWorkloadFactory(class TensorHandleFactoryRegistry& tensorHandleFactoryRegistry,
+ const ModelOptions& modelOptions,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags) const override;
+
+ std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
+
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry) override;
- IBackendInternal::IBackendProfilingContextPtr
- CreateBackendProfilingContext(const IRuntime::CreationOptions& creationOptions,
- IBackendProfilingPtr& backendProfiling) override;
+ void RegisterTensorHandleFactories(TensorHandleFactoryRegistry& registry,
+ MemorySourceFlags inputFlags,
+ MemorySourceFlags outputFlags) override;
+
+ IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions&) const override;
+ IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(
+ const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override;
OptimizationViews OptimizeSubgraphView(const SubgraphView& subgraph,
const ModelOptions& modelOptions) const override;
- std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
+ std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
- void RegisterTensorHandleFactories(class TensorHandleFactoryRegistry& registry) override;
+ BackendCapabilities GetCapabilities() const override
+ {
+ return gpuFsaCapabilities;
+ };
- std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
+ virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
+ armnn::Optional<std::string&> errMsg) override
+ {
+ IgnoreUnused(errMsg);
+ ARMNN_LOG(info) << "Using Custom Allocator for GpuFsaBackend";
+
+ // Set flag to signal the backend to use a custom memory allocator
+ m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
+ m_UsingCustomAllocator = true;
+ return m_UsingCustomAllocator;
+ }
+
+ // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
+ class GpuFsaBackendCustomAllocatorWrapper : public arm_compute::IAllocator
+ {
+ public:
+ GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
+ {}
+ // Inherited methods overridden:
+ void* allocate(size_t size, size_t alignment) override
+ {
+ auto alloc = m_CustomAllocator->allocate(size, alignment);
+ return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
+ }
+ void free(void* ptr) override
+ {
+ auto hostMemPtr = m_AllocatedBufferMappings[ptr];
+ clReleaseMemObject(static_cast<cl_mem>(ptr));
+ m_CustomAllocator->free(hostMemPtr);
+ }
+ std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
+ {
+ auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
+ cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
+
+ return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
+ hostMemPtr,
+ m_CustomAllocator->GetMemorySourceType());
+ }
+ private:
+ cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
+ {
+ // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
+
+ if (source == MemorySource::Malloc)
+ {
+ const cl_import_properties_arm importProperties[] =
+ {
+ CL_IMPORT_TYPE_ARM,
+ CL_IMPORT_TYPE_HOST_ARM,
+ 0
+ };
+ cl_int error = CL_SUCCESS;
+ cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
+ CL_MEM_READ_WRITE,
+ importProperties,
+ memory,
+ roundedSize,
+ &error);
+ if (error == CL_SUCCESS)
+ {
+ m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
+ return buffer;
+ }
+ throw armnn::Exception(
+ "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
+ }
+ else if (source == MemorySource::DmaBuf)
+ {
+ const cl_import_properties_arm importProperties[] =
+ {
+ CL_IMPORT_TYPE_ARM,
+ CL_IMPORT_TYPE_DMA_BUF_ARM,
+ CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
+ CL_TRUE,
+ 0
+ };
+ cl_int error = CL_SUCCESS;
+ cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
+ CL_MEM_READ_WRITE,
+ importProperties,
+ memory,
+ roundedSize,
+ &error);
+ if (error == CL_SUCCESS)
+ {
+ m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
+ return buffer;
+ }
+ throw armnn::Exception(
+ "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
+ + std::to_string(error));
+ }
+ else if (source == MemorySource::DmaBufProtected)
+ {
+ const cl_import_properties_arm importProperties[] =
+ {
+ CL_IMPORT_TYPE_ARM,
+ CL_IMPORT_TYPE_DMA_BUF_ARM,
+ CL_IMPORT_TYPE_PROTECTED_ARM,
+ CL_TRUE,
+ 0
+ };
+ cl_int error = CL_SUCCESS;
+ cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
+ CL_MEM_READ_WRITE,
+ importProperties,
+ memory,
+ roundedSize,
+ &error);
+ if (error == CL_SUCCESS)
+ {
+ m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
+ return buffer;
+ }
+ throw armnn::Exception(
+ "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
+ + std::to_string(error));
+ }
+ throw armnn::Exception(
+ "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
+ }
+ std::shared_ptr<ICustomAllocator> m_CustomAllocator;
+ std::map<void*, void*> m_AllocatedBufferMappings;
+ };
+
+ class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
+ {
+ public:
+ // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
+ ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source)
+ : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
+ {
+ _mem = buffer;
+ m_HostMemPtr = hostMemPtr;
+ m_MemorySource = source;
+ }
+
+ // Inherited methods overridden :
+ void* ptr() override
+ {
+ return nullptr;
+ }
+
+ void* map(cl::CommandQueue &q, bool blocking) override
+ {
+ armnn::IgnoreUnused(q, blocking);
+ if (m_HostMemPtr == nullptr)
+ {
+ throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
+ }
+ if (_mapping != nullptr)
+ {
+ throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped");
+ }
+ switch (m_MemorySource)
+ {
+ case armnn::MemorySource::Malloc:
+ _mapping = m_HostMemPtr;
+ return _mapping;
+ break;
+ case armnn::MemorySource::DmaBuf:
+ case armnn::MemorySource::DmaBufProtected:
+ // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd
+ _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0);
+ return _mapping;
+ break;
+ default:
+ throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source");
+ break;
+ }
+ }
-private:
- // Private members
+ void unmap(cl::CommandQueue &q) override
+ {
+ armnn::IgnoreUnused(q);
+ switch (m_MemorySource)
+ {
+ case armnn::MemorySource::Malloc:
+ _mapping = nullptr;
+ break;
+ case armnn::MemorySource::DmaBuf:
+ case armnn::MemorySource::DmaBufProtected:
+ munmap(_mapping, _size);
+ _mapping = nullptr;
+ break;
+ default:
+ throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source");
+ break;
+ }
+ }
+ private:
+ void* m_HostMemPtr = nullptr;
+ armnn::MemorySource m_MemorySource;
+ };
-protected:
- // Protected members
+ std::shared_ptr<GpuFsaBackendCustomAllocatorWrapper> m_CustomAllocator;
+ bool m_UsingCustomAllocator = false;
};
} // namespace armnn