ArmNN
 24.05
GpuFsaBackend.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
9 
10 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
11 #include <arm_compute/runtime/CL/CLMemoryRegion.h>
12 #include <arm_compute/core/CL/CLKernelLibrary.h>
13 #include <CL/cl_ext.h>
14 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
15 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
16 
17 // System includes for mapping and unmapping memory
18 #include <sys/mman.h>
19 
20 namespace armnn
21 {
22 
23 /**
24  * A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend
25  *
26  * @param[in, out] sketch A unique pointer to the sketch containing the operators which have been fused.
27  * @param[in, out] TensorInfos A shared pointer to a GpuWorkloadContext which creates + stores TensorInfos
28  * @param[in, out] inputTensorInfos A unique pointer to a vector of inputTensorInfos used by the sketch
29  * @param[in, out] outputTensorInfos A unique pointer to a vector of outputTensorInfos used by the sketch
30  *
31  */
33 {
34  std::unique_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch> sketch = nullptr;
35  std::shared_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadContext> workloadContext = nullptr;
36 
37  std::unique_ptr<std::vector<arm_compute::ITensorInfo*>> inputTensorInfos = nullptr;
38  std::unique_ptr<std::vector<arm_compute::ITensorInfo*>> outputTensorInfos = nullptr;
39 };
40 
41 // add new capabilities here..
43  {
44  {"NonConstWeights", false},
45  {"AsyncExecution", false},
46  {"ProtectedContentAllocation", false},
47  {"ConstantTensorsAsInputs", true},
48  {"PreImportIOTensors", false},
49  {"ExternallyManagedMemory", false},
50  {"MultiAxisPacking", false},
51  {"SingleAxisPacking", false}
52  });
55 {
56 public:
57  ARMNN_DEPRECATED_MSG_REMOVAL_DATE("The GpuFsa backend will be removed from Arm NN in 24.08", "24.08")
59  ARMNN_DEPRECATED_MSG_REMOVAL_DATE("The GpuFsa backend will be removed from Arm NN in 24.08", "24.08")
60  GpuFsaBackend(std::shared_ptr<ICustomAllocator> allocator)
61  {
63  }
64  ~GpuFsaBackend() = default;
65 
66  static const BackendId& GetIdStatic();
67  const BackendId& GetId() const override { return GetIdStatic(); }
68 
70 
72  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
73 
75 
77  const ModelOptions& modelOptions,
78  MemorySourceFlags inputFlags,
79  MemorySourceFlags outputFlags) const override;
80 
81  std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
82 
84 
86  MemorySourceFlags inputFlags,
87  MemorySourceFlags outputFlags) override;
88 
91  const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
92 
94 
96  const ModelOptions& modelOptions) const override;
97 
98  std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
99 
101  {
102  return gpuFsaCapabilities;
103  };
104 
105  virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
107  {
108  ARMNN_LOG(info) << "Using Custom Allocator for GpuFsaBackend";
109 
110  // Set flag to signal the backend to use a custom memory allocator
111  m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
112  m_UsingCustomAllocator = true;
113  return m_UsingCustomAllocator;
114  }
115 
116  // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
117  class GpuFsaBackendCustomAllocatorWrapper : public arm_compute::IAllocator
118  {
119  public:
120  GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
121  {}
122  // Inherited methods overridden:
123  void* allocate(size_t size, size_t alignment) override
124  {
125  auto alloc = m_CustomAllocator->allocate(size, alignment);
126  return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
127  }
128  void free(void* ptr) override
129  {
130  auto hostMemPtr = m_AllocatedBufferMappings[ptr];
131  clReleaseMemObject(static_cast<cl_mem>(ptr));
132  m_CustomAllocator->free(hostMemPtr);
133  }
134  std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
135  {
136  auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
137  cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
138 
139  return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
140  hostMemPtr,
141  m_CustomAllocator->GetMemorySourceType());
142  }
143  private:
144  cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
145  {
146  // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
147  auto cachelineAlignment =
148  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
149  auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
150 
151  if (source == MemorySource::Malloc)
152  {
153  const cl_import_properties_arm importProperties[] =
154  {
155  CL_IMPORT_TYPE_ARM,
156  CL_IMPORT_TYPE_HOST_ARM,
157  0
158  };
159  cl_int error = CL_SUCCESS;
160  cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
161  CL_MEM_READ_WRITE,
162  importProperties,
163  memory,
164  roundedSize,
165  &error);
166  if (error == CL_SUCCESS)
167  {
168  m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
169  return buffer;
170  }
171  throw armnn::Exception(
172  "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
173  }
174  else if (source == MemorySource::DmaBuf)
175  {
176  const cl_import_properties_arm importProperties[] =
177  {
178  CL_IMPORT_TYPE_ARM,
179  CL_IMPORT_TYPE_DMA_BUF_ARM,
180  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
181  CL_TRUE,
182  0
183  };
184  cl_int error = CL_SUCCESS;
185  cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
186  CL_MEM_READ_WRITE,
187  importProperties,
188  memory,
189  roundedSize,
190  &error);
191  if (error == CL_SUCCESS)
192  {
193  m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
194  return buffer;
195  }
196  throw armnn::Exception(
197  "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
198  + std::to_string(error));
199  }
200  else if (source == MemorySource::DmaBufProtected)
201  {
202  const cl_import_properties_arm importProperties[] =
203  {
204  CL_IMPORT_TYPE_ARM,
205  CL_IMPORT_TYPE_DMA_BUF_ARM,
206  CL_IMPORT_TYPE_PROTECTED_ARM,
207  CL_TRUE,
208  0
209  };
210  cl_int error = CL_SUCCESS;
211  cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
212  CL_MEM_READ_WRITE,
213  importProperties,
214  memory,
215  roundedSize,
216  &error);
217  if (error == CL_SUCCESS)
218  {
219  m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
220  return buffer;
221  }
222  throw armnn::Exception(
223  "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
224  + std::to_string(error));
225  }
226  throw armnn::Exception(
227  "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
228  }
229  std::shared_ptr<ICustomAllocator> m_CustomAllocator;
230  std::map<void*, void*> m_AllocatedBufferMappings;
231  };
232 
233  class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
234  {
235  public:
236  // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
237  ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source)
238  : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
239  {
240  _mem = buffer;
241  m_HostMemPtr = hostMemPtr;
242  m_MemorySource = source;
243  }
244 
245  // Inherited methods overridden :
246  void* ptr() override
247  {
248  return nullptr;
249  }
250 
251  void* map(cl::CommandQueue &q, bool blocking) override
252  {
253  armnn::IgnoreUnused(q, blocking);
254  if (m_HostMemPtr == nullptr)
255  {
256  throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
257  }
258  if (_mapping != nullptr)
259  {
260  throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped");
261  }
262  switch (m_MemorySource)
263  {
265  _mapping = m_HostMemPtr;
266  return _mapping;
267  break;
270  // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd
271  _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0);
272  return _mapping;
273  break;
274  default:
275  throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source");
276  break;
277  }
278  }
279 
280  void unmap(cl::CommandQueue &q) override
281  {
283  switch (m_MemorySource)
284  {
286  _mapping = nullptr;
287  break;
290  munmap(_mapping, _size);
291  _mapping = nullptr;
292  break;
293  default:
294  throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source");
295  break;
296  }
297  }
298  private:
299  void* m_HostMemPtr = nullptr;
300  armnn::MemorySource m_MemorySource;
301  };
302 
303  std::shared_ptr<GpuFsaBackendCustomAllocatorWrapper> m_CustomAllocator;
305 };
307 
308 } // namespace armnn
armnn::MemorySource::Malloc
@ Malloc
armnn::Optional
Definition: Optional.hpp:270
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::GpuFsaBackendCustomAllocatorWrapper
GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr< ICustomAllocator > alloc)
Definition: GpuFsaBackend.hpp:120
armnn::GpuFsaBackend::RegisterTensorHandleFactories
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: GpuFsaBackend.cpp:151
armnn::GpuFsaBackend::GetIdStatic
static const BackendId & GetIdStatic()
Definition: GpuFsaBackend.cpp:69
armnn::GpuFsaBackend::m_UsingCustomAllocator
bool m_UsingCustomAllocator
Definition: GpuFsaBackend.hpp:304
armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:99
armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:23
armnn::GpuFsaBackend::GetLayerSupport
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: GpuFsaBackend.cpp:209
armnn::ICustomAllocator
Custom Allocator interface.
Definition: ICustomAllocator.hpp:16
armnn::MemorySource::DmaBufProtected
@ DmaBufProtected
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
BaseMemoryManager.hpp
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::free
void free(void *ptr) override
Definition: GpuFsaBackend.hpp:128
ARMNN_NO_DEPRECATE_WARN_BEGIN
#define ARMNN_NO_DEPRECATE_WARN_BEGIN
Definition: Deprecated.hpp:33
armnn::GpuFsaBackend::OptimizeSubgraphView
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: GpuFsaBackend.cpp:220
armnn::IBackendInternal
Definition: IBackendInternal.hpp:77
armnn::IBackendInternal::IBackendContextPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
Definition: IBackendInternal.hpp:90
armnn::GpuFsaBackend::GetDefaultAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
Definition: GpuFsaBackend.cpp:215
armnn::BoostLogSeverityMapping::error
@ error
armnn::GpuFsaBackend::GetHandleFactoryPreferences
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: GpuFsaBackend.cpp:146
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::GpuFsaPreCompiledBlob::inputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > inputTensorInfos
Definition: GpuFsaBackend.hpp:37
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::map
void * map(cl::CommandQueue &q, bool blocking) override
Definition: GpuFsaBackend.hpp:251
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::allocate
void * allocate(size_t size, size_t alignment) override
Definition: GpuFsaBackend.hpp:123
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper
Definition: GpuFsaBackend.hpp:117
armnn::GpuFsaBackend::CreateMemoryManager
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: GpuFsaBackend.cpp:75
armnn::GpuFsaBackend::CreateBackendContext
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: GpuFsaBackend.cpp:198
armnn::MemorySource::DmaBuf
@ DmaBuf
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::make_region
std::unique_ptr< arm_compute::IMemoryRegion > make_region(size_t size, size_t alignment) override
Definition: GpuFsaBackend.hpp:134
armnn::EmptyOptional
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
IBackendInternal.hpp
armnn::GpuFsaPreCompiledBlob::sketch
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
Definition: GpuFsaBackend.hpp:34
armnn::SubgraphView
The SubgraphView class represents a subgraph of a Graph.
Definition: SubgraphView.hpp:31
armnn::OptimizationViews
Definition: OptimizationViews.hpp:17
armnn::GpuFsaBackend::CreateBackendProfilingContext
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: GpuFsaBackend.cpp:203
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::unmap
void unmap(cl::CommandQueue &q) override
Definition: GpuFsaBackend.hpp:280
armnn::GpuFsaBackend::CreateWorkloadFactory
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: GpuFsaBackend.cpp:84
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::GpuFsaBackend::UseCustomMemoryAllocator
virtual bool UseCustomMemoryAllocator(std::shared_ptr< ICustomAllocator > allocator, armnn::Optional< std::string & >) override
Signals the backend to use a custom memory allocator provided by the user.
Definition: GpuFsaBackend.hpp:105
armnn::GpuFsaBackend::GetCapabilities
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: GpuFsaBackend.hpp:100
armnn::BoostLogSeverityMapping::info
@ info
armnn::GpuFsaBackend::m_CustomAllocator
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: GpuFsaBackend.hpp:303
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion
Definition: GpuFsaBackend.hpp:233
armnn::IBackendInternal::IBackendProfilingContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Definition: IBackendInternal.hpp:92
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::IBackendInternal::IBackendProfilingPtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
Definition: IBackendInternal.hpp:93
armnn::GpuFsaBackend
Definition: GpuFsaBackend.hpp:54
armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:78
ARMNN_NO_DEPRECATE_WARN_END
#define ARMNN_NO_DEPRECATE_WARN_END
Definition: Deprecated.hpp:34
armnn::GpuFsaBackend::GetId
const BackendId & GetId() const override
Definition: GpuFsaBackend.hpp:67
armnn::GpuFsaBackend::~GpuFsaBackend
~GpuFsaBackend()=default
armnn::IBackendInternal::IMemoryManagerUniquePtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
Definition: IBackendInternal.hpp:98
armnn::gpuFsaCapabilities
const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })
std
Definition: BackendId.hpp:149
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
ARMNN_DEPRECATED_MSG_REMOVAL_DATE
#define ARMNN_DEPRECATED_MSG_REMOVAL_DATE(message, removed_in_release)
Definition: Deprecated.hpp:44
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:244
armnn::BackendId
Definition: BackendId.hpp:75
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::IBackendInternal::ILayerSupportSharedPtr
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
Definition: IBackendInternal.hpp:94
armnn::GpuFsaPreCompiledBlob::workloadContext
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
Definition: GpuFsaBackend.hpp:35
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::GpuFsaPreCompiledBlob
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
Definition: GpuFsaBackend.hpp:32
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::ClBackendCustomAllocatorMemoryRegion
ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void *hostMemPtr, armnn::MemorySource source)
Definition: GpuFsaBackend.hpp:237
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::ptr
void * ptr() override
Definition: GpuFsaBackend.hpp:246
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::GpuFsaPreCompiledBlob::outputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > outputTensorInfos
Definition: GpuFsaBackend.hpp:38