ArmNN
 24.02
GpuFsaBackend.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022-2024 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 #pragma once
6 
9 
10 #include <arm_compute/runtime/CL/CLBufferAllocator.h>
11 #include <arm_compute/runtime/CL/CLMemoryRegion.h>
12 #include <arm_compute/core/CL/CLKernelLibrary.h>
13 #include <CL/cl_ext.h>
14 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h>
15 #include <arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h>
16 
17 // System includes for mapping and unmapping memory
18 #include <sys/mman.h>
19 
20 namespace armnn
21 {
22 
23 /**
24  * A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend
25  *
26  * @param[in, out] sketch A unique pointer to the sketch containing the operators which have been fused.
27  * @param[in, out] TensorInfos A shared pointer to a GpuWorkloadContext which creates + stores TensorInfos
28  * @param[in, out] inputTensorInfos A unique pointer to a vector of inputTensorInfos used by the sketch
29  * @param[in, out] outputTensorInfos A unique pointer to a vector of outputTensorInfos used by the sketch
30  *
31  */
33 {
34  std::unique_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch> sketch = nullptr;
35  std::shared_ptr<arm_compute::experimental::dynamic_fusion::GpuWorkloadContext> workloadContext = nullptr;
36 
37  std::unique_ptr<std::vector<arm_compute::ITensorInfo*>> inputTensorInfos = nullptr;
38  std::unique_ptr<std::vector<arm_compute::ITensorInfo*>> outputTensorInfos = nullptr;
39 };
40 
41 // add new capabilities here..
43  {
44  {"NonConstWeights", false},
45  {"AsyncExecution", false},
46  {"ProtectedContentAllocation", false},
47  {"ConstantTensorsAsInputs", true},
48  {"PreImportIOTensors", false},
49  {"ExternallyManagedMemory", false},
50  {"MultiAxisPacking", false},
51  {"SingleAxisPacking", false}
52  });
53 
55 {
56 public:
58  GpuFsaBackend(std::shared_ptr<ICustomAllocator> allocator)
59  {
61  }
62  ~GpuFsaBackend() = default;
63 
64  static const BackendId& GetIdStatic();
65  const BackendId& GetId() const override { return GetIdStatic(); }
66 
68 
70  const IBackendInternal::IMemoryManagerSharedPtr& memoryManager = nullptr) const override;
71 
73 
75  const ModelOptions& modelOptions,
76  MemorySourceFlags inputFlags,
77  MemorySourceFlags outputFlags) const override;
78 
79  std::vector<ITensorHandleFactory::FactoryId> GetHandleFactoryPreferences() const override;
80 
82 
84  MemorySourceFlags inputFlags,
85  MemorySourceFlags outputFlags) override;
86 
89  const IRuntime::CreationOptions&, IBackendProfilingPtr& backendProfiling) override;
90 
92 
94  const ModelOptions& modelOptions) const override;
95 
96  std::unique_ptr<ICustomAllocator> GetDefaultAllocator() const override;
97 
99  {
100  return gpuFsaCapabilities;
101  };
102 
103  virtual bool UseCustomMemoryAllocator(std::shared_ptr<ICustomAllocator> allocator,
105  {
106  ARMNN_LOG(info) << "Using Custom Allocator for GpuFsaBackend";
107 
108  // Set flag to signal the backend to use a custom memory allocator
109  m_CustomAllocator = std::make_shared<GpuFsaBackendCustomAllocatorWrapper>(std::move(allocator));
110  m_UsingCustomAllocator = true;
111  return m_UsingCustomAllocator;
112  }
113 
114  // Cl requires a arm_compute::IAllocator we wrap the Arm NN ICustomAllocator to achieve this
115  class GpuFsaBackendCustomAllocatorWrapper : public arm_compute::IAllocator
116  {
117  public:
118  GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr<ICustomAllocator> alloc) : m_CustomAllocator(alloc)
119  {}
120  // Inherited methods overridden:
121  void* allocate(size_t size, size_t alignment) override
122  {
123  auto alloc = m_CustomAllocator->allocate(size, alignment);
124  return MapAllocatedMemory(alloc, size, m_CustomAllocator->GetMemorySourceType());
125  }
126  void free(void* ptr) override
127  {
128  auto hostMemPtr = m_AllocatedBufferMappings[ptr];
129  clReleaseMemObject(static_cast<cl_mem>(ptr));
130  m_CustomAllocator->free(hostMemPtr);
131  }
132  std::unique_ptr<arm_compute::IMemoryRegion> make_region(size_t size, size_t alignment) override
133  {
134  auto hostMemPtr = m_CustomAllocator->allocate(size, alignment);
135  cl_mem buffer = MapAllocatedMemory(hostMemPtr, size, m_CustomAllocator->GetMemorySourceType());
136 
137  return std::make_unique<ClBackendCustomAllocatorMemoryRegion>(cl::Buffer(buffer),
138  hostMemPtr,
139  m_CustomAllocator->GetMemorySourceType());
140  }
141  private:
142  cl_mem MapAllocatedMemory(void* memory, size_t size, MemorySource source)
143  {
144  // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
145  auto cachelineAlignment =
146  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
147  auto roundedSize = cachelineAlignment + size - (size % cachelineAlignment);
148 
149  if (source == MemorySource::Malloc)
150  {
151  const cl_import_properties_arm importProperties[] =
152  {
153  CL_IMPORT_TYPE_ARM,
154  CL_IMPORT_TYPE_HOST_ARM,
155  0
156  };
157  cl_int error = CL_SUCCESS;
158  cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
159  CL_MEM_READ_WRITE,
160  importProperties,
161  memory,
162  roundedSize,
163  &error);
164  if (error == CL_SUCCESS)
165  {
166  m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
167  return buffer;
168  }
169  throw armnn::Exception(
170  "Mapping allocated memory from CustomMemoryAllocator failed, errcode: " + std::to_string(error));
171  }
172  else if (source == MemorySource::DmaBuf)
173  {
174  const cl_import_properties_arm importProperties[] =
175  {
176  CL_IMPORT_TYPE_ARM,
177  CL_IMPORT_TYPE_DMA_BUF_ARM,
178  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
179  CL_TRUE,
180  0
181  };
182  cl_int error = CL_SUCCESS;
183  cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
184  CL_MEM_READ_WRITE,
185  importProperties,
186  memory,
187  roundedSize,
188  &error);
189  if (error == CL_SUCCESS)
190  {
191  m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
192  return buffer;
193  }
194  throw armnn::Exception(
195  "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
196  + std::to_string(error));
197  }
198  else if (source == MemorySource::DmaBufProtected)
199  {
200  const cl_import_properties_arm importProperties[] =
201  {
202  CL_IMPORT_TYPE_ARM,
203  CL_IMPORT_TYPE_DMA_BUF_ARM,
204  CL_IMPORT_TYPE_PROTECTED_ARM,
205  CL_TRUE,
206  0
207  };
208  cl_int error = CL_SUCCESS;
209  cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
210  CL_MEM_READ_WRITE,
211  importProperties,
212  memory,
213  roundedSize,
214  &error);
215  if (error == CL_SUCCESS)
216  {
217  m_AllocatedBufferMappings.insert(std::make_pair(static_cast<void *>(buffer), memory));
218  return buffer;
219  }
220  throw armnn::Exception(
221  "Mapping allocated memory from CustomMemoryAllocator failed, errcode: "
222  + std::to_string(error));
223  }
224  throw armnn::Exception(
225  "Attempting to allocate memory with unsupported MemorySource type in CustomAllocator");
226  }
227  std::shared_ptr<ICustomAllocator> m_CustomAllocator;
228  std::map<void*, void*> m_AllocatedBufferMappings;
229  };
230 
231  class ClBackendCustomAllocatorMemoryRegion : public arm_compute::ICLMemoryRegion
232  {
233  public:
234  // We need to have a new version of ICLMemoryRegion which holds a hostMemPtr to allow for cpu copy access
235  ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void* hostMemPtr, armnn::MemorySource source)
236  : ICLMemoryRegion(buffer.getInfo<CL_MEM_SIZE>())
237  {
238  _mem = buffer;
239  m_HostMemPtr = hostMemPtr;
240  m_MemorySource = source;
241  }
242 
243  // Inherited methods overridden :
244  void* ptr() override
245  {
246  return nullptr;
247  }
248 
249  void* map(cl::CommandQueue &q, bool blocking) override
250  {
251  armnn::IgnoreUnused(q, blocking);
252  if (m_HostMemPtr == nullptr)
253  {
254  throw armnn::Exception("ClBackend: Attempting to map memory with an invalid host ptr");
255  }
256  if (_mapping != nullptr)
257  {
258  throw armnn::Exception("ClBackend: Attempting to map memory which has not yet been unmapped");
259  }
260  switch (m_MemorySource)
261  {
263  _mapping = m_HostMemPtr;
264  return _mapping;
265  break;
268  // If the source is a Dmabuf then the memory ptr should be pointing to an integer value for the fd
269  _mapping = mmap(NULL, _size, PROT_WRITE, MAP_SHARED, *(reinterpret_cast<int*>(m_HostMemPtr)), 0);
270  return _mapping;
271  break;
272  default:
273  throw armnn::Exception("ClBackend: Attempting to map imported memory without a valid source");
274  break;
275  }
276  }
277 
278  void unmap(cl::CommandQueue &q) override
279  {
281  switch (m_MemorySource)
282  {
284  _mapping = nullptr;
285  break;
288  munmap(_mapping, _size);
289  _mapping = nullptr;
290  break;
291  default:
292  throw armnn::Exception("ClBackend: Attempting to unmap imported memory without a valid source");
293  break;
294  }
295  }
296  private:
297  void* m_HostMemPtr = nullptr;
298  armnn::MemorySource m_MemorySource;
299  };
300 
301  std::shared_ptr<GpuFsaBackendCustomAllocatorWrapper> m_CustomAllocator;
303 };
304 
305 } // namespace armnn
armnn::MemorySource::Malloc
@ Malloc
armnn::Optional
Definition: Optional.hpp:270
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::GpuFsaBackendCustomAllocatorWrapper
GpuFsaBackendCustomAllocatorWrapper(std::shared_ptr< ICustomAllocator > alloc)
Definition: GpuFsaBackend.hpp:118
armnn::GpuFsaBackend::RegisterTensorHandleFactories
void RegisterTensorHandleFactories(TensorHandleFactoryRegistry &registry) override
(Optional) Register TensorHandleFactories Either this method or CreateMemoryManager() and IWorkloadFa...
Definition: GpuFsaBackend.cpp:151
armnn::GpuFsaBackend::GetIdStatic
static const BackendId & GetIdStatic()
Definition: GpuFsaBackend.cpp:69
armnn::GpuFsaBackend::m_UsingCustomAllocator
bool m_UsingCustomAllocator
Definition: GpuFsaBackend.hpp:302
armnn::IBackendInternal::IMemoryManagerSharedPtr
std::shared_ptr< IMemoryManager > IMemoryManagerSharedPtr
Definition: IBackendInternal.hpp:99
armnn::TensorHandleFactoryRegistry
Definition: TensorHandleFactoryRegistry.hpp:23
armnn::GpuFsaBackend::GetLayerSupport
IBackendInternal::ILayerSupportSharedPtr GetLayerSupport() const override
Definition: GpuFsaBackend.cpp:209
armnn::MemorySource::DmaBufProtected
@ DmaBufProtected
armnn::MemorySourceFlags
unsigned int MemorySourceFlags
Definition: MemorySources.hpp:15
BaseMemoryManager.hpp
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::free
void free(void *ptr) override
Definition: GpuFsaBackend.hpp:126
armnn::GpuFsaBackend::GpuFsaBackend
GpuFsaBackend(std::shared_ptr< ICustomAllocator > allocator)
Definition: GpuFsaBackend.hpp:58
armnn::GpuFsaBackend::OptimizeSubgraphView
OptimizationViews OptimizeSubgraphView(const SubgraphView &subgraph, const ModelOptions &modelOptions) const override
Definition: GpuFsaBackend.cpp:220
armnn::IBackendInternal
Definition: IBackendInternal.hpp:77
armnn::IBackendInternal::IBackendContextPtr
std::unique_ptr< IBackendContext > IBackendContextPtr
Definition: IBackendInternal.hpp:90
armnn::GpuFsaBackend::GetDefaultAllocator
std::unique_ptr< ICustomAllocator > GetDefaultAllocator() const override
Returns the default memory allocator for the backend.
Definition: GpuFsaBackend.cpp:215
armnn::BoostLogSeverityMapping::error
@ error
armnn::GpuFsaBackend::GetHandleFactoryPreferences
std::vector< ITensorHandleFactory::FactoryId > GetHandleFactoryPreferences() const override
(Optional) Returns a vector of supported TensorHandleFactory ids in preference order.
Definition: GpuFsaBackend.cpp:146
ARMNN_LOG
#define ARMNN_LOG(severity)
Definition: Logging.hpp:212
armnn::GpuFsaPreCompiledBlob::inputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > inputTensorInfos
Definition: GpuFsaBackend.hpp:37
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::map
void * map(cl::CommandQueue &q, bool blocking) override
Definition: GpuFsaBackend.hpp:249
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::allocate
void * allocate(size_t size, size_t alignment) override
Definition: GpuFsaBackend.hpp:121
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper
Definition: GpuFsaBackend.hpp:115
armnn::GpuFsaBackend::CreateMemoryManager
IBackendInternal::IMemoryManagerUniquePtr CreateMemoryManager() const override
Definition: GpuFsaBackend.cpp:75
armnn::GpuFsaBackend::CreateBackendContext
IBackendInternal::IBackendContextPtr CreateBackendContext(const IRuntime::CreationOptions &) const override
Create the runtime context of the backend.
Definition: GpuFsaBackend.cpp:198
armnn::MemorySource::DmaBuf
@ DmaBuf
armnn::GpuFsaBackend::GpuFsaBackendCustomAllocatorWrapper::make_region
std::unique_ptr< arm_compute::IMemoryRegion > make_region(size_t size, size_t alignment) override
Definition: GpuFsaBackend.hpp:132
armnn::EmptyOptional
EmptyOptional is used to initialize the Optional class in case we want to have default value for an O...
Definition: Optional.hpp:32
IBackendInternal.hpp
armnn::GpuFsaPreCompiledBlob::sketch
std::unique_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadSketch > sketch
Definition: GpuFsaBackend.hpp:34
armnn::SubgraphView
The SubgraphView class represents a subgraph of a Graph.
Definition: SubgraphView.hpp:31
armnn::OptimizationViews
Definition: OptimizationViews.hpp:17
armnn::GpuFsaBackend::CreateBackendProfilingContext
IBackendInternal::IBackendProfilingContextPtr CreateBackendProfilingContext(const IRuntime::CreationOptions &, IBackendProfilingPtr &backendProfiling) override
Create context specifically used for profiling interaction from backends.
Definition: GpuFsaBackend.cpp:203
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::unmap
void unmap(cl::CommandQueue &q) override
Definition: GpuFsaBackend.hpp:278
armnn::GpuFsaBackend::CreateWorkloadFactory
IBackendInternal::IWorkloadFactoryPtr CreateWorkloadFactory(const IBackendInternal::IMemoryManagerSharedPtr &memoryManager=nullptr) const override
Definition: GpuFsaBackend.cpp:84
armnn::Exception
Base class for all ArmNN exceptions so that users can filter to just those.
Definition: Exceptions.hpp:46
armnn::GpuFsaBackend::UseCustomMemoryAllocator
virtual bool UseCustomMemoryAllocator(std::shared_ptr< ICustomAllocator > allocator, armnn::Optional< std::string & >) override
Signals the backend to use a custom memory allocator provided by the user.
Definition: GpuFsaBackend.hpp:103
armnn::GpuFsaBackend::GetCapabilities
BackendCapabilities GetCapabilities() const override
Returns a BackendCapability if the backend lists the capability The BackendCapability must then be in...
Definition: GpuFsaBackend.hpp:98
armnn::BoostLogSeverityMapping::info
@ info
armnn::GpuFsaBackend::m_CustomAllocator
std::shared_ptr< GpuFsaBackendCustomAllocatorWrapper > m_CustomAllocator
Definition: GpuFsaBackend.hpp:301
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion
Definition: GpuFsaBackend.hpp:231
armnn::IBackendInternal::IBackendProfilingContextPtr
std::shared_ptr< arm::pipe::IBackendProfilingContext > IBackendProfilingContextPtr
This is the bridge between backend and backend profiling we'll keep it in the backend namespace.
Definition: IBackendInternal.hpp:92
armnn::BackendOptions
Struct for the users to pass backend specific options.
Definition: BackendOptions.hpp:22
armnn::IBackendInternal::IBackendProfilingPtr
std::unique_ptr< arm::pipe::IBackendProfiling > IBackendProfilingPtr
Definition: IBackendInternal.hpp:93
armnn::GpuFsaBackend
Definition: GpuFsaBackend.hpp:54
armnn::IRuntime::CreationOptions
Definition: IRuntime.hpp:78
armnn::GpuFsaBackend::GetId
const BackendId & GetId() const override
Definition: GpuFsaBackend.hpp:65
armnn::GpuFsaBackend::~GpuFsaBackend
~GpuFsaBackend()=default
armnn::IBackendInternal::IMemoryManagerUniquePtr
std::unique_ptr< IMemoryManager > IMemoryManagerUniquePtr
Definition: IBackendInternal.hpp:98
armnn::gpuFsaCapabilities
const BackendCapabilities gpuFsaCapabilities("GpuFsa", { {"NonConstWeights", false}, {"AsyncExecution", false}, {"ProtectedContentAllocation", false}, {"ConstantTensorsAsInputs", true}, {"PreImportIOTensors", false}, {"ExternallyManagedMemory", false}, {"MultiAxisPacking", false}, {"SingleAxisPacking", false} })
armnn::IgnoreUnused
void IgnoreUnused(Ts &&...)
Definition: IgnoreUnused.hpp:14
armnn::MemorySource
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:244
armnn::BackendId
Definition: BackendId.hpp:75
armnn
Copyright (c) 2021 ARM Limited and Contributors.
Definition: 01_00_quick_start.dox:6
armnn::IBackendInternal::ILayerSupportSharedPtr
std::shared_ptr< ILayerSupport > ILayerSupportSharedPtr
Definition: IBackendInternal.hpp:94
armnn::GpuFsaPreCompiledBlob::workloadContext
std::shared_ptr< arm_compute::experimental::dynamic_fusion::GpuWorkloadContext > workloadContext
Definition: GpuFsaBackend.hpp:35
armnn::ModelOptions
std::vector< BackendOptions > ModelOptions
Definition: BackendOptions.hpp:18
armnn::GpuFsaPreCompiledBlob
A structure which contains all the elements needed to execute a fused workload in the GpuFsa Backend.
Definition: GpuFsaBackend.hpp:32
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::ClBackendCustomAllocatorMemoryRegion
ClBackendCustomAllocatorMemoryRegion(const cl::Buffer &buffer, void *hostMemPtr, armnn::MemorySource source)
Definition: GpuFsaBackend.hpp:235
armnn::GpuFsaBackend::ClBackendCustomAllocatorMemoryRegion::ptr
void * ptr() override
Definition: GpuFsaBackend.hpp:244
armnn::IBackendInternal::IWorkloadFactoryPtr
std::unique_ptr< IWorkloadFactory > IWorkloadFactoryPtr
Definition: IBackendInternal.hpp:89
armnn::GpuFsaPreCompiledBlob::outputTensorInfos
std::unique_ptr< std::vector< arm_compute::ITensorInfo * > > outputTensorInfos
Definition: GpuFsaBackend.hpp:38
armnn::GpuFsaBackend::GpuFsaBackend
GpuFsaBackend()
Definition: GpuFsaBackend.hpp:57