ArmNN
 22.02
ClImportTensorHandle.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 
11 #include <Half.hpp>
12 
14 
15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
21 
22 #include <cl/IClTensorHandle.hpp>
23 
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
26 
27 namespace armnn
28 {
29 
31 {
32 public:
33  ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34  : m_ImportFlags(importFlags)
35  {
36  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37  }
38 
39  ClImportTensorHandle(const TensorInfo& tensorInfo,
40  DataLayout dataLayout,
41  MemorySourceFlags importFlags)
42  : m_ImportFlags(importFlags), m_Imported(false)
43  {
44  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45  }
46 
47  arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48  arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49  virtual void Allocate() override {}
50  virtual void Manage() override {}
51 
52  virtual const void* Map(bool blocking = true) const override
53  {
54  IgnoreUnused(blocking);
55  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56  }
57 
58  virtual void Unmap() const override {}
59 
60  virtual ITensorHandle* GetParent() const override { return nullptr; }
61 
62  virtual arm_compute::DataType GetDataType() const override
63  {
64  return m_Tensor.info()->data_type();
65  }
66 
67  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68  {
69  IgnoreUnused(memoryGroup);
70  }
71 
72  TensorShape GetStrides() const override
73  {
74  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75  }
76 
77  TensorShape GetShape() const override
78  {
79  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80  }
81 
83  {
84  m_ImportFlags = importFlags;
85  }
86 
88  {
89  return m_ImportFlags;
90  }
91 
92  virtual bool Import(void* memory, MemorySource source) override
93  {
94  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95  {
96  if (source == MemorySource::Malloc)
97  {
98  const cl_import_properties_arm importProperties[] =
99  {
100  CL_IMPORT_TYPE_ARM,
101  CL_IMPORT_TYPE_HOST_ARM,
102  0
103  };
104 
105  return ClImport(importProperties, memory);
106  }
107  if (source == MemorySource::DmaBuf)
108  {
109  const cl_import_properties_arm importProperties[] =
110  {
111  CL_IMPORT_TYPE_ARM,
112  CL_IMPORT_TYPE_DMA_BUF_ARM,
113  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
114  CL_TRUE,
115  0
116  };
117 
118  return ClImport(importProperties, memory);
119 
120  }
121  if (source == MemorySource::DmaBufProtected)
122  {
123  const cl_import_properties_arm importProperties[] =
124  {
125  CL_IMPORT_TYPE_ARM,
126  CL_IMPORT_TYPE_DMA_BUF_ARM,
127  CL_IMPORT_TYPE_PROTECTED_ARM,
128  CL_TRUE,
129  0
130  };
131 
132  return ClImport(importProperties, memory, true);
133 
134  }
135  // Case for importing memory allocated by OpenCl externally directly into the tensor
136  else if (source == MemorySource::Gralloc)
137  {
138  // m_Tensor not yet Allocated
139  if (!m_Imported && !m_Tensor.buffer())
140  {
141  // Importing memory allocated by OpenCl into the tensor directly.
142  arm_compute::Status status =
143  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
144  m_Imported = bool(status);
145  if (!m_Imported)
146  {
147  throw MemoryImportException(status.error_description());
148  }
149  return m_Imported;
150  }
151 
152  // m_Tensor.buffer() initially allocated with Allocate().
153  else if (!m_Imported && m_Tensor.buffer())
154  {
155  throw MemoryImportException(
156  "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
157  }
158 
159  // m_Tensor.buffer() previously imported.
160  else if (m_Imported)
161  {
162  // Importing memory allocated by OpenCl into the tensor directly.
163  arm_compute::Status status =
164  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
165  m_Imported = bool(status);
166  if (!m_Imported)
167  {
168  throw MemoryImportException(status.error_description());
169  }
170  return m_Imported;
171  }
172  else
173  {
174  throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
175  }
176  }
177  else
178  {
179  throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
180  }
181  }
182  else
183  {
184  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
185  }
186  }
187 
188  virtual bool CanBeImported(void* memory, MemorySource source) override
189  {
190  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
191  {
192  if (source == MemorySource::Malloc)
193  {
194  const cl_import_properties_arm importProperties[] =
195  {
196  CL_IMPORT_TYPE_ARM,
197  CL_IMPORT_TYPE_HOST_ARM,
198  0
199  };
200 
201  size_t totalBytes = m_Tensor.info()->total_size();
202 
203  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
204  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
205  // We do this to match the behaviour of the Import function later on.
206  auto cachelineAlignment =
207  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
208  auto roundedSize = totalBytes;
209  if (totalBytes % cachelineAlignment != 0)
210  {
211  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
212  }
213 
214  cl_int error = CL_SUCCESS;
215  cl_mem buffer;
216  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
217  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
218 
219  // If we fail to map we know the import will not succeed and can return false.
220  // There is no memory to be released if error is not CL_SUCCESS
221  if (error != CL_SUCCESS)
222  {
223  return false;
224  }
225  else
226  {
227  // If import was successful we can release the mapping knowing import will succeed at workload
228  // execution and return true
229  error = clReleaseMemObject(buffer);
230  if (error == CL_SUCCESS)
231  {
232  return true;
233  }
234  else
235  {
236  // If we couldn't release the mapping this constitutes a memory leak and throw an exception
237  throw MemoryImportException("ClImportTensorHandle::Failed to unmap cl_mem buffer: "
238  + std::to_string(error));
239  }
240  }
241  }
242  }
243  else
244  {
245  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
246  }
247  return false;
248  }
249 
250 private:
251  bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
252  {
253  size_t totalBytes = m_Tensor.info()->total_size();
254 
255  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
256  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
257  auto cachelineAlignment =
258  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
259  auto roundedSize = totalBytes;
260  if (totalBytes % cachelineAlignment != 0)
261  {
262  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
263  }
264 
265  cl_int error = CL_SUCCESS;
266  cl_mem buffer;
267  if (isProtected)
268  {
269  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
270  CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
271  }
272  else
273  {
274  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
275  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
276  }
277 
278  if (error != CL_SUCCESS)
279  {
280  throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
281  }
282 
283  cl::Buffer wrappedBuffer(buffer);
284  arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
285 
286  // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
287  // with the Status error message
288  bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
289  if (!imported)
290  {
291  throw MemoryImportException(status.error_description());
292  }
293 
294  ARMNN_ASSERT(!m_Tensor.info()->is_resizable());
295  return imported;
296  }
297  // Only used for testing
298  void CopyOutTo(void* memory) const override
299  {
300  const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
301  switch(this->GetDataType())
302  {
303  case arm_compute::DataType::F32:
304  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
305  static_cast<float*>(memory));
306  break;
307  case arm_compute::DataType::U8:
308  case arm_compute::DataType::QASYMM8:
309  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
310  static_cast<uint8_t*>(memory));
311  break;
312  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313  case arm_compute::DataType::QASYMM8_SIGNED:
314  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
315  static_cast<int8_t*>(memory));
316  break;
317  case arm_compute::DataType::F16:
318  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
319  static_cast<armnn::Half*>(memory));
320  break;
321  case arm_compute::DataType::S16:
322  case arm_compute::DataType::QSYMM16:
323  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
324  static_cast<int16_t*>(memory));
325  break;
326  case arm_compute::DataType::S32:
327  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
328  static_cast<int32_t*>(memory));
329  break;
330  default:
331  {
333  }
334  }
335  const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
336  }
337 
338  // Only used for testing
339  void CopyInFrom(const void* memory) override
340  {
341  this->Map(true);
342  switch(this->GetDataType())
343  {
344  case arm_compute::DataType::F32:
345  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
346  this->GetTensor());
347  break;
348  case arm_compute::DataType::U8:
349  case arm_compute::DataType::QASYMM8:
350  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
351  this->GetTensor());
352  break;
353  case arm_compute::DataType::F16:
354  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
355  this->GetTensor());
356  break;
357  case arm_compute::DataType::S16:
358  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
359  case arm_compute::DataType::QASYMM8_SIGNED:
360  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
361  this->GetTensor());
362  break;
363  case arm_compute::DataType::QSYMM16:
364  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
365  this->GetTensor());
366  break;
367  case arm_compute::DataType::S32:
368  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
369  this->GetTensor());
370  break;
371  default:
372  {
374  }
375  }
376  this->Unmap();
377  }
378 
379  arm_compute::CLTensor m_Tensor;
380  MemorySourceFlags m_ImportFlags;
381  bool m_Imported;
382 };
383 
385 {
386 public:
388  const arm_compute::TensorShape& shape,
389  const arm_compute::Coordinates& coords)
390  : m_Tensor(&parent->GetTensor(), shape, coords)
391  {
392  parentHandle = parent;
393  }
394 
395  arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
396  arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
397 
398  virtual void Allocate() override {}
399  virtual void Manage() override {}
400 
401  virtual const void* Map(bool blocking = true) const override
402  {
403  IgnoreUnused(blocking);
404  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
405  }
406  virtual void Unmap() const override {}
407 
408  virtual ITensorHandle* GetParent() const override { return parentHandle; }
409 
410  virtual arm_compute::DataType GetDataType() const override
411  {
412  return m_Tensor.info()->data_type();
413  }
414 
415  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
416  {
417  IgnoreUnused(memoryGroup);
418  }
419 
420  TensorShape GetStrides() const override
421  {
422  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
423  }
424 
425  TensorShape GetShape() const override
426  {
427  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
428  }
429 
430 private:
431  // Only used for testing
432  void CopyOutTo(void* memory) const override
433  {
434  const_cast<ClImportSubTensorHandle*>(this)->Map(true);
435  switch(this->GetDataType())
436  {
437  case arm_compute::DataType::F32:
438  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
439  static_cast<float*>(memory));
440  break;
441  case arm_compute::DataType::U8:
442  case arm_compute::DataType::QASYMM8:
443  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
444  static_cast<uint8_t*>(memory));
445  break;
446  case arm_compute::DataType::F16:
447  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
448  static_cast<armnn::Half*>(memory));
449  break;
450  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
451  case arm_compute::DataType::QASYMM8_SIGNED:
452  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
453  static_cast<int8_t*>(memory));
454  break;
455  case arm_compute::DataType::S16:
456  case arm_compute::DataType::QSYMM16:
457  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
458  static_cast<int16_t*>(memory));
459  break;
460  case arm_compute::DataType::S32:
461  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
462  static_cast<int32_t*>(memory));
463  break;
464  default:
465  {
467  }
468  }
469  const_cast<ClImportSubTensorHandle*>(this)->Unmap();
470  }
471 
472  // Only used for testing
473  void CopyInFrom(const void* memory) override
474  {
475  this->Map(true);
476  switch(this->GetDataType())
477  {
478  case arm_compute::DataType::F32:
479  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
480  this->GetTensor());
481  break;
482  case arm_compute::DataType::U8:
483  case arm_compute::DataType::QASYMM8:
484  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
485  this->GetTensor());
486  break;
487  case arm_compute::DataType::F16:
488  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
489  this->GetTensor());
490  break;
491  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
492  case arm_compute::DataType::QASYMM8_SIGNED:
493  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
494  this->GetTensor());
495  break;
496  case arm_compute::DataType::S16:
497  case arm_compute::DataType::QSYMM16:
498  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
499  this->GetTensor());
500  break;
501  case arm_compute::DataType::S32:
502  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
503  this->GetTensor());
504  break;
505  default:
506  {
508  }
509  }
510  this->Unmap();
511  }
512 
513  mutable arm_compute::CLSubTensor m_Tensor;
514  ITensorHandle* parentHandle = nullptr;
515 };
516 
517 } // namespace armnn
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
DataLayout
Definition: Types.hpp:49
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
arm_compute::CLTensor const & GetTensor() const override
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual bool CanBeImported(void *memory, MemorySource source) override
Implementations must determine if this memory block can be imported.
virtual arm_compute::DataType GetDataType() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void Unmap() const override
Unmap the tensor data.
arm_compute::CLTensor & GetTensor() override
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
DataType
Definition: Types.hpp:35
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
Status
enumeration
Definition: Types.hpp:29
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
arm_compute::CLSubTensor & GetTensor() override
arm_compute::CLSubTensor const & GetTensor() const override
virtual void Unmap() const override
Unmap the tensor data.
virtual arm_compute::DataType GetDataType() const override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.