ArmNN
 22.11
ClImportTensorHandle.hpp
Go to the documentation of this file.
1 //
2 // Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 
11 #include <Half.hpp>
12 
14 
15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
21 
22 #include <cl/IClTensorHandle.hpp>
23 
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
26 
27 namespace armnn
28 {
29 
31 {
32 public:
33  ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34  : m_ImportFlags(importFlags)
35  {
36  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37  }
38 
39  ClImportTensorHandle(const TensorInfo& tensorInfo,
40  DataLayout dataLayout,
41  MemorySourceFlags importFlags)
42  : m_ImportFlags(importFlags), m_Imported(false)
43  {
44  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45  }
46 
47  arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48  arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49  virtual void Allocate() override {}
50  virtual void Manage() override {}
51 
52  virtual const void* Map(bool blocking = true) const override
53  {
54  IgnoreUnused(blocking);
55  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56  }
57 
58  virtual void Unmap() const override {}
59 
60  virtual ITensorHandle* GetParent() const override { return nullptr; }
61 
62  virtual arm_compute::DataType GetDataType() const override
63  {
64  return m_Tensor.info()->data_type();
65  }
66 
67  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68  {
69  IgnoreUnused(memoryGroup);
70  }
71 
72  TensorShape GetStrides() const override
73  {
74  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75  }
76 
77  TensorShape GetShape() const override
78  {
79  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80  }
81 
83  {
84  m_ImportFlags = importFlags;
85  }
86 
88  {
89  return m_ImportFlags;
90  }
91 
92  virtual bool Import(void* memory, MemorySource source) override
93  {
94  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95  {
96  if (source == MemorySource::Malloc)
97  {
98  const cl_import_properties_arm importProperties[] =
99  {
100  CL_IMPORT_TYPE_ARM,
101  CL_IMPORT_TYPE_HOST_ARM,
102  0
103  };
104  return ClImport(importProperties, memory);
105  }
106  if (source == MemorySource::DmaBuf)
107  {
108  const cl_import_properties_arm importProperties[] =
109  {
110  CL_IMPORT_TYPE_ARM,
111  CL_IMPORT_TYPE_DMA_BUF_ARM,
112  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
113  CL_TRUE,
114  0
115  };
116 
117  return ClImport(importProperties, memory);
118 
119  }
120  if (source == MemorySource::DmaBufProtected)
121  {
122  const cl_import_properties_arm importProperties[] =
123  {
124  CL_IMPORT_TYPE_ARM,
125  CL_IMPORT_TYPE_DMA_BUF_ARM,
126  CL_IMPORT_TYPE_PROTECTED_ARM,
127  CL_TRUE,
128  0
129  };
130 
131  return ClImport(importProperties, memory, true);
132 
133  }
134  // Case for importing memory allocated by OpenCl externally directly into the tensor
135  else if (source == MemorySource::Gralloc)
136  {
137  // m_Tensor not yet Allocated
138  if (!m_Imported && !m_Tensor.buffer())
139  {
140  // Importing memory allocated by OpenCl into the tensor directly.
141  arm_compute::Status status =
142  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143  m_Imported = bool(status);
144  if (!m_Imported)
145  {
146  throw MemoryImportException(status.error_description());
147  }
148  return m_Imported;
149  }
150 
151  // m_Tensor.buffer() initially allocated with Allocate().
152  else if (!m_Imported && m_Tensor.buffer())
153  {
154  throw MemoryImportException(
155  "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
156  }
157 
158  // m_Tensor.buffer() previously imported.
159  else if (m_Imported)
160  {
161  // Importing memory allocated by OpenCl into the tensor directly.
162  arm_compute::Status status =
163  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164  m_Imported = bool(status);
165  if (!m_Imported)
166  {
167  throw MemoryImportException(status.error_description());
168  }
169  return m_Imported;
170  }
171  else
172  {
173  throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
174  }
175  }
176  else
177  {
178  throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
179  }
180  }
181  else
182  {
183  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
184  }
185  }
186 
187  virtual bool CanBeImported(void* /*memory*/, MemorySource source) override
188  {
189  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
190  {
191  if (source == MemorySource::Malloc)
192  {
193  // Returning true as ClImport() function will decide if memory can be imported or not
194  return true;
195  }
196  }
197  else
198  {
199  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
200  }
201  return false;
202  }
203 
204 private:
205  bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
206  {
207  size_t totalBytes = m_Tensor.info()->total_size();
208 
209  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
210  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
211  auto cachelineAlignment =
212  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213  auto roundedSize = totalBytes;
214  if (totalBytes % cachelineAlignment != 0)
215  {
216  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
217  }
218 
219  cl_int error = CL_SUCCESS;
220  cl_mem buffer;
221  if (isProtected)
222  {
223  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
224  CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
225  }
226  else
227  {
228  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
229  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
230  }
231 
232  if (error != CL_SUCCESS)
233  {
234  throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
235  }
236 
237  cl::Buffer wrappedBuffer(buffer);
238  arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
239 
240  // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
241  // with the Status error message
242  bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
243  if (!imported)
244  {
245  throw MemoryImportException(status.error_description());
246  }
247 
248  ARMNN_ASSERT(!m_Tensor.info()->is_resizable());
249  return imported;
250  }
251  // Only used for testing
252  void CopyOutTo(void* memory) const override
253  {
254  const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
255  switch(this->GetDataType())
256  {
257  case arm_compute::DataType::F32:
258  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
259  static_cast<float*>(memory));
260  break;
261  case arm_compute::DataType::U8:
262  case arm_compute::DataType::QASYMM8:
263  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
264  static_cast<uint8_t*>(memory));
265  break;
266  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
267  case arm_compute::DataType::QASYMM8_SIGNED:
268  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
269  static_cast<int8_t*>(memory));
270  break;
271  case arm_compute::DataType::F16:
272  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
273  static_cast<armnn::Half*>(memory));
274  break;
275  case arm_compute::DataType::S16:
276  case arm_compute::DataType::QSYMM16:
277  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
278  static_cast<int16_t*>(memory));
279  break;
280  case arm_compute::DataType::S32:
281  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
282  static_cast<int32_t*>(memory));
283  break;
284  default:
285  {
287  }
288  }
289  const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
290  }
291 
292  // Only used for testing
293  void CopyInFrom(const void* memory) override
294  {
295  this->Map(true);
296  switch(this->GetDataType())
297  {
298  case arm_compute::DataType::F32:
299  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
300  this->GetTensor());
301  break;
302  case arm_compute::DataType::U8:
303  case arm_compute::DataType::QASYMM8:
304  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
305  this->GetTensor());
306  break;
307  case arm_compute::DataType::F16:
308  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
309  this->GetTensor());
310  break;
311  case arm_compute::DataType::S16:
312  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313  case arm_compute::DataType::QASYMM8_SIGNED:
314  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
315  this->GetTensor());
316  break;
317  case arm_compute::DataType::QSYMM16:
318  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
319  this->GetTensor());
320  break;
321  case arm_compute::DataType::S32:
322  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
323  this->GetTensor());
324  break;
325  default:
326  {
328  }
329  }
330  this->Unmap();
331  }
332 
333  arm_compute::CLTensor m_Tensor;
334  MemorySourceFlags m_ImportFlags;
335  bool m_Imported;
336 };
337 
339 {
340 public:
342  const arm_compute::TensorShape& shape,
343  const arm_compute::Coordinates& coords)
344  : m_Tensor(&parent->GetTensor(), shape, coords)
345  {
346  parentHandle = parent;
347  }
348 
349  arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
350  arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
351 
352  virtual void Allocate() override {}
353  virtual void Manage() override {}
354 
355  virtual const void* Map(bool blocking = true) const override
356  {
357  IgnoreUnused(blocking);
358  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
359  }
360  virtual void Unmap() const override {}
361 
362  virtual ITensorHandle* GetParent() const override { return parentHandle; }
363 
364  virtual arm_compute::DataType GetDataType() const override
365  {
366  return m_Tensor.info()->data_type();
367  }
368 
369  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
370  {
371  IgnoreUnused(memoryGroup);
372  }
373 
374  TensorShape GetStrides() const override
375  {
376  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
377  }
378 
379  TensorShape GetShape() const override
380  {
381  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
382  }
383 
384 private:
385  // Only used for testing
386  void CopyOutTo(void* memory) const override
387  {
388  const_cast<ClImportSubTensorHandle*>(this)->Map(true);
389  switch(this->GetDataType())
390  {
391  case arm_compute::DataType::F32:
392  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
393  static_cast<float*>(memory));
394  break;
395  case arm_compute::DataType::U8:
396  case arm_compute::DataType::QASYMM8:
397  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
398  static_cast<uint8_t*>(memory));
399  break;
400  case arm_compute::DataType::F16:
401  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
402  static_cast<armnn::Half*>(memory));
403  break;
404  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
405  case arm_compute::DataType::QASYMM8_SIGNED:
406  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
407  static_cast<int8_t*>(memory));
408  break;
409  case arm_compute::DataType::S16:
410  case arm_compute::DataType::QSYMM16:
411  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
412  static_cast<int16_t*>(memory));
413  break;
414  case arm_compute::DataType::S32:
415  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
416  static_cast<int32_t*>(memory));
417  break;
418  default:
419  {
421  }
422  }
423  const_cast<ClImportSubTensorHandle*>(this)->Unmap();
424  }
425 
426  // Only used for testing
427  void CopyInFrom(const void* memory) override
428  {
429  this->Map(true);
430  switch(this->GetDataType())
431  {
432  case arm_compute::DataType::F32:
433  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
434  this->GetTensor());
435  break;
436  case arm_compute::DataType::U8:
437  case arm_compute::DataType::QASYMM8:
438  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
439  this->GetTensor());
440  break;
441  case arm_compute::DataType::F16:
442  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
443  this->GetTensor());
444  break;
445  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
446  case arm_compute::DataType::QASYMM8_SIGNED:
447  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
448  this->GetTensor());
449  break;
450  case arm_compute::DataType::S16:
451  case arm_compute::DataType::QSYMM16:
452  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
453  this->GetTensor());
454  break;
455  case arm_compute::DataType::S32:
456  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
457  this->GetTensor());
458  break;
459  default:
460  {
462  }
463  }
464  this->Unmap();
465  }
466 
467  mutable arm_compute::CLSubTensor m_Tensor;
468  ITensorHandle* parentHandle = nullptr;
469 };
470 
471 } // namespace armnn
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
DataLayout
Definition: Types.hpp:62
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
arm_compute::CLTensor const & GetTensor() const override
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual arm_compute::DataType GetDataType() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void Unmap() const override
Unmap the tensor data.
arm_compute::CLTensor & GetTensor() override
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
DataType
Definition: Types.hpp:48
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual bool CanBeImported(void *, MemorySource source) override
Implementations must determine if this memory block can be imported.
Status
enumeration
Definition: Types.hpp:42
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:230
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
arm_compute::CLSubTensor & GetTensor() override
arm_compute::CLSubTensor const & GetTensor() const override
virtual void Unmap() const override
Unmap the tensor data.
virtual arm_compute::DataType GetDataType() const override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.