From f4019872c1134c6fcc1d6993e5746f55c1e79208 Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Tue, 8 Mar 2022 20:01:38 +0000 Subject: IVGCVSW-6819 Fix the directory structure and broken link to latest docu Signed-off-by: Nikhil Raj Change-Id: I05b559d15faf92c76ff536719693b361316be4f3 --- 22.02/_cl_import_tensor_handle_8hpp_source.xhtml | 169 +++++++++++++++++++++++ 1 file changed, 169 insertions(+) create mode 100644 22.02/_cl_import_tensor_handle_8hpp_source.xhtml (limited to '22.02/_cl_import_tensor_handle_8hpp_source.xhtml') diff --git a/22.02/_cl_import_tensor_handle_8hpp_source.xhtml b/22.02/_cl_import_tensor_handle_8hpp_source.xhtml new file mode 100644 index 0000000000..19fa23108e --- /dev/null +++ b/22.02/_cl_import_tensor_handle_8hpp_source.xhtml @@ -0,0 +1,169 @@ + + + + + + + + + + + + + +ArmNN: src/backends/cl/ClImportTensorHandle.hpp Source File + + + + + + + + + + + + + + + + +
+
+ + + + ArmNN + + + +
+
+  22.02 +
+
+
+ + + + + + + +
+
+ +
+
+
+ +
+ +
+
+ + +
+ +
+ +
+
+
ClImportTensorHandle.hpp
+
+
+Go to the documentation of this file.
1 //
2 // Copyright © 2021 Arm Ltd and Contributors. All rights reserved.
3 // SPDX-License-Identifier: MIT
4 //
5 
6 #pragma once
7 
10 
11 #include <Half.hpp>
12 
14 
15 #include <arm_compute/runtime/CL/CLTensor.h>
16 #include <arm_compute/runtime/CL/CLSubTensor.h>
17 #include <arm_compute/runtime/IMemoryGroup.h>
18 #include <arm_compute/runtime/MemoryGroup.h>
19 #include <arm_compute/core/TensorShape.h>
20 #include <arm_compute/core/Coordinates.h>
21 
22 #include <cl/IClTensorHandle.hpp>
23 
24 #include <CL/cl_ext.h>
25 #include <arm_compute/core/CL/CLKernelLibrary.h>
26 
27 namespace armnn
28 {
29 
31 {
32 public:
33  ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags)
34  : m_ImportFlags(importFlags)
35  {
36  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
37  }
38 
39  ClImportTensorHandle(const TensorInfo& tensorInfo,
40  DataLayout dataLayout,
41  MemorySourceFlags importFlags)
42  : m_ImportFlags(importFlags), m_Imported(false)
43  {
44  armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
45  }
46 
47  arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
48  arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
49  virtual void Allocate() override {}
50  virtual void Manage() override {}
51 
52  virtual const void* Map(bool blocking = true) const override
53  {
54  IgnoreUnused(blocking);
55  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
56  }
57 
58  virtual void Unmap() const override {}
59 
60  virtual ITensorHandle* GetParent() const override { return nullptr; }
61 
62  virtual arm_compute::DataType GetDataType() const override
63  {
64  return m_Tensor.info()->data_type();
65  }
66 
67  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
68  {
69  IgnoreUnused(memoryGroup);
70  }
71 
72  TensorShape GetStrides() const override
73  {
74  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
75  }
76 
77  TensorShape GetShape() const override
78  {
79  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
80  }
81 
83  {
84  m_ImportFlags = importFlags;
85  }
86 
88  {
89  return m_ImportFlags;
90  }
91 
92  virtual bool Import(void* memory, MemorySource source) override
93  {
94  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
95  {
96  if (source == MemorySource::Malloc)
97  {
98  const cl_import_properties_arm importProperties[] =
99  {
100  CL_IMPORT_TYPE_ARM,
101  CL_IMPORT_TYPE_HOST_ARM,
102  0
103  };
104 
105  return ClImport(importProperties, memory);
106  }
107  if (source == MemorySource::DmaBuf)
108  {
109  const cl_import_properties_arm importProperties[] =
110  {
111  CL_IMPORT_TYPE_ARM,
112  CL_IMPORT_TYPE_DMA_BUF_ARM,
113  CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
114  CL_TRUE,
115  0
116  };
117 
118  return ClImport(importProperties, memory);
119 
120  }
121  if (source == MemorySource::DmaBufProtected)
122  {
123  const cl_import_properties_arm importProperties[] =
124  {
125  CL_IMPORT_TYPE_ARM,
126  CL_IMPORT_TYPE_DMA_BUF_ARM,
127  CL_IMPORT_TYPE_PROTECTED_ARM,
128  CL_TRUE,
129  0
130  };
131 
132  return ClImport(importProperties, memory, true);
133 
134  }
135  // Case for importing memory allocated by OpenCl externally directly into the tensor
136  else if (source == MemorySource::Gralloc)
137  {
138  // m_Tensor not yet Allocated
139  if (!m_Imported && !m_Tensor.buffer())
140  {
141  // Importing memory allocated by OpenCl into the tensor directly.
142  arm_compute::Status status =
143  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
144  m_Imported = bool(status);
145  if (!m_Imported)
146  {
147  throw MemoryImportException(status.error_description());
148  }
149  return m_Imported;
150  }
151 
152  // m_Tensor.buffer() initially allocated with Allocate().
153  else if (!m_Imported && m_Tensor.buffer())
154  {
155  throw MemoryImportException(
156  "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
157  }
158 
159  // m_Tensor.buffer() previously imported.
160  else if (m_Imported)
161  {
162  // Importing memory allocated by OpenCl into the tensor directly.
163  arm_compute::Status status =
164  m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
165  m_Imported = bool(status);
166  if (!m_Imported)
167  {
168  throw MemoryImportException(status.error_description());
169  }
170  return m_Imported;
171  }
172  else
173  {
174  throw MemoryImportException("ClImportTensorHandle::Failed to Import Gralloc Memory");
175  }
176  }
177  else
178  {
179  throw MemoryImportException("ClImportTensorHandle::Import flag is not supported");
180  }
181  }
182  else
183  {
184  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
185  }
186  }
187 
188  virtual bool CanBeImported(void* memory, MemorySource source) override
189  {
190  if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
191  {
192  if (source == MemorySource::Malloc)
193  {
194  const cl_import_properties_arm importProperties[] =
195  {
196  CL_IMPORT_TYPE_ARM,
197  CL_IMPORT_TYPE_HOST_ARM,
198  0
199  };
200 
201  size_t totalBytes = m_Tensor.info()->total_size();
202 
203  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
204  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
205  // We do this to match the behaviour of the Import function later on.
206  auto cachelineAlignment =
207  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
208  auto roundedSize = totalBytes;
209  if (totalBytes % cachelineAlignment != 0)
210  {
211  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
212  }
213 
214  cl_int error = CL_SUCCESS;
215  cl_mem buffer;
216  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
217  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
218 
219  // If we fail to map we know the import will not succeed and can return false.
220  // There is no memory to be released if error is not CL_SUCCESS
221  if (error != CL_SUCCESS)
222  {
223  return false;
224  }
225  else
226  {
227  // If import was successful we can release the mapping knowing import will succeed at workload
228  // execution and return true
229  error = clReleaseMemObject(buffer);
230  if (error == CL_SUCCESS)
231  {
232  return true;
233  }
234  else
235  {
236  // If we couldn't release the mapping this constitutes a memory leak and throw an exception
237  throw MemoryImportException("ClImportTensorHandle::Failed to unmap cl_mem buffer: "
238  + std::to_string(error));
239  }
240  }
241  }
242  }
243  else
244  {
245  throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
246  }
247  return false;
248  }
249 
250 private:
251  bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
252  {
253  size_t totalBytes = m_Tensor.info()->total_size();
254 
255  // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
256  // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
257  auto cachelineAlignment =
258  arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
259  auto roundedSize = totalBytes;
260  if (totalBytes % cachelineAlignment != 0)
261  {
262  roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
263  }
264 
265  cl_int error = CL_SUCCESS;
266  cl_mem buffer;
267  if (isProtected)
268  {
269  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
270  CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
271  }
272  else
273  {
274  buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
275  CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
276  }
277 
278  if (error != CL_SUCCESS)
279  {
280  throw MemoryImportException("ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
281  }
282 
283  cl::Buffer wrappedBuffer(buffer);
284  arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer);
285 
286  // Use the overloaded bool operator of Status to check if it is success, if not throw an exception
287  // with the Status error message
288  bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
289  if (!imported)
290  {
291  throw MemoryImportException(status.error_description());
292  }
293 
294  ARMNN_ASSERT(!m_Tensor.info()->is_resizable());
295  return imported;
296  }
297  // Only used for testing
298  void CopyOutTo(void* memory) const override
299  {
300  const_cast<armnn::ClImportTensorHandle*>(this)->Map(true);
301  switch(this->GetDataType())
302  {
303  case arm_compute::DataType::F32:
304  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
305  static_cast<float*>(memory));
306  break;
307  case arm_compute::DataType::U8:
308  case arm_compute::DataType::QASYMM8:
309  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
310  static_cast<uint8_t*>(memory));
311  break;
312  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313  case arm_compute::DataType::QASYMM8_SIGNED:
314  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
315  static_cast<int8_t*>(memory));
316  break;
317  case arm_compute::DataType::F16:
318  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
319  static_cast<armnn::Half*>(memory));
320  break;
321  case arm_compute::DataType::S16:
322  case arm_compute::DataType::QSYMM16:
323  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
324  static_cast<int16_t*>(memory));
325  break;
326  case arm_compute::DataType::S32:
327  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
328  static_cast<int32_t*>(memory));
329  break;
330  default:
331  {
333  }
334  }
335  const_cast<armnn::ClImportTensorHandle*>(this)->Unmap();
336  }
337 
338  // Only used for testing
339  void CopyInFrom(const void* memory) override
340  {
341  this->Map(true);
342  switch(this->GetDataType())
343  {
344  case arm_compute::DataType::F32:
345  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
346  this->GetTensor());
347  break;
348  case arm_compute::DataType::U8:
349  case arm_compute::DataType::QASYMM8:
350  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
351  this->GetTensor());
352  break;
353  case arm_compute::DataType::F16:
354  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
355  this->GetTensor());
356  break;
357  case arm_compute::DataType::S16:
358  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
359  case arm_compute::DataType::QASYMM8_SIGNED:
360  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
361  this->GetTensor());
362  break;
363  case arm_compute::DataType::QSYMM16:
364  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
365  this->GetTensor());
366  break;
367  case arm_compute::DataType::S32:
368  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
369  this->GetTensor());
370  break;
371  default:
372  {
374  }
375  }
376  this->Unmap();
377  }
378 
379  arm_compute::CLTensor m_Tensor;
380  MemorySourceFlags m_ImportFlags;
381  bool m_Imported;
382 };
383 
385 {
386 public:
388  const arm_compute::TensorShape& shape,
389  const arm_compute::Coordinates& coords)
390  : m_Tensor(&parent->GetTensor(), shape, coords)
391  {
392  parentHandle = parent;
393  }
394 
395  arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
396  arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
397 
398  virtual void Allocate() override {}
399  virtual void Manage() override {}
400 
401  virtual const void* Map(bool blocking = true) const override
402  {
403  IgnoreUnused(blocking);
404  return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
405  }
406  virtual void Unmap() const override {}
407 
408  virtual ITensorHandle* GetParent() const override { return parentHandle; }
409 
410  virtual arm_compute::DataType GetDataType() const override
411  {
412  return m_Tensor.info()->data_type();
413  }
414 
415  virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
416  {
417  IgnoreUnused(memoryGroup);
418  }
419 
420  TensorShape GetStrides() const override
421  {
422  return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
423  }
424 
425  TensorShape GetShape() const override
426  {
427  return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
428  }
429 
430 private:
431  // Only used for testing
432  void CopyOutTo(void* memory) const override
433  {
434  const_cast<ClImportSubTensorHandle*>(this)->Map(true);
435  switch(this->GetDataType())
436  {
437  case arm_compute::DataType::F32:
438  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
439  static_cast<float*>(memory));
440  break;
441  case arm_compute::DataType::U8:
442  case arm_compute::DataType::QASYMM8:
443  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
444  static_cast<uint8_t*>(memory));
445  break;
446  case arm_compute::DataType::F16:
447  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
448  static_cast<armnn::Half*>(memory));
449  break;
450  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
451  case arm_compute::DataType::QASYMM8_SIGNED:
452  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
453  static_cast<int8_t*>(memory));
454  break;
455  case arm_compute::DataType::S16:
456  case arm_compute::DataType::QSYMM16:
457  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
458  static_cast<int16_t*>(memory));
459  break;
460  case arm_compute::DataType::S32:
461  armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
462  static_cast<int32_t*>(memory));
463  break;
464  default:
465  {
467  }
468  }
469  const_cast<ClImportSubTensorHandle*>(this)->Unmap();
470  }
471 
472  // Only used for testing
473  void CopyInFrom(const void* memory) override
474  {
475  this->Map(true);
476  switch(this->GetDataType())
477  {
478  case arm_compute::DataType::F32:
479  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
480  this->GetTensor());
481  break;
482  case arm_compute::DataType::U8:
483  case arm_compute::DataType::QASYMM8:
484  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
485  this->GetTensor());
486  break;
487  case arm_compute::DataType::F16:
488  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
489  this->GetTensor());
490  break;
491  case arm_compute::DataType::QSYMM8_PER_CHANNEL:
492  case arm_compute::DataType::QASYMM8_SIGNED:
493  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
494  this->GetTensor());
495  break;
496  case arm_compute::DataType::S16:
497  case arm_compute::DataType::QSYMM16:
498  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
499  this->GetTensor());
500  break;
501  case arm_compute::DataType::S32:
502  armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
503  this->GetTensor());
504  break;
505  default:
506  {
508  }
509  }
510  this->Unmap();
511  }
512 
513  mutable arm_compute::CLSubTensor m_Tensor;
514  ITensorHandle* parentHandle = nullptr;
515 };
516 
517 } // namespace armnn
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
+
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
+
DataLayout
Definition: Types.hpp:49
+ + + +
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
+
arm_compute::CLTensor const & GetTensor() const override
+
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
+
virtual bool CanBeImported(void *memory, MemorySource source) override
Implementations must determine if this memory block can be imported.
+
virtual arm_compute::DataType GetDataType() const override
+ +
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
+
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
+
virtual void Manage() override
Indicate to the memory manager that this resource is active.
+
virtual void Unmap() const override
Unmap the tensor data.
+ +
arm_compute::CLTensor & GetTensor() override
+ +
unsigned int MemorySourceFlags
+
Copyright (c) 2021 ARM Limited and Contributors.
+
void IgnoreUnused(Ts &&...)
+ +
virtual void Manage() override
Indicate to the memory manager that this resource is active.
+ +
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
+
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
+ +
void SetImportFlags(MemorySourceFlags importFlags)
+ +
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
+
DataType
Definition: Types.hpp:35
+ +
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
+ +
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
+
Status
enumeration
Definition: Types.hpp:29
+
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
+
#define ARMNN_ASSERT(COND)
Definition: Assert.hpp:14
+
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
+ +
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
+ + +
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
+
MemorySource
Define the Memory Source to reduce copies.
Definition: Types.hpp:217
+ +
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
+
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
+ +
arm_compute::CLSubTensor & GetTensor() override
+
arm_compute::CLSubTensor const & GetTensor() const override
+
virtual void Unmap() const override
Unmap the tensor data.
+ +
virtual arm_compute::DataType GetDataType() const override
+
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
+
+
+ + + + -- cgit v1.2.1