15 #include <arm_compute/runtime/CL/CLTensor.h> 16 #include <arm_compute/runtime/CL/CLSubTensor.h> 17 #include <arm_compute/runtime/IMemoryGroup.h> 18 #include <arm_compute/runtime/MemoryGroup.h> 19 #include <arm_compute/core/TensorShape.h> 20 #include <arm_compute/core/Coordinates.h> 24 #include <CL/cl_ext.h> 25 #include <arm_compute/core/CL/CLKernelLibrary.h> 34 : m_ImportFlags(importFlags)
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
42 : m_ImportFlags(importFlags), m_Imported(false)
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
47 arm_compute::CLTensor&
GetTensor()
override {
return m_Tensor; }
48 arm_compute::CLTensor
const&
GetTensor()
const override {
return m_Tensor; }
52 virtual const void*
Map(
bool blocking =
true)
const override 55 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
58 virtual void Unmap()
const override {}
64 return m_Tensor.info()->data_type();
67 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override 74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
84 m_ImportFlags = importFlags;
94 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
98 const cl_import_properties_arm importProperties[] =
101 CL_IMPORT_TYPE_HOST_ARM,
104 return ClImport(importProperties, memory);
108 const cl_import_properties_arm importProperties[] =
111 CL_IMPORT_TYPE_DMA_BUF_ARM,
112 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
117 return ClImport(importProperties, memory);
122 const cl_import_properties_arm importProperties[] =
125 CL_IMPORT_TYPE_DMA_BUF_ARM,
126 CL_IMPORT_TYPE_PROTECTED_ARM,
131 return ClImport(importProperties, memory,
true);
138 if (!m_Imported && !m_Tensor.buffer())
142 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
143 m_Imported = bool(status);
152 else if (!m_Imported && m_Tensor.buffer())
155 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
163 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
164 m_Imported = bool(status);
189 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
205 bool ClImport(
const cl_import_properties_arm* importProperties,
void* memory,
bool isProtected =
false)
207 size_t totalBytes = m_Tensor.info()->total_size();
211 auto cachelineAlignment =
212 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
213 auto roundedSize = totalBytes;
214 if (totalBytes % cachelineAlignment != 0)
216 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
219 cl_int
error = CL_SUCCESS;
223 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().
get(),
224 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
228 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().
get(),
229 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
232 if (error != CL_SUCCESS)
234 throw MemoryImportException(
"ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
237 cl::Buffer wrappedBuffer(buffer);
242 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
252 void CopyOutTo(
void* memory)
const override 257 case arm_compute::DataType::F32:
258 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
259 static_cast<float*>(memory));
261 case arm_compute::DataType::U8:
262 case arm_compute::DataType::QASYMM8:
263 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
264 static_cast<uint8_t*>(memory));
266 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
267 case arm_compute::DataType::QASYMM8_SIGNED:
268 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
269 static_cast<int8_t*>(memory));
271 case arm_compute::DataType::F16:
272 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
273 static_cast<armnn::Half*>(memory));
275 case arm_compute::DataType::S16:
276 case arm_compute::DataType::QSYMM16:
277 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
278 static_cast<int16_t*>(memory));
280 case arm_compute::DataType::S32:
281 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
282 static_cast<int32_t*>(memory));
293 void CopyInFrom(
const void* memory)
override 298 case arm_compute::DataType::F32:
299 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
302 case arm_compute::DataType::U8:
303 case arm_compute::DataType::QASYMM8:
304 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
307 case arm_compute::DataType::F16:
308 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
311 case arm_compute::DataType::S16:
312 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313 case arm_compute::DataType::QASYMM8_SIGNED:
314 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
317 case arm_compute::DataType::QSYMM16:
318 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
321 case arm_compute::DataType::S32:
322 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
333 arm_compute::CLTensor m_Tensor;
342 const arm_compute::TensorShape& shape,
344 : m_Tensor(&parent->
GetTensor(), shape, coords)
346 parentHandle = parent;
349 arm_compute::CLSubTensor&
GetTensor()
override {
return m_Tensor; }
350 arm_compute::CLSubTensor
const&
GetTensor()
const override {
return m_Tensor; }
355 virtual const void*
Map(
bool blocking =
true)
const override 358 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
360 virtual void Unmap()
const override {}
366 return m_Tensor.info()->data_type();
369 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override 376 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
381 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
386 void CopyOutTo(
void* memory)
const override 391 case arm_compute::DataType::F32:
392 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
393 static_cast<float*>(memory));
395 case arm_compute::DataType::U8:
396 case arm_compute::DataType::QASYMM8:
397 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
398 static_cast<uint8_t*>(memory));
400 case arm_compute::DataType::F16:
401 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
402 static_cast<armnn::Half*>(memory));
404 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
405 case arm_compute::DataType::QASYMM8_SIGNED:
406 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
407 static_cast<int8_t*>(memory));
409 case arm_compute::DataType::S16:
410 case arm_compute::DataType::QSYMM16:
411 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
412 static_cast<int16_t*>(memory));
414 case arm_compute::DataType::S32:
415 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
416 static_cast<int32_t*>(memory));
427 void CopyInFrom(
const void* memory)
override 432 case arm_compute::DataType::F32:
433 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
436 case arm_compute::DataType::U8:
437 case arm_compute::DataType::QASYMM8:
438 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
441 case arm_compute::DataType::F16:
442 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
445 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
446 case arm_compute::DataType::QASYMM8_SIGNED:
447 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
450 case arm_compute::DataType::S16:
451 case arm_compute::DataType::QSYMM16:
452 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
455 case arm_compute::DataType::S32:
456 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
467 mutable arm_compute::CLSubTensor m_Tensor;
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
arm_compute::CLTensor const & GetTensor() const override
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual arm_compute::DataType GetDataType() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void Unmap() const override
Unmap the tensor data.
arm_compute::CLTensor & GetTensor() override
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual bool CanBeImported(void *, MemorySource source) override
Implementations must determine if this memory block can be imported.
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
#define ARMNN_ASSERT(COND)
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
MemorySource
Define the Memory Source to reduce copies.
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
arm_compute::CLSubTensor & GetTensor() override
arm_compute::CLSubTensor const & GetTensor() const override
virtual void Unmap() const override
Unmap the tensor data.
virtual arm_compute::DataType GetDataType() const override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.