15 #include <arm_compute/runtime/CL/CLTensor.h> 16 #include <arm_compute/runtime/CL/CLSubTensor.h> 17 #include <arm_compute/runtime/IMemoryGroup.h> 18 #include <arm_compute/runtime/MemoryGroup.h> 19 #include <arm_compute/core/TensorShape.h> 20 #include <arm_compute/core/Coordinates.h> 24 #include <CL/cl_ext.h> 25 #include <arm_compute/core/CL/CLKernelLibrary.h> 34 : m_ImportFlags(importFlags)
36 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
42 : m_ImportFlags(importFlags), m_Imported(false)
44 armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
47 arm_compute::CLTensor&
GetTensor()
override {
return m_Tensor; }
48 arm_compute::CLTensor
const&
GetTensor()
const override {
return m_Tensor; }
52 virtual const void*
Map(
bool blocking =
true)
const override 55 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
58 virtual void Unmap()
const override {}
64 return m_Tensor.info()->data_type();
67 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override 74 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
79 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
84 m_ImportFlags = importFlags;
94 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
98 const cl_import_properties_arm importProperties[] =
101 CL_IMPORT_TYPE_HOST_ARM,
105 return ClImport(importProperties, memory);
109 const cl_import_properties_arm importProperties[] =
112 CL_IMPORT_TYPE_DMA_BUF_ARM,
113 CL_IMPORT_DMA_BUF_DATA_CONSISTENCY_WITH_HOST_ARM,
118 return ClImport(importProperties, memory);
123 const cl_import_properties_arm importProperties[] =
126 CL_IMPORT_TYPE_DMA_BUF_ARM,
127 CL_IMPORT_TYPE_PROTECTED_ARM,
132 return ClImport(importProperties, memory,
true);
139 if (!m_Imported && !m_Tensor.buffer())
143 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
144 m_Imported = bool(status);
153 else if (!m_Imported && m_Tensor.buffer())
156 "ClImportTensorHandle::Import Attempting to import on an already allocated tensor");
164 m_Tensor.allocator()->import_memory(cl::Buffer(static_cast<cl_mem>(memory)));
165 m_Imported = bool(status);
190 if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
194 const cl_import_properties_arm importProperties[] =
197 CL_IMPORT_TYPE_HOST_ARM,
201 size_t totalBytes = m_Tensor.info()->total_size();
206 auto cachelineAlignment =
207 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
208 auto roundedSize = totalBytes;
209 if (totalBytes % cachelineAlignment != 0)
211 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
214 cl_int
error = CL_SUCCESS;
216 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().
get(),
217 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
221 if (error != CL_SUCCESS)
229 error = clReleaseMemObject(buffer);
230 if (error == CL_SUCCESS)
238 + std::to_string(error));
251 bool ClImport(
const cl_import_properties_arm* importProperties,
void* memory,
bool isProtected =
false)
253 size_t totalBytes = m_Tensor.info()->total_size();
257 auto cachelineAlignment =
258 arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
259 auto roundedSize = totalBytes;
260 if (totalBytes % cachelineAlignment != 0)
262 roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
265 cl_int
error = CL_SUCCESS;
269 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().
get(),
270 CL_MEM_HOST_NO_ACCESS, importProperties, memory, roundedSize, &error);
274 buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().
get(),
275 CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
278 if (error != CL_SUCCESS)
280 throw MemoryImportException(
"ClImportTensorHandle::Invalid imported memory" + std::to_string(error));
283 cl::Buffer wrappedBuffer(buffer);
288 bool imported = (status.error_code() == arm_compute::ErrorCode::OK);
298 void CopyOutTo(
void* memory)
const override 303 case arm_compute::DataType::F32:
304 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
305 static_cast<float*>(memory));
307 case arm_compute::DataType::U8:
308 case arm_compute::DataType::QASYMM8:
309 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
310 static_cast<uint8_t*>(memory));
312 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
313 case arm_compute::DataType::QASYMM8_SIGNED:
314 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
315 static_cast<int8_t*>(memory));
317 case arm_compute::DataType::F16:
318 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
319 static_cast<armnn::Half*>(memory));
321 case arm_compute::DataType::S16:
322 case arm_compute::DataType::QSYMM16:
323 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
324 static_cast<int16_t*>(memory));
326 case arm_compute::DataType::S32:
327 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
328 static_cast<int32_t*>(memory));
339 void CopyInFrom(
const void* memory)
override 344 case arm_compute::DataType::F32:
345 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
348 case arm_compute::DataType::U8:
349 case arm_compute::DataType::QASYMM8:
350 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
353 case arm_compute::DataType::F16:
354 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
357 case arm_compute::DataType::S16:
358 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
359 case arm_compute::DataType::QASYMM8_SIGNED:
360 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
363 case arm_compute::DataType::QSYMM16:
364 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
367 case arm_compute::DataType::S32:
368 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
379 arm_compute::CLTensor m_Tensor;
388 const arm_compute::TensorShape& shape,
390 : m_Tensor(&parent->
GetTensor(), shape, coords)
392 parentHandle = parent;
395 arm_compute::CLSubTensor&
GetTensor()
override {
return m_Tensor; }
396 arm_compute::CLSubTensor
const&
GetTensor()
const override {
return m_Tensor; }
401 virtual const void*
Map(
bool blocking =
true)
const override 404 return static_cast<const void*
>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
406 virtual void Unmap()
const override {}
412 return m_Tensor.info()->data_type();
415 virtual void SetMemoryGroup(
const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup)
override 422 return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
427 return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
432 void CopyOutTo(
void* memory)
const override 437 case arm_compute::DataType::F32:
438 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
439 static_cast<float*>(memory));
441 case arm_compute::DataType::U8:
442 case arm_compute::DataType::QASYMM8:
443 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
444 static_cast<uint8_t*>(memory));
446 case arm_compute::DataType::F16:
447 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
448 static_cast<armnn::Half*>(memory));
450 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
451 case arm_compute::DataType::QASYMM8_SIGNED:
452 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
453 static_cast<int8_t*>(memory));
455 case arm_compute::DataType::S16:
456 case arm_compute::DataType::QSYMM16:
457 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
458 static_cast<int16_t*>(memory));
460 case arm_compute::DataType::S32:
461 armcomputetensorutils::CopyArmComputeITensorData(this->
GetTensor(),
462 static_cast<int32_t*>(memory));
473 void CopyInFrom(
const void* memory)
override 478 case arm_compute::DataType::F32:
479 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
482 case arm_compute::DataType::U8:
483 case arm_compute::DataType::QASYMM8:
484 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
487 case arm_compute::DataType::F16:
488 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
491 case arm_compute::DataType::QSYMM8_PER_CHANNEL:
492 case arm_compute::DataType::QASYMM8_SIGNED:
493 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
496 case arm_compute::DataType::S16:
497 case arm_compute::DataType::QSYMM16:
498 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
501 case arm_compute::DataType::S32:
502 armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
513 mutable arm_compute::CLSubTensor m_Tensor;
ClImportTensorHandle(const TensorInfo &tensorInfo, MemorySourceFlags importFlags)
ClImportTensorHandle(const TensorInfo &tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags)
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
arm_compute::CLTensor const & GetTensor() const override
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual bool CanBeImported(void *memory, MemorySource source) override
Implementations must determine if this memory block can be imported.
virtual arm_compute::DataType GetDataType() const override
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
std::array< unsigned int, MaxNumOfTensorDimensions > Coordinates
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void Unmap() const override
Unmap the tensor data.
arm_compute::CLTensor & GetTensor() override
unsigned int MemorySourceFlags
Copyright (c) 2021 ARM Limited and Contributors.
void IgnoreUnused(Ts &&...)
virtual void Manage() override
Indicate to the memory manager that this resource is active.
virtual void SetMemoryGroup(const std::shared_ptr< arm_compute::IMemoryGroup > &memoryGroup) override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.
void SetImportFlags(MemorySourceFlags importFlags)
MemorySourceFlags GetImportFlags() const override
Get flags describing supported import sources.
virtual const void * Map(bool blocking=true) const override
Map the tensor data for access.
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
virtual bool Import(void *memory, MemorySource source) override
Import externally allocated memory.
#define ARMNN_ASSERT(COND)
TensorShape GetShape() const override
Get the number of elements for each dimension ordered from slowest iterating dimension to fastest ite...
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
ClImportSubTensorHandle(IClTensorHandle *parent, const arm_compute::TensorShape &shape, const arm_compute::Coordinates &coords)
MemorySource
Define the Memory Source to reduce copies.
TensorShape GetStrides() const override
Get the strides for each dimension ordered from largest to smallest where the smallest value is the s...
virtual ITensorHandle * GetParent() const override
Get the parent tensor if this is a subtensor.
arm_compute::CLSubTensor & GetTensor() override
arm_compute::CLSubTensor const & GetTensor() const override
virtual void Unmap() const override
Unmap the tensor data.
virtual arm_compute::DataType GetDataType() const override
virtual void Allocate() override
Indicate to the memory manager that this resource is no longer active.