// // Copyright © 2017-2024 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once #include #include #include #include #include #include #include #include #include #include #include #include #include "armnn/TypesUtils.hpp" namespace armnn { class NeonTensorHandleDecorator; class NeonTensorHandle : public IAclTensorHandle { public: NeonTensorHandle(const TensorInfo& tensorInfo) : m_ImportFlags(static_cast(MemorySource::Malloc)), m_Imported(false), m_IsImportEnabled(false), m_TypeAlignment(GetDataTypeSize(tensorInfo.GetDataType())) { armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); } NeonTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout, MemorySourceFlags importFlags = static_cast(MemorySource::Malloc)) : m_ImportFlags(importFlags), m_Imported(false), m_IsImportEnabled(false), m_TypeAlignment(GetDataTypeSize(tensorInfo.GetDataType())) { armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); } arm_compute::ITensor& GetTensor() override { return m_Tensor; } arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } virtual void Allocate() override { // If we have enabled Importing, don't Allocate the tensor if (!m_IsImportEnabled) { armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); } }; virtual void Manage() override { // If we have enabled Importing, don't manage the tensor if (!m_IsImportEnabled) { ARMNN_THROW_INVALIDARG_MSG_IF_FALSE(m_MemoryGroup, "arm_compute::MemoryGroup is null."); m_MemoryGroup->manage(&m_Tensor); } } virtual ITensorHandle* GetParent() const override { return nullptr; } virtual arm_compute::DataType GetDataType() const override { return m_Tensor.info()->data_type(); } virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override { m_MemoryGroup = PolymorphicPointerDowncast(memoryGroup); } virtual const void* Map(bool /* blocking = true */) const override { return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); } virtual void Unmap() const override {} TensorShape GetStrides() const override { return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); } TensorShape GetShape() const override { return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } void SetImportFlags(MemorySourceFlags importFlags) { m_ImportFlags = importFlags; } MemorySourceFlags GetImportFlags() const override { return m_ImportFlags; } void SetImportEnabledFlag(bool importEnabledFlag) { m_IsImportEnabled = importEnabledFlag; } bool CanBeImported(void* memory, MemorySource source) override { if (source != MemorySource::Malloc || reinterpret_cast(memory) % m_TypeAlignment) { return false; } return true; } virtual bool Import(void* memory, MemorySource source) override { if (m_ImportFlags& static_cast(source)) { if (source == MemorySource::Malloc && m_IsImportEnabled) { if (!CanBeImported(memory, source)) { throw MemoryImportException("NeonTensorHandle::Import Attempting to import unaligned memory"); } // m_Tensor not yet Allocated if (!m_Imported && !m_Tensor.buffer()) { arm_compute::Status status = m_Tensor.allocator()->import_memory(memory); // Use the overloaded bool operator of Status to check if it worked, if not throw an exception // with the Status error message m_Imported = bool(status); if (!m_Imported) { throw MemoryImportException(status.error_description()); } return m_Imported; } // m_Tensor.buffer() initially allocated with Allocate(). if (!m_Imported && m_Tensor.buffer()) { throw MemoryImportException( "NeonTensorHandle::Import Attempting to import on an already allocated tensor"); } // m_Tensor.buffer() previously imported. if (m_Imported) { arm_compute::Status status = m_Tensor.allocator()->import_memory(memory); // Use the overloaded bool operator of Status to check if it worked, if not throw an exception // with the Status error message m_Imported = bool(status); if (!m_Imported) { throw MemoryImportException(status.error_description()); } return m_Imported; } } else { throw MemoryImportException("NeonTensorHandle::Import is disabled"); } } else { throw MemoryImportException("NeonTensorHandle::Incorrect import flag"); } return false; } virtual std::shared_ptr DecorateTensorHandle(const TensorInfo& tensorInfo) override; private: // Only used for testing void CopyOutTo(void* memory) const override { switch (this->GetDataType()) { case arm_compute::DataType::F32: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::U8: case arm_compute::DataType::QASYMM8: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::QSYMM8: case arm_compute::DataType::QASYMM8_SIGNED: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::BFLOAT16: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::F16: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::S16: case arm_compute::DataType::QSYMM16: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::S32: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; default: { throw armnn::UnimplementedException(); } } } // Only used for testing void CopyInFrom(const void* memory) override { switch (this->GetDataType()) { case arm_compute::DataType::F32: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::U8: case arm_compute::DataType::QASYMM8: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::QSYMM8: case arm_compute::DataType::QASYMM8_SIGNED: case arm_compute::DataType::QSYMM8_PER_CHANNEL: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::BFLOAT16: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::F16: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::S16: case arm_compute::DataType::QSYMM16: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::S32: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; default: { throw armnn::UnimplementedException(); } } } arm_compute::Tensor m_Tensor; std::shared_ptr m_MemoryGroup; MemorySourceFlags m_ImportFlags; bool m_Imported; bool m_IsImportEnabled; const uintptr_t m_TypeAlignment; std::vector> m_Decorated; }; class NeonSubTensorHandle : public IAclTensorHandle { public: NeonSubTensorHandle(IAclTensorHandle* parent, const arm_compute::TensorShape& shape, const arm_compute::Coordinates& coords) : m_Tensor(&parent->GetTensor(), shape, coords, true) { parentHandle = parent; } arm_compute::ITensor& GetTensor() override { return m_Tensor; } arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } virtual void Allocate() override {} virtual void Manage() override {} virtual ITensorHandle* GetParent() const override { return parentHandle; } virtual arm_compute::DataType GetDataType() const override { return m_Tensor.info()->data_type(); } virtual void SetMemoryGroup(const std::shared_ptr&) override {} virtual const void* Map(bool /* blocking = true */) const override { return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); } virtual void Unmap() const override {} TensorShape GetStrides() const override { return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); } TensorShape GetShape() const override { return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } virtual std::shared_ptr DecorateTensorHandle(const TensorInfo&) override { return nullptr; }; private: // Only used for testing void CopyOutTo(void* memory) const override { switch (this->GetDataType()) { case arm_compute::DataType::F32: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::U8: case arm_compute::DataType::QASYMM8: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::QSYMM8: case arm_compute::DataType::QASYMM8_SIGNED: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::S16: case arm_compute::DataType::QSYMM16: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::S32: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; default: { throw armnn::UnimplementedException(); } } } // Only used for testing void CopyInFrom(const void* memory) override { switch (this->GetDataType()) { case arm_compute::DataType::F32: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::U8: case arm_compute::DataType::QASYMM8: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::QSYMM8: case arm_compute::DataType::QASYMM8_SIGNED: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::S16: case arm_compute::DataType::QSYMM16: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::S32: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; default: { throw armnn::UnimplementedException(); } } } arm_compute::SubTensor m_Tensor; ITensorHandle* parentHandle = nullptr; }; /// NeonTensorDecorator wraps an existing Neon tensor allowing us to override the TensorInfo for it class NeonTensorDecorator : public arm_compute::ITensor { public: NeonTensorDecorator(); NeonTensorDecorator(arm_compute::ITensor* original, const TensorInfo& info); ~NeonTensorDecorator() = default; NeonTensorDecorator(const NeonTensorDecorator&) = delete; NeonTensorDecorator& operator=(const NeonTensorDecorator&) = delete; NeonTensorDecorator(NeonTensorDecorator&&) = default; NeonTensorDecorator& operator=(NeonTensorDecorator&&) = default; // Inherited methods overridden: arm_compute::ITensorInfo* info() const override; arm_compute::ITensorInfo* info() override; uint8_t* buffer() const override; private: arm_compute::ITensor* m_Original; mutable arm_compute::TensorInfo m_TensorInfo; }; class NeonTensorHandleDecorator : public IAclTensorHandle { public: NeonTensorHandleDecorator(IAclTensorHandle* parent, const TensorInfo& info) : m_Tensor(&parent->GetTensor(), info) { parentHandle = parent; } arm_compute::ITensor& GetTensor() override { return m_Tensor; } arm_compute::ITensor const& GetTensor() const override { return m_Tensor; } virtual void Allocate() override {} virtual void Manage() override {} virtual ITensorHandle* GetParent() const override { return nullptr; } virtual arm_compute::DataType GetDataType() const override { return m_Tensor.info()->data_type(); } virtual void SetMemoryGroup(const std::shared_ptr&) override {} virtual const void* Map(bool /* blocking = true */) const override { return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); } virtual void Unmap() const override {} TensorShape GetStrides() const override { return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); } TensorShape GetShape() const override { return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } virtual std::shared_ptr DecorateTensorHandle(const TensorInfo&) override { return nullptr; }; private: // Only used for testing void CopyOutTo(void* memory) const override { switch (this->GetDataType()) { case arm_compute::DataType::F32: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::U8: case arm_compute::DataType::QASYMM8: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::QSYMM8: case arm_compute::DataType::QASYMM8_SIGNED: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::S16: case arm_compute::DataType::QSYMM16: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; case arm_compute::DataType::S32: armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), static_cast(memory)); break; default: { throw armnn::UnimplementedException(); } } } // Only used for testing void CopyInFrom(const void* memory) override { switch (this->GetDataType()) { case arm_compute::DataType::F32: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::U8: case arm_compute::DataType::QASYMM8: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::QSYMM8: case arm_compute::DataType::QASYMM8_SIGNED: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::S16: case arm_compute::DataType::QSYMM16: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; case arm_compute::DataType::S32: armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), this->GetTensor()); break; default: { throw armnn::UnimplementedException(); } } } NeonTensorDecorator m_Tensor; ITensorHandle* parentHandle = nullptr; }; } // namespace armnn