diff options
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaTensorHandle.hpp')
-rw-r--r-- | src/backends/gpuFsa/GpuFsaTensorHandle.hpp | 350 |
1 files changed, 314 insertions, 36 deletions
diff --git a/src/backends/gpuFsa/GpuFsaTensorHandle.hpp b/src/backends/gpuFsa/GpuFsaTensorHandle.hpp index b2da50a467..d6901d1225 100644 --- a/src/backends/gpuFsa/GpuFsaTensorHandle.hpp +++ b/src/backends/gpuFsa/GpuFsaTensorHandle.hpp @@ -1,83 +1,361 @@ // -// Copyright © 2022 Arm Ltd and Contributors. All rights reserved. +// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved. // SPDX-License-Identifier: MIT // #pragma once -#include <armnn/backends/TensorHandle.hpp> +#include <aclCommon/ArmComputeTensorHandle.hpp> +#include <aclCommon/ArmComputeTensorUtils.hpp> -#include "GpuFsaMemoryManager.hpp" +#include <armnn/utility/PolymorphicDowncast.hpp> +#include <Half.hpp> + +#include <arm_compute/runtime/CL/CLTensor.h> +#include <arm_compute/runtime/CL/CLSubTensor.h> +#include <arm_compute/runtime/IMemoryGroup.h> +#include <arm_compute/runtime/MemoryGroup.h> +#include <arm_compute/core/TensorShape.h> +#include <arm_compute/core/Coordinates.h> + +#include <aclCommon/IClTensorHandle.hpp> namespace armnn { -// An implementation of ITensorHandle with simple "bump the pointer" memory-management behaviour -// Will be refactored to look more like ClTensorHandle.hpp and use ClMemoryManager instead of GpuFsaMemoryManager -class GpuFsaTensorHandle : public ITensorHandle +class GpuFsaTensorHandle : public IClTensorHandle { public: - GpuFsaTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr<GpuFsaMemoryManager>& memoryManager); + GpuFsaTensorHandle(const TensorInfo& tensorInfo) + : m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)), + m_Imported(false), + m_IsImportEnabled(false) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } - GpuFsaTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags); + GpuFsaTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout, + MemorySourceFlags importFlags = static_cast<MemorySourceFlags>(MemorySource::Undefined)) + : m_ImportFlags(importFlags), + m_Imported(false), + m_IsImportEnabled(false) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); + } - ~GpuFsaTensorHandle(); + arm_compute::CLTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override + { + // If we have enabled Importing, don't allocate the tensor + if (m_IsImportEnabled) + { + throw MemoryImportException("GpuFsaTensorHandle::Attempting to allocate memory when importing"); + } + else + { + armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); + } - virtual void Manage() override; + } - virtual void Allocate() override; + virtual void Manage() override + { + // If we have enabled Importing, don't manage the tensor + if (m_IsImportEnabled) + { + throw MemoryImportException("GpuFsaTensorHandle::Attempting to manage memory when importing"); + } + else + { + assert(m_MemoryGroup != nullptr); + m_MemoryGroup->manage(&m_Tensor); + } + } - virtual ITensorHandle* GetParent() const override + virtual const void* Map(bool blocking = true) const override { - return nullptr; + const_cast<arm_compute::CLTensor*>(&m_Tensor)->map(blocking); + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); } - virtual const void* Map(bool /* blocking = true */) const override; - using ITensorHandle::Map; + virtual void Unmap() const override { const_cast<arm_compute::CLTensor*>(&m_Tensor)->unmap(); } + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } - virtual void Unmap() const override - {} + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override + { + m_MemoryGroup = PolymorphicPointerDowncast<arm_compute::MemoryGroup>(memoryGroup); + } TensorShape GetStrides() const override { - return GetUnpaddedTensorStrides(m_TensorInfo); + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); } TensorShape GetShape() const override { - return m_TensorInfo.GetShape(); + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); } - const TensorInfo& GetTensorInfo() const + void SetImportFlags(MemorySourceFlags importFlags) { - return m_TensorInfo; + m_ImportFlags = importFlags; } - virtual MemorySourceFlags GetImportFlags() const override + MemorySourceFlags GetImportFlags() const override { return m_ImportFlags; } - virtual bool Import(void* memory, MemorySource source) override; - virtual bool CanBeImported(void* memory, MemorySource source) override; + void SetImportEnabledFlag(bool importEnabledFlag) + { + m_IsImportEnabled = importEnabledFlag; + } -private: - // Only used for testing - void CopyOutTo(void*) const override; - void CopyInFrom(const void*) override; + virtual bool Import(void* /*memory*/, MemorySource source) override + { + if (m_ImportFlags & static_cast<MemorySourceFlags>(source)) + { + throw MemoryImportException("GpuFsaTensorHandle::Incorrect import flag"); + } + m_Imported = false; + return false; + } - void* GetPointer() const; + virtual bool CanBeImported(void* /*memory*/, MemorySource /*source*/) override + { + // This TensorHandle can never import. + return false; + } - GpuFsaTensorHandle(const GpuFsaTensorHandle& other) = delete; // noncopyable - GpuFsaTensorHandle& operator=(const GpuFsaTensorHandle& other) = delete; //noncopyable +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + const_cast<armnn::GpuFsaTensorHandle*>(this)->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<float*>(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<uint8_t*>(memory)); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int8_t*>(memory)); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<armnn::Half*>(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int16_t*>(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int32_t*>(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast<armnn::GpuFsaTensorHandle*>(this)->Unmap(); + } - TensorInfo m_TensorInfo; + // Only used for testing + void CopyInFrom(const void* memory) override + { + this->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + this->Unmap(); + } - std::shared_ptr<GpuFsaMemoryManager> m_MemoryManager; - GpuFsaMemoryManager::Pool* m_Pool; - mutable void* m_UnmanagedMemory; + arm_compute::CLTensor m_Tensor; + std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup; MemorySourceFlags m_ImportFlags; bool m_Imported; bool m_IsImportEnabled; }; -}
\ No newline at end of file +class GpuFsaSubTensorHandle : public IClTensorHandle +{ +public: + GpuFsaSubTensorHandle(IClTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->map(blocking); + return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override { const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->unmap(); } + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {} + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + const_cast<GpuFsaSubTensorHandle*>(this)->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<float*>(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<uint8_t*>(memory)); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<armnn::Half*>(memory)); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int8_t*>(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int16_t*>(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast<int32_t*>(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast<GpuFsaSubTensorHandle*>(this)->Unmap(); + } + + // Only used for testing + void CopyInFrom(const void* memory) override + { + this->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM8: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + this->Unmap(); + } + + mutable arm_compute::CLSubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; +}; + +} // namespace armnn |