aboutsummaryrefslogtreecommitdiff
path: root/src/backends/gpuFsa/GpuFsaTensorHandle.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/backends/gpuFsa/GpuFsaTensorHandle.hpp')
-rw-r--r--src/backends/gpuFsa/GpuFsaTensorHandle.hpp350
1 files changed, 314 insertions, 36 deletions
diff --git a/src/backends/gpuFsa/GpuFsaTensorHandle.hpp b/src/backends/gpuFsa/GpuFsaTensorHandle.hpp
index b2da50a467..d6901d1225 100644
--- a/src/backends/gpuFsa/GpuFsaTensorHandle.hpp
+++ b/src/backends/gpuFsa/GpuFsaTensorHandle.hpp
@@ -1,83 +1,361 @@
//
-// Copyright © 2022 Arm Ltd and Contributors. All rights reserved.
+// Copyright © 2022-2023 Arm Ltd and Contributors. All rights reserved.
// SPDX-License-Identifier: MIT
//
#pragma once
-#include <armnn/backends/TensorHandle.hpp>
+#include <aclCommon/ArmComputeTensorHandle.hpp>
+#include <aclCommon/ArmComputeTensorUtils.hpp>
-#include "GpuFsaMemoryManager.hpp"
+#include <armnn/utility/PolymorphicDowncast.hpp>
+#include <Half.hpp>
+
+#include <arm_compute/runtime/CL/CLTensor.h>
+#include <arm_compute/runtime/CL/CLSubTensor.h>
+#include <arm_compute/runtime/IMemoryGroup.h>
+#include <arm_compute/runtime/MemoryGroup.h>
+#include <arm_compute/core/TensorShape.h>
+#include <arm_compute/core/Coordinates.h>
+
+#include <aclCommon/IClTensorHandle.hpp>
namespace armnn
{
-// An implementation of ITensorHandle with simple "bump the pointer" memory-management behaviour
-// Will be refactored to look more like ClTensorHandle.hpp and use ClMemoryManager instead of GpuFsaMemoryManager
-class GpuFsaTensorHandle : public ITensorHandle
+class GpuFsaTensorHandle : public IClTensorHandle
{
public:
- GpuFsaTensorHandle(const TensorInfo& tensorInfo, std::shared_ptr<GpuFsaMemoryManager>& memoryManager);
+ GpuFsaTensorHandle(const TensorInfo& tensorInfo)
+ : m_ImportFlags(static_cast<MemorySourceFlags>(MemorySource::Undefined)),
+ m_Imported(false),
+ m_IsImportEnabled(false)
+ {
+ armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo);
+ }
- GpuFsaTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags);
+ GpuFsaTensorHandle(const TensorInfo& tensorInfo,
+ DataLayout dataLayout,
+ MemorySourceFlags importFlags = static_cast<MemorySourceFlags>(MemorySource::Undefined))
+ : m_ImportFlags(importFlags),
+ m_Imported(false),
+ m_IsImportEnabled(false)
+ {
+ armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout);
+ }
- ~GpuFsaTensorHandle();
+ arm_compute::CLTensor& GetTensor() override { return m_Tensor; }
+ arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; }
+ virtual void Allocate() override
+ {
+ // If we have enabled Importing, don't allocate the tensor
+ if (m_IsImportEnabled)
+ {
+ throw MemoryImportException("GpuFsaTensorHandle::Attempting to allocate memory when importing");
+ }
+ else
+ {
+ armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor);
+ }
- virtual void Manage() override;
+ }
- virtual void Allocate() override;
+ virtual void Manage() override
+ {
+ // If we have enabled Importing, don't manage the tensor
+ if (m_IsImportEnabled)
+ {
+ throw MemoryImportException("GpuFsaTensorHandle::Attempting to manage memory when importing");
+ }
+ else
+ {
+ assert(m_MemoryGroup != nullptr);
+ m_MemoryGroup->manage(&m_Tensor);
+ }
+ }
- virtual ITensorHandle* GetParent() const override
+ virtual const void* Map(bool blocking = true) const override
{
- return nullptr;
+ const_cast<arm_compute::CLTensor*>(&m_Tensor)->map(blocking);
+ return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
}
- virtual const void* Map(bool /* blocking = true */) const override;
- using ITensorHandle::Map;
+ virtual void Unmap() const override { const_cast<arm_compute::CLTensor*>(&m_Tensor)->unmap(); }
+
+ virtual ITensorHandle* GetParent() const override { return nullptr; }
+
+ virtual arm_compute::DataType GetDataType() const override
+ {
+ return m_Tensor.info()->data_type();
+ }
- virtual void Unmap() const override
- {}
+ virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>& memoryGroup) override
+ {
+ m_MemoryGroup = PolymorphicPointerDowncast<arm_compute::MemoryGroup>(memoryGroup);
+ }
TensorShape GetStrides() const override
{
- return GetUnpaddedTensorStrides(m_TensorInfo);
+ return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
}
TensorShape GetShape() const override
{
- return m_TensorInfo.GetShape();
+ return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
}
- const TensorInfo& GetTensorInfo() const
+ void SetImportFlags(MemorySourceFlags importFlags)
{
- return m_TensorInfo;
+ m_ImportFlags = importFlags;
}
- virtual MemorySourceFlags GetImportFlags() const override
+ MemorySourceFlags GetImportFlags() const override
{
return m_ImportFlags;
}
- virtual bool Import(void* memory, MemorySource source) override;
- virtual bool CanBeImported(void* memory, MemorySource source) override;
+ void SetImportEnabledFlag(bool importEnabledFlag)
+ {
+ m_IsImportEnabled = importEnabledFlag;
+ }
-private:
- // Only used for testing
- void CopyOutTo(void*) const override;
- void CopyInFrom(const void*) override;
+ virtual bool Import(void* /*memory*/, MemorySource source) override
+ {
+ if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
+ {
+ throw MemoryImportException("GpuFsaTensorHandle::Incorrect import flag");
+ }
+ m_Imported = false;
+ return false;
+ }
- void* GetPointer() const;
+ virtual bool CanBeImported(void* /*memory*/, MemorySource /*source*/) override
+ {
+ // This TensorHandle can never import.
+ return false;
+ }
- GpuFsaTensorHandle(const GpuFsaTensorHandle& other) = delete; // noncopyable
- GpuFsaTensorHandle& operator=(const GpuFsaTensorHandle& other) = delete; //noncopyable
+private:
+ // Only used for testing
+ void CopyOutTo(void* memory) const override
+ {
+ const_cast<armnn::GpuFsaTensorHandle*>(this)->Map(true);
+ switch(this->GetDataType())
+ {
+ case arm_compute::DataType::F32:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<float*>(memory));
+ break;
+ case arm_compute::DataType::U8:
+ case arm_compute::DataType::QASYMM8:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<uint8_t*>(memory));
+ break;
+ case arm_compute::DataType::QSYMM8:
+ case arm_compute::DataType::QSYMM8_PER_CHANNEL:
+ case arm_compute::DataType::QASYMM8_SIGNED:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<int8_t*>(memory));
+ break;
+ case arm_compute::DataType::F16:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<armnn::Half*>(memory));
+ break;
+ case arm_compute::DataType::S16:
+ case arm_compute::DataType::QSYMM16:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<int16_t*>(memory));
+ break;
+ case arm_compute::DataType::S32:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<int32_t*>(memory));
+ break;
+ default:
+ {
+ throw armnn::UnimplementedException();
+ }
+ }
+ const_cast<armnn::GpuFsaTensorHandle*>(this)->Unmap();
+ }
- TensorInfo m_TensorInfo;
+ // Only used for testing
+ void CopyInFrom(const void* memory) override
+ {
+ this->Map(true);
+ switch(this->GetDataType())
+ {
+ case arm_compute::DataType::F32:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::U8:
+ case arm_compute::DataType::QASYMM8:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::F16:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::S16:
+ case arm_compute::DataType::QSYMM8:
+ case arm_compute::DataType::QSYMM8_PER_CHANNEL:
+ case arm_compute::DataType::QASYMM8_SIGNED:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::QSYMM16:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::S32:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
+ this->GetTensor());
+ break;
+ default:
+ {
+ throw armnn::UnimplementedException();
+ }
+ }
+ this->Unmap();
+ }
- std::shared_ptr<GpuFsaMemoryManager> m_MemoryManager;
- GpuFsaMemoryManager::Pool* m_Pool;
- mutable void* m_UnmanagedMemory;
+ arm_compute::CLTensor m_Tensor;
+ std::shared_ptr<arm_compute::MemoryGroup> m_MemoryGroup;
MemorySourceFlags m_ImportFlags;
bool m_Imported;
bool m_IsImportEnabled;
};
-} \ No newline at end of file
+class GpuFsaSubTensorHandle : public IClTensorHandle
+{
+public:
+ GpuFsaSubTensorHandle(IClTensorHandle* parent,
+ const arm_compute::TensorShape& shape,
+ const arm_compute::Coordinates& coords)
+ : m_Tensor(&parent->GetTensor(), shape, coords)
+ {
+ parentHandle = parent;
+ }
+
+ arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; }
+ arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; }
+
+ virtual void Allocate() override {}
+ virtual void Manage() override {}
+
+ virtual const void* Map(bool blocking = true) const override
+ {
+ const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->map(blocking);
+ return static_cast<const void*>(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes());
+ }
+ virtual void Unmap() const override { const_cast<arm_compute::CLSubTensor*>(&m_Tensor)->unmap(); }
+
+ virtual ITensorHandle* GetParent() const override { return parentHandle; }
+
+ virtual arm_compute::DataType GetDataType() const override
+ {
+ return m_Tensor.info()->data_type();
+ }
+
+ virtual void SetMemoryGroup(const std::shared_ptr<arm_compute::IMemoryGroup>&) override {}
+
+ TensorShape GetStrides() const override
+ {
+ return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes());
+ }
+
+ TensorShape GetShape() const override
+ {
+ return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape());
+ }
+
+private:
+ // Only used for testing
+ void CopyOutTo(void* memory) const override
+ {
+ const_cast<GpuFsaSubTensorHandle*>(this)->Map(true);
+ switch(this->GetDataType())
+ {
+ case arm_compute::DataType::F32:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<float*>(memory));
+ break;
+ case arm_compute::DataType::U8:
+ case arm_compute::DataType::QASYMM8:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<uint8_t*>(memory));
+ break;
+ case arm_compute::DataType::F16:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<armnn::Half*>(memory));
+ break;
+ case arm_compute::DataType::QSYMM8:
+ case arm_compute::DataType::QSYMM8_PER_CHANNEL:
+ case arm_compute::DataType::QASYMM8_SIGNED:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<int8_t*>(memory));
+ break;
+ case arm_compute::DataType::S16:
+ case arm_compute::DataType::QSYMM16:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<int16_t*>(memory));
+ break;
+ case arm_compute::DataType::S32:
+ armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(),
+ static_cast<int32_t*>(memory));
+ break;
+ default:
+ {
+ throw armnn::UnimplementedException();
+ }
+ }
+ const_cast<GpuFsaSubTensorHandle*>(this)->Unmap();
+ }
+
+ // Only used for testing
+ void CopyInFrom(const void* memory) override
+ {
+ this->Map(true);
+ switch(this->GetDataType())
+ {
+ case arm_compute::DataType::F32:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const float*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::U8:
+ case arm_compute::DataType::QASYMM8:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const uint8_t*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::F16:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const armnn::Half*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::QSYMM8:
+ case arm_compute::DataType::QSYMM8_PER_CHANNEL:
+ case arm_compute::DataType::QASYMM8_SIGNED:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int8_t*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::S16:
+ case arm_compute::DataType::QSYMM16:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int16_t*>(memory),
+ this->GetTensor());
+ break;
+ case arm_compute::DataType::S32:
+ armcomputetensorutils::CopyArmComputeITensorData(static_cast<const int32_t*>(memory),
+ this->GetTensor());
+ break;
+ default:
+ {
+ throw armnn::UnimplementedException();
+ }
+ }
+ this->Unmap();
+ }
+
+ mutable arm_compute::CLSubTensor m_Tensor;
+ ITensorHandle* parentHandle = nullptr;
+};
+
+} // namespace armnn