From e4a41dc5d5fc0f283c01b3260affdfdf6cfc1895 Mon Sep 17 00:00:00 2001 From: David Monahan Date: Wed, 14 Apr 2021 16:55:36 +0100 Subject: IVGCVSW-5727 Implement Import function of ClImportTensorHandle * Split ClImportTensorHandle out from ClTenorHandle * Added implementation of Import function * Added Unit Tests Signed-off-by: David Monahan Signed-off-by: Narumol Prangnawarat Change-Id: I07de2ca5bebf19dfb9a8dddea4b18340ffc31fad --- src/backends/cl/CMakeLists.txt | 1 + src/backends/cl/ClImportTensorHandle.hpp | 368 +++++++++++++++++++++ src/backends/cl/ClImportTensorHandleFactory.cpp | 27 +- src/backends/cl/ClImportTensorHandleFactory.hpp | 2 +- src/backends/cl/ClTensorHandle.hpp | 12 +- src/backends/cl/test/CMakeLists.txt | 1 + src/backends/cl/test/ClImportTensorHandleTests.cpp | 111 +++++++ 7 files changed, 504 insertions(+), 18 deletions(-) create mode 100644 src/backends/cl/ClImportTensorHandle.hpp create mode 100644 src/backends/cl/test/ClImportTensorHandleTests.cpp diff --git a/src/backends/cl/CMakeLists.txt b/src/backends/cl/CMakeLists.txt index 4c0fe1f549..0005c8178b 100644 --- a/src/backends/cl/CMakeLists.txt +++ b/src/backends/cl/CMakeLists.txt @@ -32,6 +32,7 @@ if(ARMCOMPUTECL) ClContextDeserializer.cpp ClContextSerializer.hpp ClContextSerializer.cpp + ClImportTensorHandle.hpp ClImportTensorHandleFactory.cpp ClImportTensorHandleFactory.hpp ClLayerSupport.cpp diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp new file mode 100644 index 0000000000..d3c53fa46a --- /dev/null +++ b/src/backends/cl/ClImportTensorHandle.hpp @@ -0,0 +1,368 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#pragma once + +#include +#include + +#include + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace armnn +{ + +class IClImportTensorHandle : public IAclTensorHandle +{ +public: + virtual arm_compute::ICLTensor& GetTensor() = 0; + virtual arm_compute::ICLTensor const& GetTensor() const = 0; + virtual arm_compute::DataType GetDataType() const = 0; + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) = 0; +}; + +class ClImportTensorHandle : public IClImportTensorHandle +{ +public: + ClImportTensorHandle(const TensorInfo& tensorInfo, MemorySourceFlags importFlags) + : m_ImportFlags(importFlags) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo); + } + + ClImportTensorHandle(const TensorInfo& tensorInfo, + DataLayout dataLayout, + MemorySourceFlags importFlags) + : m_ImportFlags(importFlags) + { + armnn::armcomputetensorutils::BuildArmComputeTensor(m_Tensor, tensorInfo, dataLayout); + } + + arm_compute::CLTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLTensor const& GetTensor() const override { return m_Tensor; } + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + IgnoreUnused(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + + virtual void Unmap() const override {} + + virtual ITensorHandle* GetParent() const override { return nullptr; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override + { + IgnoreUnused(memoryGroup); + } + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + + void SetImportFlags(MemorySourceFlags importFlags) + { + m_ImportFlags = importFlags; + } + + MemorySourceFlags GetImportFlags() const override + { + return m_ImportFlags; + } + + virtual bool Import(void* memory, MemorySource source) override + { + if (m_ImportFlags & static_cast(source)) + { + if (source == MemorySource::Malloc) + { + const size_t totalBytes = m_Tensor.info()->total_size(); + + const cl_import_properties_arm importProperties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_HOST_ARM, + 0 + }; + + cl_int error = CL_SUCCESS; + cl_mem buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), + CL_MEM_READ_WRITE, importProperties, memory, totalBytes, &error); + if (error != CL_SUCCESS) + { + throw MemoryImportException( + "ClImportTensorHandle::Invalid imported memory:" + std::to_string(error)); + } + + cl::Buffer wrappedBuffer(buffer); + arm_compute::Status status = m_Tensor.allocator()->import_memory(wrappedBuffer); + + // Use the overloaded bool operator of Status to check if it worked, if not throw an exception + // with the Status error message + bool imported = (status.error_code() == arm_compute::ErrorCode::OK); + if (!imported) + { + throw MemoryImportException(status.error_description()); + } + ARMNN_ASSERT(!m_Tensor.info()->is_resizable()); + return imported; + } + else + { + throw MemoryImportException("ClImportTensorHandle::Import flag is not supported"); + } + } + else + { + throw MemoryImportException("ClImportTensorHandle::Incorrect import flag"); + } + return false; + } + +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + const_cast(this)->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast(this)->Unmap(); + } + + // Only used for testing + void CopyInFrom(const void* memory) override + { + this->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + this->Unmap(); + } + + arm_compute::CLTensor m_Tensor; + MemorySourceFlags m_ImportFlags; +}; + +class ClImportSubTensorHandle : public IClImportTensorHandle +{ +public: + ClImportSubTensorHandle(IClImportTensorHandle* parent, + const arm_compute::TensorShape& shape, + const arm_compute::Coordinates& coords) + : m_Tensor(&parent->GetTensor(), shape, coords) + { + parentHandle = parent; + } + + arm_compute::CLSubTensor& GetTensor() override { return m_Tensor; } + arm_compute::CLSubTensor const& GetTensor() const override { return m_Tensor; } + + virtual void Allocate() override {} + virtual void Manage() override {} + + virtual const void* Map(bool blocking = true) const override + { + IgnoreUnused(blocking); + return static_cast(m_Tensor.buffer() + m_Tensor.info()->offset_first_element_in_bytes()); + } + virtual void Unmap() const override {} + + virtual ITensorHandle* GetParent() const override { return parentHandle; } + + virtual arm_compute::DataType GetDataType() const override + { + return m_Tensor.info()->data_type(); + } + + virtual void SetMemoryGroup(const std::shared_ptr& memoryGroup) override + { + IgnoreUnused(memoryGroup); + } + + TensorShape GetStrides() const override + { + return armcomputetensorutils::GetStrides(m_Tensor.info()->strides_in_bytes()); + } + + TensorShape GetShape() const override + { + return armcomputetensorutils::GetShape(m_Tensor.info()->tensor_shape()); + } + +private: + // Only used for testing + void CopyOutTo(void* memory) const override + { + const_cast(this)->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(this->GetTensor(), + static_cast(memory)); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + const_cast(this)->Unmap(); + } + + // Only used for testing + void CopyInFrom(const void* memory) override + { + this->Map(true); + switch(this->GetDataType()) + { + case arm_compute::DataType::F32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::U8: + case arm_compute::DataType::QASYMM8: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::F16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::QSYMM8_PER_CHANNEL: + case arm_compute::DataType::QASYMM8_SIGNED: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S16: + case arm_compute::DataType::QSYMM16: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + case arm_compute::DataType::S32: + armcomputetensorutils::CopyArmComputeITensorData(static_cast(memory), + this->GetTensor()); + break; + default: + { + throw armnn::UnimplementedException(); + } + } + this->Unmap(); + } + + mutable arm_compute::CLSubTensor m_Tensor; + ITensorHandle* parentHandle = nullptr; +}; + +} // namespace armnn diff --git a/src/backends/cl/ClImportTensorHandleFactory.cpp b/src/backends/cl/ClImportTensorHandleFactory.cpp index 1812034814..594e05423e 100644 --- a/src/backends/cl/ClImportTensorHandleFactory.cpp +++ b/src/backends/cl/ClImportTensorHandleFactory.cpp @@ -4,7 +4,7 @@ // #include "ClImportTensorHandleFactory.hpp" -#include "ClTensorHandle.hpp" +#include "ClImportTensorHandle.hpp" #include #include @@ -49,48 +49,45 @@ std::unique_ptr ClImportTensorHandleFactory::CreateSubTensorHandl return nullptr; } - return std::make_unique(PolymorphicDowncast(&parent), shape, coords); + return std::make_unique( + PolymorphicDowncast(&parent), shape, coords); } std::unique_ptr ClImportTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo) const { - return ClImportTensorHandleFactory::CreateTensorHandle(tensorInfo, false); + std::unique_ptr tensorHandle = std::make_unique(tensorInfo, + GetImportFlags()); + return tensorHandle; } std::unique_ptr ClImportTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout) const { - return ClImportTensorHandleFactory::CreateTensorHandle(tensorInfo, dataLayout, false); + std::unique_ptr tensorHandle = std::make_unique(tensorInfo, + dataLayout, + GetImportFlags()); + return tensorHandle; } std::unique_ptr ClImportTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, const bool IsMemoryManaged) const { - // If IsMemoryManaged is true then throw an exception. if (IsMemoryManaged) { throw InvalidArgumentException("ClImportTensorHandleFactory does not support memory managed tensors."); } - std::unique_ptr tensorHandle = std::make_unique(tensorInfo); - tensorHandle->SetImportEnabledFlag(true); - tensorHandle->SetImportFlags(GetImportFlags()); - return tensorHandle; + return CreateTensorHandle(tensorInfo); } std::unique_ptr ClImportTensorHandleFactory::CreateTensorHandle(const TensorInfo& tensorInfo, DataLayout dataLayout, const bool IsMemoryManaged) const { - // If IsMemoryManaged is true then throw an exception. if (IsMemoryManaged) { throw InvalidArgumentException("ClImportTensorHandleFactory does not support memory managed tensors."); } - std::unique_ptr tensorHandle = std::make_unique(tensorInfo, dataLayout); - // If we are not Managing the Memory then we must be importing - tensorHandle->SetImportEnabledFlag(true); - tensorHandle->SetImportFlags(GetImportFlags()); - return tensorHandle; + return CreateTensorHandle(tensorInfo, dataLayout); } const FactoryId& ClImportTensorHandleFactory::GetIdStatic() diff --git a/src/backends/cl/ClImportTensorHandleFactory.hpp b/src/backends/cl/ClImportTensorHandleFactory.hpp index d6550dbeef..ee2f84efda 100644 --- a/src/backends/cl/ClImportTensorHandleFactory.hpp +++ b/src/backends/cl/ClImportTensorHandleFactory.hpp @@ -18,7 +18,7 @@ constexpr const char* ClImportTensorHandleFactoryId() } /** - * This factory creates ClTensorHandles that refer to imported memory tensors. + * This factory creates ClImportTensorHandles that refer to imported memory tensors. */ class ClImportTensorHandleFactory : public ITensorHandleFactory { diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp index 061117e9a6..5720d2cf11 100644 --- a/src/backends/cl/ClTensorHandle.hpp +++ b/src/backends/cl/ClTensorHandle.hpp @@ -57,7 +57,11 @@ public: virtual void Allocate() override { // If we have enabled Importing, don't allocate the tensor - if (!m_IsImportEnabled) + if (m_IsImportEnabled) + { + throw MemoryImportException("ClTensorHandle::Attempting to allocate memory when importing"); + } + else { armnn::armcomputetensorutils::InitialiseArmComputeTensorEmpty(m_Tensor); } @@ -67,7 +71,11 @@ public: virtual void Manage() override { // If we have enabled Importing, don't manage the tensor - if (!m_IsImportEnabled) + if (m_IsImportEnabled) + { + throw MemoryImportException("ClTensorHandle::Attempting to manage memory when importing"); + } + else { assert(m_MemoryGroup != nullptr); m_MemoryGroup->manage(&m_Tensor); diff --git a/src/backends/cl/test/CMakeLists.txt b/src/backends/cl/test/CMakeLists.txt index 422c0a56e4..6662a1e659 100644 --- a/src/backends/cl/test/CMakeLists.txt +++ b/src/backends/cl/test/CMakeLists.txt @@ -9,6 +9,7 @@ list(APPEND armnnClBackendUnitTests_sources ClCreateWorkloadTests.cpp ClEndToEndTests.cpp ClImportTensorHandleFactoryTests.cpp + ClImportTensorHandleTests.cpp ClJsonPrinterTests.cpp ClLayerSupportTests.cpp ClLayerTests.cpp diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp new file mode 100644 index 0000000000..bfb74af801 --- /dev/null +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -0,0 +1,111 @@ +// +// Copyright © 2021 Arm Ltd and Contributors. All rights reserved. +// SPDX-License-Identifier: MIT +// + +#include + +#include +#include +#include + +#include + +using namespace armnn; + +BOOST_AUTO_TEST_SUITE(ClImportTensorHandleTests) + +BOOST_FIXTURE_TEST_CASE(ClMallocImport, ClContextControlFixture) +{ + ClImportTensorHandleFactory handleFactory(static_cast(MemorySource::Malloc), + static_cast(MemorySource::Malloc)); + + TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32); + unsigned int numElements = info.GetNumElements(); + + // create TensorHandle for memory import + auto handle = handleFactory.CreateTensorHandle(info); + + // Get CLtensor + arm_compute::CLTensor& tensor = PolymorphicDowncast(handle.get())->GetTensor(); + + // Create and configure activation function + const arm_compute::ActivationLayerInfo act_info(arm_compute::ActivationLayerInfo::ActivationFunction::RELU); + arm_compute::CLActivationLayer act_func; + act_func.configure(&tensor, nullptr, act_info); + + // Allocate user memory + const size_t totalBytes = tensor.info()->total_size(); + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t space = totalBytes + alignment; + auto testData = std::make_unique(space); + void* alignedPtr = testData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space)); + + // Import memory + BOOST_CHECK(handle->Import(alignedPtr, armnn::MemorySource::Malloc)); + + // Input with negative values + auto* typedPtr = reinterpret_cast(alignedPtr); + std::fill_n(typedPtr, numElements, -5.0f); + + // Execute function and sync + act_func.run(); + arm_compute::CLScheduler::get().sync(); + + // Validate result by checking that the output has no negative values + for(unsigned int i = 0; i < numElements; ++i) + { + BOOST_ASSERT(typedPtr[i] >= 0); + } +} + +BOOST_FIXTURE_TEST_CASE(ClIncorrectMemorySourceImport, ClContextControlFixture) +{ + ClImportTensorHandleFactory handleFactory(static_cast(MemorySource::Malloc), + static_cast(MemorySource::Malloc)); + + TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32); + + // create TensorHandle for memory import + auto handle = handleFactory.CreateTensorHandle(info); + + // Get CLtensor + arm_compute::CLTensor& tensor = PolymorphicDowncast(handle.get())->GetTensor(); + + // Allocate user memory + const size_t totalBytes = tensor.info()->total_size(); + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t space = totalBytes + alignment; + auto testData = std::make_unique(space); + void* alignedPtr = testData.get(); + BOOST_CHECK(std::align(alignment, totalBytes, alignedPtr, space)); + + // Import memory + BOOST_CHECK_THROW(handle->Import(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException); +} + +BOOST_FIXTURE_TEST_CASE(ClInvalidMemorySourceImport, ClContextControlFixture) +{ + MemorySource invalidMemSource = static_cast(256); + ClImportTensorHandleFactory handleFactory(static_cast(invalidMemSource), + static_cast(invalidMemSource)); + + TensorInfo info({ 1, 2, 2, 1 }, DataType::Float32); + + // create TensorHandle for memory import + auto handle = handleFactory.CreateTensorHandle(info); + + // Allocate user memory + std::vector inputData + { + 1.0f, 2.0f, 3.0f, 4.0f + }; + + // Import non-support memory + BOOST_CHECK_THROW(handle->Import(inputData.data(), invalidMemSource), MemoryImportException); +} + +BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file -- cgit v1.2.1