From 60ab9765329b1449b509b32b07f0b0abb3b532f2 Mon Sep 17 00:00:00 2001 From: Nikhil Raj Date: Thu, 13 Jan 2022 09:34:44 +0000 Subject: IVGCVSW-6673 Implement CanBeImported function to ClTensorHandle * Added Unittests Signed-off-by: Nikhil Raj Signed-off-by: David Monahan Change-Id: If7c0add39583a7e47b43fd79f93c620f86f80fc1 --- src/backends/cl/ClImportTensorHandle.hpp | 61 +++++++++++++++++++++- src/backends/cl/ClTensorHandle.hpp | 7 +++ src/backends/cl/test/ClImportTensorHandleTests.cpp | 57 ++++++++++++++++++++ 3 files changed, 124 insertions(+), 1 deletion(-) diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp index a24ab5656e..18cd1ffeb4 100644 --- a/src/backends/cl/ClImportTensorHandle.hpp +++ b/src/backends/cl/ClImportTensorHandle.hpp @@ -192,12 +192,71 @@ public: } } + virtual bool CanBeImported(void* memory, MemorySource source) override + { + if (m_ImportFlags & static_cast(source)) + { + if (source == MemorySource::Malloc) + { + const cl_import_properties_arm importProperties[] = + { + CL_IMPORT_TYPE_ARM, + CL_IMPORT_TYPE_HOST_ARM, + 0 + }; + + size_t totalBytes = m_Tensor.info()->total_size(); + + // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE + // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to + // We do this to match the behaviour of the Import function later on. + auto cachelineAlignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment); + + cl_int error = CL_SUCCESS; + cl_mem buffer; + buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(), + CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error); + + // If we fail to map we know the import will not succeed and can return false. + // There is no memory to be released if error is not CL_SUCCESS + if (error != CL_SUCCESS) + { + return false; + } + else + { + // If import was successful we can release the mapping knowing import will succeed at workload + // execution and return true + error = clReleaseMemObject(buffer); + if (error == CL_SUCCESS) + { + return true; + } + else + { + // If we couldn't release the mapping this constitutes a memory leak and throw an exception + throw MemoryImportException("ClImportTensorHandle::Failed to unmap cl_mem buffer: " + + std::to_string(error)); + } + } + } + } + else + { + throw MemoryImportException("ClImportTensorHandle::Incorrect import flag"); + } + return false; + } + private: bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false) { size_t totalBytes = m_Tensor.info()->total_size(); - // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE + // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE + // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to auto cachelineAlignment = arm_compute::CLKernelLibrary::get().get_device().getInfo(); auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment); diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp index 5720d2cf11..6fccb8d395 100644 --- a/src/backends/cl/ClTensorHandle.hpp +++ b/src/backends/cl/ClTensorHandle.hpp @@ -138,6 +138,13 @@ public: return false; } + virtual bool CanBeImported(void* memory, MemorySource source) override + { + // This TensorHandle can never import. + armnn::IgnoreUnused(memory, source); + return false; + } + private: // Only used for testing void CopyOutTo(void* memory) const override diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp index 0403d5379e..3d702642aa 100644 --- a/src/backends/cl/test/ClImportTensorHandleTests.cpp +++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp @@ -217,4 +217,61 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd") } } +TEST_CASE_FIXTURE(ClContextControlFixture, "ClCanBeImported") +{ + ClImportTensorHandleFactory handleFactory(static_cast(MemorySource::Malloc), + static_cast(MemorySource::Malloc)); + + TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32); + + // create TensorHandle for memory import + auto handle = handleFactory.CreateTensorHandle(info); + + // Get CLtensor + arm_compute::CLTensor& tensor = PolymorphicDowncast(handle.get())->GetTensor(); + + // Allocate user memory + const size_t totalBytes = tensor.info()->total_size(); + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t space = totalBytes + alignment + alignment; + auto testData = std::make_unique(space); + void* alignedPtr = testData.get(); + CHECK(std::align(alignment, totalBytes, alignedPtr, space)); + + // Import memory + CHECK_THROWS_AS(handle->CanBeImported(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException); + +} + +TEST_CASE("ClCanBeImportedAlignedMemory") +{ + ClImportTensorHandleFactory handleFactory(static_cast(MemorySource::Malloc), + static_cast(MemorySource::Malloc)); + + TensorInfo info({ 1, 1, 1, 1 }, DataType::Float32); + + // create TensorHandle (Memory Managed status is irrelevant) + auto handle = handleFactory.CreateTensorHandle(info); + // Get CLtensor + arm_compute::CLTensor& tensor = PolymorphicDowncast(handle.get())->GetTensor(); + + // Create an aligned buffer + const size_t totalBytes = tensor.info()->total_size(); + const size_t alignment = + arm_compute::CLKernelLibrary::get().get_device().getInfo(); + size_t space = totalBytes + alignment + alignment; + auto testData = std::make_unique(space); + void* alignedPtr = testData.get(); + CHECK(std::align(alignment, totalBytes, alignedPtr, space)); + + // Check aligned buffers return true + CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) == true); + + // Due to the nature of how GPU memory is mapped it is entirely possible for memory which is misaligned on cpu + // to be successfully import on GPU. As such there is no way to create a misaligned pointer that will always fail. + // Rather it will succeed on some devices and fail on others. As long as a correctly aligned buffer returns true + // we can be confident that it will be successfully imported. All other cases will need to be handled by the user. +} + } -- cgit v1.2.1