aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNikhil Raj <nikhil.raj@arm.com>2022-01-13 09:34:44 +0000
committerNarumol Prangnawarat <narumol.prangnawarat@arm.com>2022-01-25 15:38:16 +0000
commit60ab9765329b1449b509b32b07f0b0abb3b532f2 (patch)
treea208267d4fcb05da3e22969d084b59d446bcf6af
parente4378da833bcba7b0cb7a5dabc63bf54e08bb08c (diff)
downloadarmnn-60ab9765329b1449b509b32b07f0b0abb3b532f2.tar.gz
IVGCVSW-6673 Implement CanBeImported function to ClTensorHandle
* Added Unittests Signed-off-by: Nikhil Raj <nikhil.raj@arm.com> Signed-off-by: David Monahan <David.Monahan@arm.com> Change-Id: If7c0add39583a7e47b43fd79f93c620f86f80fc1
-rw-r--r--src/backends/cl/ClImportTensorHandle.hpp61
-rw-r--r--src/backends/cl/ClTensorHandle.hpp7
-rw-r--r--src/backends/cl/test/ClImportTensorHandleTests.cpp57
3 files changed, 124 insertions, 1 deletions
diff --git a/src/backends/cl/ClImportTensorHandle.hpp b/src/backends/cl/ClImportTensorHandle.hpp
index a24ab5656e..18cd1ffeb4 100644
--- a/src/backends/cl/ClImportTensorHandle.hpp
+++ b/src/backends/cl/ClImportTensorHandle.hpp
@@ -192,12 +192,71 @@ public:
}
}
+ virtual bool CanBeImported(void* memory, MemorySource source) override
+ {
+ if (m_ImportFlags & static_cast<MemorySourceFlags>(source))
+ {
+ if (source == MemorySource::Malloc)
+ {
+ const cl_import_properties_arm importProperties[] =
+ {
+ CL_IMPORT_TYPE_ARM,
+ CL_IMPORT_TYPE_HOST_ARM,
+ 0
+ };
+
+ size_t totalBytes = m_Tensor.info()->total_size();
+
+ // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
+ // We do this to match the behaviour of the Import function later on.
+ auto cachelineAlignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
+
+ cl_int error = CL_SUCCESS;
+ cl_mem buffer;
+ buffer = clImportMemoryARM(arm_compute::CLKernelLibrary::get().context().get(),
+ CL_MEM_READ_WRITE, importProperties, memory, roundedSize, &error);
+
+ // If we fail to map we know the import will not succeed and can return false.
+ // There is no memory to be released if error is not CL_SUCCESS
+ if (error != CL_SUCCESS)
+ {
+ return false;
+ }
+ else
+ {
+ // If import was successful we can release the mapping knowing import will succeed at workload
+ // execution and return true
+ error = clReleaseMemObject(buffer);
+ if (error == CL_SUCCESS)
+ {
+ return true;
+ }
+ else
+ {
+ // If we couldn't release the mapping this constitutes a memory leak and throw an exception
+ throw MemoryImportException("ClImportTensorHandle::Failed to unmap cl_mem buffer: "
+ + std::to_string(error));
+ }
+ }
+ }
+ }
+ else
+ {
+ throw MemoryImportException("ClImportTensorHandle::Incorrect import flag");
+ }
+ return false;
+ }
+
private:
bool ClImport(const cl_import_properties_arm* importProperties, void* memory, bool isProtected = false)
{
size_t totalBytes = m_Tensor.info()->total_size();
- // Round the size of the buffer to a multiple of the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ // Round the size of the mapping to match the CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE
+ // This does not change the size of the buffer, only the size of the mapping the buffer is mapped to
auto cachelineAlignment =
arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
auto roundedSize = cachelineAlignment + totalBytes - (totalBytes % cachelineAlignment);
diff --git a/src/backends/cl/ClTensorHandle.hpp b/src/backends/cl/ClTensorHandle.hpp
index 5720d2cf11..6fccb8d395 100644
--- a/src/backends/cl/ClTensorHandle.hpp
+++ b/src/backends/cl/ClTensorHandle.hpp
@@ -138,6 +138,13 @@ public:
return false;
}
+ virtual bool CanBeImported(void* memory, MemorySource source) override
+ {
+ // This TensorHandle can never import.
+ armnn::IgnoreUnused(memory, source);
+ return false;
+ }
+
private:
// Only used for testing
void CopyOutTo(void* memory) const override
diff --git a/src/backends/cl/test/ClImportTensorHandleTests.cpp b/src/backends/cl/test/ClImportTensorHandleTests.cpp
index 0403d5379e..3d702642aa 100644
--- a/src/backends/cl/test/ClImportTensorHandleTests.cpp
+++ b/src/backends/cl/test/ClImportTensorHandleTests.cpp
@@ -217,4 +217,61 @@ TEST_CASE_FIXTURE(ClContextControlFixture, "ClImportEndToEnd")
}
}
+TEST_CASE_FIXTURE(ClContextControlFixture, "ClCanBeImported")
+{
+ ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
+ static_cast<MemorySourceFlags>(MemorySource::Malloc));
+
+ TensorInfo info({ 1, 24, 16, 3 }, DataType::Float32);
+
+ // create TensorHandle for memory import
+ auto handle = handleFactory.CreateTensorHandle(info);
+
+ // Get CLtensor
+ arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
+
+ // Allocate user memory
+ const size_t totalBytes = tensor.info()->total_size();
+ const size_t alignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ size_t space = totalBytes + alignment + alignment;
+ auto testData = std::make_unique<uint8_t[]>(space);
+ void* alignedPtr = testData.get();
+ CHECK(std::align(alignment, totalBytes, alignedPtr, space));
+
+ // Import memory
+ CHECK_THROWS_AS(handle->CanBeImported(alignedPtr, armnn::MemorySource::Undefined), MemoryImportException);
+
+}
+
+TEST_CASE("ClCanBeImportedAlignedMemory")
+{
+ ClImportTensorHandleFactory handleFactory(static_cast<MemorySourceFlags>(MemorySource::Malloc),
+ static_cast<MemorySourceFlags>(MemorySource::Malloc));
+
+ TensorInfo info({ 1, 1, 1, 1 }, DataType::Float32);
+
+ // create TensorHandle (Memory Managed status is irrelevant)
+ auto handle = handleFactory.CreateTensorHandle(info);
+ // Get CLtensor
+ arm_compute::CLTensor& tensor = PolymorphicDowncast<ClImportTensorHandle*>(handle.get())->GetTensor();
+
+ // Create an aligned buffer
+ const size_t totalBytes = tensor.info()->total_size();
+ const size_t alignment =
+ arm_compute::CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+ size_t space = totalBytes + alignment + alignment;
+ auto testData = std::make_unique<uint8_t[]>(space);
+ void* alignedPtr = testData.get();
+ CHECK(std::align(alignment, totalBytes, alignedPtr, space));
+
+ // Check aligned buffers return true
+ CHECK(handle->CanBeImported(alignedPtr, MemorySource::Malloc) == true);
+
+ // Due to the nature of how GPU memory is mapped it is entirely possible for memory which is misaligned on cpu
+ // to be successfully import on GPU. As such there is no way to create a misaligned pointer that will always fail.
+ // Rather it will succeed on some devices and fail on others. As long as a correctly aligned buffer returns true
+ // we can be confident that it will be successfully imported. All other cases will need to be handled by the user.
+}
+
}