From 4d0351cf322df51baa5a445f637008992aa37809 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Wed, 3 Apr 2019 15:11:16 +0100
Subject: COMPMID-2057: Implement and test import memory interfaces.

Change-Id: I1559bea47ae6403177d248e2f7be47d5f1a6513f
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/956
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
---
 arm_compute/core/CL/OpenCL.h                   |   5 +-
 arm_compute/runtime/CL/CLTensorAllocator.h     |  14 ++-
 arm_compute/runtime/TensorAllocator.h          |  13 ++-
 src/core/CL/OpenCL.cpp                         |  29 +++++-
 src/runtime/CL/CLTensorAllocator.cpp           |   6 +-
 src/runtime/TensorAllocator.cpp                |   6 +-
 tests/validation/CL/UNIT/TensorAllocator.cpp   | 121 ++++++++++++++++++++++---
 tests/validation/NEON/UNIT/TensorAllocator.cpp |  77 ++++++++++++++--
 8 files changed, 231 insertions(+), 40 deletions(-)

diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 65cbb3282b..fc7083d276 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -131,6 +131,9 @@ public:
     DECLARE_FUNCTION_PTR(clEnqueueMarker);
     DECLARE_FUNCTION_PTR(clWaitForEvents);
 
+    // Third-party extensions
+    DECLARE_FUNCTION_PTR(clImportMemoryARM);
+
 #undef DECLARE_FUNCTION_PTR
 
 private:
diff --git a/arm_compute/runtime/CL/CLTensorAllocator.h b/arm_compute/runtime/CL/CLTensorAllocator.h
index de5f482d05..302bd6d52a 100644
--- a/arm_compute/runtime/CL/CLTensorAllocator.h
+++ b/arm_compute/runtime/CL/CLTensorAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -103,13 +103,17 @@ public:
     void free() override;
     /** Import an existing memory as a tensor's backing memory
      *
-     * @warning ownership of memory is not transferred
+     * @warning memory should have been created under the same context that ACL uses.
+     * @warning memory is expected to be aligned with the device requirements.
+     * @warning tensor shouldn't be memory managed.
+     * @warning ownership of memory is not transferred.
+     * @note buffer size will be checked to be compliant with total_size reported by ITensorInfo.
      *
-     * @param[in] buffer Buffer to import
+     * @param[in] buffer Buffer to be used as backing memory
      *
-     * @return error status
+     * @return An error status
      */
-    arm_compute::Status import_memory(cl::Buffer buffer);
+    Status import_memory(cl::Buffer buffer);
     /** Associates the tensor with a memory group
      *
      * @param[in] associated_memory_group Memory group to associate the tensor with
diff --git a/arm_compute/runtime/TensorAllocator.h b/arm_compute/runtime/TensorAllocator.h
index ba9e5163ab..f296bc2e3d 100644
--- a/arm_compute/runtime/TensorAllocator.h
+++ b/arm_compute/runtime/TensorAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -95,11 +95,16 @@ public:
     void free() override;
     /** Import an existing memory as a tensor's backing memory
      *
-     * @warning ownership of memory is not transferred
+     * @warning size is expected to be compliant with total_size reported by ITensorInfo.
+     * @warning ownership of memory is not transferred.
+     * @warning tensor shouldn't be memory managed.
+     * @note buffer alignment will be checked to be compliant with alignment reported by ITensorInfo.
      *
-     * @return error status
+     * @param[in] memory Raw memory pointer to be used as backing memory
+     *
+     * @return An error status
      */
-    arm_compute::Status import_memory(void *memory, size_t size);
+    Status import_memory(void *memory);
     /** Associates the tensor with a memory group
      *
      * @param[in] associated_memory_group Memory group to associate the tensor with
diff --git a/src/core/CL/OpenCL.cpp b/src/core/CL/OpenCL.cpp
index 6725f36a5d..ef03a5a302 100644
--- a/src/core/CL/OpenCL.cpp
+++ b/src/core/CL/OpenCL.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -120,6 +120,9 @@ bool CLSymbols::load(const std::string &library)
     LOAD_FUNCTION_PTR(clEnqueueMarker, handle);
     LOAD_FUNCTION_PTR(clWaitForEvents, handle);
 
+    // Third-party extensions
+    LOAD_FUNCTION_PTR(clImportMemoryARM, handle);
+
 #undef LOAD_FUNCTION_PTR
 
     //Don't call dlclose(handle) or all the symbols will be unloaded !
@@ -919,3 +922,27 @@ clGetEventProfilingInfo(cl_event          event,
         return CL_OUT_OF_RESOURCES;
     }
 }
+
+cl_mem
+clImportMemoryARM(cl_context                      context,
+                  cl_mem_flags                    flags,
+                  const cl_import_properties_arm *properties,
+                  void                           *memory,
+                  size_t                          size,
+                  cl_int                         *errcode_ret)
+{
+    arm_compute::CLSymbols::get().load_default();
+    auto func = arm_compute::CLSymbols::get().clImportMemoryARM_ptr;
+    if(func != nullptr)
+    {
+        return func(context, flags, properties, memory, size, errcode_ret);
+    }
+    else
+    {
+        if(errcode_ret != nullptr)
+        {
+            *errcode_ret = CL_OUT_OF_RESOURCES;
+        }
+        return nullptr;
+    }
+}
\ No newline at end of file
diff --git a/src/runtime/CL/CLTensorAllocator.cpp b/src/runtime/CL/CLTensorAllocator.cpp
index 0307498335..2ce64551ae 100644
--- a/src/runtime/CL/CLTensorAllocator.cpp
+++ b/src/runtime/CL/CLTensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -101,10 +101,10 @@ void CLTensorAllocator::free()
     info().set_is_resizable(true);
 }
 
-arm_compute::Status CLTensorAllocator::import_memory(cl::Buffer buffer)
+Status CLTensorAllocator::import_memory(cl::Buffer buffer)
 {
     ARM_COMPUTE_RETURN_ERROR_ON(buffer.get() == nullptr);
-    ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_SIZE>() == 0);
+    ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_SIZE>() < info().total_size());
     ARM_COMPUTE_RETURN_ERROR_ON(buffer.getInfo<CL_MEM_CONTEXT>().get() != CLScheduler::get().context().get());
     ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr);
 
diff --git a/src/runtime/TensorAllocator.cpp b/src/runtime/TensorAllocator.cpp
index 38edb8ba03..0612d751f0 100644
--- a/src/runtime/TensorAllocator.cpp
+++ b/src/runtime/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -149,11 +149,11 @@ void TensorAllocator::free()
     info().set_is_resizable(true);
 }
 
-arm_compute::Status TensorAllocator::import_memory(void *memory, size_t size)
+Status TensorAllocator::import_memory(void *memory)
 {
     ARM_COMPUTE_RETURN_ERROR_ON(memory == nullptr);
-    ARM_COMPUTE_RETURN_ERROR_ON(size == 0);
     ARM_COMPUTE_RETURN_ERROR_ON(_associated_memory_group != nullptr);
+    ARM_COMPUTE_RETURN_ERROR_ON(alignment() != 0 && !arm_compute::utility::check_aligned(memory, alignment()));
 
     _memory.set_owned_region(support::cpp14::make_unique<MemoryRegion>(memory, info().total_size()));
     info().set_is_resizable(false);
diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp
index 849eee84d0..abe06c544b 100644
--- a/tests/validation/CL/UNIT/TensorAllocator.cpp
+++ b/tests/validation/CL/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 ARM Limited.
+ * Copyright (c) 2018-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,10 +25,16 @@
 
 #include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/ActivationLayer.h"
 
 #include <memory>
+#include <random>
 
 namespace arm_compute
 {
@@ -36,14 +42,33 @@ namespace test
 {
 namespace validation
 {
+namespace
+{
+cl_mem import_malloc_memory_helper(void *ptr, size_t size)
+{
+    const cl_import_properties_arm import_properties[] =
+    {
+        CL_IMPORT_TYPE_ARM,
+        CL_IMPORT_TYPE_HOST_ARM,
+        0
+    };
+
+    cl_int err = CL_SUCCESS;
+    cl_mem buf = clImportMemoryARM(CLKernelLibrary::get().context().get(), CL_MEM_READ_WRITE, import_properties, ptr, size, &err);
+    ARM_COMPUTE_EXPECT(err == CL_SUCCESS, framework::LogLevel::ERRORS);
+
+    return buf;
+}
+} // namespace
+
 TEST_SUITE(CL)
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorAllocator)
 
-TEST_CASE(ImportMemory, framework::DatasetMode::ALL)
+TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
 {
     // Init tensor info
-    TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
+    const TensorInfo info(TensorShape(24U, 16U, 3U), 1, DataType::F32);
 
     // Allocate memory buffer
     const size_t total_size = info.total_size();
@@ -62,20 +87,88 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL)
     ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
 
-    // Positive case : Set raw pointer
-    CLTensor t3;
-    t3.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(!t3.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
-    t3.allocator()->free();
+    // Negative case : Invalid buffer size
+    CLTensor         t3;
+    const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
+    t3.allocator()->init(info_neg);
+    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
+
+    // Positive case : Set raw pointer
+    CLTensor t4;
+    t4.allocator()->init(info);
+    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
+    t4.allocator()->free();
+    ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
+{
+    // Check if import extension is supported
+    if(!device_supports_extension(CLKernelLibrary::get().get_device(), "cl_arm_import_memory"))
+    {
+        return;
+    }
+    else
+    {
+        const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
+        const TensorShape         shape     = TensorShape(24U, 16U, 3U);
+        const DataType            data_type = DataType::F32;
+
+        // Create tensor
+        const TensorInfo info(shape, 1, data_type);
+        CLTensor         tensor;
+        tensor.allocator()->init(info);
+
+        // Create and configure activation function
+        CLActivationLayer act_func;
+        act_func.configure(&tensor, nullptr, act_info);
+
+        // Allocate and import tensor
+        const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
+        const size_t total_size_in_bytes = tensor.info()->total_size();
+        const size_t alignment           = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
+        size_t       space               = total_size_in_bytes + alignment;
+        auto         raw_data            = support::cpp14::make_unique<uint8_t[]>(space);
+
+        void *aligned_ptr = raw_data.get();
+        support::cpp11::align(alignment, total_size_in_bytes, aligned_ptr, space);
+
+        cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
+        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+        // Fill tensor
+        std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+        std::mt19937                          gen(library->seed());
+        auto                                 *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
+        for(unsigned int i = 0; i < total_size_in_elems; ++i)
+        {
+            typed_ptr[i] = distribution(gen);
+        }
+
+        // Execute function and sync
+        act_func.run();
+        CLScheduler::get().sync();
+
+        // Validate result by checking that the input has no negative values
+        for(unsigned int i = 0; i < total_size_in_elems; ++i)
+        {
+            ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
+        }
+
+        // Release resources
+        tensor.allocator()->free();
+        ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+    }
 }
 
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // TensorAllocator
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/NEON/UNIT/TensorAllocator.cpp b/tests/validation/NEON/UNIT/TensorAllocator.cpp
index 384a00855b..7ba83c11b3 100644
--- a/tests/validation/NEON/UNIT/TensorAllocator.cpp
+++ b/tests/validation/NEON/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,12 +26,19 @@
 #include "arm_compute/core/utils/misc/Utility.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/MemoryRegion.h"
+#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
 
 #include "support/ToolchainSupport.h"
 
+#include "tests/Globals.h"
 #include "tests/Utils.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+#include <memory>
+#include <random>
 
 namespace arm_compute
 {
@@ -52,29 +59,30 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL)
     const size_t total_size = info.total_size();
     auto         data       = support::cpp14::make_unique<uint8_t[]>(total_size);
 
-    // Negative case : Import pointer with zero size
+    // Negative case : Import nullptr
     Tensor t1;
     t1.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(data.get(), 0)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(nullptr)), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS);
 
-    // Negative case : Import nullptr
-    Tensor t2;
-    t2.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(nullptr, total_size)), framework::LogLevel::ERRORS);
+    // Negative case : Import misaligned pointer
+    Tensor       t2;
+    const size_t required_alignment = 339;
+    t2.allocator()->init(info, required_alignment);
+    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Negative case : Import memory to a tensor that is memory managed
     Tensor      t3;
     MemoryGroup mg;
     t3.allocator()->set_associated_memory_group(&mg);
-    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get(), total_size)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
 
     // Positive case : Set raw pointer
     Tensor t4;
     t4.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get(), total_size)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(data.get())), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
     ARM_COMPUTE_EXPECT(t4.buffer() == reinterpret_cast<uint8_t *>(data.get()), framework::LogLevel::ERRORS);
     t4.allocator()->free();
@@ -82,6 +90,57 @@ TEST_CASE(ImportMemory, framework::DatasetMode::ALL)
     ARM_COMPUTE_EXPECT(t4.buffer() == nullptr, framework::LogLevel::ERRORS);
 }
 
+TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
+{
+    const ActivationLayerInfo act_info(ActivationLayerInfo::ActivationFunction::RELU);
+    const TensorShape         shape     = TensorShape(24U, 16U, 3U);
+    const DataType            data_type = DataType::F32;
+
+    // Create tensor
+    const TensorInfo info(shape, 1, data_type);
+    const size_t     required_alignment = 64;
+    Tensor           tensor;
+    tensor.allocator()->init(info, required_alignment);
+
+    // Create and configure activation function
+    NEActivationLayer act_func;
+    act_func.configure(&tensor, nullptr, act_info);
+
+    // Allocate and import tensor
+    const size_t total_size_in_elems = tensor.info()->tensor_shape().total_size();
+    const size_t total_size_in_bytes = tensor.info()->total_size();
+    size_t       space               = total_size_in_bytes + required_alignment;
+    auto         raw_data            = support::cpp14::make_unique<uint8_t[]>(space);
+
+    void *aligned_ptr = raw_data.get();
+    support::cpp11::align(required_alignment, total_size_in_bytes, aligned_ptr, space);
+
+    ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(aligned_ptr)), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+
+    // Fill tensor
+    std::uniform_real_distribution<float> distribution(-5.f, 5.f);
+    std::mt19937                          gen(library->seed());
+    auto                                 *typed_ptr = reinterpret_cast<float *>(aligned_ptr);
+    for(unsigned int i = 0; i < total_size_in_elems; ++i)
+    {
+        typed_ptr[i] = distribution(gen);
+    }
+
+    // Execute function and sync
+    act_func.run();
+
+    // Validate result by checking that the input has no negative values
+    for(unsigned int i = 0; i < total_size_in_elems; ++i)
+    {
+        ARM_COMPUTE_EXPECT(typed_ptr[i] >= 0, framework::LogLevel::ERRORS);
+    }
+
+    // Release resources
+    tensor.allocator()->free();
+    ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+}
+
 TEST_CASE(AlignedAlloc, framework::DatasetMode::ALL)
 {
     // Init tensor info
-- 
cgit v1.2.1