5 files changed, 232 insertions, 110 deletions
diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp
index 833256039e..ac433721d8 100644
--- a/tests/validation/CL/UNIT/DynamicTensor.cpp
+++ b/tests/validation/CL/UNIT/DynamicTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,11 +29,8 @@
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLIm2ColKernel.h"
 #include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
 #include "src/core/CL/kernels/CLReductionOperationKernel.h"
-#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
diff --git a/tests/validation/CL/UNIT/Multithreaded.cpp b/tests/validation/CL/UNIT/Multithreaded.cpp
new file mode 100644
index 0000000000..5c75df709d
--- /dev/null
+++ b/tests/validation/CL/UNIT/Multithreaded.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/RuntimeContext.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/ParametersLibrary.h"
+#include "tests/validation/Validation.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include <thread>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(RuntimeContext)
+// This test tries scheduling work concurrently from two independent threads
+TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL)
+{
+    constexpr auto num_threads(16u);
+    std::array<CLActivationLayer, num_threads>         func{};
+    std::array<CLPixelWiseMultiplication, num_threads> pmul{};
+    std::array<CLTensor, num_threads>                  s0{};
+    std::array<CLTensor, num_threads>                  s1{};
+
+    std::array<CLTensor, num_threads> st{};
+    std::array<CLTensor, num_threads> dt{};
+
+    const TensorShape         tensor_shape(128u, 4u, 5u);
+    const ActivationLayerInfo ainfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.5f, 1.f);
+    std::array<std::thread, num_threads> threads;
+    auto ctx = parameters->get_ctx<CLTensor>();
+
+    for(auto i = 0u; i < num_threads; ++i)
+    {
+        s0[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        s1[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        st[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        dt[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        func[i] = CLActivationLayer(ctx);
+        pmul[i] = CLPixelWiseMultiplication();
+        threads[i] =
+            std::thread([&,i]
+        {
+            auto &s  = st[i];
+            auto &t  = dt[i];
+            auto &p0 = s0[i];
+            auto &p1 = s1[i];
+            pmul[i].configure(&p0, &p1, &s, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP);
+            func[i].configure(&s, &t, ainfo);
+            s.allocator()->allocate();
+            t.allocator()->allocate();
+            p0.allocator()->allocate();
+            p1.allocator()->allocate();
+            library->fill_tensor_uniform(CLAccessor(p0), 0, -1.f, 1.f);
+            library->fill_tensor_uniform(CLAccessor(p1), 0, -1.f, 1.f);
+            pmul[i].run();
+            func[i].run();
+        });
+    }
+
+    for(auto &t : threads)
+    {
+        t.join();
+    }
+
+    SimpleTensor<float> rs{ tensor_shape, DataType::F32, 1 };
+    SimpleTensor<float> ra{ tensor_shape, DataType::F32, 1 };
+    SimpleTensor<float> rb{ tensor_shape, DataType::F32, 1 };
+    library->fill_tensor_uniform(ra, 0, -1.f, 1.f);
+    library->fill_tensor_uniform(rb, 0, -1.f, 1.f);
+    const auto mul    = reference::pixel_wise_multiplication<float, float, float>(ra, rb, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP, DataType::F32);
+    const auto golden = reference::activation_layer<float>(mul, ainfo);
+    for(auto &d : dt)
+    {
+        validate(CLAccessor(d), golden);
+    }
+}
+
+TEST_SUITE_END() // MultipleThreadedScheduller
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp
index 3ccdd99fe3..559f47e16c 100644
--- a/tests/validation/CL/UNIT/TensorAllocator.cpp
+++ b/tests/validation/CL/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,14 @@
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 
 #include "arm_compute/core/utils/misc/MMappedFile.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
@@ -60,12 +65,108 @@ cl_mem import_malloc_memory_helper(void *ptr, size_t size)
 
     return buf;
 }
+
+class DummyAllocator final : public IAllocator
+{
+public:
+    DummyAllocator() = default;
+
+    void *allocate(size_t size, size_t alignment) override
+    {
+        ++_n_calls;
+        return _backend_allocator.allocate(size, alignment);
+    }
+    void free(void *ptr) override
+    {
+        return _backend_allocator.free(ptr);
+    }
+    std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
+    {
+        // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
+        ++_n_calls;
+        return _backend_allocator.make_region(size, alignment);
+    }
+    int get_n_calls() const
+    {
+        return _n_calls;
+    }
+
+private:
+    int               _n_calls{};
+    CLBufferAllocator _backend_allocator{};
+};
+
+void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
+{
+    // Create tensors
+    CLTensor src, weights, bias, dst;
+    src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+    weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
+    bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
+    dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+
+    // Create and configure function
+    CLGEMMConvolutionLayer conv(mm);
+    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    bias.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    // Finalize memory manager
+    if(mm != nullptr)
+    {
+        mm->populate(mm_allocator, 1 /* num_pools */);
+        ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
+    }
+
+    conv.run();
+}
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorAllocator)
 
+/* Validate that an external global allocator can be used for all internal allocations */
+TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
+{
+    DummyAllocator global_tensor_alloc;
+    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+    // Run a convolution
+    run_conv2d(nullptr /* mm */, global_tensor_alloc);
+
+    // Check that allocator has been called multiple times > 4
+    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+    // Nullify global allocator
+    CLTensorAllocator::set_global_allocator(nullptr);
+}
+
+/* Validate that an external global allocator can be used for the pool manager */
+TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
+{
+    auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+    auto pool_mgr     = std::make_shared<PoolManager>();
+    auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+    DummyAllocator global_tensor_alloc;
+    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+    // Run a convolution
+    run_conv2d(mm, global_tensor_alloc);
+
+    // Check that allocator has been called multiple times > 4
+    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+    // Nullify global allocator
+    CLTensorAllocator::set_global_allocator(nullptr);
+}
+
 /** Validates import memory interface when importing cl buffer objects */
 TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
 {
@@ -79,31 +180,31 @@ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
     // Negative case : Import nullptr
     CLTensor t1;
     t1.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(cl::Buffer())), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));
+    ARM_COMPUTE_ASSERT(t1.info()->is_resizable());
 
     // Negative case : Import memory to a tensor that is memory managed
     CLTensor    t2;
     MemoryGroup mg;
     t2.allocator()->set_associated_memory_group(&mg);
-    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(t2.info()->is_resizable());
 
     // Negative case : Invalid buffer size
     CLTensor         t3;
     const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
     t3.allocator()->init(info_neg);
-    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(t3.info()->is_resizable());
 
     // Positive case : Set raw pointer
     CLTensor t4;
     t4.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());
     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
     t4.allocator()->free();
-    ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(t4.info()->is_resizable());
     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
 }
 
@@ -141,8 +242,8 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
         std::align(alignment, total_size_in_bytes, aligned_ptr, space);
 
         cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
-        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
+        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
 
         // Fill tensor
         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
@@ -205,12 +306,12 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
 
         // Map file
         utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);
-        ARM_COMPUTE_EXPECT(mmapped_file.is_mapped(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());
         unsigned char *data = mmapped_file.data();
 
         cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
-        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
+        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
 
         // Fill tensor
         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
@@ -233,7 +334,7 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
 
         // Release resources
         tensor.allocator()->free();
-        ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
     }
 }
 #endif // !defined(BARE_METAL)
diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp
deleted file mode 100644
index cf2513bf2c..0000000000
--- a/tests/validation/CL/UNIT/Tuner.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
-#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "tests/Utils.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(UNIT)
-TEST_SUITE(Tuner)
-
-/** Validates static tuning of Bifrost tuner */
-TEST_CASE(BifrostTunerSimple, framework::DatasetMode::ALL)
-{
-    // Create tuner
-    tuners::BifrostTuner tuner;
-
-    // Create tensors
-    auto src     = create_tensor<CLTensor>(TensorShape(13U, 13U, 16U), DataType::F32);
-    auto weights = create_tensor<CLTensor>(TensorShape(3U, 3U, 16U, 3U), DataType::F32);
-    auto bias    = create_tensor<CLTensor>(TensorShape(3U), DataType::F32);
-    auto dst     = create_tensor<CLTensor>(TensorShape(13U, 13U, 3U), DataType::F32);
-
-    // Create kernel
-    cl::NDRange                    fake_lws(2000);
-    CLDirectConvolutionLayerKernel conv;
-    conv.set_target(GPUTarget::G72);
-
-    // Configure
-    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1));
-
-    // Hard-wire lws to kernel and validate lws
-    conv.set_lws_hint(fake_lws);
-    ARM_COMPUTE_EXPECT(conv.lws_hint()[0] == 2000, framework::LogLevel::ERRORS);
-
-    // Tune kernel and validate
-    tuner.tune_kernel_static(conv);
-    ARM_COMPUTE_EXPECT(conv.lws_hint()[0] != 2000, framework::LogLevel::ERRORS);
-
-    // Clear tuner
-    CLScheduler::get().default_init();
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp
index acf795e48b..357c88af10 100644
--- a/tests/validation/CL/UNIT/WeightsRetention.cpp
+++ b/tests/validation/CL/UNIT/WeightsRetention.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,18 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"