9 files changed, 763 insertions, 100 deletions
diff --git a/tests/validation/CL/UNIT/CompileContext.cpp b/tests/validation/CL/UNIT/CompileContext.cpp
index 5245044323..a0541b6a99 100644
--- a/tests/validation/CL/UNIT/CompileContext.cpp
+++ b/tests/validation/CL/UNIT/CompileContext.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,6 +56,7 @@ TEST_CASE(CompileContextCache, framework::DatasetMode::ALL)
     std::set<std::string> build_opts;
     build_opts.emplace("-DDATA_TYPE=float");
     build_opts.emplace("-DVEC_SIZE=16");
+    build_opts.emplace("-DVEC_SIZE_LEFTOVER=0");
     compile_context.create_kernel(kernel_name, program_name, kernel_src.first, kernel_path, build_opts, kernel_src.second);
 
     // Check if the program is stored in the cache
diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp
index f5cc1f7c9d..ac433721d8 100644
--- a/tests/validation/CL/UNIT/DynamicTensor.cpp
+++ b/tests/validation/CL/UNIT/DynamicTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -28,6 +28,9 @@
 #include "arm_compute/runtime/MemoryGroup.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
+#include "src/core/CL/kernels/CLReductionOperationKernel.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
diff --git a/tests/validation/CL/UNIT/Extensions.cpp b/tests/validation/CL/UNIT/Extensions.cpp
new file mode 100644
index 0000000000..8119290d4b
--- /dev/null
+++ b/tests/validation/CL/UNIT/Extensions.cpp
@@ -0,0 +1,49 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/AclVersion.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_CASE(GetClContext, framework::DatasetMode::ALL)
+{
+    const auto ver = AclVersionInfo();
+    ARM_COMPUTE_EXPECT(ver->major == ARM_COMPUTE_LIBRARY_VERSION_MAJOR, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(ver->minor == ARM_COMPUTE_LIBRARY_VERSION_MINOR, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(ver->patch == ARM_COMPUTE_LIBRARY_VERSION_PATCH, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!std::string(ver->build_info).empty(), framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/MLGOHeuristics.cpp b/tests/validation/CL/UNIT/MLGOHeuristics.cpp
new file mode 100644
index 0000000000..e26464f9f7
--- /dev/null
+++ b/tests/validation/CL/UNIT/MLGOHeuristics.cpp
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/CL/mlgo/MLGOHeuristics.h"
+#include "src/runtime/CL/mlgo/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+
+using namespace arm_compute::mlgo;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(MLGOHeuristics)
+TEST_CASE(CorrectDotMLGOShouldLoadCorrectly, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+
+        <header>
+
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]
+
+        1, g71 , 8, f16, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]
+        2, g76 , 8, f16, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b , 0, var, m, ==, num, 10., 1, 2
+        l , 1, gemm-type, reshaped
+        b , 2, var, r_mn, >=, num, 2., 3, 6
+
+        b , 3, var, n, >=, num, 200., 4, 5
+        l, 4,                          gemm-type, reshaped-only-rhs
+        l , 5, gemm-type, reshaped
+        l , 6, gemm-type, reshaped-only-rhs
+        </heuristic>
+        <heuristic, 1>
+        b ,0,var, n, >, num, 100., 1, 4
+        b ,1,var, r_mnk, <=, num, 20., 2, 3
+
+
+        l ,2,gemm-config-reshaped-only-rhs, [4, 4,4,2,1,0,1]
+        l ,3,gemm-config-reshaped-only-rhs,[ 2, 2,4,2,1,1, 1 ]
+        b ,4,var, n, >=, num, 199.12, 5, 6
+        l ,5,gemm-config-reshaped-only-rhs, [1, 4,3,4,0,0,0]
+        l ,6,gemm-config-reshaped-only-rhs, [5, 4,4,5,1,1,0]
+        </heuristic>
+
+        <heuristic, 2>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+
+        </heuristic>
+
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    heuristics.reload_from_stream(ss);
+
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 10, 1024, 20, 1 }).second == GEMMType::RESHAPED, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 400, 201, 5, 1 }).second == GEMMType::RESHAPED_ONLY_RHS, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 400, 200, 199, 16 }).second == GEMMType::RESHAPED_ONLY_RHS, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 400, 199, 512, 4 }).second == GEMMType::RESHAPED, framework::LogLevel::ERRORS);
+
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 100, 1024, 20, 32 }).second == GEMMConfigReshapedOnlyRHS{ 4, 4, 4, 2, true, false, true }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 100, 1024, 20, 32 }).second == GEMMConfigReshapedOnlyRHS{ 4, 4, 4, 2, true, false, true }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 128, 101, 20, 1 }).second == GEMMConfigReshapedOnlyRHS{ 2, 2, 4, 2, true, true, true }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 400, 100, 512, 1 }).second == GEMMConfigReshapedOnlyRHS{ 5, 4, 4, 5, true, true, false }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 400, 100, 512, 1 }).second == GEMMConfigReshapedOnlyRHS{ 5, 4, 4, 5, true, true, false }),
+                       framework::LogLevel::ERRORS);
+
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped(Query{ "g76", DataType::F16, 100, 100, 20, 32 }).second == GEMMConfigReshaped{ 4, 2, 4, 2, 8, true, false, true, false }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped(Query{ "g76", DataType::F16, 128, 512, 1024, 1 }).second == GEMMConfigReshaped{ 4, 2, 4, 2, 8, true, false, true, false }),
+                       framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(InvalidDotmlgoSyntaxShouldReturnInvalidStatus, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,pu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+
+        </heurist
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE(InvalidDotmlgoSemanticsShouldReturnInvalidStatus)
+// If the semantics errors are local to some trees instead of the entire heuristics, an alternative is to simply
+// ignore/remove those invalid trees. However the reason why we choose to throw, thus invalidating the entire
+// heuristics is that if there are some invalid trees, the quality of the dotmlgo is called into question even if
+// the rest of the trees are semantically valid, and they could severely degrade the performance of GEMM. Therefore
+// this "all or nothing" approach when it comes to dotmlgo correctness is safer and more defensive.
+
+// Also note that the semantic error of the tree only refers to those that obstruct its evaluation and thus query,
+// (e.g. invalid tree structure, unsupported features etc.) instead of those affecting the desired outcome
+// (usually in terms of final GEMM performance, e.g. the effectiveness of the decision tree)
+
+// In the future we might want to check the content of the exceptions as well. But right now it suffices to only
+// know that it throws exactly when it needs to.
+TEST_CASE(MismatchesBetweenHeuristicsTableEntriesAndHeuristicTrees, framework::DatasetMode::ALL)
+{
+    {
+        // Mismatching number of entries 1
+        std::string       mlgo_str = R"_(
+            <header>
+            gemm-version, [1,2,1]
+            ip-type,gpu
+            </header>
+            <heuristics-table>
+
+            0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+
+            </heuristics-table>
+        )_";
+        std::stringstream ss(mlgo_str);
+        MLGOHeuristics    heuristics;
+        // NOTE: This case might throw an internal error as the tree inserted by the heuristics-table cannot not be checked
+        ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+    }
+
+    {
+        // Mismatching number of entries 2
+        std::string       mlgo_str = R"_(
+            <header>
+            gemm-version, [1,2,1]
+            ip-type,gpu
+            </header>
+            <heuristics-table>
+            </heuristics-table>
+            <heuristic, 1>
+            l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+            </heuristic>
+        )_";
+        std::stringstream ss(mlgo_str);
+        MLGOHeuristics    heuristics;
+        ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+    }
+
+    {
+        // Mismatching info
+        std::string       mlgo_str = R"_(
+            <header>
+            gemm-version, [1,2,1]
+            ip-type,gpu
+            </header>
+            <heuristics-table>
+            0, g76 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]
+            </heuristics-table>
+            <heuristic, 0>
+            l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+            </heuristic>
+        )_";
+        std::stringstream ss(mlgo_str);
+        MLGOHeuristics    heuristics;
+        ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_CASE(RepeatedHeuristicsTableEntriesId, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        0, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+        <heuristic, 1>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(RepeatedHeuristicsTableEntriesIndex, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        1, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+        <heuristic, 1>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(RepeatedHeuristicTreesId, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        1, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(EmptyTree, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(InvalidTreeMissingRoot, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,2, var, m, ==, num, 10., 3, 4
+        l ,3,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,4,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeMissingNodes, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeRepeatedNodeIds, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,1,gemm-config-reshaped,[1,2,4,2,8,1,0,1,0]
+        l ,2,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeDisjointNodes, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,2,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+
+        b ,4, var, n, ==, num, 10., 5, 6
+        l ,5,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,6,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+
+        l ,7,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeLoop, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 0, 1
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeCycle, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 5
+        b ,1, var, n, ==, num, 10., 2, 3
+        l ,2,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        b ,3, var, k, ==, num, 10., 0, 4
+        l ,4,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,5,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeInvalidFeatures, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, magic_feature, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,2,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // InvalidDotmlgoSemanticsShouldReturnInvalidStatus
+
+TEST_CASE(InvalidUsageOfHeuristicsShouldReturnInvalidStatus, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b , 0, var, m, ==, num, 10., 1, 2
+        l , 1, gemm-type, reshaped
+        b , 2, var, r_mn, >=, num, 2., 3, 6
+        b , 3, var, n, >=, num, 200., 4, 5
+        l , 4, gemm-type, reshaped-only-rhs
+        l , 5, gemm-type, reshaped
+        l , 6, gemm-type, reshaped-only-rhs
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+
+    // Querying unavailable heuristic type should return invalid Status
+    ARM_COMPUTE_EXPECT(!heuristics.query_gemm_config_reshaped(Query{ "g76", DataType::F32, 1024, 1024, 100, 3 }).first, framework::LogLevel::ERRORS);
+    // Querying unavailable ip target should return invalid Status
+    ARM_COMPUTE_EXPECT(!heuristics.query_gemm_type(Query{ "g77", DataType::F32, 1024, 1024, 100, 3 }).first, framework::LogLevel::ERRORS);
+    // Querying unavailable data type should return invalid Status
+    ARM_COMPUTE_EXPECT(!heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g76", DataType::QASYMM8, 1024, 1024, 100, 3 }).first, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // MLGOHeuristics
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/MemoryManager.cpp b/tests/validation/CL/UNIT/MemoryManager.cpp
index 8167026ec3..4db062ef81 100644
--- a/tests/validation/CL/UNIT/MemoryManager.cpp
+++ b/tests/validation/CL/UNIT/MemoryManager.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
diff --git a/tests/validation/CL/UNIT/Multithreaded.cpp b/tests/validation/CL/UNIT/Multithreaded.cpp
new file mode 100644
index 0000000000..5c75df709d
--- /dev/null
+++ b/tests/validation/CL/UNIT/Multithreaded.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/RuntimeContext.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/ParametersLibrary.h"
+#include "tests/validation/Validation.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include <thread>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(RuntimeContext)
+// This test tries scheduling work concurrently from two independent threads
+TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL)
+{
+    constexpr auto num_threads(16u);
+    std::array<CLActivationLayer, num_threads>         func{};
+    std::array<CLPixelWiseMultiplication, num_threads> pmul{};
+    std::array<CLTensor, num_threads>                  s0{};
+    std::array<CLTensor, num_threads>                  s1{};
+
+    std::array<CLTensor, num_threads> st{};
+    std::array<CLTensor, num_threads> dt{};
+
+    const TensorShape         tensor_shape(128u, 4u, 5u);
+    const ActivationLayerInfo ainfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.5f, 1.f);
+    std::array<std::thread, num_threads> threads;
+    auto ctx = parameters->get_ctx<CLTensor>();
+
+    for(auto i = 0u; i < num_threads; ++i)
+    {
+        s0[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        s1[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        st[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        dt[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        func[i] = CLActivationLayer(ctx);
+        pmul[i] = CLPixelWiseMultiplication();
+        threads[i] =
+            std::thread([&,i]
+        {
+            auto &s  = st[i];
+            auto &t  = dt[i];
+            auto &p0 = s0[i];
+            auto &p1 = s1[i];
+            pmul[i].configure(&p0, &p1, &s, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP);
+            func[i].configure(&s, &t, ainfo);
+            s.allocator()->allocate();
+            t.allocator()->allocate();
+            p0.allocator()->allocate();
+            p1.allocator()->allocate();
+            library->fill_tensor_uniform(CLAccessor(p0), 0, -1.f, 1.f);
+            library->fill_tensor_uniform(CLAccessor(p1), 0, -1.f, 1.f);
+            pmul[i].run();
+            func[i].run();
+        });
+    }
+
+    for(auto &t : threads)
+    {
+        t.join();
+    }
+
+    SimpleTensor<float> rs{ tensor_shape, DataType::F32, 1 };
+    SimpleTensor<float> ra{ tensor_shape, DataType::F32, 1 };
+    SimpleTensor<float> rb{ tensor_shape, DataType::F32, 1 };
+    library->fill_tensor_uniform(ra, 0, -1.f, 1.f);
+    library->fill_tensor_uniform(rb, 0, -1.f, 1.f);
+    const auto mul    = reference::pixel_wise_multiplication<float, float, float>(ra, rb, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP, DataType::F32);
+    const auto golden = reference::activation_layer<float>(mul, ainfo);
+    for(auto &d : dt)
+    {
+        validate(CLAccessor(d), golden);
+    }
+}
+
+TEST_SUITE_END() // MultipleThreadedScheduller
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp
index a3aabf9bc4..559f47e16c 100644
--- a/tests/validation/CL/UNIT/TensorAllocator.cpp
+++ b/tests/validation/CL/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,14 @@
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 
 #include "arm_compute/core/utils/misc/MMappedFile.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
@@ -60,12 +65,108 @@ cl_mem import_malloc_memory_helper(void *ptr, size_t size)
 
     return buf;
 }
+
+class DummyAllocator final : public IAllocator
+{
+public:
+    DummyAllocator() = default;
+
+    void *allocate(size_t size, size_t alignment) override
+    {
+        ++_n_calls;
+        return _backend_allocator.allocate(size, alignment);
+    }
+    void free(void *ptr) override
+    {
+        return _backend_allocator.free(ptr);
+    }
+    std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
+    {
+        // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
+        ++_n_calls;
+        return _backend_allocator.make_region(size, alignment);
+    }
+    int get_n_calls() const
+    {
+        return _n_calls;
+    }
+
+private:
+    int               _n_calls{};
+    CLBufferAllocator _backend_allocator{};
+};
+
+void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
+{
+    // Create tensors
+    CLTensor src, weights, bias, dst;
+    src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+    weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
+    bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
+    dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+
+    // Create and configure function
+    CLGEMMConvolutionLayer conv(mm);
+    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    bias.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    // Finalize memory manager
+    if(mm != nullptr)
+    {
+        mm->populate(mm_allocator, 1 /* num_pools */);
+        ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
+    }
+
+    conv.run();
+}
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorAllocator)
 
+/* Validate that an external global allocator can be used for all internal allocations */
+TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
+{
+    DummyAllocator global_tensor_alloc;
+    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+    // Run a convolution
+    run_conv2d(nullptr /* mm */, global_tensor_alloc);
+
+    // Check that allocator has been called multiple times > 4
+    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+    // Nullify global allocator
+    CLTensorAllocator::set_global_allocator(nullptr);
+}
+
+/* Validate that an external global allocator can be used for the pool manager */
+TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
+{
+    auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+    auto pool_mgr     = std::make_shared<PoolManager>();
+    auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+    DummyAllocator global_tensor_alloc;
+    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+    // Run a convolution
+    run_conv2d(mm, global_tensor_alloc);
+
+    // Check that allocator has been called multiple times > 4
+    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+    // Nullify global allocator
+    CLTensorAllocator::set_global_allocator(nullptr);
+}
+
 /** Validates import memory interface when importing cl buffer objects */
 TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
 {
@@ -79,31 +180,31 @@ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
     // Negative case : Import nullptr
     CLTensor t1;
     t1.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(cl::Buffer())), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));
+    ARM_COMPUTE_ASSERT(t1.info()->is_resizable());
 
     // Negative case : Import memory to a tensor that is memory managed
     CLTensor    t2;
     MemoryGroup mg;
     t2.allocator()->set_associated_memory_group(&mg);
-    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(t2.info()->is_resizable());
 
     // Negative case : Invalid buffer size
     CLTensor         t3;
     const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
     t3.allocator()->init(info_neg);
-    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(t3.info()->is_resizable());
 
     // Positive case : Set raw pointer
     CLTensor t4;
     t4.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());
     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
     t4.allocator()->free();
-    ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(t4.info()->is_resizable());
     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
 }
 
@@ -135,14 +236,14 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
         const size_t total_size_in_bytes = tensor.info()->total_size();
         const size_t alignment           = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
         size_t       space               = total_size_in_bytes + alignment;
-        auto         raw_data            = support::cpp14::make_unique<uint8_t[]>(space);
+        auto         raw_data            = std::make_unique<uint8_t[]>(space);
 
         void *aligned_ptr = raw_data.get();
-        support::cpp11::align(alignment, total_size_in_bytes, aligned_ptr, space);
+        std::align(alignment, total_size_in_bytes, aligned_ptr, space);
 
         cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
-        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
+        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
 
         // Fill tensor
         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
@@ -205,12 +306,12 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
 
         // Map file
         utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);
-        ARM_COMPUTE_EXPECT(mmapped_file.is_mapped(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());
         unsigned char *data = mmapped_file.data();
 
         cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
-        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
+        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
 
         // Fill tensor
         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
@@ -233,7 +334,7 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
 
         // Release resources
         tensor.allocator()->free();
-        ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
     }
 }
 #endif // !defined(BARE_METAL)
diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp
deleted file mode 100644
index 6345b7cd11..0000000000
--- a/tests/validation/CL/UNIT/Tuner.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
-#include "tests/Utils.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(UNIT)
-TEST_SUITE(Tuner)
-
-/** Validates static tuning of Bifrost tuner */
-TEST_CASE(BifrostTunerSimple, framework::DatasetMode::ALL)
-{
-    // Create tuner
-    tuners::BifrostTuner tuner;
-
-    // Create tensors
-    auto src     = create_tensor<CLTensor>(TensorShape(13U, 13U, 16U), DataType::F32);
-    auto weights = create_tensor<CLTensor>(TensorShape(3U, 3U, 16U, 3U), DataType::F32);
-    auto bias    = create_tensor<CLTensor>(TensorShape(3U), DataType::F32);
-    auto dst     = create_tensor<CLTensor>(TensorShape(13U, 13U, 3U), DataType::F32);
-
-    // Create kernel
-    cl::NDRange                    fake_lws(2000);
-    CLDirectConvolutionLayerKernel conv;
-    conv.set_target(GPUTarget::G72);
-
-    // Configure
-    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1));
-
-    // Hard-wire lws to kernel and validate lws
-    conv.set_lws_hint(fake_lws);
-    ARM_COMPUTE_EXPECT(conv.lws_hint()[0] == 2000, framework::LogLevel::ERRORS);
-
-    // Tune kernel and validate
-    tuner.tune_kernel_static(conv);
-    ARM_COMPUTE_EXPECT(conv.lws_hint()[0] != 2000, framework::LogLevel::ERRORS);
-
-    // Clear tuner
-    CLScheduler::get().default_init();
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp
index 9a6fddcc80..357c88af10 100644
--- a/tests/validation/CL/UNIT/WeightsRetention.cpp
+++ b/tests/validation/CL/UNIT/WeightsRetention.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
+#include "src/core/CL/kernels/CLFillBorderKernel.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"