123 files changed, 9251 insertions, 6959 deletions
diff --git a/tests/validation/CL/AbsLayer.cpp b/tests/validation/CL/AbsLayer.cpp
index e6ba14b50e..0bad8f9b68 100644
--- a/tests/validation/CL/AbsLayer.cpp
+++ b/tests/validation/CL/AbsLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CL/AbsoluteDifference.cpp b/tests/validation/CL/AbsoluteDifference.cpp
deleted file mode 100644
index f3eb129118..0000000000
--- a/tests/validation/CL/AbsoluteDifference.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLAbsoluteDifference.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/AbsoluteDifferenceFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Input data sets **/
-const auto AbsoluteDifferenceU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
-                                                 DataType::U8));
-const auto AbsoluteDifferenceS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
-                                                  framework::dataset::make("DataType", DataType::S16));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(AbsoluteDifference)
-
-template <typename T>
-using CLAbsoluteDifferenceFixture = AbsoluteDifferenceValidationFixture<CLTensor, CLAccessor, CLAbsoluteDifference, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AbsoluteDifferenceU8Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAbsoluteDifferenceFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceU8Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AbsoluteDifferenceS16Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAbsoluteDifferenceFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AbsoluteDifferenceS16Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // S16
-
-TEST_SUITE_END() // AbsoluteDifference
-TEST_SUITE_END() // CL
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Accumulate.cpp b/tests/validation/CL/Accumulate.cpp
deleted file mode 100644
index 8f5c6d5deb..0000000000
--- a/tests/validation/CL/Accumulate.cpp
+++ /dev/null
@@ -1,116 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLAccumulate.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/AccumulateFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance value for comparing reference's output against implementation's output for floating point data types */
-constexpr AbsoluteTolerance<float> tolerance(1.0f);
-/** Input data sets **/
-const auto AccumulateU8Dataset  = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8));
-const auto AccumulateS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-} // namespace
-TEST_SUITE(CL)
-TEST_SUITE(Accumulate)
-
-TEST_SUITE(U8)
-template <typename T1>
-using CLAccumulateFixture = AccumulateValidationFixture<CLTensor, CLAccessor, CLAccumulate, T1, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateS16Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(AccumulateWeighted)
-
-TEST_SUITE(U8)
-template <typename T1>
-using CLAccumulateWeightedFixture = AccumulateWeightedValidationFixture<CLTensor, CLAccessor, CLAccumulateWeighted, T1, uint8_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateU8Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateWeightedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateU8Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(AccumulateSquared)
-
-TEST_SUITE(U8)
-template <typename T1>
-using CLAccumulateSquaredFixture = AccumulateSquaredValidationFixture<CLTensor, CLAccessor, CLAccumulateSquared, T1, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), AccumulateS16Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLAccumulateSquaredFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), AccumulateS16Dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/ActivationLayer.cpp b/tests/validation/CL/ActivationLayer.cpp
index 9b725a44e7..133b39d154 100644
--- a/tests/validation/CL/ActivationLayer.cpp
+++ b/tests/validation/CL/ActivationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -70,6 +70,7 @@ AbsoluteTolerance<float> tolerance(ActivationLayerInfo::ActivationFunction activ
         case ActivationLayerInfo::ActivationFunction::SOFT_RELU:
         case ActivationLayerInfo::ActivationFunction::ELU:
         case ActivationLayerInfo::ActivationFunction::SQRT:
+        case ActivationLayerInfo::ActivationFunction::GELU:
             return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.01f : 0.00001f);
         case ActivationLayerInfo::ActivationFunction::TANH:
             return AbsoluteTolerance<float>(data_type == DataType::F16 ? 0.001f : 0.00001f);
@@ -168,8 +169,10 @@ template <typename T>
 using CLActivationLayerQuantizedFixture = ActivationValidationQuantizedFixture<CLTensor, CLAccessor, CLActivationLayer, T>;
 
 const auto QuantizedActivationDataset8 = combine(combine(framework::dataset::make("InPlace", { false }),
-                                                         concat(datasets::ActivationFunctionsQuantized(), framework::dataset::make("ActivationFunction", ActivationLayerInfo::ActivationFunction::HARD_SWISH))),
-                                                 framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+                                                         concat(datasets::ActivationFunctionsQuantized(),
+                                                                framework::dataset::make("ActivationFunction",
+{ ActivationLayerInfo::ActivationFunction::HARD_SWISH, ActivationLayerInfo::ActivationFunction::LEAKY_RELU }))),
+framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
 
 const auto QuantizedActivationDataset16 = combine(combine(framework::dataset::make("InPlace", { false }),
                                                           datasets::ActivationFunctionsQuantized()),
diff --git a/tests/validation/CL/ArgMinMax.cpp b/tests/validation/CL/ArgMinMax.cpp
index 2508c63524..8566972f81 100644
--- a/tests/validation/CL/ArgMinMax.cpp
+++ b/tests/validation/CL/ArgMinMax.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,15 +22,11 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLArgMinMaxLayer.h"
-#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/datasets/ShapeDatasets.h"
-#include "tests/datasets/SplitDataset.h"
-#include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/ArgMinMaxFixture.h"
@@ -45,7 +41,9 @@ namespace
 {
 const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape",
 {
-    TensorShape{ 2U, 7U, 1U, 3U },
+    TensorShape{ 1U, 7U, 1U, 3U },
+    TensorShape{ 3U, 1U, 3U, 2U },
+    TensorShape{ 2U, 1U, 3U, 2U },
     TensorShape{ 149U, 5U, 1U, 2U },
     TensorShape{ 166U, 5U, 1U, 2U },
     TensorShape{ 322U, 5U, 1U, 2U },
@@ -53,6 +51,22 @@ const auto ArgMinMaxSmallDataset = framework::dataset::make("Shape",
     TensorShape{ 2560, 2U, 2U, 2U },
 });
 
+const auto ArgMinMaxSmallDatasetAxis0 = framework::dataset::make("Shape",
+{
+    TensorShape{ 1U, 5U },
+    TensorShape{ 2U, 3U },
+    TensorShape{ 1U },
+    TensorShape{ 3U },
+    TensorShape{ 2U },
+    TensorShape{ 5U },
+    TensorShape{ 17U },
+    TensorShape{ 15U, 2U },
+});
+
+const auto OpsDataset   = framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX });
+const auto AxisDataset  = framework::dataset::make("Axis", { 0, 1, 2, 3 });
+const auto QInfoDataset = framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) });
+
 const auto ArgMinMaxLargeDataset = framework::dataset::make("Shape",
 { TensorShape{ 517U, 123U, 13U, 2U } });
 } // namespace
@@ -85,47 +99,78 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
 // clang-format on
 // *INDENT-ON*
 
-template <typename T>
-using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T>;
+template <typename T1, typename T2>
+using CLArgMinMaxValidationFixture = ArgMinMaxValidationFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T1, T2>;
+
+using CLArgMinMaxValidationFixture_S32_S32 = CLArgMinMaxValidationFixture<int32_t, int32_t>;
+using CLArgMinMaxValidationFixture_F16_S32 = CLArgMinMaxValidationFixture<half, int32_t>;
+using CLArgMinMaxValidationFixture_F32_S32 = CLArgMinMaxValidationFixture<float, int32_t>;
+using CLArgMinMaxValidationFixture_F32_S64 = CLArgMinMaxValidationFixture<float, int64_t>;
 
 TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmallAxis0,
+                       CLArgMinMaxValidationFixture_S32_S32,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(ArgMinMaxSmallDatasetAxis0,
+                                                       framework::dataset::make("DataTypeIn", DataType::S32)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       framework::dataset::make("Axis", { 0 })),
+                               OpsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunSmall,
-                       CLArgMinMaxValidationFixture<int32_t>,
+                       CLArgMinMaxValidationFixture_S32_S32,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                               framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+                       combine(combine(combine(combine(ArgMinMaxSmallDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::S32)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       AxisDataset),
+                               OpsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge,
-                       CLArgMinMaxValidationFixture<int32_t>,
+                       CLArgMinMaxValidationFixture_S32_S32,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::S32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                               framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+                       combine(combine(combine(combine(ArgMinMaxLargeDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::S32)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       AxisDataset),
+                               OpsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
 TEST_SUITE_END() // S32
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
-                       CLArgMinMaxValidationFixture<half>,
+                       CLArgMinMaxValidationFixture_F16_S32,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                               framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+                       combine(combine(combine(combine(ArgMinMaxSmallDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::F16)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       AxisDataset),
+                               OpsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
-                       CLArgMinMaxValidationFixture<half>,
+                       CLArgMinMaxValidationFixture_F16_S32,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                               framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+                       combine(combine(combine(combine(ArgMinMaxLargeDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::F16)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       AxisDataset),
+                               OpsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -134,49 +179,77 @@ TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
-                       CLArgMinMaxValidationFixture<float>,
+                       CLArgMinMaxValidationFixture_F32_S32,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                               framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+                       combine(combine(combine(combine(ArgMinMaxSmallDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::F32)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       AxisDataset),
+                               OpsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall_F32_S64,
+                       CLArgMinMaxValidationFixture_F32_S64,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(ArgMinMaxSmallDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::F32)),
+                                               framework::dataset::make("DataTypeOut", DataType::S64)),
+                                       AxisDataset),
+                               OpsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge,
-                       CLArgMinMaxValidationFixture<float>,
+                       CLArgMinMaxValidationFixture_F32_S32,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                               framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })))
+                       combine(combine(combine(combine(ArgMinMaxLargeDataset,
+                                                       framework::dataset::make("DataTypeIn", DataType::F32)),
+                                               framework::dataset::make("DataTypeOut", DataType::S32)),
+                                       AxisDataset),
+                               OpsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
-template <typename T>
-using CLArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T>;
+template <typename T1, typename T2>
+using CLArgMinMaxQuantizedValidationFixture = ArgMinMaxValidationQuantizedFixture<CLTensor, CLAccessor, CLArgMinMaxLayer, T1, T2>;
+
+using CLArgMinMaxQuantizedValidationFixture_U8_S32 = CLArgMinMaxQuantizedValidationFixture<uint8_t, int32_t>;
+using CLArgMinMaxQuantizedValidationFixture_S8_S32 = CLArgMinMaxQuantizedValidationFixture<int8_t, int32_t>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall,
-                       CLArgMinMaxQuantizedValidationFixture<uint8_t>,
+                       CLArgMinMaxQuantizedValidationFixture_U8_S32,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                                       framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
-                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+                       combine(combine(combine(combine(combine(ArgMinMaxSmallDataset,
+                                                               framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+                                                       framework::dataset::make("DataTypeOut", DataType::S32)),
+                                               AxisDataset),
+                                       OpsDataset),
+                               QInfoDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-
 FIXTURE_DATA_TEST_CASE(RunLarge,
-                       CLArgMinMaxQuantizedValidationFixture<uint8_t>,
+                       CLArgMinMaxQuantizedValidationFixture_U8_S32,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                                       framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
-                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+                       combine(combine(combine(combine(combine(ArgMinMaxLargeDataset,
+                                                               framework::dataset::make("DataTypeIn", DataType::QASYMM8)),
+                                                       framework::dataset::make("DataTypeOut", DataType::S32)),
+                                               AxisDataset),
+                                       OpsDataset),
+                               QInfoDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -185,28 +258,32 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall,
-                       CLArgMinMaxQuantizedValidationFixture<int8_t>,
+                       CLArgMinMaxQuantizedValidationFixture_S8_S32,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(ArgMinMaxSmallDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                                       framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
-                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+                       combine(combine(combine(combine(combine(ArgMinMaxSmallDataset,
+                                                               framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+                                                       framework::dataset::make("DataTypeOut", DataType::S32)),
+                                               AxisDataset),
+                                       OpsDataset),
+                               QInfoDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-
 FIXTURE_DATA_TEST_CASE(RunLarge,
-                       CLArgMinMaxQuantizedValidationFixture<int8_t>,
+                       CLArgMinMaxQuantizedValidationFixture_S8_S32,
                        framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(ArgMinMaxLargeDataset, framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                                       framework::dataset::make("Operation", { ReductionOperation::ARG_IDX_MIN, ReductionOperation::ARG_IDX_MAX })),
-                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })))
+                       combine(combine(combine(combine(combine(ArgMinMaxLargeDataset,
+                                                               framework::dataset::make("DataTypeIn", DataType::QASYMM8_SIGNED)),
+                                                       framework::dataset::make("DataTypeOut", DataType::S32)),
+                                               AxisDataset),
+                                       OpsDataset),
+                               QInfoDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END() // QASYMM8_SIGNED
-
 TEST_SUITE_END() // Quantized
 TEST_SUITE_END() // ArgMinMax
 TEST_SUITE_END() // CL
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index c74f6a3b23..1ed3a105dc 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,26 +41,12 @@ namespace test
 {
 namespace validation
 {
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Add.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
 namespace
 {
 /** Input data sets **/
-const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType",
-                                                 DataType::U8));
-const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                      framework::dataset::make("DataType",
-                                                                               DataType::QASYMM8));
-const auto ArithmeticAdditionQASYMM8SignedDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                                            framework::dataset::make("DataType",
-                                                                                     DataType::QASYMM8_SIGNED));
-const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
-                                                      framework::dataset::make("DataType",
-                                                                               DataType::QSYMM16));
-const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
-                                                  framework::dataset::make("DataType", DataType::S16));
-const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
-                                                   framework::dataset::make("DataType", DataType::F16));
-const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
-                                                   framework::dataset::make("DataType", DataType::F32));
 const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
 { ActivationLayerInfo() });
 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
@@ -68,6 +54,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
 } // namespace
 
 TEST_SUITE(CL)
@@ -76,22 +64,19 @@ TEST_SUITE(ArithmeticAddition)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
-               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true, false, false})),
+               framework::dataset::make("Expected", { true, false, false})),
                input1_info, input2_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLArithmeticAddition::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
@@ -129,8 +114,10 @@ using CLArithmeticAdditionFixture = ArithmeticAdditionValidationFixture<CLTensor
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionU8Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::U8)),
+                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                  OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -138,15 +125,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<uint8_t>, framework
 TEST_SUITE_END() // U8
 
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS16Dataset),
-                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::S16)),
+                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                  OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticAdditionS16Dataset),
-                                                                                                                framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                        DataType::S16)),
+                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                                                                                                OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -159,48 +150,65 @@ using CLArithmeticAdditionQuantizedFixture = ArithmeticAdditionValidationQuantiz
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                       ArithmeticAdditionQASYMM8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 template <typename T>
 using CLArithmeticAdditionBroadcastQuantizedFixture = ArithmeticAdditionValidationQuantizedBroadcastFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
-                       ArithmeticAdditionQASYMM8Dataset),
-                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
-                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
-                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                               framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                                               framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                               OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInPlace, CLArithmeticAdditionBroadcastQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                               framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+                                               framework::dataset::make("Src1QInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+                               InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                       ArithmeticAdditionQASYMM8SignedDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 10) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                       ArithmeticAdditionQSYMM16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
+                       framework::dataset::make("DataType", DataType::QSYMM16)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
-                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -213,16 +221,21 @@ using CLArithmeticAdditionFloatFixture = ArithmeticAdditionValidationFloatFixtur
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                      DataType::F16)),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                              EmptyActivationFunctionsDataset))
+                                                                                                                      EmptyActivationFunctionsDataset),
+                                                                                                              OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
+                                                                                                                       framework::dataset::make("DataType",
+                                                                                                                               DataType::F16)),
                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                       ActivationFunctionsDataset))
+                                                                                                                       ActivationFunctionsDataset),
+                                                                                                                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -230,24 +243,32 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<half>
 TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+                                                                                                                     framework::dataset::make("DataType",
+                                                                                                                             DataType::F32)),
                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                     EmptyActivationFunctionsDataset))
+                                                                                                                     EmptyActivationFunctionsDataset),
+                                                                                                                     InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
+                                                                                                                        framework::dataset::make("DataType",
+                                                                                                                                DataType::F32)),
                                                                                                                         framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                        ActivationFunctionsDataset))
+                                                                                                                        ActivationFunctionsDataset),
+                                                                                                                        OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::F32)),
                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                   EmptyActivationFunctionsDataset))
+                                                                                                                   EmptyActivationFunctionsDataset),
+                                                                                                                   OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -256,27 +277,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticAdditionFloatFixture<float>, framew
 template <typename T>
 using CLArithmeticAdditionBroadcastFloatFixture = ArithmeticAdditionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticAddition, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(),
-                       ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
-                       ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapesBroadcast(),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(),
-                       ArithmeticAdditionFP32Dataset),
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticAdditionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapesBroadcast(),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp
index 36567dc02a..94bacba7e5 100644
--- a/tests/validation/CL/ArithmeticDivision.cpp
+++ b/tests/validation/CL/ArithmeticDivision.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,13 +51,16 @@ const auto ArithmeticDivisionFP16Dataset = combine(combine(framework::dataset::m
                                                    framework::dataset::make("DataType", DataType::F16));
 const auto ArithmeticDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
                                                    framework::dataset::make("DataType", DataType::F32));
-const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
-{ ActivationLayerInfo() });
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ArithmeticDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S32)),
+                                                  framework::dataset::make("DataType", DataType::S32));
+const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo", { ActivationLayerInfo() });
+const auto ActivationFunctionsDataset      = framework::dataset::make("ActivationInfo",
 {
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
 } // namespace
 
 TEST_SUITE(CL)
@@ -89,19 +92,44 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 // clang-format on
 // *INDENT-ON*
 
+using CLArithmeticDivisionIntegerFixture = ArithmeticDivisionValidationIntegerFixture<CLTensor, CLAccessor, CLArithmeticDivision, int>;
+
+TEST_SUITE(Integer)
+TEST_SUITE(S32)
+
+FIXTURE_DATA_TEST_CASE(RunSmallInteger, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
+                                                                                                                       EmptyActivationFunctionsDataset),
+                                                                                                                       InPlaceDataSet))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunIntegerWithActivation, CLArithmeticDivisionIntegerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionS32Dataset),
+                       ActivationFunctionsDataset),
+                       InPlaceDataSet))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
 template <typename T>
 using CLArithmeticDivisionFloatFixture = ArithmeticDivisionValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset),
-                                                                                                              EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset),
+                                                                                                                      EmptyActivationFunctionsDataset),
+                                                                                                              InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP16Dataset),
-                                                                                                                       ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP16Dataset),
+                                                                                                                       ActivationFunctionsDataset),
+                                                                                                                       InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -109,21 +137,24 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<half>
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP32Dataset),
-                                                                                                                     EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticDivisionFP32Dataset),
+                                                                                                                     EmptyActivationFunctionsDataset),
+                                                                                                                     InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP32Dataset),
-                                                                                                                        ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ArithmeticDivisionFP32Dataset),
+                                                                                                                        ActivationFunctionsDataset),
+                                                                                                                        InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), ArithmeticDivisionFP32Dataset),
-                                                                                                                   EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticDivisionFP32Dataset),
+                                                                                                                   EmptyActivationFunctionsDataset),
+                                                                                                                   InPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -132,24 +163,27 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFloatFixture<float>, framew
 template <typename T>
 using CLArithmeticDivisionBroadcastFloatFixture = ArithmeticDivisionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapesBroadcast(),
                        ArithmeticDivisionFP32Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
                        ArithmeticDivisionFP32Dataset),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticDivisionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapesBroadcast(),
                        ArithmeticDivisionFP32Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index 2709fcaedb..5825ce2e5d 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,27 +41,12 @@ namespace test
 {
 namespace validation
 {
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Sub.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
 namespace
 {
 /** Input data sets **/
-const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)),
-                                                    framework::dataset::make("DataType",
-                                                                             DataType::U8));
-const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                         framework::dataset::make("DataType",
-                                                                                  DataType::QASYMM8));
-const auto ArithmeticSubtractionQASYMM8SignedDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                                               framework::dataset::make("DataType",
-                                                                                        DataType::QASYMM8_SIGNED));
-const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
-                                                         framework::dataset::make("DataType",
-                                                                                  DataType::QSYMM16));
-const auto ArithmeticSubtractionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
-                                                     framework::dataset::make("DataType", DataType::S16));
-const auto ArithmeticSubtractionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
-                                                      framework::dataset::make("DataType", DataType::F16));
-const auto ArithmeticSubtractionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)),
-                                                      framework::dataset::make("DataType", DataType::F32));
 const auto EmptyActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
 { ActivationLayerInfo() });
 const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
@@ -79,22 +64,19 @@ TEST_SUITE(ArithmeticSubtraction)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
-               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true, false, false})),
+               framework::dataset::make("Expected", { true, false, false})),
                input1_info, input2_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLArithmeticSubtraction::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), ConvertPolicy::WRAP)) == expected, framework::LogLevel::ERRORS);
@@ -159,7 +141,8 @@ using CLArithmeticSubtractionFixture = ArithmeticSubtractionValidationFixture<CL
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionU8Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                       DataType::U8)),
                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                OutOfPlaceDataSet))
 {
@@ -169,7 +152,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<uint8_t>, framew
 TEST_SUITE_END() // U8
 
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                     DataType::S16)),
                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                      OutOfPlaceDataSet))
 {
@@ -177,7 +161,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFixture<int16_t>, framew
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), ArithmeticSubtractionS16Dataset),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::S16)),
                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                    OutOfPlaceDataSet))
 {
@@ -193,11 +178,22 @@ using CLArithmeticSubtractionQuantizedFixture = ArithmeticSubtractionValidationQ
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                       ArithmeticSubtractionQASYMM8Dataset),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
                        framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                       OutOfPlaceDataSet))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunTinyInPlace, CLArithmeticSubtractionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::TinyShapes(),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                       framework::dataset::make("Src1QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        InPlaceDataSet))
 {
     // Validate output
@@ -206,12 +202,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<uint8_t
 TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                       ArithmeticSubtractionQASYMM8SignedDataset),
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 10) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
                        framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
-                       InPlaceDataSet))
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -219,7 +215,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int8_t>
 TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE(QSYMM16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                       ArithmeticSubtractionQSYMM16Dataset),
+                       framework::dataset::make("DataType", DataType::QSYMM16)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
@@ -237,7 +233,8 @@ using CLArithmeticSubtractionFloatFixture = ArithmeticSubtractionValidationFloat
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionFP16Dataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                 DataType::F16)),
                                                                                                                  framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                  EmptyActivationFunctionsDataset),
                                                                                                                  OutOfPlaceDataSet))
@@ -246,7 +243,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<half>, fram
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
-                       ArithmeticSubtractionFP16Dataset),
+                       framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        ActivationFunctionsDataset),
                        InPlaceDataSet))
@@ -258,7 +255,7 @@ TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
-                                                                                                                        ArithmeticSubtractionFP32Dataset),
+                                                                                                                        framework::dataset::make("DataType", DataType::F32)),
                                                                                                                         framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                         EmptyActivationFunctionsDataset),
                                                                                                                         OutOfPlaceDataSet))
@@ -267,7 +264,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticSubtractionFloatFixture<float>, fra
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapes(),
-                       ArithmeticSubtractionFP32Dataset),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        ActivationFunctionsDataset),
                        InPlaceDataSet))
@@ -277,7 +274,7 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLArithmeticSubtractionFloatFixture<fl
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticSubtractionFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(),
-                                                                                                                      ArithmeticSubtractionFP32Dataset),
+                                                                                                                      framework::dataset::make("DataType", DataType::F32)),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                                                                                                                       EmptyActivationFunctionsDataset),
                                                                                                                       OutOfPlaceDataSet))
@@ -290,7 +287,7 @@ template <typename T>
 using CLArithmeticSubtractionBroadcastFloatFixture = ArithmeticSubtractionBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLArithmeticSubtraction, T>;
 
 FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
-                       ArithmeticSubtractionFP32Dataset),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        EmptyActivationFunctionsDataset),
                        OutOfPlaceDataSet))
@@ -298,8 +295,18 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLArithmeticSubtractionBroadcastFloatF
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+FIXTURE_DATA_TEST_CASE(RunTinyBroadcastInplace, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+                                                       framework::dataset::make("DataType", DataType::F32)),
+                                               framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+                                       EmptyActivationFunctionsDataset),
+                               InPlaceDataSet))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::TinyShapesBroadcast(),
-                       ArithmeticSubtractionFP32Dataset),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        ActivationFunctionsDataset),
                        OutOfPlaceDataSet))
@@ -309,7 +316,7 @@ FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLArithmeticSubtractionBroadc
 }
 
 FIXTURE_DATA_TEST_CASE(RunLargeBroadcast, CLArithmeticSubtractionBroadcastFloatFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapesBroadcast(),
-                       ArithmeticSubtractionFP32Dataset),
+                       framework::dataset::make("DataType", DataType::F32)),
                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
                        EmptyActivationFunctionsDataset),
                        OutOfPlaceDataSet))
diff --git a/tests/validation/CL/BatchNormalizationLayer.cpp b/tests/validation/CL/BatchNormalizationLayer.cpp
index 8b3bdbc3ea..3b87b9d1b5 100644
--- a/tests/validation/CL/BatchNormalizationLayer.cpp
+++ b/tests/validation/CL/BatchNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,7 +50,7 @@ namespace
 {
 RelativeTolerance<float>           rel_tolerance_f32(0.05f);   /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
 constexpr AbsoluteTolerance<float> abs_tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-constexpr AbsoluteTolerance<float> tolerance_f16(0.01f);       /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.02f);       /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
 const auto                         act_infos = framework::dataset::make("ActivationInfo",
 {
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
diff --git a/tests/validation/CL/BatchToSpaceLayer.cpp b/tests/validation/CL/BatchToSpaceLayer.cpp
index e90ac921c5..ca12b76e8a 100644
--- a/tests/validation/CL/BatchToSpaceLayer.cpp
+++ b/tests/validation/CL/BatchToSpaceLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,56 +50,38 @@ using CLBatchToSpaceLayerFixture = BatchToSpaceLayerValidationFixture<CLTensor,
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blockx > blocky
-                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blocky > blockx
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Mismatching data types
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),     // Wrong data type block shape
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U, 4U), 1, DataType::F32), // Wrong tensor shape
-                                                     }),
-               framework::dataset::make("BlockShapeInfo",{ TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
-                                                       TensorInfo(TensorShape(2U, 4U), 1, DataType::S32),
-                                                      TensorInfo(TensorShape(4U, 2U), 1, DataType::S32),
-                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
-                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::F16),
-                                                       TensorInfo(TensorShape(2U, 2U), 1, DataType::S32),
-                                                     })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
-
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U, 2U), 1, DataType::F32),
-                                                     })),
-               framework::dataset::make("Expected", { true, true,true, false, false, false})),
-               input_info, block_shape_info, output_info, expected)
-{
-    bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), &block_shape_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false)));
-    ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
-}
-DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(
+DATA_TEST_CASE(ValidateStatic, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blockx > blocky
-                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // blockx != blocky && blocky > blockx
-                                                       TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),    // Mismatching data types
-                                                       TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),    // Negative block shapes
-                                                       TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32), // Wrong tensor shape
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // Supported: blockx != blocky && blockx > blocky
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // Supported: blockx != blocky && blocky > blockx
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),     // Invalid: Mismatching data types
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 4U), 1, DataType::F32),     // Invalid: Negative block shapes
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 4U, 4U), 1, DataType::F32),// Unsupported tensor rank
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // Invalid output tensor shape (invalid batch dimension)
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // Invalid output tensor shape (invalid spatial dimension)
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // Supported: correct tensor shape with cropping
+                                                       TensorInfo(TensorShape(16U, 8U, 2U, 16U), 1, DataType::F32),    // Invalid tensor shape with cropping
                                                      }),
-               framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2 })),
-               framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2 })),
+               framework::dataset::make("BlockShapeX", { 2, 4, 2, 2, 2, 2, 2, 2, 2, 2 })),
+               framework::dataset::make("BlockShapeY", { 2, 2, 4, 2, -2, 2, 2, 2, 2, 2 })),
+               framework::dataset::make("CropInfo", {
+                CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{}, CropInfo{3, 2, 1, 3}, CropInfo{3, 2, 1, 3}
+               })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(64U, 16U, 2U, 1U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(32U, 32U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(64U, 16U, 2U, 2U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 32U, 2U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 8U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 1U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(33U, 32U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(27, 12U, 2U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(32U, 16U, 2U, 4U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true,true, false, false, false})),
-               input_info, block_shape_x, block_shape_y, output_info, expected)
+               framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true, false})),
+               input_info, block_shape_x, block_shape_y, crop_info, output_info, expected)
 {
-    bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false)));
+    bool has_error = bool(CLBatchToSpaceLayer::validate(&input_info.clone()->set_is_resizable(false), block_shape_x, block_shape_y, &output_info.clone()->set_is_resizable(false), crop_info));
     ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -114,6 +96,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<float>, framework::D
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType",
+                                                                                                                       DataType::F32)),
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
                                                                                                                      DataType::F32)),
                                                                                                              framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
@@ -131,6 +123,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchToSpaceLayerFixture<half>, framework::Da
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithCropping, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(datasets::SmallBatchToSpaceLayerWithCroppingDataset(), framework::dataset::make("DataType",
+                                                                                                                       DataType::F16)),
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge, CLBatchToSpaceLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeBatchToSpaceLayerDataset(), framework::dataset::make("DataType",
                                                                                                                     DataType::F16)),
                                                                                                             framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
diff --git a/tests/validation/CL/BoundingBoxTransform.cpp b/tests/validation/CL/BoundingBoxTransform.cpp
index 2a7f1667d6..2584b1a5b6 100644
--- a/tests/validation/CL/BoundingBoxTransform.cpp
+++ b/tests/validation/CL/BoundingBoxTransform.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,7 +24,6 @@
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/functions/CLBoundingBoxTransform.h"
 #include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
 #include "tests/Globals.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
diff --git a/tests/validation/CL/Box3x3.cpp b/tests/validation/CL/Box3x3.cpp
deleted file mode 100644
index 6fd531b798..0000000000
--- a/tests/validation/CL/Box3x3.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLBox3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Box3x3Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Box3x3)
-
-template <typename T>
-using CLBox3x3Fixture = Box3x3ValidationFixture<CLTensor, CLAccessor, CLBox3x3, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLBox3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::U8)),
-                                                                                                    datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLBox3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::U8)),
-                                                                                                    datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/CannyEdge.cpp b/tests/validation/CL/CannyEdge.cpp
deleted file mode 100644
index c127eaca84..0000000000
--- a/tests/validation/CL/CannyEdge.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLCannyEdge.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ImageFileDatasets.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/CannyEdgeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Allowed ratio of mismatches between target and reference (1.0 = 100%) */
-const float allowed_mismatch_ratio = 0.1f;
-
-const auto data = combine(framework::dataset::make("GradientSize",
-{ 3, 5, 7 }),
-combine(framework::dataset::make("Normalization", { MagnitudeType::L1NORM, MagnitudeType::L2NORM }), datasets::BorderModes()));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(CannyEdge)
-
-template <typename T>
-using CLCannyEdgeFixture = CannyEdgeValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLCannyEdge, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLCannyEdgeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallImageFiles(), data), framework::dataset::make("Format", Format::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, AbsoluteTolerance<uint8_t>(0), allowed_mismatch_ratio);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLCannyEdgeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data), framework::dataset::make("Format", Format::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, AbsoluteTolerance<uint8_t>(0), allowed_mismatch_ratio);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Cast.cpp b/tests/validation/CL/Cast.cpp
index 2ca8b58040..2f943e84d8 100644
--- a/tests/validation/CL/Cast.cpp
+++ b/tests/validation/CL/Cast.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,6 +48,10 @@ constexpr AbsoluteTolerance<float> one_tolerance(1);
 constexpr AbsoluteTolerance<float> zero_tolerance(0);
 
 /** Input data sets **/
+// QASYMM8
+const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32));
+const auto CastQSYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QSYMM8), framework::dataset::make("DataType", DataType::F32));
+
 // U8
 const auto CastU8toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8));
 const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
@@ -119,6 +123,26 @@ const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", Da
 const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32));
 const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
 const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+
+// U64
+const auto CastU64toU8Dataset  = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U8));
+const auto CastU64toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S8));
+const auto CastU64toU16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U16));
+const auto CastU64toS16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S16));
+const auto CastU64toU32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::U32));
+const auto CastU64toS32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::S32));
+const auto CastU64toF16Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F16));
+const auto CastU64toF32Dataset = combine(framework::dataset::make("DataType", DataType::U64), framework::dataset::make("DataType", DataType::F32));
+
+// S64
+const auto CastS64toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U8));
+const auto CastS64toS8Dataset  = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S8));
+const auto CastS64toU16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U16));
+const auto CastS64toS16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S16));
+const auto CastS64toU32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::U32));
+const auto CastS64toS32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::S32));
+const auto CastS64toF16Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F16));
+const auto CastS64toF32Dataset = combine(framework::dataset::make("DataType", DataType::S64), framework::dataset::make("DataType", DataType::F32));
 } // namespace
 
 TEST_SUITE(CL)
@@ -149,6 +173,12 @@ using CLCastToF32Fixture = CastValidationFixture<CLTensor, CLAccessor, CLCast, T
     }                                                                                                                            \
     TEST_SUITE_END()
 
+// QASYMM8
+CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, CLCastToF32Fixture<uint8_t>, CastQASYMM8toF32Dataset, zero_tolerance)
+// QSYMM8
+CAST_SUITE(QSYMM8_to_F32, DataType::QSYMM8, DataType::F32, CLCastToF32Fixture<int8_t>, CastQSYMM8toF32Dataset, zero_tolerance)
+
+
 // U8
 CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, CLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance)
 CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, CLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance)
@@ -221,6 +251,26 @@ CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, CLCastToU32Fixture<float>,
 CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, CLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
 CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, CLCastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance)
 
+// S64
+CAST_SUITE(S64_to_U8, DataType::S64, DataType::U8, CLCastToU8Fixture<int64_t>, CastS64toU8Dataset, one_tolerance)
+CAST_SUITE(S64_to_S8, DataType::S64, DataType::S8, CLCastToS8Fixture<int64_t>, CastS64toS8Dataset, one_tolerance)
+CAST_SUITE(S64_to_U16, DataType::S64, DataType::U16, CLCastToU16Fixture<int64_t>, CastS64toU16Dataset, one_tolerance)
+CAST_SUITE(S64_to_S16, DataType::S64, DataType::S16, CLCastToS16Fixture<int64_t>, CastS64toS16Dataset, one_tolerance)
+CAST_SUITE(S64_to_U32, DataType::S64, DataType::U32, CLCastToU32Fixture<int64_t>, CastS64toU32Dataset, one_tolerance)
+CAST_SUITE(S64_to_S32, DataType::S64, DataType::S32, CLCastToS32Fixture<int64_t>, CastS64toS32Dataset, one_tolerance)
+CAST_SUITE(S64_to_F16, DataType::S64, DataType::F16, CLCastToF16Fixture<int64_t>, CastS64toF16Dataset, zero_tolerance)
+CAST_SUITE(S64_to_F32, DataType::S64, DataType::F32, CLCastToF32Fixture<int64_t>, CastS64toF32Dataset, zero_tolerance)
+
+// U64
+CAST_SUITE(U64_to_U8, DataType::U64, DataType::U8, CLCastToU8Fixture<uint64_t>, CastU64toU8Dataset, one_tolerance)
+CAST_SUITE(U64_to_S8, DataType::U64, DataType::S8, CLCastToS8Fixture<uint64_t>, CastU64toS8Dataset, one_tolerance)
+CAST_SUITE(U64_to_U16, DataType::U64, DataType::U16, CLCastToU16Fixture<uint64_t>, CastU64toU16Dataset, one_tolerance)
+CAST_SUITE(U64_to_S16, DataType::U64, DataType::S16, CLCastToS16Fixture<uint64_t>, CastU64toS16Dataset, one_tolerance)
+CAST_SUITE(U64_to_U32, DataType::U64, DataType::U32, CLCastToU32Fixture<uint64_t>, CastU64toU32Dataset, one_tolerance)
+CAST_SUITE(U64_to_S32, DataType::U64, DataType::S32, CLCastToS32Fixture<uint64_t>, CastU64toS32Dataset, one_tolerance)
+CAST_SUITE(U64_to_F16, DataType::U64, DataType::F16, CLCastToF16Fixture<uint64_t>, CastU64toF16Dataset, zero_tolerance)
+CAST_SUITE(U64_to_F32, DataType::U64, DataType::F32, CLCastToF32Fixture<uint64_t>, CastU64toF32Dataset, zero_tolerance)
+
 TEST_SUITE_END() // Cast
 TEST_SUITE_END() // CL
 } // namespace validation
diff --git a/tests/validation/CL/ChannelCombine.cpp b/tests/validation/CL/ChannelCombine.cpp
deleted file mode 100644
index 2ed0765eb9..0000000000
--- a/tests/validation/CL/ChannelCombine.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMultiImage.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLChannelCombine.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ChannelCombineFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(ChannelCombine)
-
-template <typename T>
-using CLChannelCombineFixture = ChannelCombineValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLChannelCombine, T>;
-
-TEST_SUITE(RGBA)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 })))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 })))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END() // RGBA
-
-TEST_SUITE(YUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 })))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 })))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END() // YUV
-
-TEST_SUITE(YUVPlanar)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("FormatType", { Format::NV12, Format::NV21, Format::IYUV, Format::YUV444 })))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelCombineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("FormatType", { Format::NV12, Format::NV21, Format::IYUV, Format::YUV444 })))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END() // YUVPlanar
-
-TEST_SUITE_END() // ChannelCombine
-TEST_SUITE_END() // CL
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/ChannelExtract.cpp b/tests/validation/CL/ChannelExtract.cpp
deleted file mode 100644
index b02741f1de..0000000000
--- a/tests/validation/CL/ChannelExtract.cpp
+++ /dev/null
@@ -1,106 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLMultiImage.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLChannelExtract.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ConvertPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ChannelExtractFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-// Input data sets
-const auto ChannelExtractRGBADataset = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }),
-                                               framework::dataset::make("ChannelType", { Channel::R, Channel::G, Channel::B, Channel::A }));
-const auto ChannelExtractYUVDataset = combine(framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 }),
-                                              framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
-const auto ChannelExtractYUVPlanarDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444, Format::NV12, Format::NV21 }),
-                                                    framework::dataset::make("ChannelType", { Channel::Y, Channel::U, Channel::V }));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(ChannelExtract)
-
-template <typename T>
-using CLChannelExtractFixture = ChannelExtractValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLChannelExtract, T>;
-
-TEST_SUITE(RGBA)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractRGBADataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractRGBADataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // RGBA
-
-TEST_SUITE(YUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractYUVDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // YUV
-
-TEST_SUITE(YUVPlanar)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ChannelExtractYUVPlanarDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLChannelExtractFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ChannelExtractYUVPlanarDataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // YUVPlanar
-
-TEST_SUITE_END() // ChannelExtract
-TEST_SUITE_END() // CL
-
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Col2Im.cpp b/tests/validation/CL/Col2Im.cpp
index b651bf8918..4b004e2472 100644
--- a/tests/validation/CL/Col2Im.cpp
+++ b/tests/validation/CL/Col2Im.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLCol2ImKernel.h"
+#include "src/gpu/cl/kernels/ClCol2ImKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/framework/Asserts.h"
@@ -40,7 +40,7 @@ namespace validation
 TEST_SUITE(CL)
 TEST_SUITE(Col2Im)
 
-using CLCol2Im = CLSynthetizeFunction<CLCol2ImKernel>;
+using ClCol2Im = ClSynthetizeOperatorWithBorder<opencl::kernels::ClCol2ImKernel>;
 
 /** Negative tests
  *
@@ -59,7 +59,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto input     = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::SIZET);
         const auto output    = TensorInfo(TensorShape(3U, 4U, 10U, 1U, 2U), 1, DataType::F32);
         const auto conv_size = Size2D(3, 4);
-        const auto status    = CLCol2ImKernel::validate(&input, &output, conv_size);
+        const auto status    = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -68,7 +68,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto input     = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32);
         const auto output    = TensorInfo(TensorShape(3U, 4U, 10U, 1U, 2U), 1, DataType::F32, DataLayout::NHWC);
         const auto conv_size = Size2D(3, 4);
-        const auto status    = CLCol2ImKernel::validate(&input, &output, conv_size);
+        const auto status    = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -77,13 +77,13 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto input     = TensorInfo(TensorShape(10U, 12U, 1U, 2U), 1, DataType::F32);
         const auto output    = TensorInfo(TensorShape(3U, 4U, 10U, 2U, 2U), 1, DataType::F32);
         const auto conv_size = Size2D(3, 4);
-        const auto status    = CLCol2ImKernel::validate(&input, &output, conv_size);
+        const auto status    = opencl::kernels::ClCol2ImKernel::validate(&input, &output, conv_size);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 }
 
 template <typename T>
-using CLCol2ImFixture = Col2ImValidationFixture<CLTensor, CLAccessor, CLCol2Im, T, true>;
+using ClCol2ImFixture = Col2ImOpValidationFixture<CLTensor, CLAccessor, ClCol2Im, T, true>;
 
 /** Test kernel for single-precision floating point
  *
@@ -99,7 +99,7 @@ using CLCol2ImFixture = Col2ImValidationFixture<CLTensor, CLAccessor, CLCol2Im,
  *  Kernel tested col2im
  */
 FIXTURE_DATA_TEST_CASE(FP32,
-                       CLCol2ImFixture<float>,
+                       ClCol2ImFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(
                                                    framework::dataset::make("InputShape", { TensorShape(8U, 16U, 3U, 1U), TensorShape(17U, 16U, 3U, 1U), TensorShape(7U, 16U, 3U, 1U) }),
@@ -125,7 +125,7 @@ FIXTURE_DATA_TEST_CASE(FP32,
  *  Kernel tested col2im
  */
 FIXTURE_DATA_TEST_CASE(F16,
-                       CLCol2ImFixture<half>,
+                       ClCol2ImFixture<half>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(
                                                    framework::dataset::make("InputShape", TensorShape(17U, 16U, 3U, 1U)),
@@ -151,7 +151,7 @@ FIXTURE_DATA_TEST_CASE(F16,
  *  Kernel tested col2im
  */
 FIXTURE_DATA_TEST_CASE(QASYMM8,
-                       CLCol2ImFixture<uint8_t>,
+                       ClCol2ImFixture<uint8_t>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(
                                                    framework::dataset::make("InputShape", TensorShape(17U, 16U, 3U, 1U)),
diff --git a/tests/validation/CL/ColorConvert.cpp b/tests/validation/CL/ColorConvert.cpp
deleted file mode 100644
index 0d672a09ff..0000000000
--- a/tests/validation/CL/ColorConvert.cpp
+++ /dev/null
@@ -1,241 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLMultiImage.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLColorConvert.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ColorConvertFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_nv(2);
-constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1);
-
-// Input data sets
-const auto RGBDataset  = framework::dataset::make("FormatType", { Format::RGB888, Format::RGBA8888 });
-const auto YUYVDataset = framework::dataset::make("FormatType", { Format::YUYV422, Format::UYVY422 });
-
-const auto ColorConvert_RGBA_to_RGB = combine(framework::dataset::make("FormatType", { Format::RGBA8888 }),
-                                              framework::dataset::make("FormatType", { Format::RGB888 }));
-
-const auto ColorConvert_RGB_to_RGBA = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
-                                              framework::dataset::make("FormatType", { Format::RGBA8888 }));
-
-const auto ColorConvert_RGB_to_U8 = combine(framework::dataset::make("FormatType", { Format::RGB888 }),
-                                            framework::dataset::make("FormatType", { Format::U8 }));
-
-const auto ColorConvert_YUYV_to_RGBDataset = combine(YUYVDataset,
-                                                     RGBDataset);
-
-const auto ColorConvert_YUVPlanar_to_RGBDataset = combine(framework::dataset::make("FormatType", { Format::IYUV, Format::NV12, Format::NV21 }),
-                                                          RGBDataset);
-
-const auto ColorConvert_RGBDataset_to_NVDataset = combine(RGBDataset,
-                                                          framework::dataset::make("FormatType", { Format::NV12, Format::IYUV, Format::YUV444 }));
-
-const auto ColorConvert_YUYVDataset_to_NVDataset = combine(YUYVDataset,
-                                                           framework::dataset::make("FormatType", { Format::NV12, Format::IYUV }));
-
-const auto ColorConvert_NVDataset_to_YUVDataset = combine(framework::dataset::make("FormatType", { Format::NV12, Format::NV21 }),
-                                                          framework::dataset::make("FormatType", { Format::IYUV, Format::YUV444 }));
-
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(ColorConvert)
-
-template <typename T>
-using CLColorConvertFixture = ColorConvertValidationFixture<CLMultiImage, CLTensor, CLAccessor, CLColorConvert, T>;
-
-TEST_SUITE(RGBA)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGBA_to_RGB))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGBA_to_RGB))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(RGB)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_RGBA))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_RGBA))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(RGBtoU8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGB_to_U8))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_u8);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGB_to_U8))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_u8);
-    }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(YUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUYV_to_RGBDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUYV_to_RGBDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(YUVPlanar)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUVPlanar_to_RGBDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUVPlanar_to_RGBDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(NV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_RGBDataset_to_NVDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_nv);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_RGBDataset_to_NVDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx], tolerance_nv);
-    }
-}
-TEST_SUITE_END()
-
-TEST_SUITE(YUYVtoNV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_YUYVDataset_to_NVDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_YUYVDataset_to_NVDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-
-TEST_SUITE_END()
-
-TEST_SUITE(NVtoYUV)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), ColorConvert_NVDataset_to_YUVDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLColorConvertFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), ColorConvert_NVDataset_to_YUVDataset))
-{
-    // Validate output
-    for(unsigned int plane_idx = 0; plane_idx < _dst_num_planes; ++plane_idx)
-    {
-        validate(CLAccessor(*_target.cl_plane(plane_idx)), _reference[plane_idx]);
-    }
-}
-
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Comparisons.cpp b/tests/validation/CL/Comparisons.cpp
index d015528b0e..dd3dbd8d59 100644
--- a/tests/validation/CL/Comparisons.cpp
+++ b/tests/validation/CL/Comparisons.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,7 +59,7 @@ TEST_SUITE(Comparison)
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
         framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid output type
                                                  TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching input types
-                                                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+                                                 TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                  TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
                                                  TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
         }),
@@ -75,7 +75,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                 TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
         })),
-        framework::dataset::make("Expected", { false, false, false, false, true})),
+        framework::dataset::make("Expected", { false, false, true, false, true})),
         input1_info, input2_info, output_info, expected)
 {
     Status s = CLComparison::validate(&input1_info.clone()->set_is_resizable(false),
diff --git a/tests/validation/CL/Convolution.cpp b/tests/validation/CL/Convolution.cpp
deleted file mode 100644
index 1608e7c66d..0000000000
--- a/tests/validation/CL/Convolution.cpp
+++ /dev/null
@@ -1,285 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLConvolution.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ConvolutionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(CustomConvolution)
-TEST_SUITE(Square3x3)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution3x3, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 3 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 3 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square 3x3
-
-TEST_SUITE(Square5x5)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 5 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 5 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square5x5
-
-TEST_SUITE(Square7x7)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 7 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 7 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square7x7
-
-TEST_SUITE(Square9x9)
-
-template <typename T>
-using CLConvolutionFixture = ConvolutionSquareValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 9 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 9 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Square9x9
-
-TEST_SUITE(Rectangle)
-template <typename T>
-using CLConvolutionFixture = ConvolutionRectangleValidationFixture<CLTensor, CLAccessor, CLConvolutionRectangle, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                                 framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
-                                                                                                         framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                                 framework::dataset::make("filter_width", { 3, 5, 7, 9 })),
-                                                                                                         framework::dataset::make("filter_height", { 3, 5, 7, 9 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Rectangle
-
-TEST_SUITE(Separable5x5)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution5x5, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 5 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 5 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable5x5
-
-TEST_SUITE(Separable7x7)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution7x7, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 7 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 7 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Separable7x7
-
-TEST_SUITE(Separable9x9)
-template <typename T>
-using CLConvolutionFixture = ConvolutionSeparableValidationFixture<CLTensor, CLAccessor, CLConvolution9x9, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::U8)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 9 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunLarge, CLConvolutionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(concat(datasets::SmallShapes(), datasets::LargeShapes()),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::S16)),
-                                                                                                                 datasets::BorderModes()),
-                                                                                                         framework::dataset::make("filter_size", { 9 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(_height / 2, _width / 2)));
-}
-TEST_SUITE_END()
-TEST_SUITE_END() // Separable9x9
-
-TEST_SUITE_END() // Custom Convolution
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Convolution3D.cpp b/tests/validation/CL/Convolution3D.cpp
new file mode 100644
index 0000000000..a2848560c3
--- /dev/null
+++ b/tests/validation/CL/Convolution3D.cpp
@@ -0,0 +1,300 @@
+/*
+ * Copyright (c) 2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLConv3D.h"
+#include "arm_compute/runtime/FunctionDescriptors.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolution3DFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+const RelativeTolerance<half>        rel_tolerance_fp16(half(0.2)); /**< Relative tolerance for FP16 tests */
+constexpr float                      abs_tolerance_fp16(0.05f);     /**< Absolute tolerance for FP16 tests */
+constexpr RelativeTolerance<float>   rel_tolerance_fp32(0.05f);     /**< Relative tolerance for FP32 tests */
+constexpr float                      abs_tolerance_fp32(0.0001f);   /**< Absolute tolerance for FP32 tests*/
+constexpr AbsoluteTolerance<uint8_t> abs_tolerance_qasymm8(1);      /**< Absolute tolerance for quantized tests */
+constexpr float                      tolerance_num = 0.07f;         /**< Tolerance number */
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DirectConvolution3D)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 5U, 3U), // Unsupported data layout
+                                                        TensorShape(27U, 13U, 5U, 3U), // Unsupported activation enabled
+                                                        TensorShape(27U, 13U, 5U, 3U), // Mismatching data type
+                                                        TensorShape(27U, 13U, 5U, 3U), // Unsupported data type
+                                                        TensorShape(27U, 13U, 5U, 3U), // Mismatching input feature maps
+                                                        TensorShape(27U, 13U, 5U, 3U), // Mismatching output feature maps
+                                                        TensorShape(27U, 13U, 5U, 3U), // Mismatching bias shape
+                                                        TensorShape(27U, 13U, 5U, 3U), // Unsupported number of weights dimensions
+                                                        TensorShape(27U, 13U, 5U, 3U), // Unsupported number of biases dimensions
+                                                        TensorShape(27U, 13U, 5U, 3U), // Mismatching output shape
+                                                        TensorShape(27U, 13U, 5U, 3U)
+                                                     }),
+               framework::dataset::make("WeightsShape", { TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 32U, 3U, 3U, 3U),
+                                                          TensorShape(8U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U, 2U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U),
+                                                          TensorShape(4U, 27U, 3U, 3U, 3U)
+                                                     })),
+               framework::dataset::make("BiasesShape", { TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(8U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U),
+                                                         TensorShape(4U)
+                                                     })),
+               framework::dataset::make("OutputShape", { TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U, 2U),
+                                                         TensorShape(4U, 11U, 5U, 3U),
+                                                         TensorShape(4U, 13U, 5U, 3U)
+                                                     })),
+               framework::dataset::make("Conv3dInfo",  { Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false),
+                                                         Conv3dInfo(Size3D(1U, 1U, 1U), Padding3D(1U, 1U, 1U), ActivationLayerInfo(), Size3D(1U, 1U, 1U), DimensionRoundingType::FLOOR, false)
+                                                      })),
+                framework::dataset::make("SrcDataType", { DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::U32,
+                                                          DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::F32,
+                                                          DataType::F32
+                                                      })),
+                framework::dataset::make("WeightsDataType", { DataType::F32,
+                                                              DataType::F32,
+                                                              DataType::F16,
+                                                              DataType::U32,
+                                                              DataType::F32,
+                                                              DataType::F32,
+                                                              DataType::F32,
+                                                              DataType::F32,
+                                                              DataType::F32,
+                                                              DataType::F32,
+                                                              DataType::F32
+                                                      })),
+                framework::dataset::make("DataLayout", { DataLayout::NCDHW,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC,
+                                                         DataLayout::NDHWC
+                                                      })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+               input_shape, weights_shape, biases_shape, output_shape, conv3d_info, src_data_type, weights_data_type, data_layout, expected)
+{
+    TensorInfo input_info   = TensorInfo(input_shape, 1, src_data_type);
+    TensorInfo weights_info = TensorInfo(weights_shape, 1, weights_data_type);
+    TensorInfo biases_info  = TensorInfo(biases_shape, 1, src_data_type);
+    TensorInfo output_info  = TensorInfo(output_shape, 1, src_data_type);
+
+    input_info.set_data_layout(data_layout);
+    weights_info.set_data_layout(data_layout);
+    biases_info.set_data_layout(data_layout);
+    output_info.set_data_layout(data_layout);
+
+    bool is_valid = bool(CLConv3D::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv3d_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+
+template <typename T>
+using CLDirectConvolution3DFixture = DirectConvolution3DValidationFixture<CLTensor, CLAccessor, CLConv3D, T>;
+template <typename T>
+using CLDirectConvolution3DQuantizedFixture = DirectConvolution3DValidationQuantizedFixture<CLTensor, CLAccessor, CLConv3D, T>;
+
+TEST_SUITE(NDHWC)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DFixture<half>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+                       framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+                                                                TensorShape(15U, 7U, 11U, 7U),
+                                                                TensorShape(19U, 5U, 16U, 4U),
+                                                                TensorShape(13U, 5U, 17U, 2U)
+                                                              }),
+                       framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+                       framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+                       framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+                       framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+                       framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+                       framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+                       framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+                       framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+                       framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+                       framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+                       framework::dataset::make("HasBias", { true, true, true, false })),
+                       framework::dataset::make("Activation", ActivationLayerInfo())),
+                       framework::dataset::make("DataType", DataType::F16)),
+                       framework::dataset::make("DataLayout", DataLayout::NDHWC)))
+{
+    validate(CLAccessor(_target), _reference, rel_tolerance_fp16, tolerance_num, abs_tolerance_fp16);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DFixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+                       framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+                                                                TensorShape(15U, 7U, 11U, 7U),
+                                                                TensorShape(19U, 5U, 16U, 4U),
+                                                                TensorShape(13U, 5U, 17U, 2U)
+                                                              }),
+                       framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+                       framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+                       framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+                       framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+                       framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+                       framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+                       framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+                       framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+                       framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+                       framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+                       framework::dataset::make("HasBias", { true, true, true, false })),
+                       framework::dataset::make("Activation", ActivationLayerInfo())),
+                       framework::dataset::make("DataType", DataType::F32)),
+                       framework::dataset::make("DataLayout", DataLayout::NDHWC)))
+{
+    validate(CLAccessor(_target), _reference, rel_tolerance_fp32, 0.0, abs_tolerance_fp32);
+}
+
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+                                                                                                                   framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+                                                                                                                           TensorShape(15U, 7U, 11U, 7U),
+                                                                                                                           TensorShape(19U, 5U, 16U, 4U),
+                                                                                                                           TensorShape(13U, 5U, 17U, 2U)
+                                                                                                                                                          }),
+                                                                                                                   framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+                                                                                                               framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+                                                                                                           framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+                                                                                                       framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+                                                                                                   framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+                                                                                               framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+                                                                                           framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+                                                                                       framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+                                                                                   framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+                                                                               framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+                                                                           framework::dataset::make("HasBias", { true, true, true, false })),
+                                                                       framework::dataset::make("Activation", ActivationLayerInfo())),
+                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                       framework::dataset::make("DataLayout", DataLayout::NDHWC)),
+                                               framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))),
+                                       framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))),
+                               framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5))))
+{
+    validate(CLAccessor(_target), _reference, abs_tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolution3DQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(zip(
+                                                                                                                   framework::dataset::make("InputShape", { TensorShape(7U, 5U, 3U, 13U, 3U),
+                                                                                                                           TensorShape(15U, 7U, 11U, 7U),
+                                                                                                                           TensorShape(19U, 5U, 16U, 4U),
+                                                                                                                           TensorShape(13U, 5U, 17U, 2U)
+                                                                                                                                                          }),
+                                                                                                                   framework::dataset::make("StrideX", { 1, 3, 2, 1 })),
+                                                                                                               framework::dataset::make("StrideY", { 2, 1, 3, 1 })),
+                                                                                                           framework::dataset::make("StrideZ", { 3, 2, 1, 1 })),
+                                                                                                       framework::dataset::make("PadX", { 0, 2, 1, 0 })),
+                                                                                                   framework::dataset::make("PadY", { 1, 0, 2, 0 })),
+                                                                                               framework::dataset::make("PadZ", { 2, 1, 0, 0 })),
+                                                                                           framework::dataset::make("KernelWidth", { 3, 7, 5, 1 })),
+                                                                                       framework::dataset::make("KernelHeight", { 5, 3, 7, 1 })),
+                                                                                   framework::dataset::make("KernelDepth", { 7, 5, 3, 1 })),
+                                                                               framework::dataset::make("NumKernels", { 5, 3, 1, 11 })),
+                                                                           framework::dataset::make("HasBias", { true, true, true, false })),
+                                                                       framework::dataset::make("Activation", ActivationLayerInfo())),
+                                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                       framework::dataset::make("DataLayout", DataLayout::NDHWC)),
+                                               framework::dataset::make("SrcQuantizationInfo", QuantizationInfo(0.1f, 10))),
+                                       framework::dataset::make("WeightsQuantizationInfo", QuantizationInfo(0.3f, 20))),
+                               framework::dataset::make("DstQuantizationInfo", QuantizationInfo(0.2f, 5))))
+{
+    validate(CLAccessor(_target), _reference, abs_tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // NDHWC
+TEST_SUITE_END() // DirectConvolution3D
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp
index b66cfd97e7..8820a6a31e 100644
--- a/tests/validation/CL/ConvolutionLayer.cpp
+++ b/tests/validation/CL/ConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h"
@@ -37,12 +38,16 @@
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/ConvolutionLayerFixture.h"
 
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
+ *  Please check there for any differences in the coverage
+ */
 namespace arm_compute
 {
 namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
 namespace
 {
 class SmallConvolutionLayerDatasetCases final : public datasets::ConvolutionLayerDataset
@@ -61,32 +66,32 @@ constexpr AbsoluteTolerance<float>  tolerance_qasymm8(1);                 /**< T
 constexpr float                     tolerance_num = 0.07f;                /**< Tolerance number */
 
 /** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
 {
     DataType::F16,
-    DataType::F32,
-    DataType::QASYMM8,
-    DataType::QASYMM8_SIGNED,
+             DataType::F32,
+             DataType::QASYMM8,
+             DataType::QASYMM8_SIGNED,
 });
 
 /** Grouped CNN data types */
-const auto GroupedCNNDataTypes = framework::dataset::make("DataType",
+const auto GroupedCNNDataTypes = make("DataType",
 {
     DataType::F16,
-    DataType::F32
+             DataType::F32
 });
 
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsDataset      = make("ActivationInfo",
 {
     ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+                        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+                        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+                        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
 });
-const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
 {
     ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
+                        ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
 });
 } // namespace
 
@@ -96,7 +101,7 @@ TEST_SUITE(ConvolutionLayer)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
-                                          framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),            // Select GEMM
+                                          make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),            // Select GEMM
                                                                                   TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),            // Select GEMM
                                                                                   TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32),        // Select GEMM
                                                                                   TensorInfo(TensorShape(23U, 27U, 31U, 4U), 1, DataType::F32),       // Select WINOGRAD
@@ -106,7 +111,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
                                                                                   TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),            // Select GEMM
                                                                                   TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::QASYMM8_SIGNED), // Select GEMM
                                           }),
-                                          framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
+                                          make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(3U, 3U, 31U, 21U), 1, DataType::F32),
@@ -116,7 +121,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
                                                                                     TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::QASYMM8_SIGNED),
                                           })),
-                                          framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+                                          make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
@@ -126,7 +131,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
                                                                                    TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::QASYMM8_SIGNED),
                                           })),
-                                          framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
+                                          make("ConvInfo", { PadStrideInfo(1, 2, 1, 1),
                                                                                  PadStrideInfo(1, 2, 1, 1),
                                                                                  PadStrideInfo(1, 1, 0, 0),
                                                                                  PadStrideInfo(1, 1, 0, 0),
@@ -136,7 +141,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
                                                                                  PadStrideInfo(1, 1, 2, 2),
                                                                                  PadStrideInfo(1, 1, 2, 2),
                                           })),
-                                          framework::dataset::make("GpuTarget", { GPUTarget::BIFROST,
+                                          make("GpuTarget", { GPUTarget::BIFROST,
                                                                                   GPUTarget::MIDGARD,
                                                                                   GPUTarget::G71,
                                                                                   GPUTarget::G71,
@@ -146,7 +151,7 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
                                                                                   GPUTarget::BIFROST,
                                                                                   GPUTarget::BIFROST,
                                           })),
-                                          framework::dataset::make("Dilation", { Size2D(1U, 1U),
+                                          make("Dilation", { Size2D(1U, 1U),
                                                                  Size2D(1U, 1U),
                                                                  Size2D(1U, 1U),
                                                                  Size2D(1U, 1U),
@@ -156,8 +161,8 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
                                                                  Size2D(2U, 1U),
                                                                  Size2D(2U, 1U),
                                           })),
-                                         framework::dataset::make("EnableFastMath", { false, false, false, false, false, false, true, true, true })),
-                                         framework::dataset::make("Expected",{ ConvolutionMethod::GEMM,
+                                         make("EnableFastMath", { false, false, false, false, false, false, true, true, true })),
+                                         make("Expected",{ ConvolutionMethod::GEMM,
                                                                                ConvolutionMethod::GEMM,
                                                                                ConvolutionMethod::GEMM,
                                                                                ConvolutionMethod::WINOGRAD,
@@ -186,15 +191,16 @@ TEST_SUITE_END() // ConvolutionLayer
 TEST_SUITE(GEMMConvolutionLayer)
 template <typename T>
 using CLGEMMConvolutionLayerFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
+template <typename T>
+using CLGEMMConvolutionLayerMixedDataLayoutFixture = ConvolutionValidationFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, true>;
+template <typename T>
+using CLConvolutionValidationWithPaddingFixture = ConvolutionValidationWithPaddingFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                   framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                   framework::dataset::make("DataType",
-                                                                                                                           DataType::F16)),
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                   make("ReshapeWeights", { true })), make("DataType", DataType::F16)), make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                                                                                            ActivationFunctionsSmallDataset))
 {
     // Validate output
@@ -205,93 +211,231 @@ TEST_SUITE_END() // FP16
 TEST_SUITE(FP32)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                    framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F32)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    make("ReshapeWeights", { true })), make("DataType", DataType::F32)), make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                                                                                             ActivationFunctionsSmallDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                           make("Input", TensorShape(23U, 27U, 5U)),
+                                                                                           make("Weights", TensorShape(3U, 3U, 5U, 2U))),
+                                                                                       make("Bias", TensorShape(2U))),
+                                                                               make("Output", TensorShape(11U, 25U, 2U))),
+                                                                       make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0))),
+                                                               make("Dilation", Size2D(1, 1))),
+                                                       make("ReshapeWeights", { true })),
+                                               make("DataType", DataType::F32)),
+                                       make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                               ActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallWithPadding, CLConvolutionValidationWithPaddingFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerPrePaddingDataset(),
+                                                               make("ReshapeWeights", { true })),
+                                                       make("DataType", DataType::F32)),
+                                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                       make("ActivationInfo", { ActivationLayerInfo() })),
+make("PrePadLayer", { PaddingList({ { 1, 1 }, { 1, 1 } }) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
 template <typename T>
 using CLGEMMConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
 template <typename T>
+using CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, true>;
+template <typename T>
 using CLGEMMConvolutionLayerQuantizedPerChannelFixture = ConvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T, int8_t>;
 
-const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
-{
-    ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
-});
-const auto QuantizedActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
-{
-    ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
-});
-
 TEST_SUITE(Quantized)
 
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto QuantizationData = make("QuantizationInfo",
 {
     QuantizationInfo(0.5f, 10),
     QuantizationInfo(0.3f, 3),
     QuantizationInfo(1.1f, 10),
 });
+
+/// @note: Every asymmetric quantized test has a version with or without activation because the quantization info given
+/// is ignored when there is no activation. Instead of using the same quantization information for all the tensors, the
+/// fixture generates separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged
+/// again, with the explicitly specified quantization info removed
+const auto NoActivation = make("ActivationInfo", ActivationLayerInfo());
+
+const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo());
+
+const auto QuantizedActivationFunctionsSmallDataset = make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
+});
+
 TEST_SUITE(QASYMM8)
 
 FIXTURE_DATA_TEST_CASE(RunSmallCases, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(SmallConvolutionLayerDatasetCases(),
-                                                               framework::dataset::make("ReshapeWeights", { true })),
-                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       QuantizationData),
-                               QuantizedActivationFunctionsSmallDataset))
+    combine(SmallConvolutionLayerDatasetCases(),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        IgnoredQuantizationInfo,
+        NoActivation))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallCasesWithActivation, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+    combine(SmallConvolutionLayerDatasetCases(),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        QuantizationData,
+        QuantizedActivationFunctionsSmallDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                               framework::dataset::make("ReshapeWeights", { true })),
-                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       QuantizationData),
-                               QuantizedActivationFunctionsSmallDataset))
+    combine(datasets::SmallConvolutionLayerDataset(),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        IgnoredQuantizationInfo,
+        NoActivation))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+    combine(datasets::SmallConvolutionLayerDataset(),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        QuantizationData,
+        QuantizedActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL,
+    combine(
+        make("Input", TensorShape(23U, 27U, 5U)),
+        make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+        make("Bias", TensorShape(2U)),
+        make("Output", TensorShape(11U, 25U, 2U)),
+        make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+        make("Dilation", Size2D(1, 1)),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        IgnoredQuantizationInfo,
+        NoActivation))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL,
+    combine(
+        make("Input", TensorShape(23U, 27U, 5U)),
+        make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+        make("Bias", TensorShape(2U)),
+        make("Output", TensorShape(11U, 25U, 2U)),
+        make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+        make("Dilation", Size2D(1, 1)),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        QuantizationData,
+        QuantizedActivationFunctionsSmallDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
-
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                               framework::dataset::make("ReshapeWeights", { true })),
-                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                       QuantizationData),
-                               QuantizedActivationFunctionsSmallDataset))
+    combine(datasets::SmallConvolutionLayerDataset(),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        IgnoredQuantizationInfo,
+        NoActivation))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
+    combine(datasets::SmallConvolutionLayerDataset(),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        QuantizationData,
+        QuantizedActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL,
+    combine(
+        make("Input", TensorShape(23U, 27U, 5U)),
+        make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+        make("Bias", TensorShape(2U)),
+        make("Output", TensorShape(11U, 25U, 2U)),
+        make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+        make("Dilation", Size2D(1, 1)),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        IgnoredQuantizationInfo,
+        NoActivation))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLGEMMConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL,
+    combine(
+        make("Input", TensorShape(23U, 27U, 5U)),
+        make("Weights", TensorShape(3U, 3U, 5U, 2U)),
+        make("Bias", TensorShape(2U)),
+        make("Output", TensorShape(11U, 25U, 2U)),
+        make("PadStrideInfo", PadStrideInfo(2, 1, 0, 0)),
+        make("Dilation", Size2D(1, 1)),
+        make("ReshapeWeights", { true }),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        QuantizationData,
+        QuantizedActivationFunctionsSmallDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE(QSYMM8_PER_CHANNEL)
+const auto QuantizedActivationFunctionsSmallPerChannelDataset = make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
+});
+
 
 FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLGEMMConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                       framework::dataset::make("ReshapeWeights", { true })),
-                                                               framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
-                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                       make("ReshapeWeights", { true })),
+                                                               make("DataType", { DataType::QASYMM8_SIGNED })),
+                                                       make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                                QuantizationData),
-                                       QuantizedActivationFunctionsSmallDataset),
-                               framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+                                       QuantizedActivationFunctionsSmallPerChannelDataset),
+                               make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -299,12 +443,12 @@ FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLGEMMConvolutionLayerQuantizedPerChannel
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                       framework::dataset::make("ReshapeWeights", { true })),
-                                                               framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                       make("ReshapeWeights", { true })),
+                                                               make("DataType", { DataType::QASYMM8 })),
+                                                       make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                                QuantizationData),
-                                       QuantizedActivationFunctionsSmallDataset),
-                               framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+                                       QuantizedActivationFunctionsSmallPerChannelDataset),
+                               make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -323,9 +467,7 @@ TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(),
-                                                                                                                   framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                   framework::dataset::make("DataType", DataType::F32)),
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                                                                                   make("ReshapeWeights", { true })), make("DataType", DataType::F32)), make("DataLayout", { DataLayout::NCHW })),
                                                                                                                    ActivationFunctionsSmallDataset))
 {
     // Validate output
@@ -334,9 +476,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<float>, fr
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(),
-                                                       framework::dataset::make("ReshapeWeights", { true })),
-                                               framework::dataset::make("DataType", DataType::F32)),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                       make("ReshapeWeights", { true })),
+                                               make("DataType", DataType::F32)),
+                                       make("DataLayout", { DataLayout::NCHW })),
                                ActivationFunctionsDataset))
 {
     // Validate output
@@ -347,9 +489,7 @@ TEST_SUITE_END() // FP32
 TEST_SUITE(FP16)
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallGroupedConvolutionLayerDataset(),
-                                                                                                                  framework::dataset::make("ReshapeWeights", { true })),
-                                                                                                                  framework::dataset::make("DataType", DataType::F16)),
-                                                                                                                  framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                                                                                  make("ReshapeWeights", { true })), make("DataType", DataType::F16)), make("DataLayout", { DataLayout::NCHW })),
                                                                                                                   ActivationFunctionsSmallDataset))
 {
     // Validate output
@@ -358,9 +498,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMGroupedConvolutionLayerFixture<half>, fra
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMGroupedConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(combine(datasets::LargeGroupedConvolutionLayerDataset(),
-                                                       framework::dataset::make("ReshapeWeights", { true })),
-                                               framework::dataset::make("DataType", DataType::F16)),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                       make("ReshapeWeights", { true })),
+                                               make("DataType", DataType::F16)),
+                                       make("DataLayout", { DataLayout::NCHW })),
                                ActivationFunctionsDataset))
 {
     // Validate output
diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp
index c284cdcee3..d1508fd902 100644
--- a/tests/validation/CL/DeconvolutionLayer.cpp
+++ b/tests/validation/CL/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,27 +53,29 @@ const auto data9x9_small_asymm = framework::dataset::make("InputShape", TensorSh
                                  *framework::dataset::make("PadLeft", 3)
                                  *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
 
-const auto data9x9_large_asymm = framework::dataset::make("InputShape", TensorShape{ 640U, 360U, 56U, 1U }) *framework::dataset::make("StrideX", 2) *framework::dataset::make("StrideY",
-                                 2)
-                                 *framework::dataset::make("PadLeft", 3)
-                                 *framework::dataset::make("PadRight", 4) *framework::dataset::make("PadTop", 3) *framework::dataset::make("PadBottom", 4) *framework::dataset::make("NumKernels", { 1 });
-
-const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3)
-                     * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 });
-
-const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 2)
+const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4, 2) * framework::dataset::make("StrideY", 2, 4) * framework::dataset::make("PadX", 1, 3)
                      * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
 
+const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2, 4) * framework::dataset::make("StrideY", 1, 4, 2) * framework::dataset::make("PadX", 1, 2)
+                     * framework::dataset::make("PadY", 1, 3) * framework::dataset::make("NumKernels", { 3 });
+
 const auto data3x3_asymm = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadLeft", 0, 1)
                            * framework::dataset::make("PadRight", 0, 1) * framework::dataset::make("PadTop", 0, 1) * framework::dataset::make("PadBottom", 0, 1) * framework::dataset::make("NumKernels", { 3 });
 
 const auto data3x3_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 2) * framework::dataset::make("StrideY", 1, 2) * framework::dataset::make("PadX", 0, 2)
                                * framework::dataset::make("PadY", 0, 2) * framework::dataset::make("NumKernels", { 3 });
 
+const auto data3x3_precommit_large_channels = datasets::SmallDeconvolutionShapesWithLargerChannels() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2)
+                                              * framework::dataset::make("PadX", 1)
+                                              * framework::dataset::make("PadY", 2) * framework::dataset::make("NumKernels", { 5 });
+
 const auto data2x2_precommit = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2) * framework::dataset::make("StrideY", 2) * framework::dataset::make("PadX", 1)
                                * framework::dataset::make("PadY", 1) * framework::dataset::make("NumKernels", { 3 });
 
-const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1)
+const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4, 2) * framework::dataset::make("StrideY", 2, 4) * framework::dataset::make("PadX", 0, 1)
+                     * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 });
+
+const auto data5x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 2, 4) * framework::dataset::make("StrideY", 1, 4, 2) * framework::dataset::make("PadX", 0, 1)
                      * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("NumKernels", { 3 });
 
 const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC });
@@ -87,42 +89,73 @@ TEST_SUITE(DeconvolutionLayer)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
-    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),   // Mismatching data type
-                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),   // Invalid weights shape
-                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),   // Non supported data type
-                                            TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),  // Invalid bias shape
-                                            TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink
-                                            TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
-                                          }),
+    framework::dataset::make("InputInfo",   { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),   // Mismatching data type
+                                              TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),   // Invalid weights shape
+                                              TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),   // Non supported data type
+                                              TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),   // Invalid bias shape
+                                              TensorInfo(TensorShape(13U, 11U, 4U, 3U), 1, DataType::F32), // Window shrink
+                                              TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC),   // Mismatching data type
+                                              TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC),   // Invalid weights shape
+                                              TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16, DataLayout::NHWC),   // Non supported data type
+                                              TensorInfo(TensorShape(2U, 13U, 27U), 1, DataType::F32, DataLayout::NHWC),   // Invalid bias shape
+                                              TensorInfo(TensorShape(4U, 11U, 13U, 3U), 1, DataType::F32, DataLayout::NHWC), // Window shrink
+                                              TensorInfo(TensorShape(2U, 16U, 32U), 1, DataType::F32, DataLayout::NHWC),
+                                            }),
     framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
-                                            TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
-                                            TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16),
+                                              TensorInfo(TensorShape(3U, 2U, 2U, 2U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(3U, 3U, 4U), 1, DataType::F32),
                                               TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
-                                          })),
-    framework::dataset::make("BiasInfo",  { TensorInfo(TensorShape(1U), 1, DataType::F16),
-                                            TensorInfo(TensorShape(1U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(1U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(25U, 11U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(1U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(4U), 1, DataType::F32),
-                                          })),
-    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
-                                            TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
-                                          })),
+                                              TensorInfo(TensorShape(2U, 3U, 3U, 2U), 1, DataType::F16, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(3U, 3U, 2U, 2U), 1, DataType::F16, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(2U, 2U, 3U, 2U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(4U, 3U, 3U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(2U, 2U, 2U, 4U), 1, DataType::F32, DataLayout::NHWC),
+                                            })),
+    framework::dataset::make("BiasInfo",    { TensorInfo(TensorShape(1U), 1, DataType::F16),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(25U, 11U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(4U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F16, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(25U, 11U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(1U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+                                            })),
+    framework::dataset::make("OutputInfo",  { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
+                                              TensorInfo(TensorShape(25U, 10U, 2U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(13U, 13U, 2U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(11U, 9U, 1U, 3U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
+                                              TensorInfo(TensorShape(2U, 11U, 25U), 1, DataType::F16, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(2U, 10U, 25U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(2U, 13U, 13U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(1U, 9U, 11U, 3U), 1, DataType::F32, DataLayout::NHWC),
+                                              TensorInfo(TensorShape(4U, 43U, 91U), 1, DataType::F32, DataLayout::NHWC),
+                                            })),
     framework::dataset::make("PadStrideInfo", { PadStrideInfo(1, 1, 0, 0),
                                                 PadStrideInfo(1, 1, 0, 0),
                                                 PadStrideInfo(1, 1, 0, 0),
                                                 PadStrideInfo(1, 1, 0, 0),
                                                 PadStrideInfo(1, 1, 1, 1),
                                                 PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 0, 0),
+                                                PadStrideInfo(1, 1, 1, 1),
+                                                PadStrideInfo(3, 3, 2, 2),
                                            })),
-    framework::dataset::make("Expected", { false, false, false, false, false, true })),
+    framework::dataset::make("Expected", { false, false, false, false, false, true,            // NCHW
+                                           false, false, false, false, false, true })),        // NHWC
     input_info, weights_info, bias_info, output_info, pad_info, expected)
 {
     bool is_valid = bool(CLDeconvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info));
@@ -149,6 +182,9 @@ using CLDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture<CLTensor,
 template <typename T>
 using CLDeconvolutionLayerAsymmFixture9x9 = DeconvolutionValidationAsymmFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 9, 9>;
 
+template <typename T>
+using CLDeconvolutionLayerFixture5x1 = DeconvolutionValidationFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 5, 1>;
+
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
@@ -171,6 +207,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3<float>, framewor
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
+
+FIXTURE_DATA_TEST_CASE(RunSmallWithLargeChannels, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data3x3_precommit_large_channels,
+                       framework::dataset::make("DataType",
+                                                DataType::F32)),
+                       data_layouts_dataset),
+                       framework::dataset::make("AddBias", { true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+
 FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3_asymm, framework::dataset::make("DataType",
                                                                                                                       DataType::F32)),
                                                                                                                       data_layouts_dataset),
@@ -180,8 +227,8 @@ FIXTURE_DATA_TEST_CASE(RunAsymm, CLDeconvolutionLayerAsymmFixture3x3<float>, fra
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)),
-                                                                                                                 data_layouts_dataset),
-                                                                                                                 add_bias_dataset))
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                 framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -202,7 +249,7 @@ TEST_SUITE_END() // W2x2
 TEST_SUITE(W1x1)
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)),
                                                                                                                     data_layouts_dataset),
-                                                                                                            add_bias_dataset))
+                                                                                                            framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -218,6 +265,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerAsymmFixture9x9<float>, fra
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 TEST_SUITE_END() // W9x9
+
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                    data_layouts_dataset),
+                                                                                                            framework::dataset::make("AddBias", { true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_fp32);
+}
+TEST_SUITE_END() // W5x1
+
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
@@ -225,7 +283,7 @@ TEST_SUITE(FP16)
 TEST_SUITE(W4x4)
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)),
                                                                                                                    data_layouts_dataset),
-                                                                                                           add_bias_dataset))
+                                                                                                           framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -242,8 +300,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerFixture3x3<half>, framework
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)),
-                                                                                                                        data_layouts_dataset),
-                                                                                                                add_bias_dataset))
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
@@ -264,13 +322,23 @@ TEST_SUITE_END() // W2x2
 TEST_SUITE(W1x1)
 FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)),
                                                                                                                    data_layouts_dataset),
-                                                                                                           add_bias_dataset))
+                                                                                                           framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 TEST_SUITE_END() // W1x1
 
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture5x1<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data5x1, framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                   data_layouts_dataset),
+                                                                                                           framework::dataset::make("AddBias", { true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 
@@ -286,6 +354,24 @@ using CLDeconvolutionLayerQuantizedFixture2x2 = DeconvolutionValidationQuantized
 template <typename T>
 using CLDeconvolutionLayerQuantizedFixture1x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 1, 1>;
 
+template <typename T>
+using CLDeconvolutionLayerQuantizedFixture5x1 = DeconvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, 5, 1>;
+
+template <typename T>
+using CLDeconvolutionLayerQuantizedPerChannelFixture4x4 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 4, 4>;
+
+template <typename T>
+using CLDeconvolutionLayerQuantizedPerChannelFixture3x3 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 3, 3>;
+
+template <typename T>
+using CLDeconvolutionLayerQuantizedPerChannelFixture2x2 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 2, 2>;
+
+template <typename T>
+using CLDeconvolutionLayerQuantizedPerChannelFixture1x1 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 1, 1>;
+
+template <typename T>
+using CLDeconvolutionLayerQuantizedPerChannelFixture5x1 = DeconvolutionValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDeconvolutionLayer, T, int8_t, 5, 1>;
+
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 
@@ -295,7 +381,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr
                                                                                                                        data_layouts_dataset),
                                                                                                                        framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
                                                                                                                        framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
-                                                                                                                       add_bias_dataset))
+                                                                                                                       framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -315,12 +401,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
-                       framework::dataset::make("DataType",
-                                                DataType::QASYMM8)),
-                       data_layouts_dataset),
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                        framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 128) })),
                        framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 128), QuantizationInfo(4.f / 255.f, 128) })),
-                       add_bias_dataset))
+                       framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -346,13 +431,26 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr
                                                                                                                        data_layouts_dataset),
                                                                                                                        framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
                                                                                                                        framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
-                                                                                                                       add_bias_dataset))
+                                                                                                                       framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
 }
 TEST_SUITE_END() // W1x1
 
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType",
+                                                                                                                       DataType::QASYMM8)),
+                                                                                                                       data_layouts_dataset),
+                                                                                                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
+                                                                                                                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
+                                                                                                                       framework::dataset::make("AddBias", { true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
 TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
@@ -366,7 +464,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<int8_t>, fra
                                                                                                                       data_layouts_dataset),
                                                                                                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
                                                                                                                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
-                                                                                                                      add_bias_dataset))
+                                                                                                                      framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -388,12 +486,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
-                       framework::dataset::make("DataType",
-                                                DataType::QASYMM8_SIGNED)),
-                       data_layouts_dataset),
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                        framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10), QuantizationInfo(2.f / 255.f, 127) })),
                        framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 64), QuantizationInfo(4.f / 255.f, -128) })),
-                       add_bias_dataset))
+                       framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
@@ -414,20 +511,192 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedFixture2x2<int8_t>
 TEST_SUITE_END() // W2x2
 
 TEST_SUITE(W1x1) // DirectDeconvolution and GEMMDeconvolution
-FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType",
-                                                                                                                      DataType::QASYMM8_SIGNED)),
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1,
+                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                                                                                                                       data_layouts_dataset),
                                                                                                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 0), QuantizationInfo(2.f / 255.f, 0) })),
                                                                                                                       framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0), QuantizationInfo(4.f / 255.f, 0) })),
-                                                                                                                      add_bias_dataset))
+                                                                                                                      framework::dataset::make("AddBias", { true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
 }
 TEST_SUITE_END() // W1x1
 
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data5x1, framework::dataset::make("DataType",
+                                                                                                                      DataType::QASYMM8_SIGNED)),
+                                                                                                                      data_layouts_dataset),
+                                                                                                                      framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(2.f / 255.f, 5) })),
+                                                                                                                      framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 5), QuantizationInfo(4.f / 255.f, 10) })),
+                                                                                                                      framework::dataset::make("AddBias", { true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
 TEST_SUITE_END() // QASYMM8_SIGNED
 
+const auto input_qinfo_dataset         = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) });
+const auto output_qinfo_dataset        = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 0) });
+const auto input_signed_qinfo_dataset  = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, -10) });
+const auto output_signed_qinfo_dataset = framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(3.f / 255.f, 10) });
+
+TEST_SUITE(QSYMM8_PER_CHANNEL)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data4x4,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       framework::dataset::make("AddBias", { true })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture4x4<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data4x4,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_signed_qinfo_dataset),
+                       output_signed_qinfo_dataset),
+                       framework::dataset::make("AddBias", { true })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data3x3,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       framework::dataset::make("AddBias", { true })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data3x3,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_signed_qinfo_dataset),
+                       output_signed_qinfo_dataset),
+                       framework::dataset::make("AddBias", { true })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallSignedPrecommit, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(data3x3_precommit,
+                                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                               data_layouts_dataset),
+                                                       input_signed_qinfo_dataset),
+                                               output_signed_qinfo_dataset),
+                                       add_bias_dataset),
+                               framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data3x3_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+
+TEST_SUITE(W2x2)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data2x2_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture2x2<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(data2x2_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_signed_qinfo_dataset),
+                       output_signed_qinfo_dataset),
+                       add_bias_dataset),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W2x2
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data1x1,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       framework::dataset::make("AddBias", { false })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data1x1,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_signed_qinfo_dataset),
+                       output_signed_qinfo_dataset),
+                       framework::dataset::make("AddBias", { true })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE(W5x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDeconvolutionLayerQuantizedPerChannelFixture5x1<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1,
+                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                       data_layouts_dataset),
+                       input_qinfo_dataset),
+                       output_qinfo_dataset),
+                       framework::dataset::make("AddBias", { true })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallSigned, CLDeconvolutionLayerQuantizedPerChannelFixture5x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(data5x1,
+                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                       data_layouts_dataset),
+                       input_signed_qinfo_dataset),
+                       output_signed_qinfo_dataset),
+                       framework::dataset::make("AddBias", { false })),
+                       framework::dataset::make("WeightsDataType", { DataType::QSYMM8_PER_CHANNEL })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+}
+TEST_SUITE_END() // W5x1
+
+TEST_SUITE_END() // QSYMM8_PER_CHANNEL
+
 TEST_SUITE_END() // Quantized
 
 TEST_SUITE_END() // DeconvolutionLayer
diff --git a/tests/validation/CL/DepthConvertLayer.cpp b/tests/validation/CL/DepthConvertLayer.cpp
index a823b278fc..490b38ccf6 100644
--- a/tests/validation/CL/DepthConvertLayer.cpp
+++ b/tests/validation/CL/DepthConvertLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,17 +44,16 @@ namespace validation
 namespace
 {
 /** Input data sets **/
-const auto DepthConvertLayerU8toU16Dataset        = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
-const auto DepthConvertLayerU8toS16Dataset        = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-const auto DepthConvertLayerU8toS32Dataset        = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertLayerU16toU8Dataset        = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertLayerU16toU32Dataset       = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
-const auto DepthConvertLayerS16toU8Dataset        = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto DepthConvertLayerS16toS32Dataset       = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto DepthConvertLayerF16toF32Dataset       = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
-const auto DepthConvertLayerF32toF16Dataset       = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
-const auto DepthConvertLayerShiftDatasetNightly   = framework::dataset::make("Shift", 0, 7);
-const auto DepthConvertLayerShiftDatasetPrecommit = framework::dataset::make("Shift", { 0, 3, 6 });
+const auto DepthConvertLayerU8toU16Dataset   = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
+const auto DepthConvertLayerU8toS16Dataset   = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
+const auto DepthConvertLayerU8toS32Dataset   = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerU16toU8Dataset   = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerU16toU32Dataset  = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
+const auto DepthConvertLayerS16toU8Dataset   = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto DepthConvertLayerS16toS32Dataset  = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerF16toF32Dataset  = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerF32toF16Dataset  = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0);
 } // namespace
 
 TEST_SUITE(CL)
@@ -63,7 +62,7 @@ TEST_SUITE(DepthConvertLayer)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid data type combination
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Support upcasting from QASYMM8 to S16
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Mismatching shapes
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Invalid shift
@@ -84,8 +83,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                    ConvertPolicy::WRAP,
                                                    ConvertPolicy::WRAP,
                                                      })),
-               framework::dataset::make("Shift",{ 1, 1, 8, 1, 1, 1, })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true})),
+               framework::dataset::make("Shift",{ 0, 0, 0, 1, 1, 0, })),
+               framework::dataset::make("Expected", { true, false, false, false, false, true})),
                input_info, output_info, policy, shift, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLDepthConvertLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy, shift)) == expected, framework::LogLevel::ERRORS);
@@ -111,7 +110,7 @@ using CLDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture<CLTen
 TEST_SUITE(U8_to_U16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toU16Dataset),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                      DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                      DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -119,7 +118,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU16Fixture<uint8_t>, frame
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toU16Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                    DepthConvertLayerShiftDatasetNightly))
+                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -129,7 +128,7 @@ TEST_SUITE_END() // U8_to_U16
 TEST_SUITE(U8_to_S16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS16Dataset),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                      DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                      DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -137,7 +136,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS16Fixture<uint8_t>, frame
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS16Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS16Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                    DepthConvertLayerShiftDatasetNightly))
+                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -146,7 +145,7 @@ TEST_SUITE_END() // U8_to_S16
 TEST_SUITE(U8_to_S32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toS32Dataset),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                      DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                      DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -154,7 +153,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<uint8_t>, frame
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toS32Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                    DepthConvertLayerShiftDatasetNightly))
+                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -164,14 +163,14 @@ TEST_SUITE_END() // U8_to_S32
 TEST_SUITE(U16_to_U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU8Dataset),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                      DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                      DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU8Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                    DepthConvertLayerShiftDatasetNightly))
+                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -181,14 +180,14 @@ TEST_SUITE_END() // U16_to_U8
 TEST_SUITE(U16_to_U32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU16toU32Dataset),
                                                                                                                        framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                       DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                       DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU32Fixture<uint16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU16toU32Dataset),
                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                     DepthConvertLayerShiftDatasetNightly))
+                                                                                                                     DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -198,14 +197,14 @@ TEST_SUITE_END() // U16_to_U32
 TEST_SUITE(S16_to_U8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toU8Dataset),
                                                                                                                      framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                     DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                     DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToU8Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toU8Dataset),
                                                                                                                    framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                   DepthConvertLayerShiftDatasetNightly))
+                                                                                                                   DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -215,14 +214,14 @@ TEST_SUITE_END() // S16_to_U8
 TEST_SUITE(S16_to_S32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS16toS32Dataset),
                                                                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                      DepthConvertLayerShiftDatasetPrecommit))
+                                                                                                                      DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthConvertLayerToS32Fixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS16toS32Dataset),
                                                                                                                     framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
-                                                                                                                    DepthConvertLayerShiftDatasetNightly))
+                                                                                                                    DepthConvertLayerZeroShiftDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
index 351819ae55..d4dbcec9d9 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,6 +41,9 @@ namespace test
 {
 namespace validation
 {
+
+using framework::dataset::make;
+
 namespace
 {
 RelativeTolerance<half_float::half>  tolerance_f16(half_float::half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
@@ -48,16 +51,47 @@ constexpr RelativeTolerance<float>   tolerance_f32(0.01f);                  /**<
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(0);                  /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */
 constexpr float                      tolerance_num = 0.05f;                 /**< Tolerance number */
 
-const auto depth_multipliers       = framework::dataset::make("DepthMultiplier", { 1, 2, 5 });
-const auto large_depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 5, 8 });
+const auto depth_multipliers       = make("DepthMultiplier", { 1, 4 });
+const auto large_depth_multipliers = make("DepthMultiplier", { 2, 5, 8 });
 
-//Activation Functions
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+// Activation Functions
+const auto NoActivation = make("ActivationInfo", ActivationLayerInfo());
+
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
 {
     ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 2.f, 0.f)
+});
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
+});
+
+const auto ActivationFunctionsQuantizedSmallDataset = make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 2.f, 0.f)
+});
+
+const auto ActivationFunctionsQuantizedDataset = make("ActivationInfo",
+{
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.f)
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.3f, -1.5f),
 });
+
+const auto IgnoredQuantizationInfo = make("IgnoredQuantizationInfo", QuantizationInfo());
+
 } // namespace
 
 TEST_SUITE(CL)
@@ -65,85 +99,85 @@ TEST_SUITE(DepthwiseConvolutionLayer)
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
-                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data type input/weights
-                                                        TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32),    // Mismatching input feature maps
-                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching depth multiplier
-                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases size
-                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases dimensions
-                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid output size
-                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // patch size bigger than input width
-                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // dilation < 1
-                                                        TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
-                                                      }),
-                framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
-                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
-                                                          TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
-                                                        })),
-                framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(4U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(24U), 1, DataType::S32),
-                                                       })),
-                framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
-                                                         TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
-                                                       })),
-                framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 1, 0),
-                                                      })),
-                framework::dataset::make("DepthMultiplier", { 1,
-                                                              1,
-                                                              3,
-                                                              1,
-                                                              1,
-                                                              1,
-                                                              2,
-                                                              2,
-                                                              2,
-                                                              3,
-                                                             })),
-                framework::dataset::make("Dilation", { Size2D(1U, 1U),
-                                                              Size2D(1U, 1U),
-                                                              Size2D(1U, 1U),
-                                                              Size2D(1U, 1U),
-                                                              Size2D(1U, 1U),
-                                                              Size2D(1U, 1U),
-                                                              Size2D(20U, 1U),
-                                                              Size2D(0U, 1U),
-                                                              Size2D(1U, 1U),
-                                                              Size2D(1U, 1U),
-                                                             })),
-                framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, true, true })),
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+                make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching data type input/weights
+                                TensorInfo(TensorShape(27U, 13U, 3U), 1, DataType::F32),    // Mismatching input feature maps
+                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Mismatching depth multiplier
+                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases size
+                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid biases dimensions
+                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),    // Invalid output size
+                                TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // patch size bigger than input width
+                                TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),    // dilation < 1
+                                TensorInfo(TensorShape(27U, 13U, 8U), 1, DataType::F32),
+                                TensorInfo(TensorShape(32U, 13U, 8U), 1, DataType::QASYMM8),
+                                }),
+                make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F16),
+                                TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(3U, 3U, 24U), 1, DataType::QASYMM8),
+                        }),
+                make("BiasesInfo", { TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(4U), 1, DataType::F32),
+                                TensorInfo(TensorShape(2U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(24U), 1, DataType::S32),
+                        }),
+                make("OutputInfo", { TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(25U, 11U, 16U), 1, DataType::F32),
+                                TensorInfo(TensorShape(32U, 11U, 24U), 1, DataType::QASYMM8),
+                        }),
+                make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 0, 0),
+                                PadStrideInfo(1, 1, 1, 0),
+                                }),
+                make("DepthMultiplier", { 1,
+                                        1,
+                                        3,
+                                        1,
+                                        1,
+                                        1,
+                                        2,
+                                        2,
+                                        2,
+                                        3,
+                                }),
+                make("Dilation", { Size2D(1U, 1U),
+                                Size2D(1U, 1U),
+                                Size2D(1U, 1U),
+                                Size2D(1U, 1U),
+                                Size2D(1U, 1U),
+                                Size2D(1U, 1U),
+                                Size2D(20U, 1U),
+                                Size2D(0U, 1U),
+                                Size2D(1U, 1U),
+                                Size2D(1U, 1U),
+                                }),
+                make("Expected", { false, false, false, false, false, false, false, false, true, true })),
                 input_info, weights_info, biases_info, output_info, conv_info, depth_multiplier, dilation, expected)
 {
     bool is_valid = bool(CLDepthwiseConvolutionLayer::validate(&input_info.clone()->set_is_resizable(true), &weights_info.clone()->set_is_resizable(true), &biases_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), conv_info, depth_multiplier,ActivationLayerInfo(), dilation));
@@ -154,47 +188,34 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
 
 template <typename T>
 using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
+template <typename T>
+using CLDepthwiseConvolutionLayerMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, true>;
+template <typename T>
+using CLDepthwiseConvolutionLayerInPlaceFixture = DepthwiseConvolutionLayerValidationFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, false, true>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 TEST_SUITE(W3x3)
 TEST_SUITE(NCHW)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F16)),
-                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
-                               ActivationFunctionsDataset))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                                                                                    large_depth_multipliers),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NCHW)),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
+    combine(
+        framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                        datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+        depth_multipliers,
+        make("DataType", DataType::F16),
+        make("DataLayout", DataLayout::NCHW),
+        ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                                                                        depth_multipliers),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::F16)),
-                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                                                                                                ActivationFunctionsDataset))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                                                                    large_depth_multipliers),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+        depth_multipliers,
+        make("DataType", DataType::F16),
+        make("DataLayout", { DataLayout::NCHW }),
+        ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
@@ -202,41 +223,43 @@ TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // NCHW
 
 TEST_SUITE(NHWC)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F16)),
-                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL,
+    combine(
+        datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+        depth_multipliers,
+        make("DataType", DataType::F16),
+        make("DataLayout", DataLayout::NHWC),
+        ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                                                                                    large_depth_multipliers),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+    combine(
+        datasets::LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset(),
+        large_depth_multipliers,
+        make("DataType", DataType::F16),
+        make("DataLayout", DataLayout::NHWC),
+        make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                                                                        depth_multipliers),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::F16)),
-                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                   ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                                                                    large_depth_multipliers),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset(),
+                                                           large_depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F16)),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
@@ -245,133 +268,153 @@ TEST_SUITE_END() // NHWC
 TEST_SUITE_END() // W3x3
 
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                                                                                        depth_multipliers),
-                                                                                                                        framework::dataset::make("DataType",
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                   ActivationFunctionsSmallDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDatasetFp16Subset(),
+                                                                                                                        large_depth_multipliers),
+                                                                                                                        make("DataType",
                                                                                                                                 DataType::F16)),
-                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                ActivationFunctionsDataset))
+                                                                                                                        make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                        make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
-                                                                                                                    large_depth_multipliers),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                    ActivationFunctionsDataset))
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+    combine(
+        make("In", TensorShape(33U, 27U, 11U, 3U)),
+        make("Weights", Size2D(3U, 4U)),
+        make("Info", PadStrideInfo(1, 2, 0, 1)),
+        make("Dilation", Size2D(2U, 2U)),
+        make("DepthMultiplier", { 2 }),
+        make("DataType", DataType::F16),
+        make("DataLayout", { DataLayout::NHWC }),
+        ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
-                                                                                                                        depth_multipliers),
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::F16)),
-                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                    depth_multipliers),
+                                                                                                                    make("DataType",
+                                                                                                                            DataType::F16)),
+                                                                                                                    make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                   ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
-                                                                                                                    large_depth_multipliers),
-                                                                                                                    framework::dataset::make("DataType",
-                                                                                                                            DataType::F16)),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset(),
+                                                           large_depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F16)),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
+
+TEST_SUITE(InPlace)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<half>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(),
+                                                           make("DepthMultiplier", { 1 })),
+                                                   make("DataType",
+                                                                            DataType::F16)),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
+{
+    validate(CLAccessor(_src), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END() // InPlace
 TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 TEST_SUITE(W3x3)
 TEST_SUITE(NCHW)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                                  datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
-                               ActivationFunctionsDataset))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                                                                                     large_depth_multipliers),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NCHW)),
-                                                                                                                     ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                      datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()),
+                                                           depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", DataLayout::NCHW)),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
-                               ActivationFunctionsDataset))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                       large_depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", DataLayout::NCHW)),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", DataLayout::NCHW)),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // NCHW
+
 TEST_SUITE(NHWC)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                           depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", DataLayout::NHWC)),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                                                                                     large_depth_multipliers),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                                                                                                                     ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                           make("DepthMultiplier", { 2 })),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", DataLayout::NHWC)),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                           large_depth_multipliers),
+                           make("DataType",
+                                                    DataType::F32)),
+                           make("DataLayout", DataLayout::NHWC)),
+                           make("ActivationInfo", ActivationLayerInfo())))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
 TEST_SUITE(Dilation)
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                       depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                           depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", DataLayout::NHWC)),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                       large_depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", DataLayout::NHWC)),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                           large_depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", DataLayout::NHWC)),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
@@ -380,148 +423,259 @@ TEST_SUITE_END() // NHWC
 TEST_SUITE_END() // W3x3
 
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                                                                                 depth_multipliers),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::F32)),
-                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                 ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                                                     depth_multipliers),
+                                                                                                                     make("DataType",
+                                                                                                                             DataType::F32)),
+                                                                                                                     make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
-                                                                                                                     large_depth_multipliers),
-                                                                                                                     framework::dataset::make("DataType",
-                                                                                                                             DataType::F32)),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                     ActivationFunctionsDataset))
+
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                           large_depth_multipliers),
+                           make("DataType",
+                                                    DataType::F32)),
+                           make("DataLayout", { DataLayout::NHWC })),
+                           make("ActivationInfo", ActivationLayerInfo())))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunLargeKernelSize, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset(),
+                                                           make("DepthMultiplier", { 1 })),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+    combine(
+        make("In", TensorShape(33U, 27U, 11U, 3U)),
+        make("Weights", Size2D(3U, 4U)),
+        make("Info", PadStrideInfo(1, 2, 0, 1)),
+        make("Dilation", Size2D(2U, 2U)),
+        make("DepthMultiplier", { 2 }),
+        make("DataType", DataType::F32),
+        make("DataLayout", { DataLayout::NHWC }),
+        ActivationFunctionsDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
-                                                                                                                 depth_multipliers),
-                                                                                                                 framework::dataset::make("DataType",
-                                                                                                                         DataType::F32)),
-                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                 ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                                                     depth_multipliers),
+                                                                                                                     make("DataType",
+                                                                                                                             DataType::F32)),
+                                                                                                                     make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                       large_depth_multipliers),
-                                               framework::dataset::make("DataType",
-                                                                        DataType::F32)),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                           large_depth_multipliers),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
+
+TEST_SUITE(InPlace)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerInPlaceFixture<float>, framework::DatasetMode::ALL,
+                           combine(combine(combine(combine(datasets::SmallInPlaceDepthwiseConvolutionLayerDataset(),
+                                                           make("DepthMultiplier", { 1 })),
+                                                   make("DataType",
+                                                                            DataType::F32)),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
+{
+    validate(CLAccessor(_src), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // InPlace
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
 template <typename T>
 using CLDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T>;
 template <typename T>
+using CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture = DepthwiseConvolutionLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, true>;
+template <typename T>
 using CLDepthwiseConvolutionLayerQuantizedPerChannelFixture = DepthwiseConvolutionLayerValidationQuantizedPerChannelFixture<CLTensor, CLAccessor, CLDepthwiseConvolutionLayer, T, int8_t>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                                       depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 128), QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(1.f, 128) }),
+        make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+        ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+        large_depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }),
+        NoActivation))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
-                                                                       large_depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+    combine(
+        make("In", TensorShape(33U, 27U, 11U, 3U)),
+        make("Weights", Size2D(3U, 4U)),
+        make("Info", PadStrideInfo(1, 2, 0, 1)),
+        make("Dilation", Size2D(2U, 2U)),
+        make("DepthMultiplier", { 2U }),
+        make("DataType", DataType::QASYMM8),
+        make("SrcQuantizationInfo", { QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.1f, 128) }),
+        make("DataLayout", { DataLayout::NHWC }),
+        ActivationFunctionsQuantizedDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
-                                                                       depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })), // NCHW is tested with int8
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) }),
+        make("DataLayout", { DataLayout::NHWC }), // NCHW is tested with int8
+        ActivationFunctionsQuantizedSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
-                                                                       large_depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(1.3f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+        large_depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }),
+        NoActivation))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                       depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }),
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+        make("DataLayout", { DataLayout::NHWC }),
+        ActivationFunctionsQuantizedSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                                       large_depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+        large_depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }),
+        NoActivation))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                       depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) }),
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        ActivationFunctionsQuantizedSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                       large_depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+        make("DepthMultiplier", { 2 }),
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }),
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+        large_depth_multipliers,
+        make("DataType", DataType::QASYMM8),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NHWC }),
+        NoActivation))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
@@ -531,26 +685,74 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                                       depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8_SIGNED),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NCHW }),
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10), QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+        make("DataLayout", { DataLayout::NCHW }),
+        ActivationFunctionsQuantizedSmallDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunMixedDataLayout, CLDepthwiseConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+        make("DepthMultiplier", { 2 }),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NCHW }),
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+    combine(
+        make("In", TensorShape(33U, 27U, 11U, 3U)),
+        make("Weights", Size2D(3U, 4U)),
+        make("Info", PadStrideInfo(1, 2, 0, 1)),
+        make("Dilation", Size2D(2U, 2U)),
+        make("DepthMultiplier", { 2U }),
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+        make("DataLayout", { DataLayout::NHWC }),
+        ActivationFunctionsQuantizedDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
-                                                                       depth_multipliers),
-                                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8_SIGNED),
+        IgnoredQuantizationInfo,
+        IgnoredQuantizationInfo,
+        make("DataLayout", { DataLayout::NCHW }),
+        NoActivation))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunSmallWithActivation, CLDepthwiseConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+        depth_multipliers,
+        make("DataType", DataType::QASYMM8_SIGNED),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10), QuantizationInfo(2.2f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) }),
+        make("DataLayout", { DataLayout::NCHW }),
+        ActivationFunctionsQuantizedSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
@@ -560,104 +762,120 @@ TEST_SUITE_END() // QASYMM8_SIGNED
 
 TEST_SUITE(QSYMM8_PER_CHANNEL)
 TEST_SUITE(Generic)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
-                                                                               depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(),
+                                                                                   depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+                                           make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
+                                                                                   large_depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(),
-                                                                               large_depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.7f, 2) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+    combine(
+        make("In", TensorShape(33U, 27U, 11U, 3U)),
+        make("Weights", Size2D(3U, 4U)),
+        make("Info", PadStrideInfo(1, 2, 0, 1)),
+        make("Dilation", Size2D(2U, 2U)),
+        make("DepthMultiplier", { 2U }),
+        make("SrcDataType", DataType::QASYMM8_SIGNED),
+        make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL),
+        make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) }),
+        make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) }),
+        make("DataLayout", { DataLayout::NHWC }),
+        ActivationFunctionsQuantizedDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
-                                                                               depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                   depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.8, 1) })),
+                                           make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
-                                                                               large_depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(),
+                                                                                   large_depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.9f, 11) })),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE_END() // Dilation
 TEST_SUITE_END() // Generic
 TEST_SUITE(W3x3)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
-                                                                               depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(),
+                                                                                   depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.3f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.5f, 4) })),
+                                           make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
-                                                                               large_depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(),
+                                                                                   large_depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 TEST_SUITE(Dilation)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                               depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                   depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                           make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                  ActivationFunctionsSmallDataset))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
-                                                                               large_depth_multipliers),
-                                                                       framework::dataset::make("SrcDataType", DataType::QASYMM8)),
-                                                               framework::dataset::make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
-                                                       framework::dataset::make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                               framework::dataset::make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                               ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerQuantizedPerChannelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                           combine(combine(combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(),
+                                                                                   large_depth_multipliers),
+                                                                           make("SrcDataType", DataType::QASYMM8)),
+                                                                   make("WeightsDataType", DataType::QSYMM8_PER_CHANNEL)),
+                                                           make("SrcQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                                   make("DstQuantizationInfo", { QuantizationInfo(0.5f, 10) })),
+                                           make("DataLayout", { DataLayout::NHWC })),
+                                   make("ActivationInfo", ActivationLayerInfo())))
 {
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
diff --git a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
index b1cd379574..012018c0fc 100644
--- a/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
+++ b/tests/validation/CL/DepthwiseConvolutionLayerNative.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,7 +30,6 @@
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -43,6 +42,7 @@ namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
 using namespace arm_compute::misc::shape_calculator;
 
 // Create function for CLDepthwiseConvolutionLayerNativeKernel
@@ -63,64 +63,89 @@ RelativeTolerance<half_float::half>  rel_tolerance_f16(half_float::half(0.01f));
 constexpr float                      abs_tolerance_f16(0.03f);
 
 /** Width values to test - Precommit */
-const auto width_values_precommit = framework::dataset::make("width", { 37U } );
+const auto width_values_precommit = make("width", { 1U, 33U } );
 
 /** Width values to test - Nightly */
-const auto width_values_nightly = framework::dataset::make("width", { 53U, 47U } );
+const auto width_values_nightly = make("width", { 53U, 47U } );
 
 /** Height values to test - Precommit */
-const auto height_values_precommit = framework::dataset::make("height", { 19U } );
+const auto height_values_precommit = make("height", { 19U } );
 
 /** Height values to test - Nightly */
-const auto height_values_nightly = framework::dataset::make("height", { 39U, 43U } );
+const auto height_values_nightly = make("height", { 39U, 43U } );
 
 /** Channel values to test - Precommit */
-const auto channel_values_precommit = framework::dataset::make("channels", { 15U });
+const auto channel_values_precommit = make("channels", { 15U });
 
 /** Channel values to test - Nightly */
-const auto channel_values_nightly = framework::dataset::make("channels", { 33U, 19U });
+const auto channel_values_nightly = make("channels", { 33U, 19U });
+
+/** Channel values to test with cl_image support - Precommit */
+const auto channel_values_export_to_cl_image_precommit = make("channels", { 16U });
+
+/** Channel values to test with cl_image support - Nightly */
+const auto channel_values_export_to_cl_image_nightly = make("channels", { 32U });
 
 /** Batch values to test - Precommit */
-const auto batch_values_precommit = framework::dataset::make("batch", { 1U, 2U });
+const auto batch_values_precommit = make("batch", { 1U, 2U });
 
 /** Batch values to test - Nightly */
-const auto batch_values_nightly = framework::dataset::make("batch", { 1U, 3U });
+const auto batch_values_nightly = make("batch", { 3U });
 
 /** Kernel size values to test - Precommit */
-const auto kernel_sz_values_precommit = framework::dataset::make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U), Size2D(5U, 5U) });
+const auto kernel_sz_values_precommit = make("kernel_size", { Size2D(1U, 1U), Size2D(1U, 3U), Size2D(5U, 5U) });
 
 /** Kernel size values to test - Nightly */
-const auto kernel_sz_values_nightly = framework::dataset::make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });
+const auto kernel_sz_values_nightly = make("kernel_size", { Size2D(3U, 5U), Size2D(5U, 1U), Size2D(1U, 7U), Size2D(9U, 7U) });
 
 /** Depth multiplier values to test - All */
-const auto depth_multiplier_values = framework::dataset::make("depth_multiplier", {3U});
+const auto depth_multiplier_values = make("depth_multiplier", {3U});
 
 /** Dilation values to test - All */
-const auto dilation_values = framework::dataset::make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) });
+const auto dilation_values = make("dilation", { Size2D(1U, 1U), Size2D(3U, 3U) });
 
 /** Stride values to test - All */
-const auto stride_values = framework::dataset::make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) });
+const auto stride_values = make("stride", { Size2D(1U, 1U), Size2D(3U, 2U) });
 
-/** Padding values to test - All */
-const auto padding_valid_values = framework::dataset::make("padding_valid", { true, false });
+/** Padding values to test - Precommit */
+const auto padding_valid_values = make("padding_valid", { true, false });
 
-/** Data type values to test - All */
-const auto data_type_values = framework::dataset::make("data_type", { DataType::F32, DataType::F16 });
+/** Padding values to test - Nightly */
+const auto padding_valid_values_nightly = make("padding_valid", { false });
 
 /** Data layout values to test - All */
-const auto data_layout_values = framework::dataset::make("data_layout", { DataLayout::NHWC });
+const auto data_layout_values = make("data_layout", { DataLayout::NHWC });
 
 /** N0 values to test - Precommit */
-const auto n0_values_precommit = framework::dataset::make("N0", {2, 4});
+const auto n0_values_precommit = make("N0", {2, 4});
 
 /** N0 values to test - Nightly */
-const auto n0_values_nightly = framework::dataset::make("N0", {3, 8});
+const auto n0_values_nightly = make("N0", {3, 8});
+
+/** N0 values to test with cl_image support - Precommit */
+const auto n0_values_export_to_cl_image_precommit = make("N0", {4});
+
+/** N0 values to test with cl_image support - Nightly */
+const auto n0_values_export_to_cl_image_nightly = make("N0", {8});
 
-/** Activation values to test */
-const auto act_values = framework::dataset::make("Activation",
+/** Activation values to test in precommit */
+const auto act_values = make("Activation", { ActivationLayerInfo() });
+
+const auto activations_rest = make("Activation",
 {
-    ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f, 0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
 });
 
 } // namespace
@@ -129,95 +154,265 @@ TEST_SUITE(CL)
 TEST_SUITE(DepthwiseConvolutionLayerNative)
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_precommit,
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
                                                                                                 batch_values_precommit),
                                                                                                 kernel_sz_values_precommit),
-                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                make("depth_multiplier", 1)),
                                                                                                 dilation_values),
                                                                                                 stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_precommit))
+                                                                                                n0_values_precommit),
+                                                                                                make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_nightly,
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
                                                                                                 batch_values_nightly),
                                                                                                 kernel_sz_values_nightly),
-                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                make("depth_multiplier", 1)),
                                                                                                 dilation_values),
                                                                                                 stride_values),
-                                                                                                padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                padding_valid_values_nightly),
+                                                                                                make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
-                                                                                                act_values),
-                                                                                                n0_values_nightly))
+                                                                                                make("Activation", { ActivationLayerInfo() })),
+                                                                                                n0_values_nightly),
+                                                                                                make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
-TEST_SUITE_END() // FP32
 
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+    combine(
+        make("width", { 33U } ),
+        height_values_precommit,
+        channel_values_precommit,
+        make("batch", { 2U } ),
+        make("kernel_size", { Size2D(5U, 5U) }),
+        make("depth_multiplier", 1),
+        make("dilation", Size2D(3U, 3U)),
+        make("stride", Size2D(3U, 2U)),
+        padding_valid_values_nightly,
+        make("DataType", DataType::F32),
+        data_layout_values,
+        activations_rest,
+        n0_values_precommit,
+        make("ExportToCLImage", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_precommit,
                                                                                                 height_values_precommit),
-                                                                                                channel_values_precommit),
+                                                                                                channel_values_export_to_cl_image_precommit),
                                                                                                 batch_values_precommit),
                                                                                                 kernel_sz_values_precommit),
-                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                make("depth_multiplier", 1)),
                                                                                                 dilation_values),
                                                                                                 stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F16)),
+                                                                                                make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_precommit))
+                                                                                                n0_values_export_to_cl_image_precommit),
+                                                                                                make("ExportToCLImage", true)))
 {
-    // Validate output
-        validate(CLAccessor(_target), _reference, rel_tolerance_f16);
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
                                                                                                 width_values_nightly,
                                                                                                 height_values_nightly),
-                                                                                                channel_values_nightly),
+                                                                                                channel_values_export_to_cl_image_nightly),
                                                                                                 batch_values_nightly),
                                                                                                 kernel_sz_values_nightly),
-                                                                                                framework::dataset::make("depth_multiplier", 1)),
+                                                                                                make("depth_multiplier", 1)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values_nightly),
+                                                                                                make("DataType", DataType::F32)),
+                                                                                                data_layout_values),
+                                                                                                make("Activation", { ActivationLayerInfo() })),
+                                                                                                n0_values_export_to_cl_image_nightly),
+                                                                                                make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_SUITE_END() // ExportWeightsToCLImage
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                width_values_precommit,
+                                                                                                height_values_precommit),
+                                                                                                channel_values_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                make("depth_multiplier", 1)),
                                                                                                 dilation_values),
                                                                                                 stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F16)),
+                                                                                                make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                n0_values_nightly))
+                                                                                                n0_values_precommit),
+                                                                                                make("ExportToCLImage", false)))
+{
+    // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 47U } ),
+                                                                                                make("height", { 39U } )),
+                                                                                                make("channels", { 19U } )),
+                                                                                                batch_values_nightly),
+                                                                                                make("kernel_size", { Size2D(5U, 5U) })),
+                                                                                                make("depth_multiplier", 1)),
+                                                                                                make("dilation", { Size2D(3U, 3U) })),
+                                                                                                make("stride", { Size2D(3U, 2U) })),
+                                                                                                padding_valid_values_nightly),
+                                                                                                make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                make("Activation", { ActivationLayerInfo() })),
+                                                                                                n0_values_nightly),
+                                                                                                make("ExportToCLImage", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE_NEW(RunActivations, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+    combine(
+        make("width", { 33U } ),
+        height_values_precommit,
+        channel_values_precommit,
+        make("batch", { 2U } ),
+        make("kernel_size", { Size2D(5U, 5U) }),
+        make("depth_multiplier", 4),
+        make("dilation", Size2D(3U, 3U)),
+        make("stride", Size2D(3U, 2U)),
+        padding_valid_values_nightly,
+        make("DataType", DataType::F16),
+        data_layout_values,
+        activations_rest,
+        n0_values_precommit,
+        make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
 }
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                width_values_precommit,
+                                                                                                height_values_precommit),
+                                                                                                channel_values_export_to_cl_image_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                make("depth_multiplier", 1)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                n0_values_export_to_cl_image_precommit),
+                                                                                                make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 47U } ),
+                                                                                                make("height", { 39U } )),
+                                                                                                channel_values_export_to_cl_image_nightly),
+                                                                                                batch_values_nightly),
+                                                                                                make("kernel_size", { Size2D(5U, 5U) })),
+                                                                                                make("depth_multiplier", 1)),
+                                                                                                make("dilation", { Size2D(3U, 3U) })),
+                                                                                                make("stride", { Size2D(3U, 2U) })),
+                                                                                                padding_valid_values_nightly),
+                                                                                                make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                make("Activation", { ActivationLayerInfo() })),
+                                                                                                n0_values_export_to_cl_image_nightly),
+                                                                                                make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
+
 TEST_SUITE(DepthMultiplier)
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                                                width_values_precommit,
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 33U } ),
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
                                                                                                 batch_values_precommit),
@@ -226,18 +421,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>
                                                                                                 dilation_values),
                                                                                                 stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                make("N0", 1)),
+                                                                                                make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                                                width_values_nightly,
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 53U } ),
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
                                                                                                 batch_values_nightly),
@@ -245,21 +441,77 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<float>
                                                                                                 depth_multiplier_values),
                                                                                                 dilation_values),
                                                                                                 stride_values),
+                                                                                                padding_valid_values_nightly),
+                                                                                                make("DataType", DataType::F32)),
+                                                                                                data_layout_values),
+                                                                                                make("Activation", { ActivationLayerInfo() })),
+                                                                                                make("N0", 1)),
+                                                                                                make("ExportToCLImage", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+TEST_SUITE(DepthMultiplierMultipleOfOutputChannels)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 33U } ),
+                                                                                                height_values_precommit),
+                                                                                                channel_values_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                make("depth_multiplier", 2)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                make("DataType", DataType::F32)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                make("N0", {2})),
+                                                                                                make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 33U } ),
+                                                                                                height_values_precommit),
+                                                                                                channel_values_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                make("depth_multiplier", 4)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                make("DataType", DataType::F32)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                make("N0", {4})),
+                                                                                                make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
+TEST_SUITE_END() // DepthMultiplierMultipleOfOutputChannels
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                                                width_values_precommit,
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 33U } ),
                                                                                                 height_values_precommit),
                                                                                                 channel_values_precommit),
                                                                                                 batch_values_precommit),
@@ -268,18 +520,19 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>,
                                                                                                 dilation_values),
                                                                                                 stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F16)),
+                                                                                                make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                make("N0", 1)),
+                                                                                                make("ExportToCLImage", false)))
 {
     // Validate output
         validate(CLAccessor(_target), _reference, rel_tolerance_f16);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                                                width_values_nightly,
+FIXTURE_DATA_TEST_CASE_NEW(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::NIGHTLY,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 53U } ),
                                                                                                 height_values_nightly),
                                                                                                 channel_values_nightly),
                                                                                                 batch_values_nightly),
@@ -287,15 +540,71 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerNativeFixture<half>,
                                                                                                 depth_multiplier_values),
                                                                                                 dilation_values),
                                                                                                 stride_values),
+                                                                                                padding_valid_values_nightly),
+                                                                                                make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                make("Activation", { ActivationLayerInfo() })),
+                                                                                                make("N0", 1)),
+                                                                                                make("ExportToCLImage", false)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+TEST_SUITE(DepthMultiplierMultipleOfOutputChannels)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 33U } ),
+                                                                                                height_values_precommit),
+                                                                                                channel_values_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                make("depth_multiplier", 2)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
                                                                                                 padding_valid_values),
-                                                                                                framework::dataset::make("DataType", DataType::F16)),
+                                                                                                make("DataType", DataType::F16)),
                                                                                                 data_layout_values),
                                                                                                 act_values),
-                                                                                                framework::dataset::make("N0", 1)))
+                                                                                                make("N0", {2})),
+                                                                                                make("ExportToCLImage", false)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
 }
+
+TEST_SUITE(ExportWeightsToCLImage)
+FIXTURE_DATA_TEST_CASE_NEW(RunSmall, CLDepthwiseConvolutionLayerNativeFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                make("width", { 33U } ),
+                                                                                                height_values_precommit),
+                                                                                                channel_values_precommit),
+                                                                                                batch_values_precommit),
+                                                                                                kernel_sz_values_precommit),
+                                                                                                make("depth_multiplier", 4)),
+                                                                                                dilation_values),
+                                                                                                stride_values),
+                                                                                                padding_valid_values),
+                                                                                                make("DataType", DataType::F16)),
+                                                                                                data_layout_values),
+                                                                                                act_values),
+                                                                                                make("N0", {4})),
+                                                                                                make("ExportToCLImage", true)))
+{
+   // Validate output
+    if(_validate_output)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // ExportWeightsToCLImage
+TEST_SUITE_END() // DepthMultiplierMultipleOfOutputChannels
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // DepthMultiplier
diff --git a/tests/validation/CL/Derivative.cpp b/tests/validation/CL/Derivative.cpp
deleted file mode 100644
index b8f6856a47..0000000000
--- a/tests/validation/CL/Derivative.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLDerivative.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/GradientDimensionDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DerivativeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Derivative)
-
-using CLDerivativeFixture = DerivativeValidationFixture<CLTensor, CLAccessor, CLDerivative, uint8_t, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                       Format::U8)),
-                                                                                               datasets::GradientDimensions()))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDerivativeFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                       Format::U8)),
-                                                                                               datasets::GradientDimensions()))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Dilate.cpp b/tests/validation/CL/Dilate.cpp
deleted file mode 100644
index c5fdb3faa7..0000000000
--- a/tests/validation/CL/Dilate.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLDilate.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/DilateFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Dilate)
-
-template <typename T>
-using CLDilateFixture = DilateValidationFixture<CLTensor, CLAccessor, CLDilate, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDilateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::U8)),
-                                                                                                    datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDilateFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::U8)),
-                                                                                                    datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/DilatedConvolutionLayer.cpp b/tests/validation/CL/DilatedConvolutionLayer.cpp
index 9a9df2c7e4..776bf34151 100644
--- a/tests/validation/CL/DilatedConvolutionLayer.cpp
+++ b/tests/validation/CL/DilatedConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -167,13 +167,18 @@ template <typename T>
 using CLGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLGEMMConvolutionLayer, T>;
 
 TEST_SUITE(Quantized)
+/// @note: Every asymmetric quantized test where there's no fused activation will have its quantization info ignored
+/// This is because instead of using the same quantization information for all the tensors, the fixture generates
+/// separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, we can remove the explicit
+/// quantization info.
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
                        combine(combine(combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(),
                                                                framework::dataset::make("ReshapeWeights", { true })),
                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
                                                framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                       framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })),
                                framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })))
 {
     // Validate output
@@ -185,7 +190,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerQuantizedFixture<u
                                                                framework::dataset::make("ReshapeWeights", { true })),
                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
                                                framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) })),
+                                       framework::dataset::make("IgnoredQuantizationInfo", { QuantizationInfo() })),
                                framework::dataset::make("ActivationLayerInfo", { ActivationLayerInfo() })))
 {
     // Validate output
diff --git a/tests/validation/CL/DirectConvolutionLayer.cpp b/tests/validation/CL/DirectConvolutionLayer.cpp
index ae2f22dd1e..ff22ae5ef0 100644
--- a/tests/validation/CL/DirectConvolutionLayer.cpp
+++ b/tests/validation/CL/DirectConvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -35,6 +35,9 @@
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
 
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/DirectConv2d.cpp
+ *  Please check there for any differences in the coverage
+ */
 namespace arm_compute
 {
 namespace test
@@ -43,11 +46,12 @@ namespace validation
 {
 namespace
 {
-RelativeTolerance<half>              tolerance_fp16(half(0.2));   /**< Tolerance for floating point tests */
-RelativeTolerance<float>             tolerance_fp32(0.05f);       /**< Tolerance for floating point tests */
-AbsoluteTolerance<float>             tolerance_fp32_abs(0.0003f); /**< Absolute Tolerance for floating point tests */
-constexpr float                      tolerance_num = 0.07f;       /**< Tolerance number */
-constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);        /**< Tolerance for quantized tests */
+RelativeTolerance<half>  tolerance_fp16(half(0.2));  /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.05f);      /**< Tolerance for floating point tests */
+constexpr float          abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/
+
+constexpr float                      tolerance_num = 0.07f; /**< Tolerance number */
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);  /**< Tolerance for quantized tests */
 
 const auto data_strides          = combine(framework::dataset::make("StrideX", 1, 3), framework::dataset::make("StrideY", 1, 3));
 const auto data_strides_small    = combine(framework::dataset::make("StrideX", 1), framework::dataset::make("StrideY", 1));
@@ -88,55 +92,132 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
 TEST_SUITE(CL)
 TEST_SUITE(DirectConvolutionLayer)
 
+/** Check whether the configuration of a Direct Convolution layer with no
+ * bias leads to a successful execution.
+ */
+TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT)
+{
+    const auto     src_shape     = TensorShape(27U, 13U, 2U);
+    const auto     weights_shape = TensorShape(3U, 3U, 2U, 4U);
+    const auto     bias_shape    = TensorShape(4U);
+    const auto     dst_shape     = TensorShape(25U, 11U, 4U);
+    constexpr auto dt            = DataType::F32;
+
+    auto src     = create_tensor<CLTensor>(src_shape, dt);
+    auto weights = create_tensor<CLTensor>(weights_shape, dt);
+    auto dst     = create_tensor<CLTensor>(dst_shape, dt);
+
+    const auto conv_info = PadStrideInfo(1, 1, 0, 0);
+
+    // Create Direct Convolution function
+    CLDirectConvolutionLayer conv{};
+    conv.configure(&src, &weights, nullptr, &dst, conv_info);
+
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    library->fill_tensor_value(CLAccessor(src), 1.f);
+    library->fill_tensor_value(CLAccessor(weights), 1.f);
+
+    conv.run();
+
+    // Compute reference to compare
+    SimpleTensor<float> ref_src{ src_shape, dt };
+    SimpleTensor<float> ref_weights{ weights_shape, dt };
+    SimpleTensor<float> ref_bias{ bias_shape, dt };
+    library->fill_tensor_value(ref_src, 1.f);
+    library->fill_tensor_value(ref_weights, 1.f);
+    // No bias
+    library->fill_tensor_value(ref_bias, 0.f);
+    auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info);
+
+    validate(CLAccessor(dst), ref_dst);
+}
+
+/** Check whether the case of rectangle kernels i.e. when width and height of the weight_shape are not equal
+ *  would lead to successful run
+ */
+TEST_CASE(NonSquareKernel, framework::DatasetMode::PRECOMMIT)
+{
+    auto           src_shape     = TensorShape(33U, 27U, 3U);
+    auto           weights_shape = TensorShape(5U, 7U, 3U, 4U); // non-square kernel
+    const auto     bias_shape    = TensorShape(4U);
+    auto           dst_shape     = TensorShape(11U, 12U, 4U);
+    constexpr auto dt            = DataType::F32;
+
+    TensorShape src_shape_nhwc(src_shape);
+    TensorShape weights_shape_nhwc(weights_shape);
+    TensorShape dst_shape_nhwc(dst_shape);
+
+    // Non-square shapes are only allowed for NHWC
+    permute(src_shape_nhwc, PermutationVector(2U, 0U, 1U));
+    permute(weights_shape_nhwc, PermutationVector(2U, 0U, 1U));
+    permute(dst_shape_nhwc, PermutationVector(2U, 0U, 1U));
+
+    auto       src       = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC);
+    auto       weights   = create_tensor<CLTensor>(weights_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC);
+    auto       dst       = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), DataLayout::NHWC);
+    const auto conv_info = PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR);
+
+    // Create direct convolution function
+    CLDirectConvolutionLayer conv{};
+    conv.configure(&src, &weights, nullptr, &dst, conv_info);
+
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    library->fill_tensor_value(CLAccessor(src), 1.f);
+    library->fill_tensor_value(CLAccessor(weights), 1.f);
+
+    conv.run();
+
+    // Compute reference to compare
+    SimpleTensor<float> ref_src{ src_shape, dt };
+    SimpleTensor<float> ref_weights{ weights_shape, dt };
+    SimpleTensor<float> ref_bias{ bias_shape, dt };
+    library->fill_tensor_value(ref_src, 1.f);
+    library->fill_tensor_value(ref_weights, 1.f);
+    // No bias
+    library->fill_tensor_value(ref_bias, 0.f);
+    auto ref_dst = reference::convolution_layer<float>(ref_src, ref_weights, ref_bias, dst_shape, conv_info);
+
+    validate(CLAccessor(dst), ref_dst);
+}
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/weights
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching input feature maps
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported kernel width
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non-rectangular weights dimensions
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching data type input/weights
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid: Mismatching input feature maps
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid weights dimensions
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid stride
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases size
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid biases dimensions
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases size
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Unsupported biases dimensions
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Invalid output size
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
                                                        TensorInfo(TensorShape(32U, 16U, 2U), 1, DataType::F32),
                                                      }),
                framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F16),
                                                         TensorInfo(TensorShape(3U, 3U, 3U, 4U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(11U, 11U, 2U, 4U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(3U, 3U, 2U, 4U, 3U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
-                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(1U, 1U, 2U, 4U), 1, DataType::F32),
                                                      })),
                framework::dataset::make("BiasesInfo",{ TensorInfo(TensorShape(4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(3U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(4U), 1, DataType::F32),
                                                      })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(26U, 11U, 4U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 16U, 4U), 1, DataType::F32),
                                                      })),
                framework::dataset::make("ConvInfo",  { PadStrideInfo(1, 1, 0, 0),
@@ -144,18 +225,20 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(3, 3, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
-                                                       PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
                                                        PadStrideInfo(1, 1, 0, 0),
                                                       })),
                        framework::dataset::make("ActivationInfo",
 {
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
 })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, false, true })),
+               framework::dataset::make("Expected", { false, false, false, false, false, false, true })),
                input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
 {
     bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
@@ -167,7 +250,307 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
 template <typename T>
 using CLDirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
 template <typename T>
+using CLDirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T, true>;
+template <typename T>
 using CLDirectConvolutionValidationWithTensorShapesFixture = DirectConvolutionValidationWithTensorShapesFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+template <typename T>
+using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+template <typename T>
+using CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T, true>;
+template <typename T>
+using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
+
+TEST_SUITE(NHWC)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", {
+                                                       TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Arbitrary weight sizes for NHWC are supported
+                                                       TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Non-rectangular weights dimensions for NHWC are supported
+                                                       TensorInfo(TensorShape(2U, 27U, 13U), 1, DataType::F32, DataLayout::NHWC), // Strides > 2 for any kernel sizes for NHWC are supported
+                                                     }),
+               framework::dataset::make("WeightsInfo",{
+                                                        TensorInfo(TensorShape(2U, 13U, 13U, 4U), 1, DataType::F32, DataLayout::NHWC),
+                                                        TensorInfo(TensorShape(2U, 5U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+                                                        TensorInfo(TensorShape(2U, 3U, 3U, 4U), 1, DataType::F32, DataLayout::NHWC),
+                                                     })),
+               framework::dataset::make("BiasesInfo",{
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NHWC),
+                                                     })),
+               framework::dataset::make("OutputInfo",{
+                                                       TensorInfo(TensorShape(4U, 15U, 1U), 1, DataType::F32, DataLayout::NHWC),
+                                                       TensorInfo(TensorShape(4U, 23U, 11U), 1, DataType::F32, DataLayout::NHWC),
+                                                       TensorInfo(TensorShape(4U, 9U, 4U), 1, DataType::F32, DataLayout::NHWC),
+                                                     })),
+               framework::dataset::make("ConvInfo",  {
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(3, 3, 0, 0),
+                                                      })),
+                       framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+})),
+               framework::dataset::make("Expected", { true, true, true })),
+               input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
+{
+    bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+               framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+               framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+               framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+               framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+               framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+               framework::dataset::make("DataType",  DataType::F16)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 9 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::F16)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+               framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+               framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+               framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+               framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+               framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+               framework::dataset::make("DataType",  DataType::F32)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 2 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 3 })),
+               framework::dataset::make("KernelSize", { 3 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::F32)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 9 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::F32)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+               framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+               framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+               framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+               framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+               framework::dataset::make("NumKernels", { 7, 3, 1, 3 })),
+               framework::dataset::make("DataType",  DataType::QASYMM8)),
+               framework::dataset::make("QuantizationInfo", QuantizationInfo(1.1f / 255, 10))),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 2 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 3 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::QASYMM8)),
+               framework::dataset::make("QuantizationInfo", QuantizationInfo(1.1f / 255, 10))),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+               combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 9 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::QASYMM8)),
+               framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+               framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+               framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+               framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+               framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+               framework::dataset::make("NumKernels", { 7, 3, 1, 3 })),
+               framework::dataset::make("DataType",  DataType::QASYMM8_SIGNED)),
+               framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 3 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::QASYMM8_SIGNED)),
+               framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+               combine(combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 9 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::QASYMM8_SIGNED)),
+               framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255, 10))),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // NHWC
+
+TEST_SUITE(NCHW)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", {
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Unsupported kernel width
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW), // Non-rectangular weights dimensions are unsupported
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, DataLayout::NCHW)  // Unsupported stride
+                                                     }),
+               framework::dataset::make("WeightsInfo",{
+                                                        TensorInfo(TensorShape(11U, 11U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW),
+                                                        TensorInfo(TensorShape(5U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW),
+                                                        TensorInfo(TensorShape(3U, 3U, 2U, 4U), 1, DataType::F32, DataLayout::NCHW)
+                                                     })),
+               framework::dataset::make("BiasesInfo",{
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW),
+                                                       TensorInfo(TensorShape(4U), 1, DataType::F32, DataLayout::NCHW)
+                                                     })),
+               framework::dataset::make("OutputInfo",{
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW),
+                                                       TensorInfo(TensorShape(23U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW),
+                                                       TensorInfo(TensorShape(25U, 11U, 4U), 1, DataType::F32, DataLayout::NCHW)
+                                                     })),
+               framework::dataset::make("ConvInfo",  {
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(1, 1, 0, 0),
+                                                       PadStrideInfo(3, 3, 0, 0)
+                                                      })),
+                       framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)
+})),
+               framework::dataset::make("Expected", { false, false, false})),
+               input_info, weights_info, biases_info, output_info, conv_info, act_info, expected)
+{
+    bool is_valid = bool(CLDirectConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &biases_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info, act_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -185,58 +568,29 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<half>, framewor
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_9x9, framework::dataset::make("DataType",
-                                                                                                                    DataType::F16)),
-                                                                                                                    ActivationFunctionsDataset),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
-}
-FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit_9x9, framework::dataset::make("DataType",
-                                                                                                                      DataType::F16)),
-                                                                                                                      ActivationFunctionsDataset),
-                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NHWC })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
-}
 TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit, framework::dataset::make("DataType",
                                                                                                                     DataType::F32)),
                                                                                                                     ActivationFunctionsDataset),
-                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_fp32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly, framework::dataset::make("DataType", DataType::F32)),
-                                                                                                                  ActivationFunctionsDataset),
-                                                                                                                  framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_fp32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_9x9, framework::dataset::make("DataType",
-                                                                                                                     DataType::F32)),
-                                                                                                                     ActivationFunctionsDataset),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
-    validate(CLAccessor(_target), _reference, tolerance_fp32);
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit_9x9, framework::dataset::make("DataType",
-                                                                                                                       DataType::F32)),
-                                                                                                                       ActivationFunctionsDataset),
-                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(data_precommit,
+                       framework::dataset::make("DataType",
+                                                DataType::F32)),
+                       ActivationFunctionsDataset),
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
-    validate(CLAccessor(_target), _reference, tolerance_fp32);
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
 }
-
-FIXTURE_DATA_TEST_CASE(RunLargeUsecase, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly_usecase, framework::dataset::make("DataType",
-                       DataType::F32)),
-                       framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })),
-                       framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(data_nightly, framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                  ActivationFunctionsDataset),
+                                                                                                                  framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.f, tolerance_fp32_abs);
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
 }
 TEST_SUITE_END() // FP32
 
@@ -246,96 +600,202 @@ FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesFixture
                        ActivationFunctionsDataset))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_fp32);
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
 }
 TEST_SUITE_END() // FP32_CustomDataset
 TEST_SUITE_END() // Float
 
-template <typename T>
-using CLDirectConvolutionLayerQuantizedFixture = DirectConvolutionValidationQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
-template <typename T>
-using CLDirectConvolutionValidationWithTensorShapesQuantizedFixture = DirectConvolutionValidationWithTensorShapesQuantizedFixture<CLTensor, CLAccessor, CLDirectConvolutionLayer, T>;
-
+/// @note: Every quantized test has a version with or without activation because the quantization info given is
+/// ignored when there is no activation. Instead of using the same quantization information for all the tensors, the
+/// fixture generates separate quantization info for each input and the output tensor.
+/// When we can also support dynamic quantization with the presence of activation, these two versions should be merged
+/// again, with the explicitly specified quantization info removed
 const auto QuantizedActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
 {
-    ActivationLayerInfo(),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)
 });
+const auto NoActivation = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo()
+});
+const auto IgnoredQuantizationInfo = framework::dataset::make("IgnoredQuantizationInfo",
+{
+    QuantizationInfo()
+});
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit,
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10) }),
+                       QuantizedActivationFunctionsDataset,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit,
+                       framework::dataset::make("DataType", DataType::QASYMM8),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+                       QuantizedActivationFunctionsDataset,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit_9x9,
                        framework::dataset::make("DataType",
-                                                DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
-                       QuantizedActivationFunctionsDataset),
-                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                DataType::QASYMM8),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(data_precommit_9x9,
+FIXTURE_DATA_TEST_CASE(RunSmall9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(data_precommit_9x9,
                        framework::dataset::make("DataType",
-                                                DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) })),
-                       QuantizedActivationFunctionsDataset),
-                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                                DataType::QASYMM8),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+                       QuantizedActivationFunctionsDataset,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(data_nightly, framework::dataset::make("DataType",
-                       DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
-                       QuantizedActivationFunctionsDataset),
-                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly, framework::dataset::make("DataType",
+                       DataType::QASYMM8),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeWithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly, framework::dataset::make("DataType",
+                       DataType::QASYMM8),
+                       framework::dataset::make("QuantizationInfoIf", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+                       QuantizedActivationFunctionsDataset,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(data_nightly_9x9,
+FIXTURE_DATA_TEST_CASE(RunLarge9x9, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly_9x9,
                        framework::dataset::make("DataType",
-                                                DataType::QASYMM8)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) })),
-                       QuantizedActivationFunctionsDataset),
+                                                DataType::QASYMM8),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
                        framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-
-TEST_SUITE_END() // QASYMM8
-
-TEST_SUITE(QASYMM8_CustomDataset)
-FIXTURE_DATA_TEST_CASE(Run, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::DirectConvolutionLayerDataset(),
-                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
-                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })),
-                                       QuantizedActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunLarge9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(data_nightly_9x9,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(3.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+                       QuantizedActivationFunctionsDataset,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(CustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::DirectConvolutionLayerDataset(),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8),
+                                               IgnoredQuantizationInfo,
+                                       NoActivation,
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-TEST_SUITE_END() // QASYMM8_CustomDataset
+FIXTURE_DATA_TEST_CASE(CustomDatasetWithActivation, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::DirectConvolutionLayerDataset(),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) }),
+                                       QuantizedActivationFunctionsDataset,
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit, framework::dataset::make("DataType",
-                                                                                                                        DataType::QASYMM8_SIGNED)),
-                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) })),
-                                                                                                                        QuantizedActivationFunctionsDataset),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, framework::dataset::make("DataType",
+                                                                                                                        DataType::QASYMM8_SIGNED),
+                                                                                                                        IgnoredQuantizationInfo,
+                                                                                                                        NoActivation,
                                                                                                                         framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-
-FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(data_precommit_9x9,
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit, framework::dataset::make("DataType",
+                                                                                                                        DataType::QASYMM8_SIGNED),
+                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, -10) }),
+                                                                                                                        QuantizedActivationFunctionsDataset,
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8_SIGNED),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLDirectConvolutionLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8_SIGNED),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.1f, -10) }),
+                       QuantizedActivationFunctionsDataset,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit_9x9,
                        framework::dataset::make("DataType",
-                                                DataType::QASYMM8_SIGNED)),
-                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) })),
-                       QuantizedActivationFunctionsDataset),
+                                                DataType::QASYMM8_SIGNED),
+                       IgnoredQuantizationInfo,
+                       NoActivation,
+                       framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall9x9WithActivation, CLDirectConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(data_precommit_9x9,
+                       framework::dataset::make("DataType",
+                                                DataType::QASYMM8_SIGNED),
+                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 10), QuantizationInfo(1.1f, 10) }),
+                       QuantizedActivationFunctionsDataset,
                        framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
@@ -343,22 +803,33 @@ FIXTURE_DATA_TEST_CASE(RunSmall9x9, CLDirectConvolutionLayerQuantizedFixture<int
 }
 
 FIXTURE_DATA_TEST_CASE(RunCustomDataset, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::DirectConvolutionLayerDataset(),
-                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) })),
-                                       QuantizedActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::DirectConvolutionLayerDataset(),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
+                                               IgnoredQuantizationInfo,
+                                       NoActivation,
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
 
-TEST_SUITE_END() // QASYMM8_SIGNED
+FIXTURE_DATA_TEST_CASE(RunCustomDatasetWithActivation, CLDirectConvolutionValidationWithTensorShapesQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::DirectConvolutionLayerDataset(),
+                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
+                                               framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), QuantizationInfo(1.1f, 10) }),
+                                       QuantizedActivationFunctionsDataset,
+                               framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
 
+TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE_END() // Quantized
-
+TEST_SUITE_END() // NCHW
 TEST_SUITE_END() // DirectConvolutionLayer
-TEST_SUITE_END() // Float
+TEST_SUITE_END() // CL
+
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/ElementwiseMax.cpp b/tests/validation/CL/ElementwiseMax.cpp
index b9444b2795..bd47c23256 100644
--- a/tests/validation/CL/ElementwiseMax.cpp
+++ b/tests/validation/CL/ElementwiseMax.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,7 +58,7 @@ const auto ElementwiseMaxQASYMM8SignedDataset = combine(combine(framework::datas
 const auto ElementwiseMaxQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
                                                   framework::dataset::make("DataType",
                                                                            DataType::QSYMM16));
-const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ElementwiseMaxS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                               framework::dataset::make("DataType", DataType::S16));
 const auto ElementwiseMaxFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                framework::dataset::make("DataType", DataType::F16));
@@ -71,6 +71,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
 } // namespace
 
 TEST_SUITE(CL)
@@ -80,21 +82,18 @@ TEST_SUITE(ElementwiseMax)
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
                framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("OutputInfo",{TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true, false, false})),
+               framework::dataset::make("Expected", { true, false, false})),
                input1_info, input2_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLElementwiseMax::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -107,7 +106,8 @@ using CLElementwiseMaxFixture = ElementwiseMaxValidationFixture<CLTensor, CLAcce
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMaxU8Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMaxU8Dataset),
+                                                                                                              OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -115,7 +115,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<uint8_t>, framework::Da
 TEST_SUITE_END()
 
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxS16Dataset),
+                                                                                                        OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -128,33 +129,36 @@ using CLElementwiseMaxQuantizedFixture = ElementwiseMaxValidationQuantizedFixtur
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        ElementwiseMaxQASYMM8Dataset),
                                                                                                                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                                                                                                                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                                                                                                                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END()
 TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                       ElementwiseMaxQASYMM8SignedDataset),
                                                                                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                                                                                                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                                                                                                                      framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                                                                                                                      framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                                                                                                                      OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END()
 TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        ElementwiseMaxQSYMM16Dataset),
                                                                                                                        framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
                                                                                                                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
-                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+                                                                                                                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -167,13 +171,16 @@ using CLElementwiseMaxFloatFixture = ElementwiseMaxValidationFloatFixture<CLTens
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset), EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxFP16Dataset),
+                                                                                                                  EmptyActivationFunctionsDataset),
+                                                                                                          OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMaxFP16Dataset),
-                                                                                                                   ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMaxFP16Dataset),
+                                                                                                                   ActivationFunctionsDataset),
+                                                                                                                   OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -181,14 +188,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<half>, fr
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset),
-                                                                                                           EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMaxFP32Dataset),
+                                                                                                                   EmptyActivationFunctionsDataset),
+                                                                                                           OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMaxFP32Dataset),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMaxFP32Dataset),
+                                                                                                                    ActivationFunctionsDataset),
+                                                                                                                    OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -197,16 +206,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMaxFloatFixture<float>, f
 template <typename T>
 using CLElementwiseMaxBroadcastFloatFixture = ElementwiseMaxBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseMax, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
                        ElementwiseMaxFP32Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMaxBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
                        ElementwiseMaxFP32Dataset),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ElementwiseMin.cpp b/tests/validation/CL/ElementwiseMin.cpp
index 8f53b241ab..ee229a0941 100644
--- a/tests/validation/CL/ElementwiseMin.cpp
+++ b/tests/validation/CL/ElementwiseMin.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,7 +58,7 @@ const auto ElementwiseMinQASYMM8SignedDataset = combine(combine(framework::datas
 const auto ElementwiseMinQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
                                                   framework::dataset::make("DataType",
                                                                            DataType::QSYMM16));
-const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ElementwiseMinS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                               framework::dataset::make("DataType", DataType::S16));
 const auto ElementwiseMinFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                framework::dataset::make("DataType", DataType::F16));
@@ -71,6 +71,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
 } // namespace
 
 TEST_SUITE(CL)
@@ -80,21 +82,18 @@ TEST_SUITE(ElementwiseMin)
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
                framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true, false, false})),
+               framework::dataset::make("Expected", { true, false, false})),
                input1_info, input2_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLElementwiseMin::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -107,7 +106,8 @@ using CLElementwiseMinFixture = ElementwiseMinValidationFixture<CLTensor, CLAcce
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseMinU8Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseMinU8Dataset),
+                                                                                                              OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -115,7 +115,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<uint8_t>, framework::Da
 TEST_SUITE_END()
 
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseMinS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinS16Dataset),
+                                                                                                        OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -128,33 +129,36 @@ using CLElementwiseMinQuantizedFixture = ElementwiseMinValidationQuantizedFixtur
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        ElementwiseMinQASYMM8Dataset),
                                                                                                                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                                                                                                                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                                                                                                                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END()
 TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                       ElementwiseMinQASYMM8SignedDataset),
                                                                                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                                                                                                                       framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                                                                                                                      framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                                                                                                                      framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                                                                                                                      OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END()
 TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                                                                                                                        ElementwiseMinQSYMM16Dataset),
                                                                                                                        framework::dataset::make("SrcQInfo0", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
                                                                                                                        framework::dataset::make("SrcQInfo1", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
-                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+                                                                                                                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+                                                                                                                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -167,13 +171,16 @@ using CLElementwiseMinFloatFixture = ElementwiseMinValidationFloatFixture<CLTens
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset), EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMinFP16Dataset),
+                                                                                                                  EmptyActivationFunctionsDataset),
+                                                                                                          OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMinFP16Dataset),
-                                                                                                                   ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMinFP16Dataset),
+                                                                                                                   ActivationFunctionsDataset),
+                                                                                                                   OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -181,14 +188,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<half>, fr
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset),
-                                                                                                           EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseMinFP32Dataset),
+                                                                                                                   EmptyActivationFunctionsDataset),
+                                                                                                           OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseMinFP32Dataset),
-                                                                                                                    ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseMinFP32Dataset),
+                                                                                                                    ActivationFunctionsDataset),
+                                                                                                                    OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -196,16 +205,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseMinFloatFixture<float>, f
 template <typename T>
 using CLElementwiseMinBroadcastFloatFixture = ElementwiseMinBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseMin, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
                        ElementwiseMinFP32Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseMinBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
                        ElementwiseMinFP32Dataset),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ElementwisePower.cpp b/tests/validation/CL/ElementwisePower.cpp
index a2d3ba6c09..c2aeb6e045 100644
--- a/tests/validation/CL/ElementwisePower.cpp
+++ b/tests/validation/CL/ElementwisePower.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,6 +57,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
 } // namespace
 
 TEST_SUITE(CL)
@@ -96,29 +98,33 @@ using CLElementwisePowerBroadcastFloatFixture = ElementwisePowerBroadcastValidat
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset),
-                                                                                                            EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwisePowerFP16Dataset),
+                                                                                                                    EmptyActivationFunctionsDataset),
+                                                                                                            OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwisePowerFP16Dataset),
-                                                                                                                     ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwisePowerFP16Dataset),
+                                                                                                                     ActivationFunctionsDataset),
+                                                                                                                     OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
                        ElementwisePowerFP16Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
                        ElementwisePowerFP16Dataset),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -126,29 +132,33 @@ FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFl
 TEST_SUITE_END() //FP16
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset),
-                                                                                                             EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwisePowerFP32Dataset),
+                                                                                                                     EmptyActivationFunctionsDataset),
+                                                                                                             OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwisePowerFP32Dataset),
-                                                                                                                      ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwisePowerFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwisePowerFP32Dataset),
+                                                                                                                      ActivationFunctionsDataset),
+                                                                                                                      OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
                        ElementwisePowerFP32Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwisePowerBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
                        ElementwisePowerFP32Dataset),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/ElementwiseSquaredDiff.cpp b/tests/validation/CL/ElementwiseSquaredDiff.cpp
index 0a4ab6627b..ee0279df33 100644
--- a/tests/validation/CL/ElementwiseSquaredDiff.cpp
+++ b/tests/validation/CL/ElementwiseSquaredDiff.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -57,7 +57,7 @@ const auto ElementwiseSquaredDiffQASYMM8Dataset = combine(combine(framework::dat
 const auto ElementwiseSquaredDiffQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)),
                                                           framework::dataset::make("DataType",
                                                                                    DataType::QSYMM16));
-const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto ElementwiseSquaredDiffS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                                       framework::dataset::make("DataType", DataType::S16));
 const auto ElementwiseSquaredDiffFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                                        framework::dataset::make("DataType", DataType::F16));
@@ -70,6 +70,8 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
+const auto InPlaceDataSet    = framework::dataset::make("InPlace", { false, true });
+const auto OutOfPlaceDataSet = framework::dataset::make("InPlace", { false });
 } // namespace
 
 TEST_SUITE(CL)
@@ -79,21 +81,18 @@ TEST_SUITE(ElementwiseSquaredDiff)
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
                framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true, false, false})),
+               framework::dataset::make("Expected", { true, false, false})),
                input1_info, input2_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLElementwiseSquaredDiff::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -106,7 +105,8 @@ using CLElementwiseSquaredDiffFixture = ElementwiseSquaredDiffValidationFixture<
 
 TEST_SUITE(Integer)
 TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ElementwiseSquaredDiffU8Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffU8Dataset),
+                                                                                                                      OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -114,7 +114,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<uint8_t>, frame
 TEST_SUITE_END()
 
 TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFixture<int16_t>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffS16Dataset),
+                                                                                                                OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -127,22 +128,24 @@ using CLElementwiseSquaredDiffQuantizedFixture = ElementwiseSquaredDiffValidatio
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        ElementwiseSquaredDiffQASYMM8Dataset),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
-                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })))
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01);
 }
 TEST_SUITE_END()
 TEST_SUITE(QSYMM16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffQuantizedFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
                        ElementwiseSquaredDiffQSYMM16Dataset),
                        framework::dataset::make("Src0QInfo", { QuantizationInfo(1.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
                        framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 32768.f, 0), QuantizationInfo(5.f / 32768.f, 0) })),
-                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })))
+                       framework::dataset::make("OutQInfo", { QuantizationInfo(5.f / 32768.f, 0) })),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qsymm16);
@@ -155,14 +158,16 @@ using CLElementwiseSquaredDiffFloatFixture = ElementwiseSquaredDiffValidationFlo
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset),
-                                                                                                                  EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP16Dataset),
+                                                                                                                  EmptyActivationFunctionsDataset),
+                                                                                                                  OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP16Dataset),
-                       ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP16Dataset),
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp16, 0.01);
@@ -170,14 +175,16 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<h
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset),
-                                                                                                                   EmptyActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ElementwiseSquaredDiffFP32Dataset),
+                                                                                                                   EmptyActivationFunctionsDataset),
+                                                                                                                   OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP32Dataset),
-                       ActivationFunctionsDataset))
+FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapes(), ElementwiseSquaredDiffFP32Dataset),
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
@@ -185,16 +192,18 @@ FIXTURE_DATA_TEST_CASE(RunWithActivation, CLElementwiseSquaredDiffFloatFixture<f
 template <typename T>
 using CLElementwiseSquaredDiffBroadcastFloatFixture = ElementwiseSquaredDiffBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLElementwiseSquaredDiff, T>;
 
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapesBroadcast(),
                        ElementwiseSquaredDiffFP32Dataset),
-                       EmptyActivationFunctionsDataset))
+                       EmptyActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
 }
-FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(datasets::TinyShapesBroadcast(),
+FIXTURE_DATA_TEST_CASE(RunWithActivationBroadcast, CLElementwiseSquaredDiffBroadcastFloatFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::TinyShapesBroadcast(),
                        ElementwiseSquaredDiffFP32Dataset),
-                       ActivationFunctionsDataset))
+                       ActivationFunctionsDataset),
+                       OutOfPlaceDataSet))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_fp32);
diff --git a/tests/validation/CL/EqualizeHistogram.cpp b/tests/validation/CL/EqualizeHistogram.cpp
deleted file mode 100644
index 3585825782..0000000000
--- a/tests/validation/CL/EqualizeHistogram.cpp
+++ /dev/null
@@ -1,63 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/CL/functions/CLEqualizeHistogram.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/EqualizeHistogramFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(EqualizeHistogram)
-
-template <typename T>
-using CLEqualizeHistogramFixture = EqualizeHistogramValidationFixture<CLTensor, CLAccessor, CLEqualizeHistogram, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                               DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLEqualizeHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                               DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Erode.cpp b/tests/validation/CL/Erode.cpp
deleted file mode 100644
index dd75b59a6b..0000000000
--- a/tests/validation/CL/Erode.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLErode.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ErodeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Erode)
-
-template <typename T>
-using CLErodeFixture = ErodeValidationFixture<CLTensor, CLAccessor, CLErode, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLErodeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                           DataType::U8)),
-                                                                                                   datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLErodeFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                           DataType::U8)),
-                                                                                                   datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/ExpLayer.cpp b/tests/validation/CL/ExpLayer.cpp
index 16e75a64b4..1797046e5d 100644
--- a/tests/validation/CL/ExpLayer.cpp
+++ b/tests/validation/CL/ExpLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CL/FFT.cpp b/tests/validation/CL/FFT.cpp
index 1115ddcd8b..99a83abe5c 100644
--- a/tests/validation/CL/FFT.cpp
+++ b/tests/validation/CL/FFT.cpp
@@ -64,8 +64,10 @@ const auto ActivationFunctionsSmallDataset = framework::dataset::make("Activatio
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f)
 });
 
-RelativeTolerance<float> tolerance_f32(0.1f);   /**< Relative tolerance value for FP32 */
-constexpr float          tolerance_num = 0.07f; /**< Tolerance number */
+RelativeTolerance<float> tolerance_f32(0.1f);       /**< Relative tolerance value for FP32 */
+RelativeTolerance<half>  tolerance_f16(half(0.1f)); /**< Relative tolerance value for FP16 */
+constexpr float          tolerance_num_f32 = 0.07f; /**< Tolerance number for FP32*/
+constexpr float          tolerance_num_f16 = 0.15f; /**< Tolerance number for FP32*/
 
 } // namespace
 TEST_SUITE(CL)
@@ -108,9 +110,16 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLFFT1DFixture<float>, framework::DatasetMode::ALL, combine(shapes_1d, framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num_f32);
 }
 TEST_SUITE_END() // FP32
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFFT1DFixture<half>, framework::DatasetMode::ALL, combine(shapes_1d, framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 
 TEST_SUITE_END() // FFT1D
@@ -149,9 +158,16 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLFFT2DFixture<float>, framework::DatasetMode::ALL, combine(shapes_2d, framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num_f32);
 }
 TEST_SUITE_END() // FP32
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFFT2DFixture<half>, framework::DatasetMode::ALL, combine(shapes_2d, framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // FFT2D
 
@@ -159,6 +175,8 @@ TEST_SUITE(FFTConvolutionLayer)
 
 template <typename T>
 using CLFFTConvolutionLayerFixture = FFTConvolutionValidationFixture<CLTensor, CLAccessor, CLFFTConvolutionLayer, T>;
+template <typename T>
+using CLFFTConvolutionLayerMixedDataLayoutFixture = FFTConvolutionValidationFixture<CLTensor, CLAccessor, CLFFTConvolutionLayer, T, true>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
@@ -168,9 +186,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLFFTConvolutionLayerFixture<float>, framework:
                                                                                                                  ActivationFunctionsSmallDataset))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFFTConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(),
+                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                 ActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num_f32);
 }
 TEST_SUITE_END() // FP32
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFFTConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFFTConvolutionLayerDataset(),
+                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                ActivationFunctionsSmallDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // FFTConvolutionLayer
 
diff --git a/tests/validation/CL/FastCorners.cpp b/tests/validation/CL/FastCorners.cpp
deleted file mode 100644
index 40a511e000..0000000000
--- a/tests/validation/CL/FastCorners.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLFastCorners.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ImageFileDatasets.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/FastCornersFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Tolerance used to compare corner strengths */
-const AbsoluteTolerance<float> tolerance(0.5f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(FastCorners)
-
-template <typename T>
-using CLFastCornersFixture = FastCornersValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLFastCorners, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFastCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallImageFiles(), framework::dataset::make("Format", Format::U8)),
-                                                                                                                 framework::dataset::make("SuppressNonMax", { false, true })),
-                                                                                                         framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
-{
-    // Validate output
-    CLArrayAccessor<KeyPoint> array(_target);
-    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFastCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeImageFiles(), framework::dataset::make("Format", Format::U8)),
-                                                                                                                 framework::dataset::make("SuppressNonMax", { false, true })),
-                                                                                                         framework::dataset::make("BorderMode", BorderMode::UNDEFINED)))
-{
-    // Validate output
-    CLArrayAccessor<KeyPoint> array(_target);
-    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), tolerance);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Floor.cpp b/tests/validation/CL/Floor.cpp
index 2961cfa3f2..a63f16b3c0 100644
--- a/tests/validation/CL/Floor.cpp
+++ b/tests/validation/CL/Floor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,18 +47,16 @@ TEST_SUITE(Floor)
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
         framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),  // Wrong data type
-                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
         }),
         framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
                                                 TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
         })),
-        framework::dataset::make("Expected", { false, false, false, false, true })),
+        framework::dataset::make("Expected", { false, false, false, true })),
         input_info, output_info, expected)
 {
     const Status status = CLFloor::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false));
diff --git a/tests/validation/CL/FullyConnectedLayer.cpp b/tests/validation/CL/FullyConnectedLayer.cpp
index 78195a556b..2f0c86499b 100644
--- a/tests/validation/CL/FullyConnectedLayer.cpp
+++ b/tests/validation/CL/FullyConnectedLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,7 @@ namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
 namespace
 {
 /** Tolerance for float operations */
@@ -51,24 +52,20 @@ constexpr float                     tolerance_num = 0.07f;      /**< Tolerance n
 /** Tolerance for quantized asymmetric operations */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
 
-/** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
-{
-    DataType::F16,
-    DataType::F32,
-    DataType::QASYMM8,
-    DataType::QASYMM8_SIGNED,
-});
-
-const auto FullyConnectedParameters = combine(framework::dataset::make("TransposeWeights", { false, true }), framework::dataset::make("ReshapeWeights", { false, true }));
+const auto FullyConnectedParameters = combine(make("TransposeWeights", { false, true }), make("ReshapeWeights", { false, true }));
 
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto QuantizationData = make("QuantizationInfo",
 {
     QuantizationInfo(1.f / 255.f, 10),
     QuantizationInfo(1.1f, 10),
 });
 
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto IgnoredQuantizationData = make("IgnoredQuantizationInfo",
+{
+    QuantizationInfo(),
+});
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
 {
     ActivationLayerInfo(),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
@@ -77,13 +74,16 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)
 });
 
-const auto ActivationFunctionsQuantizedDataset = framework::dataset::make("ActivationInfo",
+// This dataset case only runs with dynamic quantization
+const auto NoActivationFunctionsQuantizedDataset = make("ActivationInfo",
 {
-    ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)
+    ActivationLayerInfo()
 });
+
+const auto ActivationFunctionsQuantizedDataset = concat(concat(
+                                                        make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)),
+                                                        make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f))),
+                                                        make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f)));
 } // namespace
 
 TEST_SUITE(CL)
@@ -92,33 +92,33 @@ TEST_SUITE(FullyConnectedLayer)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
-    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Mismatching data types
+    make("InputInfo", { TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Mismatching data types
                                             TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
                                             TensorInfo(TensorShape(8U, 4U, 6U, 4U), 1, DataType::F32),
                                             TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Invalid weights dimensions
                                             TensorInfo(TensorShape(9U, 5U, 7U, 3U), 1, DataType::F32),    // Wrongly reshaped weights
                                           }),
-    framework::dataset::make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
+    make("WeightsInfo",{ TensorInfo(TensorShape(315U, 271U), 1, DataType::F16),
                                              TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
                                              TensorInfo(TensorShape(192U, 192U), 1, DataType::F32),
                                              TensorInfo(TensorShape(217U, 231U), 1, DataType::F32),
                                              TensorInfo(TensorShape(217U, 315U), 1, DataType::F32),
                                           })),
-    framework::dataset::make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
+    make("BiasInfo",{ TensorInfo(TensorShape(271U), 1, DataType::F32),
                                           TensorInfo(TensorShape(192U), 1, DataType::F32),
                                           TensorInfo(TensorShape(192U), 1, DataType::F32),
                                           TensorInfo(TensorShape(271U), 1, DataType::F32),
                                           TensorInfo(TensorShape(271U), 1, DataType::F32),
                                           })),
-    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+    make("OutputInfo",{ TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
                                             TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
                                             TensorInfo(TensorShape(192U, 4U), 1, DataType::F32),
                                             TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
                                             TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
                                            })),
-    framework::dataset::make("TransposeWeights",{ true, true, false, true, true })),
-    framework::dataset::make("ReshapedWeights",{ false, false, false, false, false})),
-    framework::dataset::make("Expected", { false, true, true, false, false })),
+    make("TransposeWeights",{ true, true, false, true, true })),
+    make("ReshapedWeights",{ false, false, false, false, false})),
+    make("Expected", { false, true, true, false, false })),
     input_info, weights_info, bias_info, output_info, transpose_weights, reshaped_weights, expected)
 {
     // Create Fully Connected layer info
@@ -138,37 +138,73 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(
 
 template <typename T>
 using CLFullyConnectedLayerFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
+template <typename T>
+using CLFullyConnectedLayerMixedDataLayoutFixture = FullyConnectedLayerValidationFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, true>;
+template <typename T>
+using CLFullyConnectedLayerDynamicWeightsFixture = FullyConnectedWithDynamicWeightsFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
+template <typename T>
+using CLFullyConnectedNoBiasFixture = FullyConnectedDynamicNoBiasFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(),
-                                                                                                                        FullyConnectedParameters),
-                                                                                                                        framework::dataset::make("DataType", DataType::F16)),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                                                                                                                        FullyConnectedParameters,
+                                                                                                                        make("DataType", DataType::F16),
                                                                                                                 ActivationFunctionsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(),
-                                                                                                                      FullyConnectedParameters),
-                                                                                                                      framework::dataset::make("DataType", DataType::F16)),
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(),
+                                                                                                                      FullyConnectedParameters,
+                                                                                                                      make("DataType", DataType::F16),
                                                                                                               ActivationFunctionsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                       make("DataType", DataType::F16),
+                       make("ActivationInfo", ActivationLayerInfo()),
+                       make("WeightsReshaped", { false, true })))
+{
+}
 TEST_SUITE_END()
 
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                                 framework::dataset::make("DataType", DataType::F32)),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters,
+                                                                                                                 make("DataType", DataType::F32),
                                                                                                                  ActivationFunctionsDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters),
-                                                                                                                       framework::dataset::make("DataType", DataType::F32)),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(
+                           make("Input", TensorShape(9U, 5U, 7U)),
+                           make("Weights", TensorShape(315U, 271U)),
+                       make("Biases", TensorShape(271U)),
+                       make("Output", TensorShape(271U)),
+                       FullyConnectedParameters,
+                       make("DataType", DataType::F32),
+                       make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                       make("DataType", DataType::F32),
+                       make("ActivationInfo", ActivationLayerInfo()),
+                       make("WeightsReshaped", { false, true })))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicNoBias, CLFullyConnectedNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                       make("DataType", DataType::F32),
+                       make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) }),
+                       make("WeightsReshaped", { false })))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters,
+                                                                                                                make("DataType", DataType::F32),
                                                                                                                ActivationFunctionsDataset))
 {
     // Validate output
@@ -179,37 +215,141 @@ TEST_SUITE_END()
 
 template <typename T>
 using CLFullyConnectedLayerQuantizedFixture = FullyConnectedLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T>;
+template <typename T>
+using CLFullyConnectedLayerQuantizedMixedDataLayoutFixture = FullyConnectedLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLFullyConnectedLayer, T, true>;
 
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), QuantizationData,
                                ActivationFunctionsQuantizedDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8)), QuantizationData),
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                                                                        combine(
+                                                                           make("Input", TensorShape(9U, 5U, 7U)),
+                                                                           make("Weights", TensorShape(315U, 271U)),
+                                                                       make("Biases", TensorShape(271U)),
+                                                               make("Output", TensorShape(271U)),
+                                                       FullyConnectedParameters,
+                                               make("DataType", DataType::QASYMM8),
+                                       QuantizationData,
+                               make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeWithActivation, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                            combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), QuantizationData,
                                ActivationFunctionsQuantizedDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
+
+// Dynamic Quantization Tests
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), IgnoredQuantizationData,
+                        NoActivationFunctionsQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLFullyConnectedLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                        combine(datasets::LargeFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8), IgnoredQuantizationData,
+                               NoActivationFunctionsQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                       make("DataType", DataType::QASYMM8),
+                        NoActivationFunctionsQuantizedDataset,
+                       make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ })))
+{
+}
+
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                                                                    combine(
+                                                                           make("Input", TensorShape(9U, 5U, 7U)),
+                                                                           make("Weights", TensorShape(315U, 271U)),
+                                                                       make("Biases", TensorShape(271U)),
+                                                               make("Output", TensorShape(271U)),
+                                                       FullyConnectedParameters,
+                                               make("DataType", DataType::QASYMM8),
+                                       IgnoredQuantizationData,
+                               NoActivationFunctionsQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
 TEST_SUITE_END() /* QASYMM8 */
 TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), QuantizationData),
+FIXTURE_DATA_TEST_CASE(RunSmallWithActivation, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8_SIGNED), QuantizationData,
                                ActivationFunctionsQuantizedDataset))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
-TEST_SUITE_END() /* QASYMM8_SIGNED */
-TEST_SUITE_END() /* Quantized */
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayoutWithActivation, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                                                                            combine(
+                                                                           make("Input", TensorShape(9U, 5U, 7U)),
+                                                                           make("Weights", TensorShape(315U, 271U)),
+                                                                       make("Biases", TensorShape(271U)),
+                                                               make("Output", TensorShape(271U)),
+                                                       FullyConnectedParameters,
+                                               make("DataType", DataType::QASYMM8_SIGNED),
+                                       QuantizationData,
+                               make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+// Dynamic Quantization tests below
+FIXTURE_DATA_TEST_CASE(RunSmall, CLFullyConnectedLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                            combine(datasets::SmallFullyConnectedLayerDataset(), FullyConnectedParameters, make("DataType", DataType::QASYMM8_SIGNED), IgnoredQuantizationData,
+                               NoActivationFunctionsQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLFullyConnectedLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+                                                                    combine(
+                                                                           make("Input", TensorShape(9U, 5U, 7U)),
+                                                                           make("Weights", TensorShape(315U, 271U)),
+                                                                       make("Biases", TensorShape(271U)),
+                                                               make("Output", TensorShape(271U)),
+                                                       FullyConnectedParameters,
+                                               make("DataType", DataType::QASYMM8_SIGNED),
+                                       IgnoredQuantizationData,
+                               NoActivationFunctionsQuantizedDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunDynamicWeights, CLFullyConnectedLayerDynamicWeightsFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                       make("DataType", DataType::QASYMM8_SIGNED),
+                       make("ActivationInfo", ActivationLayerInfo()),
+                       make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ })))
+{
+}
+FIXTURE_DATA_TEST_CASE(RunDynamicNoBias, CLFullyConnectedNoBiasFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallFullyConnectedLayerDataset(),
+                       make("DataType", DataType::QASYMM8_SIGNED),
+                       make("ActivationInfo", ActivationLayerInfo()),
+                       make("WeightsReshaped", { false /* COMPMID-6000: Support FullyConnected with quantized dynamic weights already reshaped */ })))
+{
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // FullyConnectedLayer
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/GEMM.cpp b/tests/validation/CL/GEMM.cpp
index 392eeb1510..16ca14f1d6 100644
--- a/tests/validation/CL/GEMM.cpp
+++ b/tests/validation/CL/GEMM.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -49,8 +49,7 @@ RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for compari
 constexpr float          abs_tolerance_f32(
     0.0001f);                                                 /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
 RelativeTolerance<half_float::half> tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
-constexpr float                     tolerance_num   = 0.02f;  /**< Tolerance number */
-const auto                          data_interleave = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
+constexpr float                     tolerance_num = 0.02f;    /**< Tolerance number */
 
 /** CNN data types */
 const auto CNNDataTypes = framework::dataset::make("DataType",
@@ -60,11 +59,32 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
 });
 } // namespace
 
-const auto data_transpose = framework::dataset::make("M", 8, 14) * framework::dataset::make("N", 7, 14);
-
 TEST_SUITE(CL)
 TEST_SUITE(GEMM)
 
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::S32), // Unsupported data type
+                                                       TensorInfo(TensorShape(27U, 13U), 1, DataType::F32),
+                                                     }),
+               framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(8U, 27U), 1, DataType::S32),
+                                                        TensorInfo(TensorShape(8U, 27U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(8U, 13U), 1, DataType::S32),
+                                                        TensorInfo(TensorShape(8U, 13U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("Expected", { false, true })),
+               lhs_info, rhs_info, output_info, expected)
+{
+    constexpr float alpha = 1.0;
+    constexpr float beta = 0.0;
+    const auto gemm_info = GEMMInfo();
+    bool is_valid = bool(CLGEMM::validate(&lhs_info.clone()->set_is_resizable(true), &rhs_info.clone()->set_is_resizable(true), nullptr, &output_info.clone()->set_is_resizable(true), alpha, beta, gemm_info));
+    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
 template <typename T>
 using CLGEMMFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T>;
 
@@ -74,6 +94,9 @@ using CLGEMMOutput3DFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM
 template <typename T>
 using CLGEMMInputOutput3DFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T, false, true, true>;
 
+template <typename T>
+using CLBatchedMatMulFixture = GEMMValidationFixture<CLTensor, CLAccessor, CLGEMM, T, true, false, false, false, false, true>;
+
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallGEMMDataset(),
@@ -184,10 +207,32 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMOutput3DFixture<half>, framework::Dataset
     validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
 }
 TEST_SUITE_END() // FP16
-
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // OUTPUT_3D
 
+TEST_SUITE(BATCHED_MATMUL)
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchedMatMulFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
+                                                                                                                   framework::dataset::make("ReshapeWeights", { false })),
+                                                                                                           framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLBatchedMatMulFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallBatchedMatMulDataset(),
+                                                                                                                  framework::dataset::make("ReshapeWeights", { false })),
+                                                                                                          framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num);
+}
+TEST_SUITE_END()
+TEST_SUITE_END() // BATCHED_MATMUL
+
 TEST_SUITE_END() // GEMM
 TEST_SUITE_END() // CL
 } // namespace validation
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 5a1971b54c..78d794a9bb 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,9 @@ namespace test
 {
 namespace validation
 {
+
+using framework::dataset::make;
+
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
@@ -53,6 +56,7 @@ TEST_SUITE(GEMMLowp)
 
 TEST_SUITE(MatrixMultiplyCore)
 using CLGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
+using CLGEMMLowpBatchedMatMulFixture      = GEMMLowpMatrixMultiplyCoreValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, true>;
 
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset())
 {
@@ -66,18 +70,74 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework:
     validate(CLAccessor(_target), _reference);
 }
 
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
+    GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, uint8_t, uint8_t, true>;
+TEST_SUITE(BatchedMatMul)
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8
+
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
+    GEMMLowpBatchedMatrixMultiplyCoreFusedOffsetOutputFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false })))
+{
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE_END() // BatchedMatMul
+
 TEST_SUITE(FusedOffsetOutput)
 TEST_SUITE(QASYMM8)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })))
+TEST_SUITE(Output3D)
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture =
+    GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // Output3D
+
+TEST_SUITE(InputOutput3D)
+using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture =
+    GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>;
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // InputOutput3D
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -86,8 +146,10 @@ TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -130,24 +192,24 @@ TEST_SUITE(QuantizeDownInt32Scale)
 
 TEST_SUITE(QASYMM8)
 
-const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
-                                                      3)
-                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+                                                      * make("min", 0) * make("max", 255) * make("addBias", { false, true });
 
-const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
-                                                           2)
-                                                           * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+                                                           * make("result_shift", 2, 3) * make("min", 0, 2) * make("max", 171, 173) * make("addBias", { false, true });
 
 using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -158,24 +220,24 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 
-const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
-                                                     3)
-                                                     * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+                                                     * make("min", -128) * make("max", 127) * make("addBias", { false, true });
 
-const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
-                                                          2)
-                                                          * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+                                                          * make("result_shift", 2, 3) * make("min", -100, -98) * make("max", 71, 73) * make("addBias", { false, true });
 
 using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -185,140 +247,6 @@ TEST_SUITE_END() // BoundedReLu
 TEST_SUITE_END() // QASYMM8_SIGNED
 TEST_SUITE_END() // QuantizeDownInt32Scale
 
-TEST_SUITE(QuantizeDownInt32ScaleByFixedPoint)
-
-TEST_SUITE(QASYMM8)
-
-const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
-                                                                    2)
-                                                                    * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
-                                                                         2)
-                                                                         * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 174) * framework::dataset::make("addBias", { false, true });
-using CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture =
-    GEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
-                       quantize_down_int32_to_uint8_scale_by_fixedpoint_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(),
-                       quantize_down_int32_to_uint8_scale_by_fixedpoint_relu_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QASYMM8
-TEST_SUITE(QASYMM8_SIGNED)
-const auto quantize_down_int32_to_int8_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
-                                                                   * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1, 2)
-                                                                        * framework::dataset::make("result_offset_after_shift", 2, 3) * framework::dataset::make("min", -128, -126) * framework::dataset::make("max", 110, 112) * framework::dataset::make("addBias", { false, true });
-using CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture =
-    GEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_int8_scale_by_fixedpoint_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_int8_scale_by_fixedpoint_relu_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QASYMM8_SIGNED
-TEST_SUITE(QSYMM16)
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
-                                                                    2)
-                                                                    * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600, 254601602) * framework::dataset::make("result_shift", 1,
-                                                                         2)
-                                                                         * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases = framework::dataset::make("result_fixedpoint_multiplier", 1073741823,
-                                                                                                        1073741825)
-                                                                               * framework::dataset::make("result_shift", -3,
-                                                                                                          -2)
-                                                                               * framework::dataset::make("min", -32768) * framework::dataset::make("max", 32767) * framework::dataset::make("addBias", { false, true });
-
-const auto quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases = framework::dataset::make("result_fixedpoint_multiplier", 254601600,
-                                                                                                             254601602)
-                                                                                    * framework::dataset::make("result_shift", -3,
-                                                                                                               -1)
-                                                                                    * framework::dataset::make("min", -2, 0) * framework::dataset::make("max", 1, 3) * framework::dataset::make("addBias", { false, true });
-
-using CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture =
-    GEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointValidationFixture<CLTensor, CLAccessor, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPoint>;
-
-TEST_SUITE(NoRelu)
-TEST_SUITE(MultSmallerEq1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_int16_scale_by_fixedpoint_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultSmallerEq1
-TEST_SUITE(MultGreater1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultGreater1
-TEST_SUITE_END() // NoRelu
-TEST_SUITE(BoundedReLu)
-TEST_SUITE(MultSmallerEq1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_int16_scale_by_fixedpoint_relu_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultSmallerEq1
-TEST_SUITE(MultGreater1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ToInt16ScaleByFixedPointFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(),
-                       quantize_down_int32_to_int16_scale_by_fixedpoint_multgreat1_relu_cases))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END() // MultGreater1
-TEST_SUITE_END() // BoundedReLu
-TEST_SUITE_END() // QSYMM16
-TEST_SUITE_END() // QuantizeDownInt32ScaleByFixedPoint
-
 TEST_SUITE(QuantizeDownInt32ScaleByFloat)
 
 TEST_SUITE(QASYMM8)
@@ -326,13 +254,14 @@ using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture =
     GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, uint8_t>;
 
 FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
-                                                                       datasets::TinyShapes()),
-                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
-                                                       framework::dataset::make("result_offset", 2, 3)),
-                                               framework::dataset::make("min", 0)),
-                                       framework::dataset::make("max", 255)),
-                               framework::dataset::make("addBias", { false, true })))
+    combine(
+        make("DataType", DataType::QASYMM8),
+        datasets::TinyShapes(),
+        make("result_real_multiplier", 0.33f),
+        make("result_offset", 2, 3),
+        make("min", 0),
+        make("max", 255),
+        make("addBias", { false, true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -343,13 +272,14 @@ TEST_SUITE(QASYMM8_SIGNED)
 using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed =
     GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, int8_t>;
 FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
-                                                                       datasets::TinyShapes()),
-                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
-                                                       framework::dataset::make("result_offset", 2, 3)),
-                                               framework::dataset::make("min", -128)),
-                                       framework::dataset::make("max", 127)),
-                               framework::dataset::make("addBias", { false, true })))
+    combine(
+        make("DataType", DataType::QASYMM8_SIGNED),
+        datasets::TinyShapes(),
+        make("result_real_multiplier", 0.33f),
+        make("result_offset", 2, 3),
+        make("min", -128),
+        make("max", 127),
+        make("addBias", { false, true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -363,4 +293,4 @@ TEST_SUITE_END() // GEMMLowp
 TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
-} // namespace arm_compute
-\ No newline at end of file
+} // namespace arm_compute
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
index 1cfeac59af..d0d06a8ddb 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyNative.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,7 +23,7 @@
  */
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/framework/Asserts.h"
@@ -41,7 +41,7 @@ namespace validation
 using namespace arm_compute::misc::shape_calculator;
 
 // Create function for CLGEMMMatrixMultiplyNativeKernel
-using CLGEMMLowpMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyNativeKernel>;
+using CLGEMMLowpMatrixMultiplyNative = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyNativeKernel>;
 
 // Fixture for CLGEMMLowpMatrixMultiplyNative
 using CLGEMMLowpMatrixMultiplyNativeFixture = GEMMLowpMatrixMultiplyNativeValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyNative>;
@@ -53,8 +53,15 @@ namespace
 {
 // *INDENT-OFF*
 // clang-format off
-/** M values to test */
-const auto m_values = framework::dataset::make("M", 37);
+/** M, N combinations to test
+ *  1: Special 1x1 case
+ *  2: Special multples of processor size in both dimensions
+ *  3: Non multiples of processor size in both dimensions
+*/
+const auto m_n_values = zip(
+    framework::dataset::make("M", {1, 16, 37}),
+    framework::dataset::make("N", {1, 16, 51})
+    );
 
 /** M_W values to test */
 const auto m_w_values = framework::dataset::make("M_W", 5);
@@ -81,7 +88,7 @@ const auto n0_values_precommit = framework::dataset::make("N0", { 4 });
 const auto k0_values_precommit = framework::dataset::make("K0", { 16 });
 
 /** M0 values to test - Nightly */
-const auto m0_values_nightly = framework::dataset::make("M0", 1, 2, 7);
+const auto m0_values_nightly = framework::dataset::make("M0", {1, 2, 7});
 
 /** N0 values to test - Nightly */
 const auto n0_values_nightly = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
@@ -93,8 +100,7 @@ const auto k0_values_nightly = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 1
 TEST_SUITE(CL)
 TEST_SUITE(GEMMLowpMatrixMultiplyNative)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyNativeFixture, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(m_values,
-                                                                n_values),
+                combine(combine(combine(combine(combine(m_n_values,
                                                                 k_values),
                                                                 b_values),
                                                                 m0_values_precommit),
@@ -106,8 +112,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyNativeFixture, framewor
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyNativeFixture, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(m_values,
-                                                                n_values),
+                combine(combine(combine(combine(combine(m_n_values,
                                                                 k_values),
                                                                 b_values),
                                                                 m0_values_nightly),
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
index 0c651cddc2..88455bdeb8 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshaped.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,9 +23,9 @@
  */
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/framework/Asserts.h"
@@ -42,14 +42,14 @@ namespace validation
 {
 using namespace arm_compute::misc::shape_calculator;
 
-// Create function for CLGEMMReshapeLHSMatrixKernel
-using CLGEMMReshapeLHSMatrix = CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>;
+// Create function for ClGemmReshapeLhsMatrixKernel
+using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeLhsMatrixKernel>;
 
-// Create function for CLGEMMReshapeRHSMatrixKernel
-using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
+// Create function for ClGemmReshapeRhsMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>;
 
-// Create function for CLGEMMMatrixMultiplyReshapedKernel
-using CLGEMMLowpMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedKernel>;
+// Create function for CLGEMMLowpMatrixMultiplyReshapedKernel
+using CLGEMMLowpMatrixMultiplyReshaped = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedKernel>;
 
 // Fixture for CLGEMMLowpMatrixMultiplyReshaped
 using CLGEMMLowpMatrixMultiplyReshapedFixture = GEMMLowpMatrixMultiplyReshapedValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshaped>;
@@ -62,8 +62,19 @@ namespace
 {
 // *INDENT-OFF*
 // clang-format off
+
+/** M, N combinations to test
+ *  1: Special 1x1 case
+ *  2: Special multples of processor size in both dimensions
+ *  3: Non multiples of processor size in both dimensions
+*/
+const auto m_n_values = zip(
+    framework::dataset::make("M", {1, 16, 37}),
+    framework::dataset::make("N", {1, 16, 51})
+    );
+
 /** M values to test */
-const auto m_values = framework::dataset::make("M", 37);
+const auto m_values = framework::dataset::make("M", {1, 37});
 
 /** M_W values to test */
 const auto m_w_values = framework::dataset::make("M_W", 5);
@@ -72,7 +83,7 @@ const auto m_w_values = framework::dataset::make("M_W", 5);
 const auto m_h_values = framework::dataset::make("M_H", 7);
 
 /** N values to test */
-const auto n_values = framework::dataset::make("N", 51);
+const auto n_values = framework::dataset::make("N", {1, 51});
 
 /** K values to test */
 const auto k_values = framework::dataset::make("K", 23);
@@ -125,9 +136,8 @@ TEST_SUITE(QUANTIZED)
 
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyReshapedFixture, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_values,
-                                                                   n_values),
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_n_values,
                                                                    k_values),
                                                                    b_values),
                                                                    m0_values_precommit_1),
@@ -205,9 +215,8 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyReshapedFixture, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_values,
-                                                                   n_values),
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_n_values,
                                                                    k_values),
                                                                    b_values),
                                                                    m0_values_precommit_2),
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
index fa256280ca..c56901effc 100644
--- a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRHS.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,8 +25,8 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
@@ -46,10 +46,10 @@ namespace validation
 using namespace arm_compute::misc::shape_calculator;
 
 // Create function for CLGEMMReshapeRHSMatrixKernel
-using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>;
 
 // Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
-using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel>;
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>;
 
 // Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
 using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFixture = GEMMLowpMatrixMultiplyReshapedOnlyRHSValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>;
@@ -157,7 +157,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned
 
     // Create and configure function
     CLGEMMLowpMatrixMultiplyReshapedOnlyRHS gemm;
-    gemm.configure(&lhs, &rhs_reshaped, &dst, gemm_info);
+    gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info);
 }
 } // namespace
 
diff --git a/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp
new file mode 100644
index 0000000000..a0d13c3e39
--- /dev/null
+++ b/tests/validation/CL/GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL.cpp
@@ -0,0 +1,206 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/functions/CLCast.h"
+#include "arm_compute/runtime/CL/functions/CLReductionOperation.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/fixtures/GEMMLowpFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::opencl::kernels;
+
+// Create function for CLGEMMReshapeRHSMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<opencl::kernels::ClGemmReshapeRhsMatrixKernel>;
+
+// Create function for CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<opencl::kernels::ClGemmLowpMatrixMultiplyReshapedOnlyRhsMMULKernel>;
+
+// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture =
+    GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS>;
+
+// Fixture for CLGEMMLowpMatrixMultiplyReshapedOnlyRHS
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureSigned =
+    GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture<int8_t, CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS, CLReductionOperation, CLCast>;
+
+using CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureUnsigned =
+    GEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageValidationFixture<uint8_t, CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, CLGEMMLowpMatrixMultiplyReshapedOnlyRHS, CLReductionOperation, CLCast>;
+
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+
+/** M values to test */
+const auto m_values = framework::dataset::make("M", {16, 49});
+
+/** N values to test */
+const auto n_values = framework::dataset::make("N", {16, 259});
+
+/** K values to test */
+const auto k_values = framework::dataset::make("K", {192});
+
+/** Batch size values to test */
+const auto b_values = framework::dataset::make("batch_size", {1, 2});
+
+/** M0 values to test - Precommit */
+const auto m0 = framework::dataset::make("M0", {1, 2, 4});
+
+/** N0 values to test - Precommit */
+const auto n0 = framework::dataset::make("N0", { 1, 4, 8});
+
+/** K0 values to test - Precommit */
+const auto k0 = framework::dataset::make("K0", { 4 });
+
+/** H0 values to test - Precommit */
+const auto h0 = framework::dataset::make("H0", 1);
+
+/** Interleave values to test with RHS matrix */
+const auto i_values_rhs = framework::dataset::make("interleave_rhs", { false });
+
+/** Transpose values to test with RHS matrix */
+const auto t_values_rhs = framework::dataset::make("transpose_rhs", { true });
+
+const auto broadcast_bias = framework::dataset::make("broadcast_bias", {true, false});
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL)
+FIXTURE_DATA_TEST_CASE(Signed, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0),
+                                                                   n0),
+                                                                   k0),
+                                                                   h0),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                    framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })))
+{
+    // Validate output
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+FIXTURE_DATA_TEST_CASE(Unsigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULFixture, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0),
+                                                                   n0),
+                                                                   k0),
+                                                                   h0),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                    framework::dataset::make("DataType", { DataType::QASYMM8})))
+{
+    // Validate output
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+FIXTURE_DATA_TEST_CASE(OutputStageSigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureSigned, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0),
+                                                                   n0),
+                                                                   k0),
+                                                                   h0),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   broadcast_bias),
+                    framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED})))
+{
+    // Validate output
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+FIXTURE_DATA_TEST_CASE(OutputStageUnsigned, CLGEMMLowpMatrixMultiplyReshapedOnlyRHSMMULOutputStageFixtureUnsigned, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0),
+                                                                   n0),
+                                                                   k0),
+                                                                   h0),
+                                                                   i_values_rhs),
+                                                                   t_values_rhs),
+                                                                   broadcast_bias),
+                    framework::dataset::make("DataType", { DataType::QASYMM8})))
+{
+    // Validate output
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        validate(CLAccessor(_target), _reference);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // GEMMLowpMatrixMultiplyReshapedOnlyRhsMMUL
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+\ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiply.cpp b/tests/validation/CL/GEMMMatrixMultiply.cpp
deleted file mode 100644
index 5d2e211d91..0000000000
--- a/tests/validation/CL/GEMMMatrixMultiply.cpp
+++ /dev/null
@@ -1,332 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/Helper.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/GEMMFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-using namespace arm_compute::misc::shape_calculator;
-
-// Create function for CLGEMMMatrixMultiplyKernel
-using CLGEMMMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMMatrixMultiplyKernel>;
-
-// Fixture for GEMMMatrixMultiplyValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyNativeFixture = GEMMMatrixMultiplyValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>;
-
-// Fixture for GEMMMatrixMultiply3DValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyNative3DFixture = GEMMMatrixMultiply3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMMatrixMultiplyNative>;
-
-namespace
-{
-// *INDENT-OFF*
-// clang-format off
-RelativeTolerance<float> rel_tolerance_f32(0.001f);
-constexpr float          abs_tolerance_f32(0.0001f);
-
-RelativeTolerance<half> rel_tolerance_f16(half(0.2));
-constexpr float         tolerance_num_f16 = 0.02f;
-
-/** Alpha values to test */
-const auto alpha_values = framework::dataset::make("alpha", {1.0f, -0.75f} );
-
-/** Beta values to test */
-const auto beta_values = framework::dataset::make("beta", {-0.35f, 0.0f} );
-
-/** M values to test */
-const auto m_values = framework::dataset::make("M", {37, 1});
-
-/** N values to test */
-const auto n_values = framework::dataset::make("N", {51, 1003});
-
-/** K values to test */
-const auto k_values = framework::dataset::make("K", 23);
-
-/** M_W values to test */
-const auto m_w_values = framework::dataset::make("M_W", 5);
-
-/** M_H values to test */
-const auto m_h_values = framework::dataset::make("M_H", 7);
-
-/** Batch size values to test */
-const auto b_values = framework::dataset::make("batch_size", 1, 3);
-
-/** Activation values to test */
-const auto act_values = framework::dataset::make("Activation",
-{
-    ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
-});
-
-/** Broadcast bias from vector to matrix */
-const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
-
-/** GPU architectures values to test */
-const auto gpu_arch_values = framework::dataset::make("GPUArch",
-{
-    GPUTarget::MIDGARD,
-    GPUTarget::BIFROST
-});
-
-/** Data types values to test in the configuration */
-const auto data_type_values = framework::dataset::make("DataType",
-{
-    DataType::F32,
-    DataType::F16
-});
-
-/** M values to test */
-const auto fp16_mixed_precision_values = framework::dataset::make("fp16_mixed_precision", {true, false});
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(GEMMMatrixMultiply)
-TEST_CASE(Negative, framework::DatasetMode::ALL)
-{
-    // Unsupported QASYMM8 data type
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::QASYMM8);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::QASYMM8);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::QASYMM8);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Unsupported SIZE_T data type
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::SIZET);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::SIZET);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::SIZET);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Mixed precision with F32
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info  = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = true;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Max number of dimensions LHS matrix
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U, 4U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Max number of dimensions RHS matrix
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 4U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 4U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 4U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Broadcast bias
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F16);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F16);
-        // The correct shape should be bias = TensorInfo(TensorShape(14U, 1U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F16);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F16);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, true);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Invalid dimensions for the bias
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
-        // The correct shape should be bias = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(14U, 8U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Invalid dimensions for the output
-    {
-        const auto lhs                       = TensorInfo(TensorShape(13U, 12U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(14U, 13U, 1U, 1U), 1, DataType::F32);
-        // The correct shape should be out = TensorInfo(TensorShape(14U, 12U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(14U, 7U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = false;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(12, 14, 13, 1, 1, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, nullptr, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-}
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_values,
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   broadcast_bias_values),
-                                                                   framework::dataset::make("fp16_mixed_precision", false)),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F32)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyNative3DFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_w_values,
-                                                                   m_h_values),
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   broadcast_bias_values),
-                                                                   framework::dataset::make("fp16_mixed_precision", false)),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F32)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyNativeFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_values,
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   broadcast_bias_values),
-                                                                   fp16_mixed_precision_values),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F16)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyNative3DFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_w_values,
-                                                                   m_h_values),
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   broadcast_bias_values),
-                                                                   fp16_mixed_precision_values),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F16)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Float
-TEST_SUITE_END() // GEMMMatrixMuliplty
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp b/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
deleted file mode 100644
index b2701e7f6c..0000000000
--- a/tests/validation/CL/GEMMMatrixMultiplyInterleavedTransposed.cpp
+++ /dev/null
@@ -1,328 +0,0 @@
-/*
- * Copyright (c) 2019-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/Helper.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/GEMMFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-using namespace arm_compute::misc::shape_calculator;
-
-// Create function for CLGEMMReshapeLHSMatrixKernel
-using CLGEMMReshapeLHSMatrix = CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>;
-
-// Create function for CLGEMMReshapeRHSMatrixKernel
-using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
-
-// Create function for CLGEMMMatrixMultiplyKernel
-using CLGEMMMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMMatrixMultiplyKernel>;
-
-// Fixture for GEMMMatrixMultiplyInterleavedTransposedValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyReshapedFixture =
-    GEMMMatrixMultiplyInterleavedTransposedValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
-
-// Fixture for GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture
-template <typename T>
-using CLGEMMMatrixMultiplyReshaped3DFixture =
-    GEMMMatrixMultiplyInterleavedTransposed3DValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeLHSMatrix, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshaped>;
-
-namespace
-{
-// *INDENT-OFF*
-// clang-format off
-RelativeTolerance<float> rel_tolerance_f32(0.001f);
-constexpr float          abs_tolerance_f32(0.0001f);
-
-RelativeTolerance<half> rel_tolerance_f16(half(0.2));
-constexpr float         tolerance_num_f16 = 0.02f;
-
-/** Alpha values to test */
-const auto alpha_values = framework::dataset::make("alpha", {1.0f, -0.75f} );
-
-/** Beta values to test */
-const auto beta_values = framework::dataset::make("beta", {-0.35f, 0.0f} );
-
-/** M values to test */
-const auto m_values = framework::dataset::make("M", {37, 1});
-
-/** N values to test */
-const auto n_values = framework::dataset::make("N", 51);
-
-/** K values to test */
-const auto k_values = framework::dataset::make("K", 23);
-
-/** M_W values to test */
-const auto m_w_values = framework::dataset::make("M_W", 5);
-
-/** M_H values to test */
-const auto m_h_values = framework::dataset::make("M_H", 7);
-
-/** Batch size values to test */
-const auto b_values = framework::dataset::make("batch_size", 1, 3);
-
-/** Activation values to test */
-const auto act_values = framework::dataset::make("Activation",
-{
-    ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
-});
-
-/** V0 values to test */
-const auto v0_values = framework::dataset::make("V0", 2);
-
-/** H0 values to test */
-const auto h0_values = framework::dataset::make("H0", 4);
-
-/** Broadcast bias from vector to matrix */
-const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", {false, true} );
-
-/** GPU architectures values to test */
-const auto gpu_arch_values = framework::dataset::make("GPUArch",
-{
-    GPUTarget::MIDGARD,
-    GPUTarget::BIFROST
-});
-
-/** Data types values to test in the configuration */
-const auto data_type_values = framework::dataset::make("DataType",
-{
-    DataType::F32,
-    DataType::F16
-});
-
-/** M values to test */
-const auto fp16_mixed_precision_values = framework::dataset::make("fp16_mixed_precision", {true, false});
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(GEMMMatrixMultiplyInterleavedTransposed)
-TEST_CASE(Negative, framework::DatasetMode::ALL)
-{
-    // The following tests are already integrated in the GEMMMatrixMultiply validation because
-    // in common with this validation
-    // - Unsupported QASYMM8 data type
-    // - Unsupported SIZE_T data type
-    // - Mixed precision with F32
-    // - Max number of dimensions LHS matrix
-    // - Max number of dimensions RHS matrix
-
-    // Invalid LHS dimensions
-    {
-        // The correct shape should be: lhs = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
-        const auto lhs                       = TensorInfo(TensorShape(256U, 2U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = true;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Invalid RHS dimensions
-    {
-        const auto lhs                       = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
-        // The correct shape should be rhs = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(104U, 4U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = true;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Broadcast bias
-    {
-        const auto lhs                       = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
-        // The correct shape should be bias = TensorInfo(TensorShape(24U, 1U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = true;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, true);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Invalid dimensions for the bias
-    {
-        const auto lhs                       = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
-        // The correct shape should be bias = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(25U, 16U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = true;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-
-    // Invalid dimensions for the output
-    {
-        const auto lhs                       = TensorInfo(TensorShape(256U, 1U, 1U, 1U), 1, DataType::F32);
-        const auto rhs                       = TensorInfo(TensorShape(104U, 3U, 1U, 1U), 1, DataType::F32);
-        const auto bias                      = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        // The correct shape should be out = TensorInfo(TensorShape(24U, 16U, 1U, 1U), 1, DataType::F32);
-        const auto out                       = TensorInfo(TensorShape(24U, 13U, 1U, 1U), 1, DataType::F32);
-        constexpr float alpha                = 1.3f;
-        constexpr float beta                 = 0.7f;
-        const bool is_interleaved_transposed = true;
-        const GEMMReshapeInfo reshape_info = GEMMReshapeInfo(16, 24, 13, 2, 4, 0, false, false);
-        const GPUTarget gpu_target           = GPUTarget::MIDGARD;
-        const bool fp_mixed_precision        = false;
-        const auto status    = CLGEMMMatrixMultiplyKernel::validate(&lhs, &rhs, &bias, &out, alpha, beta, is_interleaved_transposed, reshape_info, gpu_target, fp_mixed_precision);
-        ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
-    }
-}
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_values,
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   v0_values),
-                                                                   h0_values),
-                                                                   broadcast_bias_values),
-                                                                   framework::dataset::make("fp16_mixed_precision", false)),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F32)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_w_values,
-                                                                   m_h_values),
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   v0_values),
-                                                                   h0_values),
-                                                                   broadcast_bias_values),
-                                                                   framework::dataset::make("fp16_mixed_precision", false)),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F32)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_values,
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   v0_values),
-                                                                   h0_values),
-                                                                   broadcast_bias_values),
-                                                                   fp16_mixed_precision_values),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F16)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL,
-                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                   m_w_values,
-                                                                   m_h_values),
-                                                                   n_values),
-                                                                   k_values),
-                                                                   b_values),
-                                                                   alpha_values),
-                                                                   beta_values),
-                                                                   v0_values),
-                                                                   h0_values),
-                                                                   broadcast_bias_values),
-                                                                   fp16_mixed_precision_values),
-                                                                   act_values),
-                                                                   framework::dataset::make("DataType", DataType::F16)),
-                                                                   gpu_arch_values))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num_f16);
-}
-
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Float
-TEST_SUITE_END() // GEMMMatrixMulipltyInterleavedTransposed
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
-\ No newline at end of file
diff --git a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
index ec6b87fbae..0ddf43766f 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyNative.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,7 +26,7 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
@@ -44,9 +44,10 @@ namespace test
 namespace validation
 {
 using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
 
-// Create function for CLGEMMMatrixMultiplyNativeKernel
-using CLGEMMMatrixMultiplyNative = CLSynthetizeFunction<CLGEMMMatrixMultiplyNativeKernel>;
+// Create function for ClGemmMatrixMultiplyNativeKernel
+using CLGEMMMatrixMultiplyNative = CLSynthetizeOperator<ClGemmMatrixMultiplyNativeKernel>;
 
 // Fixture for CLGEMMMatrixMultiplyNative
 template <typename T>
@@ -90,8 +91,8 @@ const auto b_values = framework::dataset::make("batch_size", 1, 3);
 /** Activation values to test */
 const auto act_values = framework::dataset::make("Activation",
 {
-    ActivationLayerInfo(),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
 });
 
 /** M0 values to test - Precommit */
@@ -184,7 +185,7 @@ void validate_configuration(unsigned int m_value, unsigned int n_value, unsigned
 
     // Create and configure function
     CLGEMMMatrixMultiplyNative gemm;
-    gemm.configure(&lhs, &rhs, &bias, &dst, 1.0f, 1.0f, lhs_info, rhs_info, kernel_info);
+    gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), 1.0f, 1.0f, lhs_info, rhs_info, kernel_info);
 }
 } // namespace
 
@@ -322,6 +323,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyNative3DFixture<float>, f
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
 }
+
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // GEMMMatrixMulipltyNative
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
index 52afb716e4..b06e4bf213 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshaped.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,9 +26,9 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
@@ -46,15 +46,16 @@ namespace test
 namespace validation
 {
 using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
 
-// Create function for CLGEMMReshapeLHSMatrixKernel
-using CLGEMMReshapeLHSMatrix = CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>;
+// Create function for ClGemmReshapeLhsMatrixKernel
+using CLGEMMReshapeLHSMatrix = CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
 
-// Create function for CLGEMMReshapeRHSMatrixKernel
-using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
+// Create function for ClGemmReshapeRhsMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
 
-// Create function for CLGEMMMatrixMultiplyReshapedKernel
-using CLGEMMMatrixMultiplyReshaped = CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedKernel>;
+// Create function for ClGemmMatrixMultiplyReshapedKernel
+using CLGEMMMatrixMultiplyReshaped = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedKernel>;
 
 // Fixture for CLGEMMMatrixMultiplyReshaped
 template <typename T>
@@ -109,6 +110,7 @@ const auto b_values = framework::dataset::make("batch_size", 2, 3);
 const auto act_values = framework::dataset::make("Activation",
 {
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 8.f, 2.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
 });
 
 /** Alpha values to test - Precommit */
@@ -327,7 +329,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
                framework::dataset::make("Expected", { true, true, false, false, false, true, true,true})),
                     input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
 {
-   ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
+    ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
                                                           &input1_info.clone()->set_is_resizable(true),
                                                           &input2_info.clone()->set_is_resizable(true),
                                                           &output_info.clone()->set_is_resizable(true),1.f,1.f,
@@ -335,6 +337,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
                                                           rhs_info,
                                                           gemm_info)) == expected, framework::LogLevel::ERRORS);
 }
+
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
@@ -360,7 +363,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<float>, fra
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, framework::DatasetMode::DISABLED,
@@ -385,7 +396,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<float>, fra
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::ALL,
@@ -410,7 +429,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>,
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>, framework::DatasetMode::DISABLED,
@@ -435,8 +462,17 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>,
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
+
 TEST_SUITE(ExportToCLImage)
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip(
                framework::dataset::make("Input0Info", { TensorInfo(TensorShape(256U, 16U, 2U), 1, DataType::F32),  // OK or incorrect if cl_khr_image2d_from_buffer not supported
@@ -559,10 +595,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
                                                       true,
                                                       true,
                                                       false,
-                                                      false})),
+                                                      true})),
                     input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
 {
-   ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
+   ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
                                                           &input1_info.clone()->set_is_resizable(true),
                                                           &input2_info.clone()->set_is_resizable(true),
                                                           &output_info.clone()->set_is_resizable(true),1.f,1.f,
@@ -703,6 +739,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<float>,
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
+
 TEST_SUITE_END() // ExportToCLImage
 TEST_SUITE_END() // FP32
 
@@ -730,7 +767,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedFixture<half>, fram
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, framework::DatasetMode::DISABLED,
@@ -755,7 +800,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedFixture<half>, fram
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::ALL,
@@ -780,7 +833,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>,
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>, framework::DatasetMode::DISABLED,
@@ -805,7 +866,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>,
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 TEST_SUITE(ExportToCLImage)
@@ -930,10 +999,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zi
                                                       true,
                                                       true,
                                                       false,
-                                                      false})),
+                                                      true})),
                     input0_info ,input1_info, input2_info, output_info, lhs_info, rhs_info, gemm_info, expected)
 {
-   ARM_COMPUTE_EXPECT(bool(CLGEMMMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
+   ARM_COMPUTE_EXPECT(bool(ClGemmMatrixMultiplyReshapedKernel::validate(&input0_info.clone()->set_is_resizable(true),
                                                           &input1_info.clone()->set_is_resizable(true),
                                                           &input2_info.clone()->set_is_resizable(true),
                                                           &output_info.clone()->set_is_resizable(true),1.f,1.f,
@@ -1074,6 +1143,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DFixture<half>,
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
+
 TEST_SUITE_END() // ExportToCLImage
 TEST_SUITE_END() // FP16
 
@@ -1101,7 +1171,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixtu
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED,
@@ -1126,7 +1204,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMMatrixMultiplyReshapedMixedPrecisionFixtu
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::ALL,
@@ -1151,7 +1237,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionF
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionFixture<half>, framework::DatasetMode::DISABLED,
@@ -1176,8 +1270,17 @@ FIXTURE_DATA_TEST_CASE(RunLarge3D, CLGEMMMatrixMultiplyReshaped3DMixedPrecisionF
                                                                    act_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16_mixed_precision, 0.f, abs_tolerance_f16_mixed_precision);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
 }
+
 TEST_SUITE_END() // MixedPrecision
 TEST_SUITE_END() // Float
 TEST_SUITE_END() // GEMMMatrixMultiplyReshaped
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
index 7cde3d04ca..dafc8dc5ec 100644
--- a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRHS.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,8 +26,8 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
@@ -45,12 +45,13 @@ namespace test
 namespace validation
 {
 using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
 
-// Create function for CLGEMMReshapeRHSMatrixKernel
-using CLGEMMReshapeRHSMatrix = CLSynthetizeFunction<CLGEMMReshapeRHSMatrixKernel>;
+// Create function for ClGemmReshapeRhsMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
 
-// Create function for CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
-using CLGEMMMatrixMultiplyReshapedOnlyRHS = CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>;
+// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsKernel
+using CLGEMMMatrixMultiplyReshapedOnlyRHS = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>;
 
 // Fixture for CLGEMMMatrixMultiplyReshapedOnlyRHS
 template <typename T>
@@ -97,7 +98,8 @@ const auto b_values = framework::dataset::make("batch_size", 2);
 /** Activation values to test */
 const auto act_values = framework::dataset::make("Activation",
 {
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, -0.8f, 10.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 10.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
 });
 
 /** M0 values to test - precommit */
@@ -210,6 +212,7 @@ bool validate_configuration(unsigned int m_value, unsigned int n_value, unsigned
     CLGEMMMatrixMultiplyReshapedOnlyRHS gemm;
     return bool(gemm.validate(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info));
 }
+
 } // namespace
 
 TEST_SUITE(CL)
@@ -461,6 +464,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
+
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
@@ -589,6 +593,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly3D, CLGEMMMatrixMultiplyReshapedOnlyRHS3DFixtur
         framework::ARM_COMPUTE_PRINT_INFO();
     }
 }
+
 TEST_SUITE_END() // FP16
 
 TEST_SUITE_END() // Float
diff --git a/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp
new file mode 100644
index 0000000000..3b3cf85317
--- /dev/null
+++ b/tests/validation/CL/GEMMMatrixMultiplyReshapedOnlyRhsMMUL.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/GEMMFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+using namespace arm_compute::opencl::kernels;
+
+// Create function for ClGemmReshapeRhsMatrixKernel
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperator<ClGemmReshapeRhsMatrixKernel>;
+
+// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel
+using CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL = CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsMMULKernel>;
+
+// Fixture for CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL
+template <typename T>
+using CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture = GEMMMatrixMultiplyReshapedOnlyRhsMMULValidationFixture<CLTensor, CLAccessor, T, CLGEMMReshapeRHSMatrix, CLGEMMMatrixMultiplyReshapedOnlyRhsMMUL>;
+
+namespace
+{
+// *INDENT-OFF*
+// clang-format off
+RelativeTolerance<float> rel_tolerance_f32(0.001f);
+constexpr float          abs_tolerance_f32(0.0001f);
+RelativeTolerance<half_float::half> rel_tolerance_f16(half_float::half(0.001f));
+constexpr float          abs_tolerance_f16(0.3f);
+
+/** Alpha values to test - Precommit */
+const auto a_values = framework::dataset::make("alpha", {1.0f, 0.75f} );
+
+/** Beta values to test - Precommit */
+const auto beta_values = framework::dataset::make("beta", {0.0f, -0.75f} );
+
+/** M values to test */
+const auto m_values = framework::dataset::make("M", {49});
+
+/** N values to test */
+const auto n_values = framework::dataset::make("N", {257});
+
+/** K values to test */
+/** The test case requires this to be multiple of 4*/
+const auto k_values = framework::dataset::make("K", {192});
+
+/** Batch size values to test */
+const auto b_values = framework::dataset::make("batch_size", {1, 2});
+
+/** Activation values to test */
+const auto act_values = framework::dataset::make("Activation",
+{
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+});
+
+/** M0 values to test - Precommit */
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 2, 4 });
+
+/** N0 values to test - Precommit */
+const auto n0_values_precommit = framework::dataset::make("N0", { 4, 8 });
+
+/** K0 values to test - Precommit */
+const auto k0_values_precommit = framework::dataset::make("K0", { 1 });
+
+/** Broadcast bias from vector to matrix */
+const auto broadcast_bias_values = framework::dataset::make("broadcast_bias", { false, true } );
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(GEMMMatrixMultiplyReshapedOnlyRhsMMUL)
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   framework::dataset::make("ExportToCLImage", false)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values),
+                                                                   beta_values),
+                                                                   broadcast_bias_values),
+                                                                   act_values))
+{
+    // Validate output
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   framework::dataset::make("ExportToCLImage", false)),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values),
+                                                                   beta_values),
+                                                                   broadcast_bias_values),
+                                                                   act_values))
+{
+    // Validate output
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(ExportToCLImage)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<float>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   framework::dataset::make("ExportToCLImage", true)),
+                                                                   framework::dataset::make("DataType", DataType::F32)),
+                                                                   a_values),
+                                                                   beta_values),
+                                                                   broadcast_bias_values),
+                                                                   act_values))
+{
+    // Validate output
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMMatrixMultiplyReshapedOnlyRhsMMULFixture<half>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                   m_values,
+                                                                   n_values),
+                                                                   k_values),
+                                                                   b_values),
+                                                                   m0_values_precommit),
+                                                                   n0_values_precommit),
+                                                                   k0_values_precommit),
+                                                                   framework::dataset::make("ExportToCLImage", true)),
+                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                   a_values),
+                                                                   beta_values),
+                                                                   broadcast_bias_values),
+                                                                   act_values))
+{
+    // Validate output
+    if(validate_result)
+    {
+        validate(CLAccessor(_target), _reference, rel_tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // ExportToCLImage
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // GEMMMatrixMultiplyReshapedOnlyRhsMMUL
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
index 34c37dffde..0dd9b811f6 100644
--- a/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
+++ b/tests/validation/CL/GEMMReshapeLHSMatrix.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,7 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
@@ -43,9 +43,10 @@ namespace test
 namespace validation
 {
 using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
 
 // Initialize the output tensor with zero and fill the border with zero
-using CLGEMMReshapeLHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder<CLGEMMReshapeLHSMatrixKernel, 16>;
+using CLGEMMReshapeLHSMatrix = CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder<ClGemmReshapeLhsMatrixKernel, 16>;
 
 template <typename T>
 using CLGEMMReshapeLHSMatrixFixture = GEMMReshapeLHSMatrixValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeLHSMatrix, T, false>;
@@ -65,8 +66,10 @@ const auto b_values = framework::dataset::make("batchsize", 1, 3);
 
 /** M0 values to test */
 const auto m0_values_s32 = framework::dataset::make("M0", { 2, 3 });
-const auto m0_values_s16 = framework::dataset::make("M0", { 4, 5 });
-const auto m0_values_s8 = framework::dataset::make("M0", { 6, 7, 8 });
+const auto m0_values_s16 = framework::dataset::make("M0", { 4 });
+const auto m0_values_s16_nt = framework::dataset::make("M0", { 5 });
+const auto m0_values_s8_nt = framework::dataset::make("M0", { 6,7 });
+const auto m0_values_s8 = framework::dataset::make("M0", { 8 });
 
 /** K0 values to test */
 const auto k0_values_s32 = framework::dataset::make("K0", { 2, 3 });
@@ -100,6 +103,7 @@ FIXTURE_DATA_TEST_CASE(S32, CLGEMMReshapeLHSMatrixFixture<int>, framework::Datas
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
 FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::DatasetMode::ALL,
                 combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
                                                                    b_values),
@@ -113,6 +117,7 @@ FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::Dat
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+
 FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::DatasetMode::ALL,
                 combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
                                                                    b_values),
@@ -127,6 +132,37 @@ FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::Datas
     validate(CLAccessor(_target), _reference);
 }
 
+TEST_SUITE(NotTransposed)
+FIXTURE_DATA_TEST_CASE(S16, CLGEMMReshapeLHSMatrixFixture<short>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
+                                                                   b_values),
+                                                                   framework::dataset::make("DataType", DataType::S16)),
+                                                                   m0_values_s16_nt),
+                                                                   k0_values_s16),
+                                                                   v0_values),
+                                                                   i_values),
+                                                                   framework::dataset::make("transpose", { false })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(S8, CLGEMMReshapeLHSMatrixFixture<char>, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape2DShapes(),
+                                                                   b_values),
+                                                                   framework::dataset::make("DataType", DataType::S8)),
+                                                                   m0_values_s8_nt),
+                                                                   k0_values_s8),
+                                                                   v0_values),
+                                                                   i_values),
+                                                                   framework::dataset::make("transpose", { false })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END()
+
 TEST_SUITE(ReinterpretInputAs3D)
 FIXTURE_DATA_TEST_CASE(S32, CLGEMMReshapeLHSMatrix3DFixture<int>, framework::DatasetMode::ALL,
                 combine(combine(combine(combine(combine(combine(combine(datasets::SmallGEMMReshape3DShapes(),
diff --git a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
index 14048e81ec..f8462058a6 100644
--- a/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
+++ b/tests/validation/CL/GEMMReshapeRHSMatrix.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,7 +25,7 @@
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
@@ -73,9 +73,10 @@ const auto i_values = framework::dataset::make("interleave", { true, false });
 } // namespace
 
 using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
 
 // Initialize the output tensor with zero and fill the border with zero
-using CLGEMMReshapeRHSMatrix = CLSynthetizeFunctionInitOutputWithZeroAndWithZeroConstantBorder<CLGEMMReshapeRHSMatrixKernel, 16>;
+using CLGEMMReshapeRHSMatrix = CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder<ClGemmReshapeRhsMatrixKernel, 16>;
 
 template <typename T>
 using CLGEMMReshapeRHSMatrixFixture = GEMMReshapeRHSMatrixValidationFixture<CLTensor, CLAccessor, CLGEMMReshapeRHSMatrix, T>;
@@ -117,7 +118,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
     rhs_info.transpose = true;
     rhs_info.interleave = true;
 
-    bool has_error = bool(CLGEMMReshapeRHSMatrixKernel::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), rhs_info));
+    bool has_error = bool(ClGemmReshapeRhsMatrixKernel::validate(&input_info.clone()->set_is_resizable(false), (output_info.total_size() == 0) ? nullptr : &output_info.clone()->set_is_resizable(false), rhs_info));
     ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS);
 }
 
@@ -158,9 +159,9 @@ DATA_TEST_CASE(ValidatePadding, framework::DatasetMode::ALL, combine(combine(com
         padding = round_up_width - output_shape[0];
     }
 
-    CLGEMMReshapeRHSMatrixKernel kernel;
+    ClGemmReshapeRhsMatrixKernel kernel;
 
-    kernel.configure(&input, &output, rhs_info);
+    kernel.configure(CLKernelLibrary::get().get_compile_context(), input.info(), output.info(), rhs_info);
 
     ARM_COMPUTE_EXPECT((output.info()->padding().right == padding), framework::LogLevel::ERRORS);
 }
diff --git a/tests/validation/CL/Gather.cpp b/tests/validation/CL/Gather.cpp
index f0b87d7d9f..7619baae1e 100644
--- a/tests/validation/CL/Gather.cpp
+++ b/tests/validation/CL/Gather.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -48,19 +48,21 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
         framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 27U), 1, DataType::F16),
                                                 TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),     // Invalid Indices data type
-                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),     // Invalid Indices dimensionality
-                                                TensorInfo(TensorShape(5U, 5U, 5U, 5U, 5U), 1, DataType::F32),    // Invalid Input dimensionality
-                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F16),     // Mismatching data type input/output
-                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),     // Invalid positive axis value
-                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F16),     // Invalid negative axis value
+                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),                // Invalid Output shape
+                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),                // Invalid Indices data type
+                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),                // Invalid Indices dimensionality
+                                                TensorInfo(TensorShape(5U, 5U, 5U, 5U, 5U), 1, DataType::F32),      // Invalid Input dimensionality
+                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F16),                // Mismatching data type input/output
+                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),                // Invalid positive axis value
+                                                TensorInfo(TensorShape(27U, 27U), 1, DataType::F16),                // Invalid negative axis value
         }),
         framework::dataset::make("IndicesInfo", {
                                                 TensorInfo(TensorShape(10U), 1, DataType::U32),
                                                 TensorInfo(TensorShape(10U), 1, DataType::U32),
                                                 TensorInfo(TensorShape(10U), 1, DataType::U32),
-                                                TensorInfo(TensorShape(10U), 1, DataType::U8),
                                                 TensorInfo(TensorShape(10U, 10U), 1, DataType::U32),
+                                                TensorInfo(TensorShape(10U), 1, DataType::U8),
+                                                TensorInfo(TensorShape(10U, 10U, 10U, 10U), 1, DataType::U32),
                                                 TensorInfo(TensorShape(10U), 1, DataType::U32),
                                                 TensorInfo(TensorShape(10U), 1, DataType::U32),
                                                 TensorInfo(TensorShape(10U), 1, DataType::U32),
@@ -71,7 +73,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                                 TensorInfo(TensorShape(27U, 10U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(10U, 27U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(10U, 27U), 1, DataType::F32),
-                                                TensorInfo(TensorShape(27U, 10U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(10U, 27U), 1, DataType::F32),
+                                                TensorInfo(TensorShape(27U, 10U, 10U, 10U, 10U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(10U, 5U, 5U, 5U, 5U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(27U, 10U), 1, DataType::F32),
                                                 TensorInfo(TensorShape(27U, 27U), 1, DataType::F32),
@@ -82,13 +85,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                                             1,
                                             -2,
                                             0,
+                                            0,
                                             1,
                                             0,
                                             1,
                                             2,
                                             -3,
         })),
-        framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, false })),
+        framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, false, false })),
         input_info, indices_info, output_info, axis, expected)
 {
     const Status status = CLGather::validate(&input_info.clone()->set_is_resizable(true), &indices_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), axis);
@@ -111,6 +115,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+                       CLGatherFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLGatherFixture<half>,
                        framework::DatasetMode::NIGHTLY,
@@ -131,6 +144,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+                       CLGatherFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLGatherFixture<float>,
                        framework::DatasetMode::NIGHTLY,
@@ -152,6 +174,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+                       CLGatherFixture<uint8_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLGatherFixture<uint8_t>,
                        framework::DatasetMode::NIGHTLY,
@@ -172,6 +203,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDimIndices,
+                       CLGatherFixture<uint16_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::CLSmallGatherMultiDimIndicesDataset(), framework::dataset::make("DataType", DataType::U16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLGatherFixture<uint16_t>,
                        framework::DatasetMode::NIGHTLY,
diff --git a/tests/validation/CL/Gaussian3x3.cpp b/tests/validation/CL/Gaussian3x3.cpp
deleted file mode 100644
index b7672bdac0..0000000000
--- a/tests/validation/CL/Gaussian3x3.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Gaussian3x3Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Gaussian3x3)
-
-template <typename T>
-using CLGaussian3x3Fixture = Gaussian3x3ValidationFixture<CLTensor, CLAccessor, CLGaussian3x3, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGaussian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::U8)),
-                                                                                                         datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGaussian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::U8)),
-                                                                                                         datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Gaussian5x5.cpp b/tests/validation/CL/Gaussian5x5.cpp
deleted file mode 100644
index f2a1a30f33..0000000000
--- a/tests/validation/CL/Gaussian5x5.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLGaussian5x5.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Gaussian5x5Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 5;              /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Gaussian5x5)
-
-template <typename T>
-using CLGaussian5x5Fixture = Gaussian5x5ValidationFixture<CLTensor, CLAccessor, CLGaussian5x5, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGaussian5x5Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::U8)),
-                                                                                                         datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGaussian5x5Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                                 DataType::U8)),
-                                                                                                         datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/GaussianPyramid.cpp b/tests/validation/CL/GaussianPyramid.cpp
deleted file mode 100644
index d29f6752d6..0000000000
--- a/tests/validation/CL/GaussianPyramid.cpp
+++ /dev/null
@@ -1,89 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/GaussianPyramidHalfFixture.h"
-#include "tests/validation/reference/Utils.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-const auto small_gaussian_pyramid_levels = combine(datasets::Medium2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 4);
-const auto large_gaussian_pyramid_levels = combine(datasets::Large2DShapes(), datasets::BorderModes()) * framework::dataset::make("numlevels", 2, 5);
-
-template <typename T>
-inline void validate_gaussian_pyramid(const CLPyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode)
-{
-    ValidRegion prev_valid_region = shape_to_valid_region(reference[0].shape());
-
-    for(size_t i = 1; i < reference.size(); ++i)
-    {
-        const ValidRegion valid_region = shape_to_valid_region_gaussian_pyramid_half(reference[i - 1].shape(), prev_valid_region, (border_mode == BorderMode::UNDEFINED));
-
-        // Validate outputs
-        validate(CLAccessor(*(target.get_pyramid_level(i))), reference[i], valid_region);
-
-        // Keep the valid region for the next level
-        prev_valid_region = valid_region;
-    }
-}
-
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(GaussianPyramid)
-TEST_SUITE(Half)
-template <typename T>
-using CLGaussianPyramidHalfFixture = GaussianPyramidHalfValidationFixture<CLTensor, CLAccessor, CLGaussianPyramidHalf, T, CLPyramid>;
-
-FIXTURE_DATA_TEST_CASE(RunSmallGaussianPyramidHalf, CLGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::NIGHTLY, small_gaussian_pyramid_levels)
-{
-    validate_gaussian_pyramid(_target, _reference, _border_mode);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLargeGaussianPyramidHalf, CLGaussianPyramidHalfFixture<uint8_t>, framework::DatasetMode::NIGHTLY, large_gaussian_pyramid_levels)
-{
-    validate_gaussian_pyramid(_target, _reference, _border_mode);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/GenerateProposalsLayer.cpp b/tests/validation/CL/GenerateProposalsLayer.cpp
index d0ceef5106..646a0e0530 100644
--- a/tests/validation/CL/GenerateProposalsLayer.cpp
+++ b/tests/validation/CL/GenerateProposalsLayer.cpp
@@ -22,12 +22,13 @@
  * SOFTWARE.
  */
 #include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/functions/CLComputeAllAnchors.h"
 #include "arm_compute/runtime/CL/functions/CLGenerateProposalsLayer.h"
 #include "arm_compute/runtime/CL/functions/CLPermute.h"
 #include "arm_compute/runtime/CL/functions/CLSlice.h"
+#include "src/core/CL/kernels/CLGenerateProposalsLayerKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/CLArrayAccessor.h"
+#include "tests/CL/Helper.h"
 #include "tests/Globals.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -43,6 +44,8 @@ namespace validation
 {
 namespace
 {
+using CLComputeAllAnchors = CLSynthetizeFunction<CLComputeAllAnchorsKernel>;
+
 template <typename U, typename T>
 inline void fill_tensor(U &&tensor, const std::vector<T> &v)
 {
diff --git a/tests/validation/CL/HOGDescriptor.cpp b/tests/validation/CL/HOGDescriptor.cpp
deleted file mode 100644
index a73e563283..0000000000
--- a/tests/validation/CL/HOGDescriptor.cpp
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLHOG.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/HOGDescriptorDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HOGDescriptorFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-RelativeTolerance<float> tolerance(0.001f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HOGDescriptor)
-
-using CLHOGDescriptorFixture = HOGDescriptorValidationFixture<CLTensor, CLHOG, CLAccessor, CLHOGDescriptor, uint8_t, float>;
-
-// *INDENT-OFF*
-// clang-format off
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGDescriptorFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(
-                       datasets::SmallHOGDescriptorDataset(),
-                       framework::dataset::make("Format", Format::U8)),
-                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGDescriptorFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(
-                       datasets::LargeHOGDescriptorDataset(),
-                       framework::dataset::make("Format", Format::U8)),
-                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HOGDetector.cpp b/tests/validation/CL/HOGDetector.cpp
deleted file mode 100644
index 2d1904ffe8..0000000000
--- a/tests/validation/CL/HOGDetector.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDetector.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/CL/CLHOGAccessor.h"
-#include "tests/datasets/HOGDescriptorDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HOGDetectorFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Set the tolerance (percentage) used when validating the score of detection window. */
-RelativeTolerance<float> tolerance(0.01f);
-
-/* Input dataset (values must be a multiple of the HOGInfo block_size) */
-const auto DetectionWindowStrideDataset = framework::dataset::make("DetectionWindowStride", { Size2D(8, 8), Size2D(16, 16) });
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HOGDetector)
-
-// *INDENT-OFF*
-// clang-format off
-using CLHOGDetectorFixture = HOGDetectorValidationFixture<CLTensor,
-                                                          CLHOG,
-                                                          CLDetectionWindowArray,
-                                                          CLHOGDescriptor,
-                                                          CLAccessor,
-                                                          CLArrayAccessor<DetectionWindow>,
-                                                          CLHOGAccessor,
-                                                          CLHOGDetector,
-                                                          uint8_t,
-                                                          float>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGDetectorFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       DetectionWindowStrideDataset,
-                       datasets::SmallHOGDescriptorDataset()),
-                       framework::dataset::make("Format", Format::U8)),
-                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-
-{
-    // Validate output
-    validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGDetectorFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       DetectionWindowStrideDataset,
-                       datasets::LargeHOGDescriptorDataset()),
-                       framework::dataset::make("Format", Format::U8)),
-                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})))
-{
-    // Validate output
-    validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HOGMultiDetection.cpp b/tests/validation/CL/HOGMultiDetection.cpp
deleted file mode 100644
index 4ca1dab32e..0000000000
--- a/tests/validation/CL/HOGMultiDetection.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLMultiHOG.h"
-#include "arm_compute/runtime/CL/functions/CLHOGDescriptor.h"
-#include "arm_compute/runtime/CL/functions/CLHOGMultiDetection.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/CL/CLHOGAccessor.h"
-#include "tests/datasets/HOGMultiDetectionDataset.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HOGMultiDetectionFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Set the tolerance (percentage) used when validating the strength of detection window. */
-RelativeTolerance<float> tolerance(0.1f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HOGMultiDetection)
-
-// *INDENT-OFF*
-// clang-format off
-using CLHOGMultiDetectionFixture = HOGMultiDetectionValidationFixture<CLTensor,
-                                                                      CLHOG,
-                                                                      CLMultiHOG,
-                                                                      CLDetectionWindowArray,
-                                                                      CLSize2DArray,
-                                                                      CLAccessor,
-                                                                      CLArrayAccessor<Size2D>,
-                                                                      CLArrayAccessor<DetectionWindow>,
-                                                                      CLHOGAccessor,
-                                                                      CLHOGMultiDetection,
-                                                                      uint8_t,
-                                                                      float>;
-
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHOGMultiDetectionFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       datasets::SmallHOGMultiDetectionDataset(),
-                       framework::dataset::make("Format", Format::U8)),
-                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})),
-                       framework::dataset::make("NonMaximaSuppression", {false, true})))
-{
-    // Validate output
-    validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHOGMultiDetectionFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       datasets::LargeHOGMultiDetectionDataset(),
-                       framework::dataset::make("Format", Format::U8)),
-                       framework::dataset::make("BorderMode", {BorderMode::CONSTANT, BorderMode::REPLICATE})),
-                       framework::dataset::make("NonMaximaSuppression", {false, true})))
-{
-    // Validate output
-    validate_detection_windows(_target.begin(), _target.end(), _reference.begin(), _reference.end(), tolerance);
-}
-
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/HarrisCorners.cpp b/tests/validation/CL/HarrisCorners.cpp
deleted file mode 100644
index 157102f268..0000000000
--- a/tests/validation/CL/HarrisCorners.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLHarrisCorners.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ImageFileDatasets.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HarrisCornersFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-const auto data_nightly   = combine(framework::dataset::make("GradientSize", { 3, 5, 7 }), combine(framework::dataset::make("BlockSize", { 3, 5, 7 }), datasets::BorderModes()));
-const auto data_precommit = combine(framework::dataset::make("GradientSize", { 3 }), combine(framework::dataset::make("BlockSize", { 3 }), datasets::BorderModes()));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(HarrisCorners)
-
-template <typename T>
-using CLHarrisCornersFixture = HarrisCornersValidationFixture<CLTensor, CLAccessor, CLKeyPointArray, CLHarrisCorners, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallImageFiles(), data_precommit), framework::dataset::make("Format",
-                                                                                                           Format::U8)))
-{
-    // Validate output
-    CLArrayAccessor<KeyPoint> array(_target);
-    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHarrisCornersFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeImageFiles(), data_nightly), framework::dataset::make("Format",
-                                                                                                           Format::U8)))
-{
-    // Validate output
-    CLArrayAccessor<KeyPoint> array(_target);
-    validate_keypoints(array.buffer(), array.buffer() + array.num_values(), _reference.begin(), _reference.end(), RelativeTolerance<float>(0.0001f));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Histogram.cpp b/tests/validation/CL/Histogram.cpp
deleted file mode 100644
index 2619a0067c..0000000000
--- a/tests/validation/CL/Histogram.cpp
+++ /dev/null
@@ -1,68 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLDistribution1D.h"
-#include "arm_compute/runtime/CL/functions/CLHistogram.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/HistogramFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Histogram)
-
-template <typename T>
-using CLHistogramFixture = HistogramValidationFixture<CLTensor, CLAccessor, CLHistogram, T, CLDistribution1D>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                       DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLHistogramFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                       DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Im2Col.cpp b/tests/validation/CL/Im2Col.cpp
index a31aec4d0c..1f5b781690 100644
--- a/tests/validation/CL/Im2Col.cpp
+++ b/tests/validation/CL/Im2Col.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLIm2ColKernel.h"
+#include "src/gpu/cl/kernels/ClIm2ColKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/framework/Asserts.h"
@@ -40,7 +40,7 @@ namespace validation
 TEST_SUITE(CL)
 TEST_SUITE(Im2Col)
 
-using CLIm2Col = CLSynthetizeFunction<CLIm2ColKernel>;
+using ClIm2Col = ClSynthetizeOperatorWithBorder<opencl::kernels::ClIm2ColKernel>;
 
 /** Negative tests
  *
@@ -63,7 +63,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto output    = TensorInfo(TensorShape(9U, 10U, 12U, 2U), 1, DataType::F32);
         const auto conv_size = Size2D(3, 3);
         const bool has_bias  = false;
-        const auto status    = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+        const auto status    = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -73,7 +73,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto output    = TensorInfo(TensorShape(9U, 80U, 2U), 1, DataType::QASYMM8);
         const auto conv_size = Size2D(3, 3);
         const bool has_bias  = true;
-        const auto status    = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+        const auto status    = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -84,7 +84,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto conv_size = Size2D(3, 3);
         const auto dilation  = Size2D(0, 1);
         const bool has_bias  = false;
-        const auto status    = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation);
+        const auto status    = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -96,7 +96,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto         dilation   = Size2D(1, 1);
         const bool         has_bias   = false;
         const unsigned int num_groups = 2;
-        const auto         status     = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
+        const auto         status     = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -108,7 +108,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto         dilation   = Size2D(1, 1);
         const bool         has_bias   = false;
         const unsigned int num_groups = 2;
-        const auto         status     = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
+        const auto         status     = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias, dilation, num_groups);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -118,7 +118,7 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto output    = TensorInfo(TensorShape(9U, 81U, 2U), 1, DataType::F32);
         const auto conv_size = Size2D(3, 3);
         const bool has_bias  = false;
-        const auto status    = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+        const auto status    = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 
@@ -128,13 +128,13 @@ TEST_CASE(Negative, framework::DatasetMode::ALL)
         const auto output    = TensorInfo(TensorShape(1U, 1U, 1U, 2U), 1, DataType::F32, DataLayout::NHWC);
         const auto conv_size = Size2D(9, 9);
         const bool has_bias  = false;
-        const auto status    = CLIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
+        const auto status    = opencl::kernels::ClIm2ColKernel::validate(&input, &output, conv_size, PadStrideInfo(), has_bias);
         ARM_COMPUTE_EXPECT(bool(status) == false, framework::LogLevel::ERRORS);
     }
 }
 
 template <typename T>
-using CLIm2ColFixture = Im2ColValidationFixture<CLTensor, CLAccessor, CLIm2Col, T, true>;
+using ClIm2ColFixture = Im2ColOpValidationFixture<CLTensor, CLAccessor, ClIm2Col, T, true>;
 
 TEST_SUITE(NHWC)
 
@@ -150,12 +150,12 @@ TEST_SUITE(NHWC)
  *  Kernel tested im2col3x3_nhwc
  */
 FIXTURE_DATA_TEST_CASE(W3x3,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape",
 {
-    TensorShape(2U, 5U, 7U, 2U), TensorShape(3U, 4U, 6U, 2U), TensorShape(1U, 5U, 3U, 2U),
+    TensorShape(5U, 7U, 2U, 2U), TensorShape(4U, 6U, 3U, 2U), TensorShape(5U, 3U, 1U, 2U),
 }),
 framework::dataset::make("DataType", DataType::F32)),
 framework::dataset::make("Kernel", Size2D(3, 3))),
@@ -180,12 +180,12 @@ framework::dataset::make("Groups", 1)))
  *  Kernel tested im2col9x9_nhwc
  */
 FIXTURE_DATA_TEST_CASE(W9x9,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape",
 {
-    TensorShape(2U, 13U, 15U, 2U), TensorShape(3U, 15U, 12U, 2U), TensorShape(1U, 13U, 22U, 2U),
+    TensorShape(13U, 15U, 2U, 2U), TensorShape(15U, 12U, 3U, 2U), TensorShape(13U, 22U, 1U, 2U),
 }),
 framework::dataset::make("DataType", DataType::F32)),
 framework::dataset::make("Kernel", Size2D(9, 9))),
@@ -210,12 +210,12 @@ framework::dataset::make("Groups", 1)))
  *  Kernel tested im2col_generic_nhwc
  */
 FIXTURE_DATA_TEST_CASE(Generic,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape",
 {
-    TensorShape(4U, 13U, 15U, 2U), TensorShape(7U, 15U, 12U, 1U), TensorShape(1U, 5U, 3U, 1U),
+    TensorShape(13U, 15U, 4U, 2U), TensorShape(15U, 12U, 7U, 1U), TensorShape(5U, 3U, 1U, 1U),
 }),
 framework::dataset::make("DataType", DataType::F32)),
 framework::dataset::make("Kernel", Size2D(5, 3))),
@@ -243,7 +243,7 @@ TEST_SUITE(NCHW)
  *  Kernel tested im2col1x1_stridex1_nchw
  */
 FIXTURE_DATA_TEST_CASE(W1x1_Stride1_NoPad,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", { TensorShape(4U, 4U, 3U, 2U), TensorShape(5U, 4U, 3U, 2U), TensorShape(3U, 4U, 3U, 2U) }),
@@ -267,7 +267,7 @@ FIXTURE_DATA_TEST_CASE(W1x1_Stride1_NoPad,
  *  Kernel tested im2col3x3_nchw
  */
 FIXTURE_DATA_TEST_CASE(W3x3,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", TensorShape(4U, 4U, 3U, 2U)),
@@ -291,7 +291,7 @@ FIXTURE_DATA_TEST_CASE(W3x3,
  *  Kernel tested im2col5x5_nchw
  */
 FIXTURE_DATA_TEST_CASE(W5x5,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", TensorShape(7U, 4U, 3U, 2U)),
@@ -317,7 +317,7 @@ FIXTURE_DATA_TEST_CASE(W5x5,
  * Kernel tested im2col11x11_padx0_pady0_nchw
  */
 FIXTURE_DATA_TEST_CASE(W11x11_NoPad,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", { TensorShape(11U, 11U, 2U, 2U), TensorShape(14U, 13U, 1U, 2U) }),
@@ -341,7 +341,7 @@ FIXTURE_DATA_TEST_CASE(W11x11_NoPad,
  * Kernel tested im2col_generic_padx0_pady0_nchw
  */
 FIXTURE_DATA_TEST_CASE(GenericZeroPad,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", TensorShape(13U, 11U, 2U, 2U)),
@@ -367,7 +367,7 @@ TEST_SUITE_END() // NCHW
  * Kernel tested im2col_generic_(nchw|nhwc)
  */
 FIXTURE_DATA_TEST_CASE(Generic,
-                       CLIm2ColFixture<float>,
+                       ClIm2ColFixture<float>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", TensorShape(13U, 11U, 5U, 2U)),
@@ -393,7 +393,7 @@ FIXTURE_DATA_TEST_CASE(Generic,
  *  - im2col9x9_nhwc
  */
 FIXTURE_DATA_TEST_CASE(Quantized,
-                       CLIm2ColFixture<uint8_t>,
+                       ClIm2ColFixture<uint8_t>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", TensorShape(13U, 11U, 11U, 2U)),
@@ -419,7 +419,7 @@ FIXTURE_DATA_TEST_CASE(Quantized,
  *  - im2col9x9_nhwc
  */
 FIXTURE_DATA_TEST_CASE(FP16,
-                       CLIm2ColFixture<half>,
+                       ClIm2ColFixture<half>,
                        framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(combine(
                                                                    framework::dataset::make("InputShape", TensorShape(13U, 11U, 11U, 2U)),
diff --git a/tests/validation/CL/Remap.cpp b/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp
index f73073105b..67f70685d1 100644
--- a/tests/validation/CL/Remap.cpp
+++ b/tests/validation/CL/IndirectConv2dAddressPrecalculation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,18 +22,19 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLRemap.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "src/gpu/cl/kernels/ClIndirectConv2dAddressPrecalculationKernel.h"
 #include "tests/CL/CLAccessor.h"
+#include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/RemapFixture.h"
+#include "tests/validation/fixtures/IndirectConv2dAddressPrecalculationFixture.h"
 
 namespace arm_compute
 {
@@ -41,37 +42,48 @@ namespace test
 {
 namespace validation
 {
+using namespace arm_compute::misc::shape_calculator;
+using namespace arm_compute::opencl::kernels;
+
+using CLIndirectConv2dAddressPrecalculation = CLSynthetizeOperator<ClIndirectConv2dAddressPrecalculationKernel>;
+
+using CLIndirectConv2dAddressPrecalculationFixture = IndirectConv2dAddressPrecalculationValidationFixture<CLTensor, CLAccessor, CLIndirectConv2dAddressPrecalculation>;
+
+// *INDENT-OFF*
+// clang-format off
+/** Data types */
+
 namespace
 {
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-constexpr float                      tolerance_number = 0.2f;
+const auto src_w_values  = framework::dataset::make("src_w", {91});
+const auto src_h_values  = framework::dataset::make("src_h", {103});
+const auto src_b_values  = framework::dataset::make("src_b", {1, 2});
+const auto wei_w_values  = framework::dataset::make("wei_w", {3, 5});
+const auto wei_h_values  = framework::dataset::make("wei_h", {1, 6});
+const auto pad_values    = framework::dataset::make("pad", {1, 2, 3});
+const auto stride_values = framework::dataset::make("stride", {1, 2});
+const auto m0_values     = framework::dataset::make("M0", { 1, 2, 4, 5, 7 });
 } // namespace
 
 TEST_SUITE(CL)
-TEST_SUITE(Remap)
-template <typename T>
-using CLRemapFixture = RemapValidationFixture<CLTensor, CLAccessor, CLRemap, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                           framework::dataset::make("DataType",
-                                                                                                                   DataType::U8)),
-                                                                                                   framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
-}
+TEST_SUITE(IndirectConv2dAddressPrecalculation)
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLRemapFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                           framework::dataset::make("DataType",
-                                                                                                                   DataType::U8)),
-                                                                                                   framework::dataset::make("BorderModes", { BorderMode::UNDEFINED, BorderMode::CONSTANT })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConv2dAddressPrecalculationFixture, framework::DatasetMode::ALL,
+                combine(combine(combine(combine(combine(combine(combine(src_w_values,
+                                                                        src_h_values),
+                                                                        src_b_values),
+                                                                        wei_w_values),
+                                                                        wei_h_values),
+                                                                        pad_values),
+                                                                        stride_values),
+                                                                        m0_values))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
+    validate(CLAccessor(_target), _reference);
 }
 
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // IndirectConv2dAddressPrecalculation
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/IndirectConvolutionLayer.cpp b/tests/validation/CL/IndirectConvolutionLayer.cpp
new file mode 100644
index 0000000000..aedf070e6b
--- /dev/null
+++ b/tests/validation/CL/IndirectConvolutionLayer.cpp
@@ -0,0 +1,268 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLIndirectConvolutionLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/DirectConvolutionLayerFixture.h"
+
+// Note: Since the interface of indirect convolution is the same of direct convolution, we can reuse
+// the direct convolution fixture
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<half>  tolerance_fp16(half(0.2));  /**< Tolerance for floating point tests */
+RelativeTolerance<float> tolerance_fp32(0.05f);      /**< Tolerance for floating point tests */
+constexpr float          abs_tolerance_f32(0.0001f); /**< Absolute tolerance for FP32 tests*/
+constexpr float          tolerance_num = 0.07f;      /**< Tolerance number */
+
+/** Activation function Dataset*/
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{ ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f) });
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(IndirectConvolutionLayer)
+
+/** Check whether the configuration of a indirect convolution layer with no
+ * bias leads to a successful run.
+ */
+TEST_CASE(NoBias, framework::DatasetMode::PRECOMMIT)
+{
+    const TensorShape    src_shape_nhwc = TensorShape(8U, 27U, 13U);
+    const TensorShape    wei_shape_nhwc = TensorShape(8U, 3U, 3U, 4U);
+    const TensorShape    bia_shape      = TensorShape(4U);
+    const TensorShape    dst_shape_nhwc = TensorShape(4U, 25U, 11U);
+    constexpr DataType   dt             = DataType::F32;
+    constexpr DataLayout data_layout    = DataLayout::NHWC;
+
+    auto src_nhwc = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+    auto wei_nhwc = create_tensor<CLTensor>(wei_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+    auto dst_nhwc = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+
+    TensorShape src_shape_nchw = src_shape_nhwc;
+    TensorShape wei_shape_nchw = wei_shape_nhwc;
+    TensorShape dst_shape_nchw = dst_shape_nhwc;
+
+    permute(src_shape_nchw, PermutationVector(1U, 2U, 0U));
+    permute(wei_shape_nchw, PermutationVector(1U, 2U, 0U, 3U));
+    permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U));
+
+    const PadStrideInfo conv_info = PadStrideInfo(1, 1, 0, 0);
+
+    // Create indirect Convolution function
+    CLIndirectConvolutionLayer conv{};
+    conv.configure(&src_nhwc, &wei_nhwc, nullptr, &dst_nhwc, conv_info);
+
+    src_nhwc.allocator()->allocate();
+    wei_nhwc.allocator()->allocate();
+    dst_nhwc.allocator()->allocate();
+
+    library->fill_tensor_value(CLAccessor(src_nhwc), 1.f);
+    library->fill_tensor_value(CLAccessor(wei_nhwc), 1.f);
+
+    conv.run();
+
+    // Compute reference to compare
+    SimpleTensor<float> ref_src{ src_shape_nchw, dt };
+    SimpleTensor<float> ref_wei{ wei_shape_nchw, dt };
+    SimpleTensor<float> ref_bia{ bia_shape, dt };
+    library->fill_tensor_value(ref_src, 1.f);
+    library->fill_tensor_value(ref_wei, 1.f);
+    // No bias
+    library->fill_tensor_value(ref_bia, 0.f);
+    auto ref_dst = reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw, conv_info);
+
+    validate(CLAccessor(dst_nhwc), ref_dst);
+}
+
+/** Check whether the case of rectangle kernels i.e. when width and height of the weight_shape are not equal
+ *  would lead to successful run
+ */
+TEST_CASE(NonSquareKernel, framework::DatasetMode::PRECOMMIT)
+{
+    const TensorShape    src_shape_nhwc = TensorShape(3U, 33U, 27U);
+    const TensorShape    wei_shape_nhwc = TensorShape(3U, 5U, 7U, 4U); // non-square kernel
+    const TensorShape    bia_shape      = TensorShape(4U);
+    const TensorShape    dst_shape_nhwc = TensorShape(4U, 11U, 12U);
+    constexpr DataType   dt             = DataType::F32;
+    constexpr DataLayout data_layout    = DataLayout::NHWC;
+
+    auto src_nhwc = create_tensor<CLTensor>(src_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+    auto wei_nhwc = create_tensor<CLTensor>(wei_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+    auto dst_nhwc = create_tensor<CLTensor>(dst_shape_nhwc, dt, 1, QuantizationInfo(), data_layout);
+
+    TensorShape src_shape_nchw = src_shape_nhwc;
+    TensorShape wei_shape_nchw = wei_shape_nhwc;
+    TensorShape dst_shape_nchw = dst_shape_nhwc;
+
+    permute(src_shape_nchw, PermutationVector(1U, 2U, 0U));
+    permute(wei_shape_nchw, PermutationVector(1U, 2U, 0U, 3U));
+    permute(dst_shape_nchw, PermutationVector(1U, 2U, 0U));
+
+    const PadStrideInfo conv_info = PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR);
+
+    // Create indirect convolution function
+    CLIndirectConvolutionLayer conv{};
+    conv.configure(&src_nhwc, &wei_nhwc, nullptr, &dst_nhwc, conv_info);
+
+    src_nhwc.allocator()->allocate();
+    wei_nhwc.allocator()->allocate();
+    dst_nhwc.allocator()->allocate();
+
+    library->fill_tensor_value(CLAccessor(src_nhwc), 1.f);
+    library->fill_tensor_value(CLAccessor(wei_nhwc), 1.f);
+
+    conv.run();
+
+    // Compute reference to compare
+    SimpleTensor<float> ref_src{ src_shape_nchw, dt };
+    SimpleTensor<float> ref_wei{ wei_shape_nchw, dt };
+    SimpleTensor<float> ref_bia{ bia_shape, dt };
+    library->fill_tensor_value(ref_src, 1.f);
+    library->fill_tensor_value(ref_wei, 1.f);
+    // No bias
+    library->fill_tensor_value(ref_bia, 0.f);
+    auto ref_dst = reference::convolution_layer<float>(ref_src, ref_wei, ref_bia, dst_shape_nchw, conv_info);
+
+    validate(CLAccessor(dst_nhwc), ref_dst);
+}
+// *INDENT-OFF*
+// clang-format off
+// Note: Since the interface of indirect convolution is the same of direct convolution, we can reuse
+// the direct convolution fixture
+template <typename T>
+using CLIndirectConvolutionLayerFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLIndirectConvolutionLayer, T>;
+template <typename T>
+using CLIndirectConvolutionLayerMixedDataLayoutFixture = DirectConvolutionValidationFixture<CLTensor, CLAccessor, CLIndirectConvolutionLayer, T, true>;
+
+TEST_SUITE(NHWC)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConvolutionLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+               framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+               framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+               framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+               framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+               framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+               framework::dataset::make("DataType",  DataType::F16)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLIndirectConvolutionLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 9 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::F16)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp16, tolerance_num);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLIndirectConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1, 3, 1, 1 })),
+               framework::dataset::make("StrideY", { 1, 3, 2, 1 })),
+               framework::dataset::make("PadX", { 1, 3, 0, 4 })),
+               framework::dataset::make("PadY", { 1, 3, 0, 4 })),
+               framework::dataset::make("KernelSize", { 3, 8, 1, 9 })),
+               framework::dataset::make("NumKernels", { 17, 3, 1, 19 })),
+               framework::dataset::make("DataType",  DataType::F32)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLIndirectConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(27U, 13U, 23U),
+                                                        TensorShape(19U, 5U, 16U, 4U),
+                                                        TensorShape(13U, 5U, 17U, 2U),
+                                                        TensorShape(32U, 37U, 13U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 2 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 3 })),
+               framework::dataset::make("KernelSize", { 3 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::F32)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLIndirectConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+               combine(combine(combine(zip(zip(zip(zip(zip(zip(
+               framework::dataset::make("InputShape", { TensorShape(800U, 800U, 3U) } ),
+               framework::dataset::make("StrideX", { 1 })),
+               framework::dataset::make("StrideY", { 1 })),
+               framework::dataset::make("PadX", { 1 })),
+               framework::dataset::make("PadY", { 1 })),
+               framework::dataset::make("KernelSize", { 9 })),
+               framework::dataset::make("NumKernels", { 3 })),
+               framework::dataset::make("DataType",  DataType::F32)),
+               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::IDENTITY) )),
+               framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    validate(CLAccessor(_target), _reference, tolerance_fp32, 0.0, abs_tolerance_f32);
+}
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // NHWC
+TEST_SUITE_END() // IndirectConvolutionLayer
+TEST_SUITE_END() // CL
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/LaplacianPyramid.cpp b/tests/validation/CL/LaplacianPyramid.cpp
deleted file mode 100644
index 801115ea09..0000000000
--- a/tests/validation/CL/LaplacianPyramid.cpp
+++ /dev/null
@@ -1,114 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/LaplacianPyramidFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/* Absolute tolerance value for comparing reference's output against implementation's output for DataType::S16
- * Tolerance is needed for calculation uncertainties introduced from the layers
- */
-AbsoluteTolerance<int16_t> tolerance_int16(1);
-const auto                 small_laplacian_pyramid_levels = framework::dataset::make("NumLevels", 2, 3);
-const auto                 large_laplacian_pyramid_levels = framework::dataset::make("NumLevels", 2, 5);
-
-const auto formats = combine(framework::dataset::make("FormatIn", Format::U8), framework::dataset::make("FormatOut", Format::S16));
-
-template <typename T>
-inline void validate_laplacian_pyramid(const CLPyramid &target, const std::vector<SimpleTensor<T>> &reference, BorderMode border_mode)
-{
-    CLTensor   *level_image  = target.get_pyramid_level(0);
-    ValidRegion valid_region = shape_to_valid_region(reference[0].shape(), border_mode == BorderMode::UNDEFINED, BorderSize(2));
-
-    // Validate lowest level
-    validate(CLAccessor(*level_image), reference[0], valid_region);
-
-    // Validate remaining levels
-    for(size_t lev = 1; lev < target.info()->num_levels(); lev++)
-    {
-        level_image                = target.get_pyramid_level(lev);
-        CLTensor *prev_level_image = target.get_pyramid_level(lev - 1);
-
-        valid_region = shape_to_valid_region_laplacian_pyramid(prev_level_image->info()->tensor_shape(),
-                                                               prev_level_image->info()->valid_region(),
-                                                               border_mode == BorderMode::UNDEFINED);
-
-        // Validate level
-        validate(CLAccessor(*level_image), reference[lev], valid_region, tolerance_int16);
-    }
-}
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(LaplacianPyramid)
-
-// *INDENT-OFF*
-// clang-format off
-
-using CLLaplacianPyramidFixture = LaplacianPyramidValidationFixture<CLTensor, CLAccessor, CLLaplacianPyramid, uint8_t, int16_t, CLPyramid>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLaplacianPyramidFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       datasets::Medium2DShapes(),
-                       datasets::BorderModes()),
-                       small_laplacian_pyramid_levels),
-                       formats))
-{
-    validate_laplacian_pyramid(_target, _reference, _border_mode);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLLaplacianPyramidFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       datasets::Large2DShapes(),
-                       datasets::BorderModes()),
-                       large_laplacian_pyramid_levels),
-                       formats))
-{
-    validate_laplacian_pyramid(_target, _reference, _border_mode);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/LaplacianReconstruct.cpp b/tests/validation/CL/LaplacianReconstruct.cpp
deleted file mode 100644
index e6e32ce910..0000000000
--- a/tests/validation/CL/LaplacianReconstruct.cpp
+++ /dev/null
@@ -1,100 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLLaplacianReconstruct.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/LaplacianReconstructFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-const auto small_laplacian_reconstruct_levels = framework::dataset::make("NumLevels", 2, 3);
-const auto large_laplacian_reconstruct_levels = framework::dataset::make("NumLevels", 2, 5);
-
-const auto formats = combine(framework::dataset::make("FormatIn", Format::S16), framework::dataset::make("FormatOut", Format::U8));
-
-template <typename T>
-void validate_laplacian_reconstruct(CLTensor &target, const SimpleTensor<T> &reference, BorderMode border_mode, size_t num_levels)
-{
-    const unsigned int filter_size = 5;
-    const unsigned int border_size(filter_size / 2);
-
-    BorderSize border(std::pow(border_size, num_levels));
-
-    // Validate output
-    ValidRegion valid_region = shape_to_valid_region(reference.shape(), border_mode == BorderMode::UNDEFINED, border);
-    validate(CLAccessor(target), reference, valid_region);
-}
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(LaplacianReconstruct)
-
-// *INDENT-OFF*
-// clang-format off
-
-using CLLaplacianReconstructFixture = LaplacianReconstructValidationFixture<CLTensor, CLAccessor, CLLaplacianReconstruct, CLLaplacianPyramid, int16_t, uint8_t, CLPyramid>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLLaplacianReconstructFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       datasets::Medium2DShapes(),
-                       datasets::BorderModes()),
-                       small_laplacian_reconstruct_levels),
-                       formats))
-{
-    validate_laplacian_reconstruct(_target, _reference, _border_mode, _pyramid_levels);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLLaplacianReconstructFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(
-                       datasets::Large2DShapes(),
-                       datasets::BorderModes()),
-                       large_laplacian_reconstruct_levels),
-                       formats))
-{
-    validate_laplacian_reconstruct(_target, _reference, _border_mode, _pyramid_levels);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/LogLayer.cpp b/tests/validation/CL/LogLayer.cpp
index 95c4f1226e..895c306841 100644
--- a/tests/validation/CL/LogLayer.cpp
+++ b/tests/validation/CL/LogLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CL/Magnitude.cpp b/tests/validation/CL/Magnitude.cpp
deleted file mode 100644
index bf5879b527..0000000000
--- a/tests/validation/CL/Magnitude.cpp
+++ /dev/null
@@ -1,91 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMagnitude.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/MagnitudeFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-template <typename T>
-AbsoluteTolerance<T> tolerance(MagnitudeType magnitude_type)
-{
-    return AbsoluteTolerance<T>((MagnitudeType::L1NORM == magnitude_type) ? 0 : 1);
-}
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Magnitude)
-
-template <typename T>
-using CLMagnitudeFixture = MagnitudeValidationFixture<CLTensor, CLAccessor, CLMagnitude, T>;
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S16)),
-                                                                                                       framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
-                                                                                                       framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance<int16_t>(_magnitude_type));
-}
-TEST_SUITE_END() // S16
-
-TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMagnitudeFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S32)),
-                                                                                                       framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance<int32_t>(_magnitude_type));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMagnitudeFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S32)),
-                                                                                                       framework::dataset::make("MagnitudeType", { MagnitudeType::L1NORM, MagnitudeType::L2NORM })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance<int32_t>(_magnitude_type));
-}
-TEST_SUITE_END() // S32
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/MatMul.cpp b/tests/validation/CL/MatMul.cpp
new file mode 100644
index 0000000000..844597f3e9
--- /dev/null
+++ b/tests/validation/CL/MatMul.cpp
@@ -0,0 +1,232 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLMatMul.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
+#include "tests/framework/DatasetModes.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/TestCase.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/validation/fixtures/MatMulFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
+constexpr float          abs_tolerance_f32(
+    0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for fp32 data type in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+    0.001f);                                                    /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16  data type in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half>  tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
+constexpr AbsoluteTolerance<uint8_t> tolerance_quant(1);        /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+} // namespace
+
+template <typename T>
+using CLMatMulFixture = MatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLQuantizedMatMulFixture = QuantizedMatMulValidationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLMatMulActivationFixture = MatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLMatMulActivationAlphaBetaFixture = MatMulValidationWithActivationAlphaBetaFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+template <typename T>
+using CLQuantizedMatMulActivationFixture = QuantizedMatMulValidationWithActivationFixture<CLTensor, CLAccessor, CLMatMul, GpuMatMulSettings, T>;
+
+/* The main act functions matmul (float) is expected to support */
+const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.75f, 0.25f),
+});
+
+/* (Float datatype only) Larger activation functions dataset, used during some nightly tests. */
+const auto AllActivationsDataset = combine(datasets::ActivationFunctions(), framework::dataset::make("AlphaBeta", { 0.5f, 1.f }));
+
+// Alpha beta values should be integer values
+// This is for testing purposes with quantized datatypes and is not a limitation of the kernel.
+// To properly remove this restriction, dst_qinfo should be auto-initialised with consideration for alpha beta values
+// The main act functions quantized matmul kernels are expected to support
+const auto ActivationFunctionsQuantizedDataset = concat(concat(concat(
+                                                                   framework::dataset::make("ActivationInfo", ActivationLayerInfo()),
+                                                                   framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))),
+                                                               framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 1.f))),
+                                                        framework::dataset::make("ActivationInfo", ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 2.f, 1.f)));
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMul)
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                        framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                framework::dataset::make("DataType", DataType::F32)),
+                                                                                                        ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                    framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                    framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                    framework::dataset::make("DataType", DataType::F32)),
+                                                                                                            ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunAllActivations, CLMatMulActivationAlphaBetaFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SmallerMatMulDataset(),
+                       framework::dataset::make("TransposeA", { false })),
+                       framework::dataset::make("TransposeB", { true })),
+                       framework::dataset::make("DataType", DataType::F32)),
+                       AllActivationsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulActivationFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                       framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                       framework::dataset::make("TransposeB", { false, true })),
+                                                                                                               framework::dataset::make("DataType", DataType::F16)),
+                                                                                                       ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLMatMulActivationFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                   framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                                                           ActivationFunctionsDataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                                     datasets::SmallMatMulDataset(),
+                                                                                                                     framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                 framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                 framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                 ActivationFunctionsQuantizedDataset),
+                                                                                                                 framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                 framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
+                                                                                                                 framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
+                                                                                                         framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
+        datasets::LargeMatMulDataset(),
+        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                     framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                     framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                     ActivationFunctionsQuantizedDataset),
+                                                                                                                     framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                     framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+                                                                                                                     framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+                                                                                                             framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(combine(
+        datasets::SmallMatMulDataset(),
+        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                        framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                        framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                        ActivationFunctionsQuantizedDataset),
+                                                                                                                        framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                        framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })),
+                                                                                                                framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })),
+                                                                                                        framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 2) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLQuantizedMatMulFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine(
+                                                                                                                        datasets::LargeMatMulDataset(),
+                                                                                                                        framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                    framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                    ActivationFunctionsQuantizedDataset),
+                                                                                                                    framework::dataset::make("NumberOfExtraRuns", { 0, 1 })),
+                                                                                                                    framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })),
+                                                                                                                    framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })),
+                                                                                                            framework::dataset::make("DstQInfo", { QuantizationInfo(1.f, 50) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE_END() // Quantized
+
+TEST_SUITE_END() // MatMul
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulKernel.cpp b/tests/validation/CL/MatMulKernel.cpp
new file mode 100644
index 0000000000..b47f8bc924
--- /dev/null
+++ b/tests/validation/CL/MatMulKernel.cpp
@@ -0,0 +1,650 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h"
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr float          abs_tolerance_f32(
+    0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+    0.001f);                                                   /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16  data types in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half> tolerance_f16(half(0.01)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+} // namespace
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** K0 values to test --precommit*/
+const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+const auto m0_values_nightly_lhs_t  = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
+const auto n0_values_nightly_rhs_t  = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
+
+/** K0 values to test --nightly*/
+const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 });
+const auto k0_values_nightly_rhs_t         = framework::dataset::make("K0", { 1, 2, 3, 4, 8 });
+const auto k0_values_nightly_lhs_t_rhs_nt  = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+
+template <typename T>
+using CLMatMulKernelFixture = MatMulKernelValidationFixture<T, ClMatMulNativeKernel>;
+
+template <typename T>
+using CLMatMulKernelBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulNativeKernel>;
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL)
+{
+    using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+    const std::vector<MatMulConfigurationPair> supported_block_sizes =
+    {
+        // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+        // Lhs not-transposed, Rhs-not-transposed
+        { MatMulKernelInfo(false, false, 0, 1, 1), false },  // M0 should be > 0
+        { MatMulKernelInfo(false, false, 3, 5, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 6, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 3, 7), false },  // K0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 9, 1, 2), true },
+        { MatMulKernelInfo(false, false, 3, 16, 3), true },
+        { MatMulKernelInfo(false, false, 7, 3, 4), true },
+        { MatMulKernelInfo(false, false, 7, 3, 4, true), false },  // N0 not in {4, 8, 16}
+        { MatMulKernelInfo(false, false, 7, 1, 4, true), false },  // N0 not in {4, 8, 16}
+        { MatMulKernelInfo(false, false, 7, 12, 4, true), false }, // N0 not in {4, 8, 16}
+        { MatMulKernelInfo(false, false, 7, 4, 4, true), true },
+        { MatMulKernelInfo(false, false, 7, 8, 4, true), true },
+        { MatMulKernelInfo(false, false, 7, 16, 4, true), true },
+
+        // Lhs not-transposed, Rhs transposed
+        { MatMulKernelInfo(false, true, 0, 1, 1), false },  // M0 should be > 0
+        { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, true, 3, 7, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, true, 3, 3, 12), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, true, 3, 3, 6), false },  // K0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, true, 5, 1, 2), true },
+        { MatMulKernelInfo(false, true, 3, 3, 3), true },
+        { MatMulKernelInfo(false, true, 2, 4, 8), true },
+        { MatMulKernelInfo(false, true, 2, 4, 5, true), false }, // K0 not in {4, 8, 16}
+        { MatMulKernelInfo(false, true, 2, 4, 9, true), false }, // K0 not in {4, 8, 16}
+        { MatMulKernelInfo(false, true, 2, 4, 3, true), false }, // K0 not in {4, 8, 16}
+        { MatMulKernelInfo(false, true, 2, 4, 4, true), true },
+        { MatMulKernelInfo(false, true, 2, 4, 8, true), true },
+        { MatMulKernelInfo(false, true, 2, 8, 16, true), true },
+
+        // Lhs transposed, Rhs-not-transposed
+        { MatMulKernelInfo(true, false, 1, 1, 0), false },  // K0 should be > 0
+        { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, false, 3, 7, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, false, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, false, 5, 3, 6), false },  // M0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, false, 4, 1, 22), true },
+        { MatMulKernelInfo(true, false, 3, 3, 3), true },
+        { MatMulKernelInfo(true, false, 2, 4, 8), true },
+        { MatMulKernelInfo(true, false, 2, 3, 8, true), false }, // N0 not in {4, 8, 16}
+        { MatMulKernelInfo(true, false, 2, 7, 8, true), false }, // N0 not in {4, 8, 16}
+        { MatMulKernelInfo(true, false, 2, 5, 8, true), false }, // N0 not in {4, 8, 16}
+        { MatMulKernelInfo(true, false, 2, 4, 8, true), true },
+        { MatMulKernelInfo(true, false, 2, 8, 8, true), true },
+        { MatMulKernelInfo(true, false, 2, 16, 8, true), true },
+
+        // Lhs transposed, Rhs-transposed
+        { MatMulKernelInfo(true, true, 2, 1, 5), false },  // K0 should in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, true, 1, 8, 7), false },  // K0 should in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, true, 3, 7, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, true, 6, 3, 12), false }, // M0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, true, 5, 3, 6), false },  // M0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(true, true, 4, 8, 16), true },
+        { MatMulKernelInfo(true, true, 3, 3, 4), true },
+        { MatMulKernelInfo(true, true, 16, 4, 8), true },
+        { MatMulKernelInfo(true, true, 2, 2, 1, true), false }, // K0 not in {4, 8, 16}
+        { MatMulKernelInfo(true, true, 2, 2, 5, true), false }, // K0 not in {4, 8, 16}
+        { MatMulKernelInfo(true, true, 2, 4, 7, true), false }, // K0 not in {4, 8, 16}
+        { MatMulKernelInfo(true, true, 2, 4, 4, true), true },
+        { MatMulKernelInfo(true, true, 2, 8, 8, true), true },
+        { MatMulKernelInfo(true, true, 2, 8, 16, true), true },
+    };
+
+    // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+    // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+    // not the shapes themselves.
+    const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+    const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+
+    const bool export_to_cl_image_supported = image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+    for(auto &pair : supported_block_sizes)
+    {
+        TensorInfo output_info;
+        Status     status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+
+        if(!pair.first.export_rhs_to_cl_image || export_to_cl_image_supported)
+        {
+            ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+        }
+    }
+}
+
+TEST_CASE(ExportToCLImage, framework::DatasetMode::ALL)
+{
+    // We skip this test if the hardware does not support exporting to CL Image
+    if(image2d_from_buffer_supported(CLKernelLibrary::get().get_device()))
+    {
+        constexpr size_t pixel_size  = 4;
+        const size_t     max_image_w = pixel_size * CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_WIDTH>();
+        const size_t     max_image_h = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_IMAGE2D_MAX_HEIGHT>();
+
+        using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, bool, bool, bool>;
+        const std::vector<ShapeConfigurationTuple> shape_configurations =
+        {
+            // lhs_shape, rhs_shape, adj_lhs, adj_rhs, expected
+            // Lhs t/Nt, Rhs Nt
+            // Transposition of Lhs doesn't add any value to the tests, therefore always assumed false below
+            { TensorShape(5U, 1U), TensorShape(3U, 5U), false, false, false },  // N should be multiple of 4
+            { TensorShape(5U, 1U), TensorShape(14U, 5U), false, false, false }, // N should be multiple of 4
+            { TensorShape(5U, 1U), TensorShape(12U, 5U), false, false, true },
+            { TensorShape(5U, 1U), TensorShape(8U, 5U), false, false, true },
+            { TensorShape(5U, 1U), TensorShape(4U, 5U), false, false, true },
+            { TensorShape(max_image_h + 1, 1U), TensorShape(4U, max_image_h + 1), false, false, false }, // Cannot fit into CL Image memory's height
+            { TensorShape(5U, 1U), TensorShape(max_image_w + 1, 5U), false, false, false },              // Cannot fit into CL Image memory's width
+            { TensorShape(max_image_h, 1U), TensorShape(4U, max_image_h), false, false, true },          // Barely fits into CL Image memory's height
+            { TensorShape(5U, 1U), TensorShape(max_image_w, 5U), false, false, true },                   // Barely fits into CL Image memory's width
+
+            // Lhs Nt/T , Rhs T
+            { TensorShape(5U, 1U), TensorShape(5U, 3U), false, true, false },  // K should be multiple of 4
+            { TensorShape(5U, 1U), TensorShape(5U, 14U), false, true, false }, // K should be multiple of 4
+            { TensorShape(4U, 1U), TensorShape(4U, 10U), false, true, true },
+            { TensorShape(8U, 1U), TensorShape(8U, 9U), false, true, true },
+            { TensorShape(12U, 1U), TensorShape(12U, 6U), false, true, true },
+        };
+
+        for(auto &tuple : shape_configurations)
+        {
+            TensorShape lhs_shape = std::get<0>(tuple);
+            TensorShape rhs_shape = std::get<1>(tuple);
+
+            const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
+            const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
+
+            const bool adj_lhs = std::get<2>(tuple);
+            const bool adj_rhs = std::get<3>(tuple);
+
+            // We choose M0, N0, K0 equal to 4 so that they're always valid for CLImage in any combination
+            const MatMulKernelInfo matmul_kernel_info
+            {
+                adj_lhs, adj_rhs, 4, 4, 4, true /* export_rhs_to_cl_image */
+            };
+
+            TensorInfo output_info;
+            Status     status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, matmul_kernel_info);
+
+            const bool expected = std::get<4>(tuple);
+            ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+        }
+    }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+    // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+    using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>;
+    const std::vector<ShapeConfigurationTuple> shape_configurations =
+    {
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U), true },
+        { TensorShape(10U, 12U), TensorShape(3U, 10U), TensorShape(3U), true },
+        { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true },
+        { TensorShape(8U, 4U), TensorShape(2U, 5U), TensorShape(2U), false }, // Mismatch in the K dimension
+        { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension
+        { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+        { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting
+        { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(1U), false },                                 // Unsupported bias broadcasting.
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U, 3U), false },                             // 2D bias is unsupported.
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(6U), false },                                 // bias first dimension != dst first dimension
+    };
+
+    for(auto &tuple : shape_configurations)
+    {
+        const bool expected = std::get<3>(tuple);
+
+        for(bool adj_lhs :
+            {
+                false, true
+            })
+        {
+            for(bool adj_rhs :
+                {
+                    false, true
+                })
+            {
+                TensorShape lhs_shape = std::get<0>(tuple);
+                TensorShape rhs_shape = std::get<1>(tuple);
+                TensorShape bia_shape = std::get<2>(tuple);
+
+                if(adj_lhs)
+                {
+                    permute(lhs_shape, PermutationVector(1U, 0U));
+                }
+
+                if(adj_rhs)
+                {
+                    permute(rhs_shape, PermutationVector(1U, 0U));
+                }
+
+                const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
+                const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
+                const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::F32);
+                TensorInfo       output_info;
+
+                MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
+
+                Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+                ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+            }
+        }
+    }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+    // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+    using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, bool>;
+    const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+    {
+        { DataType::F32, DataType::F32, DataType::F32, true },
+        { DataType::F16, DataType::F16, DataType::F16, true },
+        { DataType::F16, DataType::F32, DataType::F32, false },                                              // no mixed precision
+        { DataType::F64, DataType::F64, DataType::F64, false },                                              // no double precision
+        { DataType::QASYMM8, DataType::QASYMM8, DataType::QASYMM8, false },                                  // no quantized types
+        { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, false },             // no quantized types
+        { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types
+        { DataType::QASYMM16, DataType::QASYMM16, DataType::QASYMM16, false },                               // no quantized types
+        { DataType::QSYMM16, DataType::QSYMM16, DataType::QSYMM16, false },                                  // no quantized types
+        { DataType::QSYMM8, DataType::QSYMM8, DataType::QSYMM8, false },                                     // no quantized types
+        { DataType::S64, DataType::S64, DataType::S64, false },                                              // no integral types
+        { DataType::S32, DataType::S32, DataType::S32, false },                                              // no integral types
+        { DataType::S16, DataType::S16, DataType::S16, false },                                              // no integral types
+        { DataType::S8, DataType::S8, DataType::S8, false },                                                 // no integral types
+        { DataType::U64, DataType::U64, DataType::U64, false },                                              // no integral types
+        { DataType::U32, DataType::U32, DataType::U32, false },                                              // no integral types
+        { DataType::U16, DataType::U16, DataType::U16, false },                                              // no integral types
+        { DataType::U8, DataType::U8, DataType::U8, false },                                                 // no integral types
+    };
+
+    const TensorShape      shape = TensorShape(10U, 10U);
+    const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
+    for(auto &tuple : data_type_configurations)
+    {
+        const bool expected = std::get<3>(tuple);
+
+        const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+        const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+        TensorInfo       output_info(shape, 1, std::get<2>(tuple));
+
+        Status status = ClMatMulNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, matmul_kernel_info);
+        ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                   framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                   m0_values_precommit),
+                                                                                                                   n0_values_precommit),
+                                                                                                                   k0_values_precommit),
+                                                                                                           framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                   framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                    framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                    framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                    m0_values_precommit),
+                                                                                                                    n0_values_precommit),
+                                                                                                                    k0_values_precommit),
+                                                                                                            framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                    framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulKernelBiasFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                   framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                   m0_values_precommit),
+                                                                                                                   n0_values_precommit),
+                                                                                                                   k0_values_precommit),
+                                                                                                                   framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                           framework::dataset::make("DataType", DataType::F32)))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false })),
+                                                                                                                   framework::dataset::make("TransposeB", { false })),
+                                                                                                                   m0_values_nightly_lhs_nt),
+                                                                                                                   n0_values_nightly_rhs_nt),
+                                                                                                                   k0_values_nightly_lhs_nt_rhs_nt),
+                                                                                                                   framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                   framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                     framework::dataset::make("TransposeA", { false })),
+                                                                                                                     framework::dataset::make("TransposeB", { true })),
+                                                                                                                     m0_values_nightly_lhs_nt),
+                                                                                                                     n0_values_nightly_rhs_t),
+                                                                                                                     k0_values_nightly_rhs_t),
+                                                                                                                     framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                     framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                     framework::dataset::make("TransposeA", { true })),
+                                                                                                                     framework::dataset::make("TransposeB", { false })),
+                                                                                                                     m0_values_nightly_lhs_t),
+                                                                                                                     n0_values_nightly_rhs_nt),
+                                                                                                                     k0_values_nightly_lhs_t_rhs_nt),
+                                                                                                                     framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                     framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_values_nightly_rhs_t),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(),
+                                                                                                                      framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                      framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                      framework::dataset::make("M0", { 2 })),
+                                                                                                                      framework::dataset::make("N0", { 2 })),
+                                                                                                                      framework::dataset::make("K0", { 2 })),
+                                                                                                                      framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                              framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE(ExportRhsToCLImage)
+FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               framework::dataset::make("M0", { 2 })),
+                                                       framework::dataset::make("N0", { 4, 8, 16 })),
+                                               framework::dataset::make("K0", { 2, 4 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+                                                       framework::dataset::make("N0", { 4, 8, 16 })),
+                                               framework::dataset::make("K0", { 1, 2, 3, 4 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               framework::dataset::make("M0", { 2 })),
+                                                       framework::dataset::make("N0", { 2, 4 })),
+                                               framework::dataset::make("K0", { 4, 8, 16 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+                                                       framework::dataset::make("N0", { 1, 2, 3, 4 })),
+                                               framework::dataset::make("K0", { 4, 8, 16 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+TEST_SUITE_END() // ExportRhsToCLImage
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                   framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                   m0_values_precommit),
+                                                                                                                   n0_values_precommit),
+                                                                                                                   k0_values_precommit),
+                                                                                                           framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                   framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                  framework::dataset::make("TransposeA", { false })),
+                                                                                                                  framework::dataset::make("TransposeB", { false })),
+                                                                                                                  m0_values_nightly_lhs_nt),
+                                                                                                                  n0_values_nightly_rhs_nt),
+                                                                                                                  k0_values_nightly_lhs_nt_rhs_nt),
+                                                                                                                  framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                  framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                    framework::dataset::make("TransposeA", { false })),
+                                                                                                                    framework::dataset::make("TransposeB", { true })),
+                                                                                                                    m0_values_nightly_lhs_nt),
+                                                                                                                    n0_values_nightly_rhs_t),
+                                                                                                                    k0_values_nightly_rhs_t),
+                                                                                                                    framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                    framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                                                                    framework::dataset::make("TransposeA", { true })),
+                                                                                                                    framework::dataset::make("TransposeB", { false })),
+                                                                                                                    m0_values_nightly_lhs_t),
+                                                                                                                    n0_values_nightly_rhs_nt),
+                                                                                                                    k0_values_nightly_lhs_t_rhs_nt),
+                                                                                                                    framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                    framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_values_nightly_rhs_t),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE(ExportRhsToCLImage)
+FIXTURE_DATA_TEST_CASE(RunSmallRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsNT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               framework::dataset::make("M0", { 2 })),
+                                                       framework::dataset::make("N0", { 4, 8, 16 })),
+                                               framework::dataset::make("K0", { 2, 4 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsNotTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsNT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+                                                       framework::dataset::make("N0", { 4, 8, 16 })),
+                                               framework::dataset::make("K0", { 1, 2, 3, 4 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunSmallRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDatasetRhsExportToCLImageRhsT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               framework::dataset::make("M0", { 2 })),
+                                                       framework::dataset::make("N0", { 2, 4 })),
+                                               framework::dataset::make("K0", { 4, 8, 16 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDatasetRhsExportToCLImageRhsT(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               framework::dataset::make("M0", { 2 })), // Choices of M0 does not matter much because it's related to Lhs tensor
+                                                       framework::dataset::make("N0", { 1, 2, 3, 4 })),
+                                               framework::dataset::make("K0", { 4, 8, 16 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { true })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_export_to_cl_image)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+TEST_SUITE_END() // ExportRhsToCLImage
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // MatMulKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulLowpNativeKernel.cpp b/tests/validation/CL/MatMulLowpNativeKernel.cpp
new file mode 100644
index 0000000000..90eee4fb82
--- /dev/null
+++ b/tests/validation/CL/MatMulLowpNativeKernel.cpp
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "src/gpu/cl/kernels/ClMatMulLowpNativeKernel.h"
+
+#include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/SmallMatMulDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+}
+template <typename T>
+using CLMatMulLowpNativeKernelFixture = MatMulKernelValidationFixture<T, ClMatMulLowpNativeKernel>;
+
+template <typename T>
+using CLMatMulLowpKernelWithBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulLowpNativeKernel>;
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** K0 values to test --precommit*/
+const auto k0_values_precommit = framework::dataset::make("K0", { 2, 3 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+const auto m0_values_nightly_lhs_t  = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
+const auto n0_values_nightly_rhs_t  = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
+
+/** K0 values to test --nightly*/
+const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", { 1, 2, 3, 4, 8, 16 });
+const auto k0_values_nightly_rhs_t         = framework::dataset::make("K0", { 1, 2, 3, 4, 8 });
+const auto k0_values_nightly_lhs_t_rhs_nt  = framework::dataset::make("K0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulLowpNativeKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedKernelConfigurations, framework::DatasetMode::ALL)
+{
+    using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+    const std::vector<MatMulConfigurationPair> supported_block_sizes =
+    {
+        // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+        // Lhs not-transposed, Rhs-not-transposed
+        { MatMulKernelInfo(false, false, 0, 1, 1), false },  // M0 should be > 0
+        { MatMulKernelInfo(false, false, 3, 5, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 6, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 3, 17), false }, // K0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 3, 7), false },  // K0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 9, 1, 2), true },
+        { MatMulKernelInfo(false, false, 3, 16, 3), true },
+        { MatMulKernelInfo(false, false, 7, 3, 4), true },
+        { MatMulKernelInfo(false, false, 7, 3, 4, true), true }, // export to CLImage is unsupported for quantized types
+    };
+
+    // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+    // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+    // not the shapes themselves.
+    const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::QASYMM8_SIGNED);
+    const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::QASYMM8_SIGNED);
+
+    for(auto &pair : supported_block_sizes)
+    {
+        TensorInfo output_info;
+        Status     status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+
+        ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+    // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+    using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>;
+    const std::vector<ShapeConfigurationTuple> shape_configurations =
+    {
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U), true },
+        { TensorShape(10U, 12U), TensorShape(3U, 10U), TensorShape(3U), true },
+        { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true },
+        { TensorShape(8U, 4U), TensorShape(2U, 5U), TensorShape(2U), false }, // Mismatch in the K dimension
+        { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension
+        { TensorShape(5U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+        { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting
+        { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(1U), false },                                 // invalid broadcast of bias
+        { TensorShape(5U, 1U), TensorShape(3U, 5U), TensorShape(3U, 3U), false },                             // 2d bias is invalid
+    };
+
+    for(auto &tuple : shape_configurations)
+    {
+        const bool expected = std::get<3>(tuple);
+
+        for(bool adj_lhs :
+            {
+                false, true
+            })
+        {
+            for(bool adj_rhs :
+                {
+                    false, true
+                })
+            {
+                TensorShape lhs_shape = std::get<0>(tuple);
+                TensorShape rhs_shape = std::get<1>(tuple);
+                TensorShape bia_shape = std::get<2>(tuple);
+
+                if(adj_lhs)
+                {
+                    permute(lhs_shape, PermutationVector(1U, 0U));
+                }
+
+                if(adj_rhs)
+                {
+                    permute(rhs_shape, PermutationVector(1U, 0U));
+                }
+
+                const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::QASYMM8_SIGNED);
+                const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::QASYMM8_SIGNED);
+                const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::S32);
+                TensorInfo       output_info;
+
+                MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
+
+                Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+                ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+            }
+        }
+    }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+    using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>;
+    const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+    {
+        { DataType::F32, DataType::F32, DataType::F32, DataType::F32, false }, // no floating point types
+        { DataType::F16, DataType::F16, DataType::F16, DataType::F16, false }, // no floating point types
+        { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
+        { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, true },
+        { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, true },
+        { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // only qasymm8/qasymm8_signed is supported
+        { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false },                               // only qasymm8/qasymm8_signed is supported
+        { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false },                                  // only qasymm8/qasymm8_signed is supported
+        { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false },                                     // only qasymm8/qasymm8_signed is supported
+        { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8, false },                           // no mixed data types
+        { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false },                                              // no integral types
+        { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false },                                              // no integral types
+        { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false },                                              // no integral types
+        { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false },                                                  // no integral types
+        { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false },                                              // no integral types
+        { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false },                                              // no integral types
+        { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false },                                              // no integral types
+        { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false },                                                  // no integral types
+        { DataType::QASYMM8, DataType::QASYMM8, DataType::F32, DataType::QASYMM8, false }                                   // Only S32 bias is supported
+    };
+
+    // It's enough to test a single shape and block size configuration while checking data types
+    const TensorShape      shape     = TensorShape(10U, 10U);
+    const TensorShape      bia_shape = TensorShape(10U);
+    const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
+    for(auto &tuple : data_type_configurations)
+    {
+        const bool expected = std::get<4>(tuple);
+
+        const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+        const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+        const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple));
+        TensorInfo       output_info(shape, 1, std::get<3>(tuple));
+
+        Status status = ClMatMulLowpNativeKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+        ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
+                                                                                                                      framework::dataset::make("TransposeA", { true, false })),
+                                                                                                                      framework::dataset::make("TransposeB", { true, false })),
+                                                                                                                      m0_values_precommit),
+                                                                                                                      n0_values_precommit),
+                                                                                                                      k0_values_precommit),
+                                                                                                                      framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                              framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                       framework::dataset::make("TransposeA", { true, false })),
+                                                                                                                       framework::dataset::make("TransposeB", { true, false })),
+                                                                                                                       m0_values_precommit),
+                                                                                                                       n0_values_precommit),
+                                                                                                                       k0_values_precommit),
+                                                                                                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpKernelWithBiasFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                    framework::dataset::make("TransposeA", { true, false })),
+                                                                                                                    framework::dataset::make("TransposeB", { true, false })),
+                                                                                                                    m0_values_precommit),
+                                                                                                                    n0_values_precommit),
+                                                                                                                    k0_values_precommit),
+                                                                                                                    framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_values_nightly_lhs_nt_rhs_nt),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_values_nightly_rhs_t),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_values_nightly_lhs_t_rhs_nt),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_values_nightly_rhs_t),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+// Running High Dimensional test is enough for qasymm8_signed, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulLowpNativeKernelFixture<int8_t>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true, false })),
+                                                                       framework::dataset::make("TransposeB", { true, false })),
+                                                               framework::dataset::make("M0", { 2 })),
+                                                       framework::dataset::make("N0", { 2 })),
+                                               framework::dataset::make("K0", { 2 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QASYMM8)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
+                                                                                                                       framework::dataset::make("TransposeA", { true, false })),
+                                                                                                                       framework::dataset::make("TransposeB", { true, false })),
+                                                                                                                       m0_values_precommit),
+                                                                                                                       n0_values_precommit),
+                                                                                                                       k0_values_precommit),
+                                                                                                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                               framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
+                                                                                                                        framework::dataset::make("TransposeA", { true, false })),
+                                                                                                                        framework::dataset::make("TransposeB", { true, false })),
+                                                                                                                        m0_values_precommit),
+                                                                                                                        n0_values_precommit),
+                                                                                                                        k0_values_precommit),
+                                                                                                                        framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                                framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_values_nightly_lhs_nt_rhs_nt),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_values_nightly_rhs_t),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_values_nightly_lhs_t_rhs_nt),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulLowpNativeKernelFixture<uint8_t>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_values_nightly_rhs_t),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_quant);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // MatMulLowpNativeKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp b/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp
new file mode 100644
index 0000000000..ac46b67c9e
--- /dev/null
+++ b/tests/validation/CL/MatMulLowpNativeMMULKernel.cpp
@@ -0,0 +1,394 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.h"
+
+#include "tests/datasets/MatMulLowpMMULDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+}
+using framework::dataset::make;
+
+template <typename T>
+using CLMatMulLowpNativeMMULKernelFixture = MatMulKernelValidationFixture<T, ClMatMulLowpNativeMMULKernel, true /* use_mmul */>;
+
+template <typename T>
+using CLMatMulLowpNativeMMULKernelWithBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulLowpNativeMMULKernel, true /* use_mmul */>;
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 2, 4, 5, 8 });
+const auto m0_values_nightly_lhs_t  = framework::dataset::make("M0", { 2, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly = framework::dataset::make("N0", { 1, 3, 8, 16 });
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulLowpNativeMMULKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedKernelConfigurations, framework::DatasetMode::ALL)
+{
+    using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+    const std::vector<MatMulConfigurationPair> supported_block_sizes =
+    {
+        // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+        { MatMulKernelInfo(false, false, 0, 1, 4), false }, // M0 should be > 0
+        { MatMulKernelInfo(false, true, 3, 5, 4), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 6, 4), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+        { MatMulKernelInfo(false, false, 3, 3, 8), false }, // K0 not in 4
+        { MatMulKernelInfo(true, false, 5, 3, 4), false },  // M0 not in {1, 2, 3, 4, 8, 16} when Lhs is transposed
+        { MatMulKernelInfo(false, false, 9, 1, 4), true },
+        { MatMulKernelInfo(false, true, 3, 16, 4), true },
+        { MatMulKernelInfo(false, false, 7, 3, 4), true },
+        { MatMulKernelInfo(true, false, 8, 3, 4), true },
+        { MatMulKernelInfo(true, true, 4, 3, 4), true },
+        { MatMulKernelInfo(false, false, 7, 3, 4, true), false }, // export to CLImage is unsupported for quantized types
+    };
+
+    // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+    // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+    // not the shapes themselves.
+    const TensorInfo lhs_info = TensorInfo(TensorShape(64U, 64U), 1, DataType::QASYMM8_SIGNED);
+    const TensorInfo rhs_info = TensorInfo(TensorShape(64U, 64U), 1, DataType::QASYMM8_SIGNED);
+
+    for(auto &pair : supported_block_sizes)
+    {
+        TensorInfo output_info;
+        Status     status   = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+        const bool expected = (pair.second && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()));
+
+        ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+    // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+    using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>;
+    const std::vector<ShapeConfigurationTuple> shape_configurations =
+    {
+        { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(3U), true },
+        { TensorShape(16U, 12U), TensorShape(3U, 16U), TensorShape(3U), true },
+        { TensorShape(64U, 4U), TensorShape(2U, 64U), TensorShape(2U), true },
+        { TensorShape(16U, 4U), TensorShape(2U, 32U), TensorShape(2U), false }, // Mismatch in the K dimension
+        { TensorShape(16U, 0U), TensorShape(2U, 16U), TensorShape(2U), false }, // Invalid dimension
+        { TensorShape(32U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+        { TensorShape(32U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // no batch broadcasting
+        { TensorShape(32U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 32U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // mismatch in batch dimension
+        { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(1U), false },                                 // invalid broadcast of bias
+        { TensorShape(32U, 1U), TensorShape(3U, 32U), TensorShape(3U, 3U), false },                             // 2d bias is invalid
+        { TensorShape(12U, 12U), TensorShape(3U, 12U), TensorShape(3U), false },                                // K must be multiple of 16
+    };
+
+    for(auto &tuple : shape_configurations)
+    {
+        const bool expected = (std::get<3>(tuple) && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()));
+
+        for(bool adj_lhs :
+            {
+                false, true
+            })
+        {
+            for(bool adj_rhs :
+                {
+                    false, true
+                })
+            {
+                TensorShape lhs_shape = std::get<0>(tuple);
+                TensorShape rhs_shape = std::get<1>(tuple);
+                TensorShape bia_shape = std::get<2>(tuple);
+
+                if(adj_lhs)
+                {
+                    permute(lhs_shape, PermutationVector(1U, 0U));
+                }
+
+                if(adj_rhs)
+                {
+                    permute(rhs_shape, PermutationVector(1U, 0U));
+                }
+
+                const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::QASYMM8_SIGNED);
+                const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::QASYMM8_SIGNED);
+                const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::S32);
+                TensorInfo       output_info;
+
+                MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 4, false /* export_rhs_to_cl_image */ };
+
+                Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+                ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+            }
+        }
+    }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+    using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>;
+    const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+    {
+        { DataType::F32, DataType::F32, DataType::F32, DataType::F32, false }, // no floating point types
+        { DataType::F16, DataType::F16, DataType::F16, DataType::F16, false }, // no floating point types
+        { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false }, // no double precision
+        { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, true },
+        { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, true },
+        { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // only qasymm8/qasymm8_signed is supported
+        { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false },                               // only qasymm8/qasymm8_signed is supported
+        { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false },                                  // only qasymm8/qasymm8_signed is supported
+        { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false },                                     // only qasymm8/qasymm8_signed is supported
+        { DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8, false },                           // no mixed data types
+        { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false },                                              // no integral types
+        { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false },                                              // no integral types
+        { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false },                                              // no integral types
+        { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false },                                                  // no integral types
+        { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false },                                              // no integral types
+        { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false },                                              // no integral types
+        { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false },                                              // no integral types
+        { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false },                                                  // no integral types
+        { DataType::QASYMM8, DataType::QASYMM8, DataType::F32, DataType::QASYMM8, false }                                   // Only S32 bias is supported
+    };
+
+    // It's enough to test a single shape and block size configuration while checking data types
+    const TensorShape      shape     = TensorShape(48U, 48U);
+    const TensorShape      bia_shape = TensorShape(48U);
+    const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 4, false };
+    for(auto &tuple : data_type_configurations)
+    {
+        const bool expected = (std::get<4>(tuple) && arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()));
+
+        const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+        const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+        const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple));
+        TensorInfo       output_info(shape, 1, std::get<3>(tuple));
+
+        Status status = ClMatMulLowpNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+
+        ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+                       framework::DatasetMode::ALL,
+                       combine(datasets::SmallMatMulLowpMMULDataset(),
+                               make("TransposeA", { false, true }),
+                               make("TransposeB", { false, true }),
+                               m0_values_precommit,
+                               n0_values_precommit,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpNativeMMULKernelWithBiasFixture<int8_t>,
+                       framework::DatasetMode::ALL,
+                       combine(datasets::SmallMatMulLowpMMULWithBiasDataset(),
+                               make("TransposeA", { false, true }),
+                               make("TransposeB", { false, true }),
+                               m0_values_precommit,
+                               n0_values_precommit,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsNotTransposed, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeMatMulLowpMMULDataset(),
+                               make("TransposeA", { false }),
+                               make("TransposeB", { false, true }),
+                               m0_values_nightly_lhs_nt,
+                               n0_values_nightly,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeMatMulLowpMMULDataset(),
+                               make("TransposeA", { true }),
+                               make("TransposeB", { false, true }),
+                               m0_values_nightly_lhs_t,
+                               n0_values_nightly,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+// Running High Dimensional test is enough for qasymm8_signed, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulLowpNativeMMULKernelFixture<int8_t>,
+                       framework::DatasetMode::ALL,
+                       combine(datasets::HighDimensionalMatMulLowpMMULDataset(),
+                               make("TransposeA", { false, true }),
+                               make("TransposeB", { false, true }),
+                               make("M0", { 2 }),
+                               make("N0", { 2 }),
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8_SIGNED)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulLowpNativeMMULKernelFixture<uint8_t>,
+                       framework::DatasetMode::ALL,
+                       combine(datasets::SmallMatMulLowpMMULDatasetSubset(),
+                               make("TransposeA", { false, true }),
+                               make("TransposeB", { false, true }),
+                               m0_values_precommit,
+                               n0_values_precommit,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulLowpNativeMMULKernelWithBiasFixture<uint8_t>,
+                       framework::DatasetMode::ALL,
+                       combine(datasets::SmallMatMulLowpMMULWithBiasDataset(),
+                               make("TransposeA", { false, true }),
+                               make("TransposeB", { false, true }),
+                               m0_values_precommit,
+                               n0_values_precommit,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsNotTransposed, CLMatMulLowpNativeMMULKernelFixture<uint8_t>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeMatMulLowpMMULDataset(),
+                               make("TransposeA", { false }),
+                               make("TransposeB", { false, true }),
+                               m0_values_nightly_lhs_nt,
+                               n0_values_nightly,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulLowpNativeMMULKernelFixture<uint8_t>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeMatMulLowpMMULDataset(),
+                               make("TransposeA", { true }),
+                               make("TransposeB", { false, true }),
+                               m0_values_nightly_lhs_t,
+                               n0_values_nightly,
+                               make("K0", { 4 }),
+                               make("ExportRhsToCLImage", { false }),
+                               make("DataType", DataType::QASYMM8)))
+{
+    if(_device_supports_mmul)
+    {
+        // Validate output
+        validate(CLAccessor(_target), _reference, tolerance_quant);
+    }
+}
+
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+TEST_SUITE_END() // MatMulLowpNativeMMULKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MatMulNativeMMULKernel.cpp b/tests/validation/CL/MatMulNativeMMULKernel.cpp
new file mode 100644
index 0000000000..655dd354dc
--- /dev/null
+++ b/tests/validation/CL/MatMulNativeMMULKernel.cpp
@@ -0,0 +1,501 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "src/gpu/cl/kernels/ClMatMulNativeMMULKernel.h"
+#include "tests/datasets/LargeMatMulMMULDataset.h"
+#include "tests/datasets/SmallMatMulMMULDataset.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/MatMulKernelFixture.h"
+#include "tests/validation/reference/Permute.h"
+
+#include <tuple>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+constexpr float          abs_tolerance_f32(
+    0.0001f); /**< Absolute tolerance value for comparing reference's output against implementation's output for floating point data types in case using relative tolerance fails because of small values */
+constexpr float abs_tolerance_f16(
+    0.02f);                                                   /**< Absolute tolerance value for comparing reference's output against implementation's output for fp16  data types in case using relative tolerance fails because of small values */
+RelativeTolerance<half_float::half> tolerance_f16(half(0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
+} // namespace
+
+/** M0 values to test --precommit*/
+const auto m0_values_precommit = framework::dataset::make("M0", { 1, 3 });
+
+/** N0 values to test --precommit*/
+const auto n0_values_precommit = framework::dataset::make("N0", { 2, 4 });
+
+/** M0 values to test --nightly*/
+const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", { 1, 2, 3, 4, 5, 6, 7, 8 });
+const auto m0_values_nightly_lhs_t  = framework::dataset::make("M0", { 1, 2, 3, 4, 8 });
+
+/** N0 values to test --nightly*/
+const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", { 1, 2, 3, 4, 8, 16 });
+const auto n0_values_nightly_rhs_t  = framework::dataset::make("N0", { 1, 2, 3, 4, 8 });
+
+/** K0 value -- Fixed to 1 */
+const auto k0_value = framework::dataset::make("K0", { 1 });
+
+template <typename T>
+using CLMatMulNativeMMULKernelFixture = MatMulKernelValidationFixture<T, ClMatMulNativeMMULKernel, true /*use_mmul*/>;
+
+template <typename T>
+using CLMatMulKernelBiasFixture = MatMulKernelWithBiasValidation<T, ClMatMulNativeMMULKernel, true /*use_mmul*/>;
+
+TEST_SUITE(CL)
+TEST_SUITE(MatMulNativeMMULKernel)
+TEST_SUITE(Validate)
+
+TEST_CASE(SupportedBlockSizes, framework::DatasetMode::ALL)
+{
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        using MatMulConfigurationPair = std::pair<MatMulKernelInfo, bool>;
+
+        const std::vector<MatMulConfigurationPair> supported_block_sizes =
+        {
+            // MatMulKernelInfo(adj_lhs, adj_rhs, M0, N0, K0, export_rhs_to_cl_image = false)
+            // Lhs not-transposed, Rhs not-transposed
+            { MatMulKernelInfo(false, false, 0, 1, 1), false }, // M0 should be > 0
+            { MatMulKernelInfo(false, false, 3, 5, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(false, false, 3, 6, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(false, false, 3, 3, 4), false }, // K0 not 1
+            { MatMulKernelInfo(false, false, 9, 1, 1), true },
+            { MatMulKernelInfo(false, false, 3, 16, 1), true },
+            { MatMulKernelInfo(false, false, 7, 3, 1), true },
+
+            // Lhs transposed, Rhs not-transposed
+            { MatMulKernelInfo(true, false, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, false, 3, 7, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, false, 6, 3, 1), false },  // M0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, false, 5, 3, 1), false },  // M0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, false, 2, 2, 2), false },  // K0 is not 1
+            { MatMulKernelInfo(true, false, 4, 1, 1), true },
+            { MatMulKernelInfo(true, false, 3, 3, 1), true },
+            { MatMulKernelInfo(true, false, 2, 4, 1), true },
+
+            // Lhs not-transposed, Rhs not-transposed
+            { MatMulKernelInfo(false, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8}
+            { MatMulKernelInfo(false, true, 2, 17, 1), false }, // N0 not in {1, 2, 3, 4, 8}
+            { MatMulKernelInfo(false, true, 4, 5, 1), false },  // N0 not in {1, 2, 3, 4, 8}
+            { MatMulKernelInfo(false, true, 4, 4, 7), false },  // K0 is not 1
+            { MatMulKernelInfo(false, true, 4, 7, 1), false },  // N0 not in {1, 2, 3, 4, 8}
+            { MatMulKernelInfo(false, true, 3, 8, 1), true },
+            { MatMulKernelInfo(false, true, 8, 16, 1), true },
+            { MatMulKernelInfo(false, true, 2, 4, 1), true },
+
+            // Lhs transposed, Rhs transposed
+            { MatMulKernelInfo(true, true, 3, 11, 1), false }, // N0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, true, 3, 7, 1), false },  // N0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, true, 6, 3, 1), false },  // M0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, true, 5, 3, 1), false },  // M0 not in {1, 2, 3, 4, 8, 16}
+            { MatMulKernelInfo(true, true, 4, 8, 2), false },  // K0 is not 1
+            { MatMulKernelInfo(true, true, 4, 8, 1), true },
+            { MatMulKernelInfo(true, true, 3, 3, 1), true },
+            { MatMulKernelInfo(true, true, 16, 4, 1), true },
+        };
+
+        // Set big enough shapes so that block sizes are not truncated. Also, set all dimensions equal
+        // so that it doesn't fail for different NT/T configurations. We aim to test the block sizes here,
+        // not the shapes themselves.
+        const TensorInfo lhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+        const TensorInfo rhs_info = TensorInfo(TensorShape(100U, 100U), 1, DataType::F32);
+
+        for(auto &pair : supported_block_sizes)
+        {
+            TensorInfo output_info;
+            Status     status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, nullptr, &output_info, pair.first);
+            ARM_COMPUTE_EXPECT(bool(status) == pair.second, framework::LogLevel::ERRORS);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_CASE(ValidateInputShapes, framework::DatasetMode::ALL)
+{
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+        using ShapeConfigurationTuple = std::tuple<TensorShape, TensorShape, TensorShape, bool>; // lhs, rhs, bias, result
+        const std::vector<ShapeConfigurationTuple> shape_configurations =
+        {
+            { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(3U), true },
+            { TensorShape(12U, 12U), TensorShape(3U, 12U), TensorShape(3U), true },
+            { TensorShape(8U, 4U), TensorShape(2U, 8U), TensorShape(2U), true },
+            { TensorShape(8U, 4U), TensorShape(2U, 4U), TensorShape(2U), false }, // Mismatch in the K dimension
+            { TensorShape(5U, 0U), TensorShape(2U, 5U), TensorShape(2U), false }, // Invalid dimension
+            { TensorShape(5U, 7U), TensorShape(2U, 5U), TensorShape(2U), false }, // K not a multiple of 4 (MMUL_K0)
+            { TensorShape(8U, 4U, 3U, 4U, 5U, 6U), TensorShape(2U, 8U, 3U, 4U, 5U, 6U), TensorShape(2U), true },
+            { TensorShape(5U, 4U, 3U, 4U, 5U, 1U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // No batch broadcasting
+            { TensorShape(5U, 4U, 3U, 4U, 9U, 6U), TensorShape(2U, 5U, 3U, 4U, 5U, 6U), TensorShape(2U), false }, // Mismatch in batch dimension
+            { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(1U), false },                                 // Bias first dimensions != dst first dimension.
+            { TensorShape(4U, 1U), TensorShape(3U, 4U), TensorShape(5U, 6U), false },                             // Bias is 2d which is invalid.
+        };
+
+        for(auto &tuple : shape_configurations)
+        {
+            const bool expected = std::get<3>(tuple);
+
+            for(bool adj_lhs :
+                {
+                    false, true
+                })
+            {
+                for(bool adj_rhs :
+                    {
+                        false, true
+                    })
+                {
+                    TensorShape lhs_shape = std::get<0>(tuple);
+                    TensorShape rhs_shape = std::get<1>(tuple);
+                    TensorShape bia_shape = std::get<2>(tuple);
+
+                    if(adj_lhs)
+                    {
+                        permute(lhs_shape, PermutationVector(1U, 0U));
+                    }
+
+                    if(adj_rhs)
+                    {
+                        permute(rhs_shape, PermutationVector(1U, 0U));
+                    }
+
+                    const TensorInfo lhs_info = TensorInfo(lhs_shape, 1, DataType::F32);
+                    const TensorInfo rhs_info = TensorInfo(rhs_shape, 1, DataType::F32);
+                    const TensorInfo bia_info = TensorInfo(bia_shape, 1, DataType::F32);
+                    TensorInfo       output_info;
+
+                    MatMulKernelInfo matmul_kernel_info{ adj_lhs, adj_rhs, 1, 1, 1, false /* export_rhs_to_cl_image */ };
+
+                    Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+                    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+                }
+            }
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_CASE(ValidateDataTypes, framework::DatasetMode::ALL)
+{
+    if(arm_matrix_multiply_supported(CLKernelLibrary::get().get_device()))
+    {
+        // Configurations are assumed to be Nt/Nt, but will be transposed inside the test to test other configurations
+        using DataTypeConfigurationTuple = std::tuple<DataType, DataType, DataType, DataType, bool>;
+        const std::vector<DataTypeConfigurationTuple> data_type_configurations =
+        {
+            { DataType::F32, DataType::F32, DataType::F32, DataType::F32, true },
+            { DataType::F16, DataType::F16, DataType::F16, DataType::F16, true },
+            { DataType::F32, DataType::F32, DataType::F32, DataType::F32, true },
+            { DataType::F32, DataType::F32, DataType::F16, DataType::F32, false },                                              // incorrect bias type
+            { DataType::F16, DataType::F32, DataType::F32, DataType::F32, false },                                              // no mixed precision
+            { DataType::F64, DataType::F64, DataType::F64, DataType::F64, false },                                              // no double precision
+            { DataType::QASYMM8, DataType::QASYMM8, DataType::S32, DataType::QASYMM8, false },                                  // no quantized types
+            { DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, false },             // no quantized types
+            { DataType::QSYMM8_PER_CHANNEL, DataType::QSYMM8_PER_CHANNEL, DataType::S32, DataType::QSYMM8_PER_CHANNEL, false }, // no quantized types
+            { DataType::QASYMM16, DataType::QASYMM16, DataType::S32, DataType::QASYMM16, false },                               // no quantized types
+            { DataType::QSYMM16, DataType::QSYMM16, DataType::S32, DataType::QSYMM16, false },                                  // no quantized types
+            { DataType::QSYMM8, DataType::QSYMM8, DataType::S32, DataType::QSYMM8, false },                                     // no quantized types
+            { DataType::S64, DataType::S64, DataType::S64, DataType::S64, false },                                              // no integral types
+            { DataType::S32, DataType::S32, DataType::S32, DataType::S32, false },                                              // no integral types
+            { DataType::S16, DataType::S16, DataType::S16, DataType::S16, false },                                              // no integral types
+            { DataType::S8, DataType::S8, DataType::S8, DataType::S8, false },                                                  // no integral types
+            { DataType::U64, DataType::U64, DataType::U64, DataType::U64, false },                                              // no integral types
+            { DataType::U32, DataType::U32, DataType::U32, DataType::U32, false },                                              // no integral types
+            { DataType::U16, DataType::U16, DataType::U16, DataType::U16, false },                                              // no integral types
+            { DataType::U8, DataType::U8, DataType::U8, DataType::U8, false },                                                  // no integral types
+        };
+
+        const TensorShape      shape     = TensorShape(8U, 8U);
+        const TensorShape      bia_shape = TensorShape(8U);
+        const MatMulKernelInfo matmul_kernel_info{ false, false, 1, 1, 1, false };
+        for(auto &tuple : data_type_configurations)
+        {
+            const bool expected = std::get<4>(tuple);
+
+            const TensorInfo lhs_info(shape, 1, std::get<0>(tuple));
+            const TensorInfo rhs_info(shape, 1, std::get<1>(tuple));
+            const TensorInfo bia_info(bia_shape, 1, std::get<2>(tuple));
+            TensorInfo       output_info(shape, 1, std::get<3>(tuple));
+
+            Status status = ClMatMulNativeMMULKernel::validate(&lhs_info, &rhs_info, &bia_info, &output_info, matmul_kernel_info);
+            ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+        }
+    }
+    else
+    {
+        ARM_COMPUTE_TEST_INFO("cl_arm_matrix_multiply not supported. TEST skipped");
+        framework::ARM_COMPUTE_PRINT_INFO();
+    }
+}
+
+TEST_SUITE_END() // Validate
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunTiny, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulMMULDataset(),
+                                                                                                                     framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                     framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                     m0_values_precommit),
+                                                                                                                     n0_values_precommit),
+                                                                                                                     k0_value),
+                                                                                                                     framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                             framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(),
+                                                                                                                      framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                      framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                      m0_values_precommit),
+                                                                                                                      n0_values_precommit),
+                                                                                                                      k0_value),
+                                                                                                                      framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                              framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunWithBias, CLMatMulKernelBiasFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(),
+                                                                                                                   framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                   framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                   m0_values_precommit),
+                                                                                                                   n0_values_precommit),
+                                                                                                                   k0_value),
+                                                                                                                   framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                           framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTranspose, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0
+// It's a good idea to test for each Lhs/Rhs T/NT combinations because they're different CL kernels
+FIXTURE_DATA_TEST_CASE(RunHighDimensional, CLMatMulNativeMMULKernelFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { false, true })),
+                                                                       framework::dataset::make("TransposeB", { false, true })),
+                                                               framework::dataset::make("M0", { 2 })),
+                                                       framework::dataset::make("N0", { 2 })),
+                                               framework::dataset::make("K0", { 1 })),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
+    }
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+TEST_SUITE(Buffer)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulMMULDataset(),
+                                                                                                                     framework::dataset::make("TransposeA", { false, true })),
+                                                                                                                     framework::dataset::make("TransposeB", { false, true })),
+                                                                                                                     m0_values_precommit),
+                                                                                                                     n0_values_precommit),
+                                                                                                                     k0_value),
+                                                                                                                     framework::dataset::make("ExportRhsToCLImage", { false })),
+                                                                                                             framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeNoTranspose, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeRhsTranspose, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { false })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_nt),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposed, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { false })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_nt),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+FIXTURE_DATA_TEST_CASE(RunLargeLhsTransposedRhsTransposed, CLMatMulNativeMMULKernelFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulMMULDataset(),
+                                                                               framework::dataset::make("TransposeA", { true })),
+                                                                       framework::dataset::make("TransposeB", { true })),
+                                                               m0_values_nightly_lhs_t),
+                                                       n0_values_nightly_rhs_t),
+                                               k0_value),
+                                       framework::dataset::make("ExportRhsToCLImage", { false })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    // Validate output
+    if(_device_supports_mmul)
+    {
+        validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
+    }
+}
+TEST_SUITE_END() // Buffer
+
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // MatMulNativeMMULKernel
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/MaxUnpoolingLayer.cpp b/tests/validation/CL/MaxUnpoolingLayer.cpp
index 6cba8b8bd5..cf4fcdda70 100644
--- a/tests/validation/CL/MaxUnpoolingLayer.cpp
+++ b/tests/validation/CL/MaxUnpoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 Arm Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -51,20 +51,19 @@ const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(framework::datase
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
                                                                                                                    framework::dataset::make("DataType", DataType::F32))),
                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
 
                                                                                                                   ))
 {
-    printf("validate\n");
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(MaxUnpooling, CLMaxUnpoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
                                                                                                                   framework::dataset::make("DataType", DataType::F16))),
                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })
 
diff --git a/tests/validation/CL/MeanStdDev.cpp b/tests/validation/CL/MeanStdDev.cpp
deleted file mode 100644
index 0e5135ec44..0000000000
--- a/tests/validation/CL/MeanStdDev.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMeanStdDev.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/MeanStdDevFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-RelativeTolerance<float> tolerance_rel_high_error(0.05f);
-RelativeTolerance<float> tolerance_rel_low_error(0.0005f);
-AbsoluteTolerance<float> tolerance_rel_high_error_f32(0.01f);
-AbsoluteTolerance<float> tolerance_rel_low_error_f32(0.00001f);
-AbsoluteTolerance<float> tolerance_rel_high_error_f16(0.1f);
-AbsoluteTolerance<float> tolerance_rel_low_error_f16(0.01f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(MeanStdDev)
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U), 1, DataType::F32),    // Wrong input data type
-                                                       TensorInfo(TensorShape(16U, 5U, 16U), 1, DataType::U8), // Invalid shape
-                                                       TensorInfo(TensorShape(16U, 16U), 1, DataType::U8),     // Valid
-                                                     }),
-               framework::dataset::make("Expected", { false, false, true })),
-               input_info, expected)
-{
-    ARM_COMPUTE_EXPECT(bool(CLMeanStdDev::validate(&input_info.clone()->set_is_resizable(false), nullptr, nullptr)) == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-template <typename T>
-using CLMeanStdDevFixture = MeanStdDevValidationFixture<CLTensor, CLAccessor, CLMeanStdDev, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                        DataType::U8)))
-{
-    // Validate mean output
-    validate(_target.first, _reference.first);
-
-    // Validate std_dev output
-    validate(_target.second, _reference.second, tolerance_rel_high_error);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                        DataType::U8)))
-{
-    // Validate mean output
-    validate(_target.first, _reference.first, tolerance_rel_low_error);
-
-    // Validate std_dev output
-    validate(_target.second, _reference.second, tolerance_rel_high_error);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(F16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                     DataType::F16)))
-{
-    // Validate mean output
-    validate(_target.first, _reference.first, tolerance_rel_low_error_f16);
-
-    // Validate std_dev output
-    validate(_target.second, _reference.second, tolerance_rel_high_error_f16);
-}
-TEST_SUITE_END() // F16
-
-TEST_SUITE(F32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                      DataType::F32)))
-{
-    // Validate mean output
-    validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
-
-    // Validate std_dev output
-    validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMeanStdDevFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                      DataType::F32)))
-{
-    // Validate mean output
-    validate(_target.first, _reference.first, tolerance_rel_low_error_f32);
-
-    // Validate std_dev output
-    validate(_target.second, _reference.second, tolerance_rel_high_error_f32);
-}
-TEST_SUITE_END() // F32
-
-TEST_SUITE_END() // MeanStdDev
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
index e77a21ed7f..cdeb622130 100644
--- a/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
+++ b/tests/validation/CL/MeanStdDevNormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -78,7 +78,7 @@ TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLMeanStdDevNormalizationLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small2DShapes(),
                        framework::dataset::make("DataType", DataType::F16)),
                        framework::dataset::make("InPlace", { false, true })),
-                       framework::dataset::make("Epsilon", { 1e-8 })))
+                       framework::dataset::make("Epsilon", { 1e-3 })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
diff --git a/tests/validation/CL/Median3x3.cpp b/tests/validation/CL/Median3x3.cpp
deleted file mode 100644
index 9a09ae5be5..0000000000
--- a/tests/validation/CL/Median3x3.cpp
+++ /dev/null
@@ -1,74 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLMedian3x3.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/Median3x3Fixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr unsigned int filter_size = 3;              /* Size of the kernel/filter in number of elements. */
-constexpr BorderSize   border_size(filter_size / 2); /* Border size of the kernel/filter around its central element. */
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Median3x3)
-template <typename T>
-using CLMedian3x3Fixture = Median3x3ValidationFixture<CLTensor, CLAccessor, CLMedian3x3, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMedian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                               DataType::U8)),
-                                                                                                       datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMedian3x3Fixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                               DataType::U8)),
-                                                                                                       datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/MinMaxLocation.cpp b/tests/validation/CL/MinMaxLocation.cpp
deleted file mode 100644
index 1ad863d90e..0000000000
--- a/tests/validation/CL/MinMaxLocation.cpp
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLMinMaxLocation.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/MinMaxLocationFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(MinMaxLocation)
-
-template <typename T>
-using CLMinMaxLocationFixture = MinMaxLocationValidationFixture<CLTensor, CLAccessor, CLArray<Coordinates2D>, CLArrayAccessor<Coordinates2D>, CLMinMaxLocation, T>;
-
-TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMinMaxLocationFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::U8)))
-{
-    validate_min_max_loc(_target, _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMinMaxLocationFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::U8)))
-{
-    validate_min_max_loc(_target, _reference);
-}
-
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMinMaxLocationFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::S16)))
-{
-    validate_min_max_loc(_target, _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMinMaxLocationFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                            DataType::S16)))
-{
-    validate_min_max_loc(_target, _reference);
-}
-
-TEST_SUITE_END() // S16
-
-TEST_SUITE(Float)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMinMaxLocationFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Small2DShapes(), framework::dataset::make("DataType",
-                                                                                                          DataType::F32)))
-{
-    validate_min_max_loc(_target, _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLMinMaxLocationFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large2DShapes(), framework::dataset::make("DataType",
-                                                                                                          DataType::F32)))
-{
-    validate_min_max_loc(_target, _reference);
-}
-
-TEST_SUITE_END() // F32
-
-TEST_SUITE_END() // MinMaxLocation
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/NegLayer.cpp b/tests/validation/CL/NegLayer.cpp
index 690b8f44fb..c93e31dca9 100644
--- a/tests/validation/CL/NegLayer.cpp
+++ b/tests/validation/CL/NegLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
@@ -50,6 +50,15 @@ TEST_SUITE(NegLayer)
 template <typename T>
 using CLNegLayerFixture = NegValidationFixture<CLTensor, CLAccessor, CLNegLayer, T>;
 
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLNegLayerFixture<int>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                    DataType::S32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
+
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLNegLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
diff --git a/tests/validation/CL/NonLinearFilter.cpp b/tests/validation/CL/NonLinearFilter.cpp
deleted file mode 100644
index 3fd9d5cd73..0000000000
--- a/tests/validation/CL/NonLinearFilter.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLNonLinearFilter.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/MatrixPatternDataset.h"
-#include "tests/datasets/NonLinearFilterFunctionDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/NonLinearFilterFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(NonLinearFilter)
-
-template <typename T>
-using CLNonLinearFilterFixture = NonLinearFilterValidationFixture<CLTensor, CLAccessor, CLNonLinearFilter, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLNonLinearFilterFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                                                                                                                     datasets::NonLinearFilterFunctions()),
-                                                                                                                     framework::dataset::make("MaskSize", { 3U, 5U })),
-                                                                                                                     datasets::MatrixPatterns()),
-                                                                                                                     datasets::BorderModes()),
-                                                                                                             framework::dataset::make("DataType", DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), _border_size));
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLNonLinearFilterFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(datasets::LargeShapes(),
-                                                                                                                     datasets::NonLinearFilterFunctions()),
-                                                                                                                     framework::dataset::make("MaskSize", { 3U, 5U })),
-                                                                                                                     datasets::MatrixPatterns()),
-                                                                                                                     datasets::BorderModes()),
-                                                                                                             framework::dataset::make("DataType", DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, shape_to_valid_region(_reference.shape(), (_border_mode == BorderMode::UNDEFINED), _border_size));
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/NormalizationLayer.cpp b/tests/validation/CL/NormalizationLayer.cpp
index 1aed2786ff..8c1890842b 100644
--- a/tests/validation/CL/NormalizationLayer.cpp
+++ b/tests/validation/CL/NormalizationLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -62,33 +62,28 @@ const auto NormalizationDatasetFP32 = combine(combine(combine(framework::dataset
 TEST_SUITE(CL)
 TEST_SUITE(NormalizationLayer)
 
-//TODO(COMPMID-415): Missing configuration?
-
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data type input/output
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Even normalization
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Non implemented IN_MAP_2D
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink
+                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Windows shrinking for NCHW
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(27U, 11U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                      })),
                framework::dataset::make("NormInfo",  { NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
                                                        NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
                                                        NormalizationLayerInfo(NormType::IN_MAP_1D, 4),
                                                        NormalizationLayerInfo(NormType::IN_MAP_2D, 5),
-                                                       NormalizationLayerInfo(NormType::IN_MAP_1D, 5),
                                                        NormalizationLayerInfo(NormType::CROSS_MAP, 5),
                                                       })),
-               framework::dataset::make("Expected", { false, false, false, false, false, true })),
+               framework::dataset::make("Expected", { false, false, false, false, true })),
                input_info, output_info, norm_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLNormalizationLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), norm_info)) == expected, framework::LogLevel::ERRORS);
diff --git a/tests/validation/CL/OpticalFlow.cpp b/tests/validation/CL/OpticalFlow.cpp
deleted file mode 100644
index 3636a8f847..0000000000
--- a/tests/validation/CL/OpticalFlow.cpp
+++ /dev/null
@@ -1,93 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLArray.h"
-#include "arm_compute/runtime/CL/CLPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLGaussianPyramid.h"
-#include "arm_compute/runtime/CL/functions/CLOpticalFlow.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLArrayAccessor.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/OpticalFlowDataset.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/OpticalFlowFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(OpticalFlow)
-
-// *INDENT-OFF*
-// clang-format off
-using CLOpticalFlowFixture = OpticalFlowValidationFixture<CLTensor,
-                                                          CLAccessor,
-                                                          CLKeyPointArray,
-                                                          CLArrayAccessor<KeyPoint>,
-                                                          CLOpticalFlow,
-                                                          CLPyramid,
-                                                          CLGaussianPyramidHalf,
-                                                          uint8_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLOpticalFlowFixture, framework::DatasetMode::NIGHTLY, combine(combine(
-                       datasets::SmallOpticalFlowDataset(),
-                       framework::dataset::make("Format", Format::U8)),
-                       datasets::BorderModes()))
-{
-    // Validate output
-    CLArrayAccessor<KeyPoint> array(_target);
-
-    validate_keypoints(array.buffer(),
-                       array.buffer() + array.num_values(),
-                       _reference.begin(),
-                       _reference.end());
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLOpticalFlowFixture, framework::DatasetMode::NIGHTLY, combine(combine(
-                       datasets::LargeOpticalFlowDataset(),
-                       framework::dataset::make("Format", Format::U8)),
-                       datasets::BorderModes()))
-{
-    // Validate output
-    CLArrayAccessor<KeyPoint> array(_target);
-
-    validate_keypoints(array.buffer(),
-                       array.buffer() + array.num_values(),
-                       _reference.begin(),
-                       _reference.end());
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/PReluLayer.cpp b/tests/validation/CL/PReluLayer.cpp
index 82436a9671..f3f1c8b1b8 100644
--- a/tests/validation/CL/PReluLayer.cpp
+++ b/tests/validation/CL/PReluLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -56,7 +56,7 @@ const auto PReluLayerQASYMM8Dataset = combine(combine(framework::dataset::make("
 const auto PReluLayerQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                                                     framework::dataset::make("DataType",
                                                                              DataType::QASYMM8_SIGNED));
-const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
+const auto PReluLayerS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S16 }), framework::dataset::make("DataType", DataType::S16)),
                                           framework::dataset::make("DataType", DataType::S16));
 const auto PReluLayerFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
                                            framework::dataset::make("DataType", DataType::F16));
@@ -71,21 +71,18 @@ TEST_SUITE(PReluLayer)
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
                framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
-                                                       TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Expected", { true, true, false, false})),
+               framework::dataset::make("Expected", { true, false, false})),
                input1_info, input2_info, output_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLPReluLayer::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS);
@@ -93,6 +90,63 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 // clang-format on
 // *INDENT-ON*
 
+TEST_SUITE(InPlace)
+TEST_CASE(Validate, framework::DatasetMode::ALL)
+{
+    // PRelu operaotr should be able to take nullptr as output and do the in-place computation.
+    // Shape and data type are selected randomly since they shouldn't matter
+    const auto tensor_info = TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32);
+    const auto result      = arm_compute::CLPReluLayer::validate(&tensor_info, &tensor_info, nullptr);
+    ARM_COMPUTE_EXPECT(bool(result) == true, framework::LogLevel::ERRORS);
+}
+
+SimpleTensor<float> compute_float_reference(const TensorInfo &tensor_info)
+{
+    SimpleTensor<float> ref_src1{ tensor_info.tensor_shape(), tensor_info.data_type() };
+    SimpleTensor<float> ref_src2{ tensor_info.tensor_shape(), tensor_info.data_type() };
+    SimpleTensor<float> ref_dst{ tensor_info.tensor_shape(), tensor_info.data_type() };
+
+    library->fill_tensor_uniform(ref_src1, 0);
+    library->fill_tensor_uniform(ref_src2, 1);
+
+    return reference::arithmetic_operation<float>(ArithmeticOperation::PRELU, ref_src1, ref_src2, ref_dst);
+}
+
+void compute_float_target_in_place(CLTensor &src1, CLTensor &src2, bool use_nullptr_output)
+{
+    auto fn = arm_compute::CLPReluLayer{};
+    fn.configure(&src1, &src2, use_nullptr_output ? nullptr : &src1);
+
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+
+    library->fill_tensor_uniform(CLAccessor(src1), 0);
+    library->fill_tensor_uniform(CLAccessor(src2), 1);
+
+    fn.run();
+}
+
+TEST_CASE(ComputeWithNullPtr, framework::DatasetMode::ALL)
+{
+    const auto tensor_info = TensorInfo(TensorShape(33U, 13U, 2U), 1, DataType::F32);
+
+    auto src1 = create_tensor<CLTensor>(tensor_info);
+    auto src2 = create_tensor<CLTensor>(tensor_info);
+    compute_float_target_in_place(src1, src2, true);
+    validate(CLAccessor(src1), compute_float_reference(tensor_info));
+}
+
+TEST_CASE(ComputeWithSameTensor, framework::DatasetMode::ALL)
+{
+    const auto tensor_info = TensorInfo(TensorShape(33U, 13U, 2U), 1, DataType::F32);
+
+    auto src1 = create_tensor<CLTensor>(tensor_info);
+    auto src2 = create_tensor<CLTensor>(tensor_info);
+    compute_float_target_in_place(src1, src2, false);
+    validate(CLAccessor(src1), compute_float_reference(tensor_info));
+}
+TEST_SUITE_END() // InPlace
+
 template <typename T>
 using CLPReluLayerFixture = PReluLayerValidationFixture<CLTensor, CLAccessor, CLPReluLayer, T>;
 
@@ -143,6 +197,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPReluLayerFixture<int16_t>, framework::Datase
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+FIXTURE_DATA_TEST_CASE(RunOneDimensional, CLPReluLayerFixture<int16_t>, framework::DatasetMode::ALL, combine(framework::dataset::make("Shape", TensorShape(1U, 16U)), PReluLayerS16Dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 TEST_SUITE_END()
 
 TEST_SUITE(Float)
diff --git a/tests/validation/CL/PadLayer.cpp b/tests/validation/CL/PadLayer.cpp
index 370195b078..ea0cb32785 100644
--- a/tests/validation/CL/PadLayer.cpp
+++ b/tests/validation/CL/PadLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/graph/Utils.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/functions/CLPadLayer.h"
+#include "src/graph/mutators/MutatorUtils.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
 #include "tests/datasets/ShapeDatasets.h"
@@ -110,6 +112,63 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
     ARM_COMPUTE_EXPECT(bool(CLPadLayer::validate(&input_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), padding, PixelValue(), mode)) == expected, framework::LogLevel::ERRORS);
 }
 
+DATA_TEST_CASE(CheckFusingWithConvolution, framework::DatasetMode::ALL, zip(zip(
+                framework::dataset::make("DataLayout",  { DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NCHW,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::NHWC,
+                                                          DataLayout::UNKNOWN
+                                                        }),
+                framework::dataset::make("PaddingList", { PaddingList({{0, 0}, {1, 1}, {1, 1}}),          // nchw
+                                                          PaddingList({{1, 1}, {1, 1}, {0, 0}, {0, 0}}),
+                                                          PaddingList({{1, 1}, {1, 1}}),
+                                                          PaddingList({}),
+                                                          PaddingList({{0, 0}}),
+                                                          PaddingList({{0, 0}, {0, 0}, {0, 0}, {0, 0}}),
+                                                          PaddingList({{0, 0}, {0, 0}, {0, 0}, {1, 0}}),
+                                                          PaddingList({{0, 1}}),
+                                                          PaddingList({{0, 0}, {1, 1}, {1, 1}}),          // nhwc
+                                                          PaddingList({{0, 0}, {0, 0}, {1, 1}, {1, 1}}),
+                                                          PaddingList({{0, 0}, {1, 0}, {1, 1}, {0, 0}}),
+                                                          PaddingList({}),
+                                                          PaddingList({{0, 0}}),
+                                                          PaddingList({{0, 1}}),
+                                                          PaddingList({{0, 0}, {1, 1}}),
+                                                          PaddingList({{0, 0}})
+                                                        })),                           // unknown
+                framework::dataset::make("Expected",    { false,    // nchw
+                                                          true,
+                                                          true,
+                                                          true,
+                                                          true,
+                                                          true,
+                                                          false,
+                                                          true,
+                                                          true,     // nhwc
+                                                          false,
+                                                          true,
+                                                          true,
+                                                          true,
+                                                          false,
+                                                          true,
+                                                          false     // unknown
+                                                        })),
+                data_layout, padding_list, expected)
+{
+    ARM_COMPUTE_EXPECT(expected == arm_compute::graph::is_padding_in_height_or_width(data_layout, padding_list), framework::LogLevel::ERRORS);
+}
+
 // clang-format on
 // *INDENT-ON*
 
diff --git a/tests/validation/CL/Phase.cpp b/tests/validation/CL/Phase.cpp
deleted file mode 100644
index be7f9df2be..0000000000
--- a/tests/validation/CL/Phase.cpp
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLPhase.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/PhaseFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(Phase)
-
-template <typename T>
-using CLPhaseFixture = PhaseValidationFixture<CLTensor, CLAccessor, CLPhase, T>;
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPhaseFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S16)),
-                                                                                                   framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
-    // Validate output
-    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPhaseFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S16)),
-                                                                                                   framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
-    // Validate output
-    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-TEST_SUITE_END() // S16
-
-TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPhaseFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), framework::dataset::make("Format", Format::S32)),
-                                                                                                   framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
-    // Validate output
-    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLPhaseFixture<int32_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::Large2DShapes(), framework::dataset::make("Format", Format::S32)),
-                                                                                                   framework::dataset::make("PhaseType", { PhaseType::SIGNED, PhaseType::UNSIGNED })))
-{
-    // Validate output
-    validate_wrap(CLAccessor(_target), _reference, tolerance_value, 0);
-}
-TEST_SUITE_END() // S32
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
index 70e618efa1..62ff15a37f 100644
--- a/tests/validation/CL/PixelWiseMultiplication.cpp
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,6 +36,9 @@ namespace test
 {
 namespace validation
 {
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Mul.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
 namespace
 {
 namespace
@@ -50,9 +53,6 @@ const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.75f, 0.25f),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.75f, 0.25f)
 });
-// Since in-place computation on CL-side hasn't been intended to be implemented, they are not tested.
-// However, this dataset is required for the shared fixture and it would make extension easier when
-// CL-side also starts supporting in-place computation.
 const auto InPlaceDataSet = framework::dataset::make("InPlace", { false });
 } //namespace
 // *INDENT-OFF*
@@ -81,7 +81,9 @@ using CLPixelWiseMultiplicationToF16Fixture = PixelWiseMultiplicationValidationF
 template <typename T>
 using CLPixelWiseMultiplicationToF32Fixture = PixelWiseMultiplicationValidationFloatFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>;
 template <typename T>
-using CLPixelWiseMultiplicationBroadcastFixture = PixelWiseMultiplicationBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>;
+using CLPixelWiseMultiplicationToF32BroadcastFixture = PixelWiseMultiplicationBroadcastValidationFloatFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, float>;
+template <typename T>
+using CLPixelWiseMultiplicationIntegerFixture = PixelWiseMultiplicationValidationIntegerFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, int>;
 
 TEST_SUITE(CL)
 TEST_SUITE(PixelWiseMultiplication)
@@ -91,27 +93,24 @@ TEST_SUITE(PixelWiseMultiplication)
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
                framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                        TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),      // Window shrink
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid scale
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),      // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),     // Mismatching shapes
                                                       }),
                framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
-                                                       TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                                        TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
                                                      })),
-               framework::dataset::make("Scale",{  2.f, 2.f, 2.f, -1.f, 1.f, 1.f})),
-               framework::dataset::make("Expected", { true, true, false, false, false, false})),
+               framework::dataset::make("Scale",{  2.f, 2.f, -1.f, 1.f, 1.f})),
+               framework::dataset::make("Expected", { true, true, false, false, false})),
                input1_info, input2_info, output_info, scale, expected)
 {
     bool has_error = bool(CLPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO));
@@ -119,6 +118,33 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
 }
 // clang-format on
 // *INDENT-ON*
+TEST_SUITE(INT32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationIntegerFixture<int>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(combine(
+                                                                           datasets::SmallShapes(),
+                                                                           framework::dataset::make("DataType1", DataType::S32)),
+                                                                       framework::dataset::make("DataType2", DataType::S32)),
+                                                               framework::dataset::make("Scale", { 1.f })),
+                                                       datasets::ConvertPolicies()),
+                                               framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
+                                       EmptyActivationFunctionsDataset),
+                               InPlaceDataSet))
+{
+    validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationIntegerFixture<int>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapes(),
+                                                                               framework::dataset::make("DataType1", DataType::S32)),
+                                                                       framework::dataset::make("DataType2", DataType::S32)),
+                                                               framework::dataset::make("Scale", { 1.f })),
+                                                       datasets::ConvertPolicies()),
+                                               framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
+                                       EmptyActivationFunctionsDataset),
+                               framework::dataset::make("InPlace", { true })))
+{
+    validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END()
 
 TEST_SUITE(F16toF16)
 TEST_SUITE(Scale255)
@@ -133,18 +159,36 @@ TEST_SUITE(F32toF32)
 TEST_SUITE(Scale255)
 PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToF32Fixture<float>, PRECOMMIT, SmallShapes(), F32, F32, scale_255, TO_NEAREST_UP, EmptyActivationFunctionsDataset, VALIDATE(float, 1.f))
 PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivation, ToF32Fixture<float>, ALL, TinyShapes(), F32, F32, scale_255, TO_NEAREST_UP, ActivationFunctionsDataset, VALIDATE(float, 1.f))
+FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationToF32Fixture<float>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapes(),
+                                                                               framework::dataset::make("DataTypeIn1", DataType::F32)),
+                                                                       framework::dataset::make("DataTypeIn2", DataType::F32)),
+                                                               framework::dataset::make("Scale", { scale_255 })),
+                                                       datasets::ConvertPolicies()),
+                                               framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_UP)),
+                                       EmptyActivationFunctionsDataset),
+                               framework::dataset::make("InPlace", { true })))
+{
+    // Validate output
+    VALIDATE(float, 1.f)
+}
 TEST_SUITE_END() // Scale255
 TEST_SUITE_END() // F32toF32
 
-PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, BroadcastFixture<float>, PRECOMMIT, SmallShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, EmptyActivationFunctionsDataset,
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, ToF32BroadcastFixture<float>, PRECOMMIT, SmallShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP,
+                                                 EmptyActivationFunctionsDataset,
                                                  VALIDATE(float, 1.f))
-PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivationSmallBroadcast, BroadcastFixture<float>, ALL, TinyShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP, ActivationFunctionsDataset,
+PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunWithActivationSmallBroadcast, ToF32BroadcastFixture<float>, ALL, TinyShapesBroadcast(), F32, F32, scale_255, TO_NEAREST_UP,
+                                                 ActivationFunctionsDataset,
                                                  VALIDATE(float, 1.f))
 
 template <typename T>
 using CLPixelWiseMultiplicationQuantizedFixture   = PixelWiseMultiplicationValidationQuantizedFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, T>;
 using CLPixelWiseMultiplicationQSYMM16ToS32Fxture = PixelWiseMultiplicationValidationQuantizedFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, int16_t, int16_t, int32_t>;
 
+template <typename T>
+using CLPixelWiseMultiplicationQuantizedBroadcastFixture = PixelWiseMultiplicationBroadcastValidationQuantizedFixture<CLTensor, CLAccessor, CLPixelWiseMultiplication, T, T>;
+
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
@@ -163,6 +207,41 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture<uint8
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
 }
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, CLPixelWiseMultiplicationQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapesBroadcast(),
+                                                                                                       framework::dataset::make("DataTypeIn1", DataType::QASYMM8)),
+                                                                                               framework::dataset::make("DataTypeIn2", DataType::QASYMM8)),
+                                                                                       framework::dataset::make("DataTypeOut", DataType::QASYMM8)),
+                                                                               framework::dataset::make("Scale", { 1.f, 2.f })),
+                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                                                               framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)),
+                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(5.f / 255.f, 20) })),
+                                               framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                       framework::dataset::make("OUtQInfo", { QuantizationInfo(1.f / 255.f, 5) })),
+                               InPlaceDataSet))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+FIXTURE_DATA_TEST_CASE(RunInplace, CLPixelWiseMultiplicationQuantizedBroadcastFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+                       combine(combine(combine(combine(combine(combine(combine(combine(combine(combine(datasets::TinyShapesBroadcastInplace(),
+                                                                                                       framework::dataset::make("DataTypeIn1", DataType::QASYMM8)),
+                                                                                               framework::dataset::make("DataTypeIn2", DataType::QASYMM8)),
+                                                                                       framework::dataset::make("DataTypeOut", DataType::QASYMM8)),
+                                                                               framework::dataset::make("Scale", { 1.f, 2.f })),
+                                                                       framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+                                                               framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)),
+                                                       framework::dataset::make("Src0QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                               framework::dataset::make("Src1QInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                       framework::dataset::make("OUtQInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                               framework::dataset::make("InPlace", { true })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
 TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
diff --git a/tests/validation/CL/Pooling3dLayer.cpp b/tests/validation/CL/Pooling3dLayer.cpp
new file mode 100644
index 0000000000..84d630e6cf
--- /dev/null
+++ b/tests/validation/CL/Pooling3dLayer.cpp
@@ -0,0 +1,345 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/core/TensorShape.h"
+#include "tests/framework/datasets/Datasets.h"
+
+#include "arm_compute/core/Types.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/CLTensorAllocator.h"
+#include "arm_compute/runtime/CL/functions/CLPooling3dLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/PaddingCalculator.h"
+#include "tests/datasets/Pooling3dLayerDataset.h"
+#include "tests/datasets/PoolingTypesDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/Pooling3dLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+/** Input data sets for floating-point data types */
+const auto Pooling3dLayerDatasetFP = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })),
+                                                             framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(2, 2, 1) })),
+                                                     framework::dataset::make("Padding", { Padding3D(0, 1, 0), Padding3D(1, 1, 1) })),
+                                             framework::dataset::make("ExcludePadding", { true, false }));
+
+const auto Pooling3dLayerDatasetFPSmall = combine(combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { Size3D(2, 2, 2), Size3D(3, 3, 3) })),
+                                                                  framework::dataset::make("Stride", { Size3D(2, 2, 2), Size3D(2, 1, 1) })),
+                                                          framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+                                                  framework::dataset::make("ExcludePadding", { true, false }));
+
+const auto Pooling3DLayerDatasetQuantized = combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+                                                                            framework::dataset::make("PoolingSize", { Size3D(2, 3, 2) })),
+                                                                    framework::dataset::make("Stride", { Size3D(1, 1, 1), Size3D(2, 1, 1), Size3D(1, 2, 1), Size3D(1, 1, 2), Size3D(2, 2, 1)})),
+                                                            framework::dataset::make("Padding", { Padding3D(0, 0, 0), Padding3D(1, 1, 1), Padding3D(1, 0, 0) })),
+                                                    framework::dataset::make("ExcludePadding", { true }));
+
+using ShapeDataset = framework::dataset::ContainerDataset<std::vector<TensorShape>>;
+
+constexpr AbsoluteTolerance<float>   tolerance_f32(0.001f);       /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */
+constexpr AbsoluteTolerance<float>   tolerance_f16(0.1f);         /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
+constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_signed(1); /**< Tolerance value for comparing reference's output against implementation's output for QASYMM8_SIGNED integer datatype*/
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);        /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */
+
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(Pooling3dLayer)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Mismatching data type
+                                                       TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination
+                                                       TensorInfo(TensorShape(2U, 27U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid pad/size combination
+                                                       TensorInfo(TensorShape(2U, 27U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output shape
+                                                       TensorInfo(TensorShape(5U, 13U, 15U, 2U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global Pooling
+                                                       TensorInfo(TensorShape(13U,13U, 5U, 1U, 2U), 1, DataType::F32, DataLayout::NDHWC),  // Invalid output Global Pooling
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Invalid data type
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 4U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 5U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(1U, 16U,  1U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 4U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 13U, 13U, 4U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+                                                     }),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(2U, 25U, 11U, 3U, 3U), 1, DataType::F16, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(2U, 30U, 11U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(2U, 25U, 16U, 3U, 2U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(2U, 27U, 13U, 3U, 3U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U,  1U,  1U, 1U, 3U), 1, DataType::F32, DataLayout::NDHWC), // Global pooling applied
+                                                       TensorInfo(TensorShape(5U,  2U,  2U, 2U, 2U), 1, DataType::F32, DataLayout::NDHWC), // Invalid output Global Pooling
+                                                       TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 12U, 12U, 3U, 4U), 1, DataType::QASYMM8, DataLayout::NDHWC), // Invalid data type
+                                                       TensorInfo(TensorShape(5U,  1U, 1U, 1U, 4U), 1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(1U, 15U, 1U, 2U, 4U), 1, DataType::F32, DataLayout::NDHWC), // Output width larger than input
+                                                       TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U),  1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 6U, 6U, 2U, 2U),  1, DataType::F32, DataLayout::NDHWC), 
+                                                       TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U),  1, DataType::F32, DataLayout::NDHWC),
+                                                       TensorInfo(TensorShape(5U, 6U, 6U, 2U, 3U),  1, DataType::F32, DataLayout::NDHWC),
+                                                     })),
+               framework::dataset::make("PoolInfo",  { Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(2, 0, 0)),
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+                                                       Pooling3dLayerInfo(PoolingType::L2,  3, Size3D(1, 1, 1), Padding3D(0, 0, 0)),
+                                                       Pooling3dLayerInfo(PoolingType::AVG),
+                                                       Pooling3dLayerInfo(PoolingType::MAX),
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(), Padding3D(), false),
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(1U, 1U, 1U), Padding3D(), false),
+                                                       Pooling3dLayerInfo(PoolingType::AVG),
+                                                       Pooling3dLayerInfo(PoolingType::MAX, 2, Size3D(1, 1, 2), Padding3D(0, 0, 0), false),
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 2, Size3D(2U, 2U, 2U), Padding3D(), false),
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), true), // Pool size is smaller than the padding size with padding excluded
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 1, Size3D(2U, 2U, 2U), Padding3D(2, 2, 2), false), // Pool size is smaller than the padding size with padding included
+                                                       Pooling3dLayerInfo(PoolingType::AVG, 3, Size3D(2U, 2U, 2U), Padding3D(2,1,2,2,1,2), false, false, DimensionRoundingType::CEIL), // CEIL with asymmetric Padding
+                                                      })),
+               framework::dataset::make("Expected", { false, false, false, false, true, false, false, false, true , false, true, false, false, false})),
+               input_info, output_info, pool_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLPooling3dLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
+}
+
+
+template <typename T>
+using CLPooling3dLayerFixture = Pooling3dLayerValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+template <typename T>
+using CLSpecialPooling3dLayerFixture = SpecialPooling3dLayerValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+template <typename T>
+using CLPooling3dLayerGlobalFixture = Pooling3dLayerGlobalValidationFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+template <typename T>
+using CLPooling3dLayerQuantizedFixture = Pooling3dLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPooling3dLayer, T>;
+
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE(QUANTIZED)
+
+TEST_SUITE(QASYMM8)
+// Small Dataset Quantized Dataset
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+                                                                                                                       combine(Pooling3DLayerDatasetQuantized,
+                                                                                                                               framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                                       framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, 10), QuantizationInfo(1.f / 127.f, 10) })),
+                                                                                                                       framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, 5), QuantizationInfo(1.f / 127.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+// Large Dataset Quantized Dataset
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(),
+                                                                                                                       combine(Pooling3DLayerDatasetQuantized,
+                                                                                                                               framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                                       framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, 10), QuantizationInfo(1.f / 127.f, 10) })),
+                                                                                                                       framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, 5), QuantizationInfo(1.f / 127.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+// Large Dataset Quantized Dataset Signed
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::Small5dShapes(),
+                                                                                                                      combine(Pooling3DLayerDatasetQuantized,
+                                                                                                                              framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+                                                                                                                      framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })),
+                                                                                                                      framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+// Large Dataset Quantized pooling test
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerQuantizedFixture<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large5dShapes(),
+                                                                                                                    combine(Pooling3DLayerDatasetQuantized,
+                                                                                                                            framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+                                                                                                                    framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })),
+                                                                                                                    framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8_signed);
+}
+
+TEST_SUITE_END()
+TEST_SUITE_END()
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+
+FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPooling3dLayerFixture<float>, framework::DatasetMode::ALL, datasets::Pooling3dLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5dShapes(), combine(Pooling3dLayerDatasetFPSmall,
+                                                                                                            framework::dataset::make("DataType", DataType::F32))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP,
+                                                                                                          framework::dataset::make("DataType", DataType::F32))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U),
+                                                                             TensorShape(4U, 27U, 13U, 4U, 2U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })),
+                                    framework::dataset::make("Strides",  Size3D(1, 1, 1))),
+                                    framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallGlobal, CLPooling3dLayerGlobalFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U),
+                                                                             TensorShape(27U, 13U, 4U, 4U, 2U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U),
+                                                                             TensorShape(4U, 79U, 37U, 11U, 2U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })),
+                                    framework::dataset::make("Strides",  Size3D(1, 1, 1))),
+                                    framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(datasets::Small5x5Shapes(), combine(Pooling3dLayerDatasetFPSmall,
+                                                                                                           framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(datasets::Large5dShapes(), combine(Pooling3dLayerDatasetFP,
+                                                                                                         framework::dataset::make("DataType", DataType::F16))))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPooling3dLayerFixture<half>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(3U, 27U, 13U, 4U),
+                                                                             TensorShape(4U, 27U, 13U, 4U, 2U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size3D(27, 13, 4) })),
+                                    framework::dataset::make("Strides",  Size3D(1, 1, 1))),
+                                    framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallGlobal, CLPooling3dLayerGlobalFixture<half>, framework::DatasetMode::ALL,
+                       combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(27U, 13U, 4U, 3U),
+                                                                             TensorShape(27U, 13U, 4U, 4U, 2U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPooling3dLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(4U, 79U, 37U, 11U),
+                                                                             TensorShape(4U, 79U, 37U, 11U, 2U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size3D(79, 37, 11) })),
+                                    framework::dataset::make("Strides",  Size3D(1, 1, 1))),
+                                    framework::dataset::make("Paddings", Padding3D(0, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+TEST_SUITE_END() // Pooling3dLayer
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index c79775e1e2..9fe28c7acf 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -101,6 +101,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32),     // Invalid output Global Pooling
                                                        TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::QASYMM8),
                                                        TensorInfo(TensorShape(13U, 13U, 5U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(1U, 16U, 1U),  1, DataType::F32),
                                                      }),
                framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16),
                                                        TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
@@ -110,6 +111,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        TensorInfo(TensorShape(2U, 2U, 5U), 1, DataType::F32),
                                                        TensorInfo(TensorShape(12U, 12U, 5U), 1, DataType::QASYMM8),
                                                        TensorInfo(TensorShape(1U, 1U, 5U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(1U, 15U, 1U), 1, DataType::F32),
                                                      })),
                framework::dataset::make("PoolInfo",  { PoolingLayerInfo(PoolingType::AVG, 3, DataLayout::NCHW, PadStrideInfo(1, 1, 0, 0)),
                                                        PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NCHW, PadStrideInfo(1, 1, 2, 0)),
@@ -119,8 +121,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
                                                        PoolingLayerInfo(PoolingType::MAX, DataLayout::NCHW),
                                                        PoolingLayerInfo(PoolingType::AVG, 2, DataLayout::NHWC, PadStrideInfo(), false),
                                                        PoolingLayerInfo(PoolingType::AVG, DataLayout::NCHW),
+                                                       PoolingLayerInfo(PoolingType::MAX, 2, DataLayout::NHWC, PadStrideInfo(1, 1, 0, 0), false),
                                                       })),
-               framework::dataset::make("Expected", { false, false, false, false, true, false, true, true })),
+               framework::dataset::make("Expected", { false, false, false, false, true, false, true, true , false})),
                input_info, output_info, pool_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS);
@@ -131,6 +134,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
 
 template <typename T>
 using CLPoolingLayerFixture = PoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
+template <typename T>
+using CLPoolingLayerMixedDataLayoutFixture = PoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T, true>;
 
 template <typename T>
 using CLSpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
@@ -148,7 +153,7 @@ FIXTURE_DATA_TEST_CASE(RunSpecial, CLSpecialPoolingLayerFixture<float>, framewor
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerDatasetFPSmall,
                                                                                                                   framework::dataset::make("DataType",
                                                                                                                           DataType::F32))),
                                                                                                           pool_data_layout_dataset))
@@ -156,6 +161,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::Datase
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(),
+                       combine(combine(combine(combine(datasets::PoolingTypes(),
+                                                       framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
+                                               framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })),
+                                       framework::dataset::make("ExcludePadding", { false })),
+                               framework::dataset::make("DataType", DataType::F32))),
+                       pool_data_layout_dataset))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
 FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetFP,
                                                                                                                 framework::dataset::make("DataType",
                                                                                                                         DataType::F32))),
@@ -165,21 +181,61 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::Datase
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPIndicesSmall,
-                                                                                                                        framework::dataset::make("DataType",
-                                                                                                                                DataType::F32))),
-                                                                                                                        pool_data_layout_dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                                                                                                                        combine(PoolingLayerDatasetFPIndicesSmall,
+                                                                                                                                framework::dataset::make("DataType",
+                                                                                                                                        DataType::F32))),
+                                                                                                                        pool_data_layout_dataset),framework::dataset::make("UseKernelIndices", { false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
     validate(CLAccessor(_target_indices), _ref_indices);
 }
 
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<float>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U),
+                                                                             TensorShape(27U, 13U, 2U, 4U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size2D(27, 13) })),
+                                    framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F32)),
+                                    framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U),
+                                                                             TensorShape(79U, 37U, 11U, 4U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size2D(79, 37) })),
+                                    framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F32)),
+                                    framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLMixedPrecesionPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPSmall,
-                                                                                                                       framework::dataset::make("DataType", DataType::F16))),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLMixedPrecesionPoolingLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                                                                                                                       combine(PoolingLayerDatasetFPSmall,
+                                                                                                                               framework::dataset::make("DataType", DataType::F16))),
                                                                                                                        pool_data_layout_dataset),
                                                                                                                        pool_fp_mixed_precision_dataset))
 {
@@ -194,15 +250,55 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMixedPrecesionPoolingLayerFixture<half>, fram
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
-FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFPIndicesSmall,
-                                                                                                                       framework::dataset::make("DataType",
-                                                                                                                               DataType::F16))),
-                                                                                                                       pool_data_layout_dataset))
+FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                                                                                                                       combine(PoolingLayerDatasetFPIndicesSmall,
+                                                                                                                               framework::dataset::make("DataType",
+                                                                                                                                       DataType::F16))),
+                                                                                                                       pool_data_layout_dataset), framework::dataset::make("UseKernelIndices", { false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
     validate(CLAccessor(_target_indices), _ref_indices);
 }
+
+TEST_SUITE(GlobalPooling)
+// *INDENT-OFF*
+// clang-format off
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture<half>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U),
+                                                                             TensorShape(27U, 13U, 2U, 4U)
+                                                                            }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size2D(27, 13) })),
+                                    framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F16)),
+                                    framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<half>, framework::DatasetMode::NIGHTLY,
+                       combine(combine(combine(combine(combine(combine(
+                                    framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U),
+                                                                             TensorShape(79U, 37U, 11U, 4U)
+                                                                           }),
+                                    framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })),
+                                    framework::dataset::make("PoolingSize", { Size2D(79, 37) })),
+                                    framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))),
+                                    framework::dataset::make("ExcludePadding", false)),
+                                    framework::dataset::make("DataType", DataType::F16)),
+                                    framework::dataset::make("DataLayout", DataLayout::NHWC)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+// clang-format on
+// *INDENT-ON*
+TEST_SUITE_END() // GlobalPooling
+
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 
@@ -210,11 +306,29 @@ TEST_SUITE(Quantized)
 
 template <typename T>
 using CLPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T>;
+template <typename T>
+using CLPoolingLayerQuantizedMixedDataLayoutFixture = PoolingLayerValidationQuantizedFixture<CLTensor, CLAccessor, CLPoolingLayer, T, true>;
 
 TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
-                                                                                                                     framework::dataset::make("DataType", DataType::QASYMM8))),
-                                                                                                                     pool_data_layout_dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                                                                                                                     combine(PoolingLayerDatasetQASYMM8Small,
+                                                                                                                             framework::dataset::make("DataType", DataType::QASYMM8))),
+                                                                                                                     pool_data_layout_dataset),
+                                                                                                                     framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 255.f, 10), QuantizationInfo(1.f / 255.f, 10) })),
+                                                                                                                     framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 255.f, 5), QuantizationInfo(1.f / 255.f, 10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                       combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+                                                       framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
+                                               framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
+                                       framework::dataset::make("ExcludePadding", { true })),
+                               framework::dataset::make("DataType", DataType::QASYMM8))),
+                       framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })),
+                       framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 255.f, 10) })),
+                       framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 255.f, 5) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8);
@@ -222,9 +336,25 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<uint8_t>, framew
 TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8Small,
-                                                                                                                    framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
-                                                                                                                    pool_data_layout_dataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                                                                                                                    combine(PoolingLayerDatasetQASYMM8Small,
+                                                                                                                            framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+                                                                                                                    pool_data_layout_dataset),
+                                                                                                                    framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10), QuantizationInfo(1.f / 127.f, -10) })),
+                                                                                                                    framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -5), QuantizationInfo(1.f / 127.f, -10) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8_s);
+}
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallNoneUnitShapes(),
+                       combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }),
+                                                       framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
+                                               framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })),
+                                       framework::dataset::make("ExcludePadding", { true })),
+                               framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))),
+                       framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })),
+                       framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) })),
+                       framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_qasymm8_s);
diff --git a/tests/validation/CL/ROIPoolingLayer.cpp b/tests/validation/CL/ROIPoolingLayer.cpp
new file mode 100644
index 0000000000..eb16c1baec
--- /dev/null
+++ b/tests/validation/CL/ROIPoolingLayer.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLROIPoolingLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/Globals.h"
+#include "tests/datasets/ROIDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ROIPoolingLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> relative_tolerance_f32(0.01f);
+AbsoluteTolerance<float> absolute_tolerance_f32(0.001f);
+
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
+} // end namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(RoiPooling)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Successful test
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::QASYMM8), // Successful test (quantized)
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Incorrect rois type
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching data type input/output
+                                                       TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching depth size input/output
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching number of rois and output batch size
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Invalid number of values per ROIS
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching height and width input/output
+
+                                                     }),
+               framework::dataset::make("RoisInfo", { TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::F16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 10U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(4, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                    })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(5U, 5U, 3U, 4U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("PoolInfo", { ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, false })),
+               input_info, rois_info, output_info, pool_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLROIPoolingLayer::validate(&input_info.clone()->set_is_resizable(true), &rois_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), pool_info)) == expected, framework::LogLevel::ERRORS);
+}
+
+using CLROIPoolingLayerFloatFixture = ROIPoolingLayerFixture<CLTensor, CLAccessor, CLROIPoolingLayer, float>;
+
+TEST_SUITE(Float)
+FIXTURE_DATA_TEST_CASE(Small, CLROIPoolingLayerFloatFixture, framework::DatasetMode::ALL,
+                       framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(),
+                                                    framework::dataset::make("DataType", { DataType::F32 })),
+                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32);
+}
+
+TEST_SUITE_END() // Float test suite end
+
+// Begin quantized tests
+template <typename T>
+using CLROIPoolingLayerQuantizedFixture = ROIPoolingLayerQuantizedFixture<CLTensor, CLAccessor, CLROIPoolingLayer, T>;
+
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(Small, CLROIPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(datasets::SmallROIDataset(),
+                                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 127) })),
+                               framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(2.f / 255.f, 120) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // end qasymm8 tests
+
+TEST_SUITE_END() // RoiPooling
+TEST_SUITE_END() // NEON
+
+} // validation namespace end
+} // test namespace end
+} // arm_compute namespace end
diff --git a/tests/validation/CL/ReduceMean.cpp b/tests/validation/CL/ReduceMean.cpp
index 947f84af49..8a8fa4aef0 100644
--- a/tests/validation/CL/ReduceMean.cpp
+++ b/tests/validation/CL/ReduceMean.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -45,7 +45,7 @@ constexpr AbsoluteTolerance<float>   tolerance_f32(0.001f); /**< Tolerance value
 constexpr AbsoluteTolerance<float>   tolerance_f16(0.03f);  /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */
 constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);  /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric quantized type */
 
-const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(0, 1, 2, 3) }),
+const auto axis_keep = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1, 0), Coordinates(1, 2), Coordinates(0, 2), Coordinates(1, 3), Coordinates(2, 3), Coordinates(0, 1, 2, 3) }),
                                framework::dataset::make("KeepDims", { true }));
 const auto axis_drop = combine(framework::dataset::make("Axis", { Coordinates(0), Coordinates(1), Coordinates(3), Coordinates(1, 2), Coordinates(2, 1) }), framework::dataset::make("KeepDims", { false }));
 } // namespace
diff --git a/tests/validation/CL/ReductionOperation.cpp b/tests/validation/CL/ReductionOperation.cpp
index 31c5a97925..beb58381ca 100644
--- a/tests/validation/CL/ReductionOperation.cpp
+++ b/tests/validation/CL/ReductionOperation.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,10 +50,11 @@ RelativeTolerance<float> rel_tolerance_f16(0.2f);
 /** Tolerance for quantized operations */
 RelativeTolerance<float> tolerance_qasymm8(1);
 
-const auto ReductionOperationsSumProd = framework::dataset::make("ReductionOperationsSumProd",
+const auto ReductionOperationsSumProdMean = framework::dataset::make("ReductionOperationsSumProdMean",
 {
     ReductionOperation::SUM,
     ReductionOperation::PROD,
+    ReductionOperation::MEAN_SUM
 
 });
 const auto ReductionOperationsMinMax = framework::dataset::make("ReductionMinMax",
@@ -109,15 +110,16 @@ using CLReductionOperationFixture = ReductionOperationFixture<CLTensor, CLAccess
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall4D, CLReductionOperationFixture<half>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd,
-                                       ReductionOperationsMinMax)),
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
+                                       concat(ReductionOperationsSumProdMean,
+                                              ReductionOperationsMinMax)),
                                KeepDimensions))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd,
+                       combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProdMean,
                                        ReductionOperationsMinMax)),
                                KeepDimensions))
 {
@@ -127,15 +129,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<half>, framework::D
 TEST_SUITE_END() // F16
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall4D, CLReductionOperationFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd,
-                                       ReductionOperationsMinMax)),
+                       combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
+                                       concat(ReductionOperationsSumProdMean,
+                                              ReductionOperationsMinMax)),
                                KeepDimensions))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, CLReductionOperationFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProd,
+                       combine(combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("Axis", { 0, 1, 2, 3 })), concat(ReductionOperationsSumProdMean,
                                        ReductionOperationsMinMax)),
                                KeepDimensions))
 {
@@ -152,7 +155,7 @@ TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                                               ReductionOperationsSumProd),
+                                               ReductionOperationsSumProdMean),
                                        framework::dataset::make("QuantizationInfo", QuantizationInfo(1.f / 64, 2))),
                                KeepDimensions))
 {
@@ -172,7 +175,7 @@ TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLReductionOperationQuantizedFixture<int8_t>, framework::DatasetMode::ALL,
                        combine(combine(combine(combine(combine(datasets::Small4DShapes(), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("Axis", { 0, 1, 2, 3 })),
-                                               ReductionOperationsSumProd),
+                                               ReductionOperationsSumProdMean),
                                        framework::dataset::make("QuantizationInfo", QuantizationInfo(1.f / 64, 2))),
                                KeepDimensions))
 {
diff --git a/tests/validation/CL/Reverse.cpp b/tests/validation/CL/Reverse.cpp
index 11df0e7803..82effc2136 100644
--- a/tests/validation/CL/Reverse.cpp
+++ b/tests/validation/CL/Reverse.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -41,9 +41,10 @@ namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
 namespace
 {
-auto run_small_dataset = combine(datasets::SmallShapes(), datasets::Tiny1DShapes());
+auto run_small_dataset = combine(datasets::Small3DShapes(), datasets::Tiny1DShapes());
 auto run_large_dataset = combine(datasets::LargeShapes(), datasets::Tiny1DShapes());
 
 } // namespace
@@ -53,33 +54,34 @@ TEST_SUITE(Reverse)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-        framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype
+        make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Invalid axis datatype
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis shape
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid axis length (> 4)
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                             TensorInfo(TensorShape(2U), 1, DataType::U8),
         }),
-        framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
+        make("OutputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                             TensorInfo(TensorShape(2U, 13U, 2U), 1, DataType::U8),
                                             TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
                                             TensorInfo(TensorShape(2U), 1, DataType::U8),
         })),
-        framework::dataset::make("AxisInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U8),
+        make("AxisInfo",{ TensorInfo(TensorShape(3U), 1, DataType::U8),
                                            TensorInfo(TensorShape(2U, 10U), 1, DataType::U32),
                                            TensorInfo(TensorShape(8U), 1, DataType::U32),
                                            TensorInfo(TensorShape(2U), 1, DataType::U32),
                                            TensorInfo(TensorShape(2U), 1, DataType::U32),
                                            TensorInfo(TensorShape(2U), 1, DataType::U32),
         })),
-        framework::dataset::make("Expected", { false, false, false, false, true, true})),
+        make("Expected", { false, false, false, false, true, true})),
         src_info, dst_info, axis_info, expected)
 {
     Status s = CLReverse::validate(&src_info.clone()->set_is_resizable(false),
                                   &dst_info.clone()->set_is_resizable(false),
-                                  &axis_info.clone()->set_is_resizable(false));
+                                  &axis_info.clone()->set_is_resizable(false),
+                                  false);
     ARM_COMPUTE_EXPECT(bool(s) == expected, framework::LogLevel::ERRORS);
 }
 // clang-format on
@@ -93,7 +95,11 @@ TEST_SUITE(F16)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        CLReverseFixture<half>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(run_small_dataset, framework::dataset::make("DataType", DataType::F16)))
+                       combine(
+                           run_small_dataset,
+                           make("DataType", DataType::F16),
+                           make("use_negative_axis", { true, false }),
+                           make("use_inverted_axis", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -102,7 +108,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLReverseFixture<half>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(run_large_dataset, framework::dataset::make("DataType", DataType::F16)))
+                       combine(
+                           run_large_dataset,
+                           make("DataType", DataType::F16),
+                           make("use_negative_axis", { true, false }),
+                           make("use_inverted_axis", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -113,7 +123,11 @@ TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        CLReverseFixture<float>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(run_small_dataset, framework::dataset::make("DataType", DataType::F32)))
+                       combine(
+                           run_small_dataset,
+                           make("DataType", DataType::F32),
+                           make("use_negative_axis", { true, false }),
+                           make("use_inverted_axis", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -122,7 +136,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLReverseFixture<float>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(run_large_dataset, framework::dataset::make("DataType", DataType::F32)))
+                       combine(
+                           run_large_dataset,
+                           make("DataType", DataType::F32),
+                           make("use_negative_axis", { true, false }),
+                           make("use_inverted_axis", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -135,7 +153,11 @@ TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall,
                        CLReverseFixture<uint8_t>,
                        framework::DatasetMode::PRECOMMIT,
-                       combine(run_small_dataset, framework::dataset::make("DataType", DataType::QASYMM8)))
+                       combine(
+                           run_small_dataset,
+                           make("DataType", DataType::QASYMM8),
+                           make("use_negative_axis", { true, false }),
+                           make("use_inverted_axis", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -144,7 +166,11 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLReverseFixture<uint8_t>,
                        framework::DatasetMode::NIGHTLY,
-                       combine(run_large_dataset, framework::dataset::make("DataType", DataType::QASYMM8)))
+                       combine(
+                           run_large_dataset,
+                           make("DataType", DataType::QASYMM8),
+                           make("use_negative_axis", { true, false }),
+                           make("use_inverted_axis", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/CL/RoundLayer.cpp b/tests/validation/CL/RoundLayer.cpp
index 5aa9ca6b4e..f0c88d1ad3 100644
--- a/tests/validation/CL/RoundLayer.cpp
+++ b/tests/validation/CL/RoundLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CL/RsqrtLayer.cpp b/tests/validation/CL/RsqrtLayer.cpp
index 29c113b105..2353bda8d3 100644
--- a/tests/validation/CL/RsqrtLayer.cpp
+++ b/tests/validation/CL/RsqrtLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
@@ -42,8 +42,11 @@ namespace validation
 {
 namespace
 {
-RelativeTolerance<float> tolerance_fp32(0.000001f);
-RelativeTolerance<float> tolerance_fp16(0.001f);
+RelativeTolerance<float>             tolerance_fp32(0.000001f);
+RelativeTolerance<float>             tolerance_fp16(0.001f);
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);   /**< Tolerance value for comparing reference's output against implementation's output for unsigned 8-bit asymmetric type */
+constexpr AbsoluteTolerance<int8_t>  tolerance_qasymm8_s(1); /**< Tolerance value for comparing reference's output against implementation's output for signed 8-bit asymmetric type */
+
 } // namespace
 TEST_SUITE(CL)
 TEST_SUITE(RsqrtLayer)
@@ -68,6 +71,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
 // *INDENT-ON*
 template <typename T>
 using CLRsqrtLayerFixture = RsqrtValidationFixture<CLTensor, CLAccessor, CLRsqrtLayer, T>;
+template <typename T>
+using CLRsqrtLayerQuantizedFixture = RsqrtQuantizedValidationFixture<CLTensor, CLAccessor, CLRsqrtLayer, T>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP16)
@@ -102,6 +107,30 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLRsqrtLayerFixture<float>, framework::DatasetM
 TEST_SUITE_END() // FP32
 TEST_SUITE_END() // Float
 
+TEST_SUITE(Quantized)
+TEST_SUITE(QASYMM8_SIGNED)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLRsqrtLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                  DataType::QASYMM8_SIGNED)),
+                                                                                                                  framework::dataset::make("SrcQInfo", { QuantizationInfo(0.4044, -128) })),
+                                                                                                                  framework::dataset::make("OutQInfo", { QuantizationInfo(0.0027, -128) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8_s);
+}
+TEST_SUITE_END() // QASYMM8_SIGNED
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(RunSmall, CLRsqrtLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
+                                                                                                                   DataType::QASYMM8)),
+                                                                                                                   framework::dataset::make("SrcQInfo", { QuantizationInfo(0.4044, 0) })),
+                                                                                                                   framework::dataset::make("OutQInfo", { QuantizationInfo(0.0027, 0) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+TEST_SUITE_END() // QASYMM8
+TEST_SUITE_END() // Quantized
+
 TEST_SUITE_END() // RsqrtLayer
 TEST_SUITE_END() // CL
 } // namespace validation
diff --git a/tests/validation/CL/Scale.cpp b/tests/validation/CL/Scale.cpp
index 523b49deb7..10a99ae34f 100644
--- a/tests/validation/CL/Scale.cpp
+++ b/tests/validation/CL/Scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -82,6 +82,7 @@ constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
 constexpr float                      tolerance_f32_absolute(0.001f);
 
 RelativeTolerance<float> tolerance_f32(0.05);
+constexpr float          abs_tolerance_f16(0.1f);
 RelativeTolerance<half>  tolerance_f16(half(0.1));
 
 constexpr float tolerance_num_f32(0.01f);
@@ -186,16 +187,6 @@ TEST_CASE(AlignedCornerNotSupported, framework::DatasetMode::ALL)
     ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
 }
 
-TEST_CASE(WindowShrink, framework::DatasetMode::ALL)
-{
-    const auto input  = TensorInfo{ TensorShape(37U, 37U, 2U), 1, DataType::F32 };
-    const auto output = TensorInfo{ TensorShape(39U, 55U, 2U), 1, DataType::F32 };
-    Status     result{};
-
-    result = CLScale::validate(&input.clone()->set_is_resizable(false), &output.clone()->set_is_resizable(false), ScaleKernelInfo{ default_interpolation_policy, default_border_mode });
-    ARM_COMPUTE_EXPECT(bool(result) == false, framework::LogLevel::ERRORS);
-}
-
 TEST_CASE(IncorrectScaleFactor, framework::DatasetMode::ALL)
 {
     const auto     input                = TensorInfo{ TensorShape(28U, 33U, 2U), 1, DataType::F32 };
@@ -210,6 +201,8 @@ TEST_SUITE_END() // Validate
 
 template <typename T>
 using CLScaleFixture = ScaleValidationFixture<CLTensor, CLAccessor, CLScale, T>;
+template <typename T>
+using CLScaleMixedDataLayoutFixture = ScaleValidationFixture<CLTensor, CLAccessor, CLScale, T, true>;
 
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
@@ -223,6 +216,15 @@ FIXTURE_DATA_TEST_CASE(Run, CLScaleFixture<float>, framework::DatasetMode::ALL,
     // Validate output
     validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
 }
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLScaleMixedDataLayoutFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleSamplingPolicySet))
+{
+    //Create valid region
+    TensorInfo        src_info(_shape, 1, _data_type);
+    const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
+
+    // Validate output
+    validate(CLAccessor(_target), _reference, valid_region, tolerance_f32, tolerance_num_f32, tolerance_f32_absolute);
+}
 FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<float>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f32_shape, ScaleAlignCornersSamplingPolicySet))
 {
     //Create valid region
@@ -261,7 +263,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLScaleFixture<half>, framework::DatasetMode::ALL, A
     const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
     // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<half>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(f16_shape, ScaleAlignCornersSamplingPolicySet))
 {
@@ -270,7 +272,7 @@ FIXTURE_DATA_TEST_CASE(RunAlignCorners, CLScaleFixture<half>, framework::Dataset
     const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
     // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
 }
 const auto f16_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<half>())), framework::dataset::make("DataType", DataType::F16));
 FIXTURE_DATA_TEST_CASE(RunNightly, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, ASSEMBLE_DATASET(f16_nightly_shape, ScaleSamplingPolicySet))
@@ -280,7 +282,7 @@ FIXTURE_DATA_TEST_CASE(RunNightly, CLScaleFixture<half>, framework::DatasetMode:
     const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
     // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
 }
 FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, CLScaleFixture<half>, framework::DatasetMode::NIGHTLY, ASSEMBLE_DATASET(f16_nightly_shape, ScaleAlignCornersSamplingPolicySet))
 {
@@ -289,7 +291,7 @@ FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners, CLScaleFixture<half>, framework::
     const ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED));
 
     // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16);
+    validate(CLAccessor(_target), _reference, valid_region, tolerance_f16, 0.0f, abs_tolerance_f16);
 }
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
diff --git a/tests/validation/CL/ScatterLayer.cpp b/tests/validation/CL/ScatterLayer.cpp
new file mode 100644
index 0000000000..b1531eb64a
--- /dev/null
+++ b/tests/validation/CL/ScatterLayer.cpp
@@ -0,0 +1,298 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLScatter.h"
+#include "tests/validation/fixtures/ScatterLayerFixture.h"
+#include "tests/datasets/ScatterDataset.h"
+#include "tests/CL/CLAccessor.h"
+#include "arm_compute/function_info/ScatterInfo.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for fp32 data type */
+RelativeTolerance<float> tolerance_f16(0.02f); /**< Tolerance value for comparing reference's output against implementation's output for fp16 data type */
+RelativeTolerance<int32_t> tolerance_int(0); /**< Tolerance value for comparing reference's output against implementation's output for integer data types */
+} // namespace
+
+template <typename T>
+using CLScatterLayerFixture = ScatterValidationFixture<CLTensor, CLAccessor, CLScatter, T>;
+
+using framework::dataset::make;
+
+TEST_SUITE(CL)
+TEST_SUITE(Scatter)
+DATA_TEST_CASE(Validate, framework::DatasetMode::PRECOMMIT, zip(
+    make("InputInfo", { TensorInfo(TensorShape(9U), 1, DataType::F32),    // Mismatching data types
+                        TensorInfo(TensorShape(15U), 1, DataType::F32),   // Valid
+                        TensorInfo(TensorShape(15U), 1, DataType::U8),   // Valid
+                        TensorInfo(TensorShape(8U), 1, DataType::F32),
+                        TensorInfo(TensorShape(217U), 1, DataType::F32),    // Mismatch input/output dims.
+                        TensorInfo(TensorShape(217U), 1, DataType::F32),    // Updates dim higher than Input/Output dims.
+                        TensorInfo(TensorShape(12U), 1, DataType::F32),     // Indices wrong datatype.
+                        TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32), // Number of updates != number of indices
+                        TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32), // index_len != (dst_dims - upt_dims + 1)
+                        TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32), // index_len > 5
+    }),
+    make("UpdatesInfo",{TensorInfo(TensorShape(3U), 1, DataType::F16),
+                        TensorInfo(TensorShape(15U), 1, DataType::F32),
+                        TensorInfo(TensorShape(15U), 1, DataType::U8),
+                        TensorInfo(TensorShape(2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(217U), 1, DataType::F32),
+                        TensorInfo(TensorShape(217U, 3U), 1, DataType::F32),
+                        TensorInfo(TensorShape(2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(9U, 3U, 2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(17U, 3U, 2U), 1, DataType::F32),
+                        TensorInfo(TensorShape(1U), 1, DataType::F32),
+    }),
+    make("IndicesInfo",{TensorInfo(TensorShape(1U, 3U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 15U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 15U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 2U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 271U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 271U), 1, DataType::S32),
+                        TensorInfo(TensorShape(1U, 2U), 1 , DataType::F32),
+                        TensorInfo(TensorShape(1U, 4U), 1, DataType::S32),
+                        TensorInfo(TensorShape(3U, 2U), 1, DataType::S32),
+                        TensorInfo(TensorShape(6U, 2U), 1, DataType::S32),
+    }),
+    make("OutputInfo",{TensorInfo(TensorShape(9U), 1, DataType::F16),
+                       TensorInfo(TensorShape(15U), 1, DataType::F32),
+                       TensorInfo(TensorShape(15U), 1, DataType::U8),
+                       TensorInfo(TensorShape(8U), 1, DataType::F32),
+                       TensorInfo(TensorShape(271U, 3U), 1, DataType::F32),
+                       TensorInfo(TensorShape(271U), 1, DataType::F32),
+                       TensorInfo(TensorShape(12U), 1, DataType::F32),
+                       TensorInfo(TensorShape(9U, 3U, 4U), 1, DataType::F32),
+                       TensorInfo(TensorShape(17U, 3U, 3U, 2U), 1, DataType::F32),
+                       TensorInfo(TensorShape(17U, 3U, 3U, 2U, 2U, 2U), 1, DataType::F32),
+    }),
+    make("ScatterInfo",{ ScatterInfo(ScatterFunction::Add, false),
+                         ScatterInfo(ScatterFunction::Max, false),
+                         ScatterInfo(ScatterFunction::Max, false),
+                         ScatterInfo(ScatterFunction::Min, false),
+                         ScatterInfo(ScatterFunction::Add, false),
+                         ScatterInfo(ScatterFunction::Update, false),
+                         ScatterInfo(ScatterFunction::Sub, false),
+                         ScatterInfo(ScatterFunction::Sub, false),
+                         ScatterInfo(ScatterFunction::Update, false),
+                         ScatterInfo(ScatterFunction::Update, false),
+    }),
+    make("Expected", { false, true, true, true, false, false, false, false, false, false })),
+    input_info, updates_info, indices_info, output_info, scatter_info, expected)
+{
+    const Status status = CLScatter::validate(&input_info, &updates_info, &indices_info, &output_info, scatter_info);
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
+const auto allScatterFunctions = make("ScatterFunction",
+    {ScatterFunction::Update, ScatterFunction::Add, ScatterFunction::Sub, ScatterFunction::Min, ScatterFunction::Max });
+
+TEST_SUITE(Float)
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::Small1DScatterDataset(),
+        make("DataType", {DataType::F32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// With this test, src should be passed as nullptr.
+FIXTURE_DATA_TEST_CASE(RunSmallZeroInit, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::Small1DScatterDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Add}),
+        make("ZeroInit", {true}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// Updates/src/dst have same no. dims.
+FIXTURE_DATA_TEST_CASE(RunSmallMultiDim, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMultiDimDataset(),
+        make("DataType", {DataType::F32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// m+1-D to m+n-D cases
+FIXTURE_DATA_TEST_CASE(RunSmallMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMultiIndicesDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add }),
+        make("ZeroInit", {false}),
+        make("Inplace", {false, true}),
+        make("Padding", {true})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// m+k, k-1-D m+n-D case
+FIXTURE_DATA_TEST_CASE(RunSmallBatchedMultiIndices, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterBatchedDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}),
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {true})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+// m+k, k-1-D m+n-D case
+FIXTURE_DATA_TEST_CASE(RunSmallScatterScalar, CLScatterLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterScalarDataset(),
+        make("DataType", {DataType::F32}),
+        make("ScatterFunction", {ScatterFunction::Update, ScatterFunction::Add}),
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false}))) // NOTE: Padding not supported in this datset
+{
+    validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+TEST_SUITE_END() // FP32
+
+
+// NOTE: Padding is disabled for the SmallScatterMixedDataset due certain shapes not supporting padding.
+//       Padding is well tested in F32 Datatype test cases.
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<half>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::F16}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // FP16
+TEST_SUITE_END() // Float
+
+TEST_SUITE(Integer)
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int32_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::S32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int16_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::S16}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(S8)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<int8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::S8}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // S8
+
+TEST_SUITE(U32)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint32_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::U32}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U32
+
+TEST_SUITE(U16)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint16_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::U16}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmallMixed, CLScatterLayerFixture<uint8_t>, framework::DatasetMode::PRECOMMIT,
+    combine(datasets::SmallScatterMixedDataset(),
+        make("DataType", {DataType::U8}),
+        allScatterFunctions,
+        make("ZeroInit", {false}),
+        make("Inplace", {false}),
+        make("Padding", {false})))
+{
+    validate(CLAccessor(_target), _reference, tolerance_int);
+}
+TEST_SUITE_END() // U8
+TEST_SUITE_END() // Integer
+
+TEST_SUITE_END() // Scatter
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/Scharr.cpp b/tests/validation/CL/Scharr.cpp
deleted file mode 100644
index ed1fec82fd..0000000000
--- a/tests/validation/CL/Scharr.cpp
+++ /dev/null
@@ -1,76 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLScharr3x3.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/GradientDimensionDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ScharrFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Scharr)
-
-TEST_SUITE(W3x3)
-using CLScharr3x3Fixture = ScharrValidationFixture<CLTensor, CLAccessor, CLScharr3x3, uint8_t, int16_t>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                      Format::U8)),
-                                                                                              datasets::GradientDimensions()))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLScharr3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                      Format::U8)),
-                                                                                              datasets::GradientDimensions()))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Select.cpp b/tests/validation/CL/Select.cpp
index 3d7c61aab5..d3540cae48 100644
--- a/tests/validation/CL/Select.cpp
+++ b/tests/validation/CL/Select.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -107,6 +107,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunOneDim,
+                       CLSelectFixture<half>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(framework::dataset::make("Shape", TensorShape(1U, 16U)),
+                                       framework::dataset::make("has_same_rank", { false, true })),
+                               framework::dataset::make("DataType", DataType::F16)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLSelectFixture<half>,
                        framework::DatasetMode::NIGHTLY,
@@ -127,6 +138,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunOneDim,
+                       CLSelectFixture<float>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(framework::dataset::make("Shape", TensorShape(1U, 16U)),
+                                       framework::dataset::make("has_same_rank", { false, true })),
+                               framework::dataset::make("DataType", DataType::F32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLSelectFixture<float>,
                        framework::DatasetMode::NIGHTLY,
@@ -149,6 +171,17 @@ FIXTURE_DATA_TEST_CASE(RunSmall,
     validate(CLAccessor(_target), _reference);
 }
 
+FIXTURE_DATA_TEST_CASE(RunOneDim,
+                       CLSelectFixture<uint8_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(combine(framework::dataset::make("Shape", TensorShape(1U, 16U)),
+                                       framework::dataset::make("has_same_rank", { false, true })),
+                               framework::dataset::make("DataType", DataType::QASYMM8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
+
 FIXTURE_DATA_TEST_CASE(RunLarge,
                        CLSelectFixture<uint8_t>,
                        framework::DatasetMode::NIGHTLY,
diff --git a/tests/validation/CL/SinLayer.cpp b/tests/validation/CL/SinLayer.cpp
index e40c990db6..f0cb4c314e 100644
--- a/tests/validation/CL/SinLayer.cpp
+++ b/tests/validation/CL/SinLayer.cpp
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLElementWiseUnaryLayer.h"
+#include "arm_compute/runtime/CL/functions/CLElementwiseUnaryLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
 #include "tests/CL/CLAccessor.h"
@@ -32,7 +32,7 @@
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ElementWiseUnaryFixture.h"
+#include "tests/validation/fixtures/ElementwiseUnaryFixture.h"
 
 namespace arm_compute
 {
diff --git a/tests/validation/CL/Sobel.cpp b/tests/validation/CL/Sobel.cpp
deleted file mode 100644
index 3aee0fe2e1..0000000000
--- a/tests/validation/CL/Sobel.cpp
+++ /dev/null
@@ -1,259 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLSobel3x3.h"
-#include "arm_compute/runtime/CL/functions/CLSobel5x5.h"
-#include "arm_compute/runtime/CL/functions/CLSobel7x7.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/SobelFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Sobel)
-
-TEST_SUITE(W3x3)
-using CLSobel3x3Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel3x3, uint8_t, int16_t>;
-
-TEST_SUITE(X)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(Y)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
-    // Validate output
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
-    // Validate output
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(XY)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel3x3Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(1));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(W5x5)
-using CLSobel5x5Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel5x5, uint8_t, int16_t>;
-
-TEST_SUITE(X)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-TEST_SUITE_END()
-TEST_SUITE(Y)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
-    // Validate output
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
-    // Validate output
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE(XY)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel5x5Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(2));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE(W7x7)
-using CLSobel7x7Fixture = SobelValidationFixture<CLTensor, CLAccessor, CLSobel7x7, uint8_t, int32_t>;
-
-TEST_SUITE(X)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_X)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-}
-TEST_SUITE_END()
-TEST_SUITE(Y)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
-    // Validate output
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_Y)))
-{
-    // Validate output
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE(XY)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Small2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLSobel7x7Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::Large2DShapes(), datasets::BorderModes()), framework::dataset::make("Format",
-                                                                                                     Format::U8)),
-                                                                                             framework::dataset::make("GradientDimension", GradientDimension::GRAD_XY)))
-{
-    // Validate output
-    ValidRegion valid_region_x = shape_to_valid_region(_reference.first.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.first), _reference.first, valid_region_x);
-
-    ValidRegion valid_region_y = shape_to_valid_region(_reference.second.shape(), (_border_mode == BorderMode::UNDEFINED), BorderSize(3));
-    validate(CLAccessor(_target.second), _reference.second, valid_region_y);
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/SoftmaxLayer.cpp b/tests/validation/CL/SoftmaxLayer.cpp
index 396e274e0b..eb47b7f666 100644
--- a/tests/validation/CL/SoftmaxLayer.cpp
+++ b/tests/validation/CL/SoftmaxLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,6 +34,12 @@
 #include "tests/validation/Validation.h"
 #include "tests/validation/fixtures/SoftmaxLayerFixture.h"
 
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
+#include "arm_compute/runtime/BlobMemoryPool.h"
+
 namespace arm_compute
 {
 namespace test
@@ -62,6 +68,47 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
 TEST_SUITE(CL)
 TEST_SUITE(SoftmaxLayer)
 
+TEST_CASE(SimpleMemoryManaged, framework::DatasetMode::ALL)
+{
+    // The purpose of this test is to test if the function can
+    // run correctly even with the given memory manager from its caller
+    // (Similar scenario when the library is integrated into other software)
+    // especially when working with workspace() method of
+    // @ref arm_compute::opencl::ClSoftmax.
+    const auto shape = TensorShape{4,2};    // Random shape, not important
+    constexpr auto dt = DataType::F32;      // Random data type, not important
+
+    // Create a memory manager
+    auto lm = std::make_shared<BlobLifetimeManager>();
+    auto pm = std::make_shared<arm_compute::PoolManager>();
+    auto alloc = std::make_unique<CLBufferAllocator>();
+    auto mm = std::make_shared<MemoryManagerOnDemand>(lm, pm);
+
+    auto src = create_tensor<CLTensor>(shape, dt);
+    auto dst = create_tensor<CLTensor>(shape, dt);
+    src.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    // Create the function with the memory manager
+    CLSoftmaxLayer smx(mm);
+    smx.configure(&src, &dst);
+
+    // Populate the memory, acquire() will happen in run()
+    mm->populate(*alloc.get(), 1);
+
+    std::vector<float> input_vals{0.0f, 1.0f, 0.0f, 0.0f, 0.5f, 0.0f, 0.0f, 0.0f,};
+    library->fill_static_values(CLAccessor(src), input_vals);
+
+    smx.run();
+
+    // Compute reference to compare
+    SimpleTensor<float> ref_src{shape, dt};
+    library->fill_static_values(ref_src, input_vals);
+    auto ref_dst = reference::softmax_layer<float>(ref_src, 1., 0, false);
+
+    validate(CLAccessor(dst), ref_dst);
+}
+
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
diff --git a/tests/validation/CL/TableLookup.cpp b/tests/validation/CL/TableLookup.cpp
deleted file mode 100644
index 415b91c42d..0000000000
--- a/tests/validation/CL/TableLookup.cpp
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLTableLookup.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/CL/CLLutAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Helpers.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/TableLookupFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(TableLookup)
-
-template <typename T>
-using CLTableLookupFixture = TableLookupValidationFixture<CLTensor, CLAccessor, CLTableLookup, CLLutAccessor<T>, CLLut, T>;
-TEST_SUITE(U8)
-
-FIXTURE_DATA_TEST_CASE(RunSmallU8, CLTableLookupFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeU8, CLTableLookupFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmallS16, CLTableLookupFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::S16)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLargeS16, CLTableLookupFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::S16)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-TEST_SUITE_END()
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Threshold.cpp b/tests/validation/CL/Threshold.cpp
deleted file mode 100644
index be26245b7e..0000000000
--- a/tests/validation/CL/Threshold.cpp
+++ /dev/null
@@ -1,65 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLThreshold.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/datasets/ThresholdDataset.h"
-#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/ThresholdFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(Threshold)
-
-template <typename T>
-using CLThresholdFixture = ThresholdValidationFixture<CLTensor, CLAccessor, CLThreshold, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLThresholdFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::SmallShapes(), datasets::MixedThresholdDataset()),
-                                                                                                       framework::dataset::make("DataType",
-                                                                                                               DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLThresholdFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), datasets::MixedThresholdDataset()),
-                                                                                                       framework::dataset::make("DataType",
-                                                                                                               DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/Tile.cpp b/tests/validation/CL/Tile.cpp
index a06c05744f..f243780c00 100644
--- a/tests/validation/CL/Tile.cpp
+++ b/tests/validation/CL/Tile.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,6 +42,7 @@ namespace validation
 namespace
 {
 const auto MultiplesDataset = framework::dataset::make("Multiples", { Multiples{ 3 },
+                                                                      Multiples{ 7 },
                                                                       Multiples{ 2, 2 },
                                                                       Multiples{ 1, 1, 3, 4 },
                                                                       Multiples{ 2, 1, 2, 2 },
diff --git a/tests/validation/CL/Transpose.cpp b/tests/validation/CL/Transpose.cpp
index 876bf29dd5..6cf5fe8537 100644
--- a/tests/validation/CL/Transpose.cpp
+++ b/tests/validation/CL/Transpose.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -47,19 +47,17 @@ TEST_SUITE(Transpose)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
-    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::U8),  // Input not a multiple of 8
-                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), // Invalid shape
-                                            TensorInfo(TensorShape(20U, 13U), 1, DataType::U32), // Window shrink
+    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::U16), // Invalid shape
                                             TensorInfo(TensorShape(20U, 13U), 1, DataType::U8),  // Wrong data type
                                             TensorInfo(TensorShape(20U, 16U), 1, DataType::U32), // Valid
+                                            TensorInfo(TensorShape(20U, 16U, 3U, 3U), 1, DataType::U16), // Transpose only first two dimensions
                                           }),
-    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(13U, 21U), 1, DataType::U8),
-                                            TensorInfo(TensorShape(21U, 13U), 1, DataType::U16),
-                                            TensorInfo(TensorShape(13U, 20U), 1, DataType::U32),
+    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(13U, 20U), 1, DataType::U32),
                                             TensorInfo(TensorShape(31U, 20U), 1, DataType::U16),
                                             TensorInfo(TensorShape(16U, 20U), 1, DataType::U32),
+                                            TensorInfo(TensorShape(16U, 20U, 3U, 3U), 1, DataType::U16),
                                            })),
-    framework::dataset::make("Expected", { false, false, false, false, true })),
+    framework::dataset::make("Expected", { false, false, true, true })),
     a_info, output_info, expected)
 {
     // Lock tensors
@@ -84,6 +82,16 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint8_t>, framework::Dataset
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+FIXTURE_DATA_TEST_CASE(RunLargeHighDimensional,
+                       CLTransposeFixture<uint8_t>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(concat(concat(datasets::Large3DShapes(), datasets::Large4DShapes()),
+                                      datasets::Large5dShapes()),
+                               framework::dataset::make("DataType", DataType::U8)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 TEST_SUITE_END() // U8
 
 TEST_SUITE(U16)
@@ -102,12 +110,23 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint16_t>, framework::Datase
 TEST_SUITE_END() // U16
 
 TEST_SUITE(U32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(concat(datasets::Small1DShapes(), datasets::Small2DShapes()),
+FIXTURE_DATA_TEST_CASE(RunSmall, CLTransposeFixture<uint32_t>, framework::DatasetMode::PRECOMMIT, combine(concat(concat(framework::dataset::make("Shape", { TensorShape{ 1U, 5U }, TensorShape{ 4U, 5U }, TensorShape{ 3, 12 } }),
+                                                                                                                        datasets::Small1DShapes()),
+                                                                                                                 datasets::Small2DShapes()),
                                                                                                           framework::dataset::make("DataType", DataType::U32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
+FIXTURE_DATA_TEST_CASE(RunSmallHighDimensional,
+                       CLTransposeFixture<uint32_t>,
+                       framework::DatasetMode::PRECOMMIT,
+                       combine(concat(datasets::Small3DShapes(), datasets::Small4DShapes()),
+                               framework::dataset::make("DataType", DataType::U32)))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference);
+}
 FIXTURE_DATA_TEST_CASE(RunLarge, CLTransposeFixture<uint32_t>, framework::DatasetMode::NIGHTLY, combine(concat(datasets::Large1DShapes(), datasets::Large2DShapes()),
                                                                                                         framework::dataset::make("DataType", DataType::U32)))
 {
diff --git a/tests/validation/CL/UNIT/CompileContext.cpp b/tests/validation/CL/UNIT/CompileContext.cpp
index 06f7eae88e..a0541b6a99 100644
--- a/tests/validation/CL/UNIT/CompileContext.cpp
+++ b/tests/validation/CL/UNIT/CompileContext.cpp
@@ -56,6 +56,7 @@ TEST_CASE(CompileContextCache, framework::DatasetMode::ALL)
     std::set<std::string> build_opts;
     build_opts.emplace("-DDATA_TYPE=float");
     build_opts.emplace("-DVEC_SIZE=16");
+    build_opts.emplace("-DVEC_SIZE_LEFTOVER=0");
     compile_context.create_kernel(kernel_name, program_name, kernel_src.first, kernel_path, build_opts, kernel_src.second);
 
     // Check if the program is stored in the cache
diff --git a/tests/validation/CL/UNIT/DynamicTensor.cpp b/tests/validation/CL/UNIT/DynamicTensor.cpp
index 833256039e..ac433721d8 100644
--- a/tests/validation/CL/UNIT/DynamicTensor.cpp
+++ b/tests/validation/CL/UNIT/DynamicTensor.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,11 +29,8 @@
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
 #include "arm_compute/runtime/PoolManager.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLIm2ColKernel.h"
 #include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
 #include "src/core/CL/kernels/CLReductionOperationKernel.h"
-#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
diff --git a/tests/validation/CL/IntegralImage.cpp b/tests/validation/CL/UNIT/Extensions.cpp
index e3b728ab9c..8119290d4b 100644
--- a/tests/validation/CL/IntegralImage.cpp
+++ b/tests/validation/CL/UNIT/Extensions.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,13 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/runtime/CL/functions/CLIntegralImage.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
+#include "arm_compute/AclVersion.h"
+#include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/IntegralImageFixture.h"
 
 namespace arm_compute
 {
@@ -36,25 +33,17 @@ namespace test
 namespace validation
 {
 TEST_SUITE(CL)
-TEST_SUITE(IntegralImage)
-template <typename T>
-using CLIntegralImageFixture = IntegralImageValidationFixture<CLTensor, CLAccessor, CLIntegralImage, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLIntegralImageFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                           DataType::U8)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLIntegralImageFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                           DataType::U8)))
+TEST_SUITE(UNIT)
+TEST_CASE(GetClContext, framework::DatasetMode::ALL)
 {
-    // Validate output
-    validate(CLAccessor(_target), _reference);
+    const auto ver = AclVersionInfo();
+    ARM_COMPUTE_EXPECT(ver->major == ARM_COMPUTE_LIBRARY_VERSION_MAJOR, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(ver->minor == ARM_COMPUTE_LIBRARY_VERSION_MINOR, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(ver->patch == ARM_COMPUTE_LIBRARY_VERSION_PATCH, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(!std::string(ver->build_info).empty(), framework::LogLevel::ERRORS);
 }
-
-TEST_SUITE_END()
-TEST_SUITE_END()
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/MLGOHeuristics.cpp b/tests/validation/CL/UNIT/MLGOHeuristics.cpp
new file mode 100644
index 0000000000..e26464f9f7
--- /dev/null
+++ b/tests/validation/CL/UNIT/MLGOHeuristics.cpp
@@ -0,0 +1,473 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "src/runtime/CL/mlgo/MLGOHeuristics.h"
+#include "src/runtime/CL/mlgo/Utils.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+
+using namespace arm_compute::mlgo;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(MLGOHeuristics)
+TEST_CASE(CorrectDotMLGOShouldLoadCorrectly, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+
+        <header>
+
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]
+
+        1, g71 , 8, f16, best-performance, static, gemm-config-reshaped-only-rhs, [m,n,k,n]
+        2, g76 , 8, f16, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b , 0, var, m, ==, num, 10., 1, 2
+        l , 1, gemm-type, reshaped
+        b , 2, var, r_mn, >=, num, 2., 3, 6
+
+        b , 3, var, n, >=, num, 200., 4, 5
+        l, 4,                          gemm-type, reshaped-only-rhs
+        l , 5, gemm-type, reshaped
+        l , 6, gemm-type, reshaped-only-rhs
+        </heuristic>
+        <heuristic, 1>
+        b ,0,var, n, >, num, 100., 1, 4
+        b ,1,var, r_mnk, <=, num, 20., 2, 3
+
+
+        l ,2,gemm-config-reshaped-only-rhs, [4, 4,4,2,1,0,1]
+        l ,3,gemm-config-reshaped-only-rhs,[ 2, 2,4,2,1,1, 1 ]
+        b ,4,var, n, >=, num, 199.12, 5, 6
+        l ,5,gemm-config-reshaped-only-rhs, [1, 4,3,4,0,0,0]
+        l ,6,gemm-config-reshaped-only-rhs, [5, 4,4,5,1,1,0]
+        </heuristic>
+
+        <heuristic, 2>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+
+        </heuristic>
+
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    heuristics.reload_from_stream(ss);
+
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 10, 1024, 20, 1 }).second == GEMMType::RESHAPED, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 400, 201, 5, 1 }).second == GEMMType::RESHAPED_ONLY_RHS, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 400, 200, 199, 16 }).second == GEMMType::RESHAPED_ONLY_RHS, framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT(heuristics.query_gemm_type(Query{ "g76", DataType::F32, 400, 199, 512, 4 }).second == GEMMType::RESHAPED, framework::LogLevel::ERRORS);
+
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 100, 1024, 20, 32 }).second == GEMMConfigReshapedOnlyRHS{ 4, 4, 4, 2, true, false, true }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 100, 1024, 20, 32 }).second == GEMMConfigReshapedOnlyRHS{ 4, 4, 4, 2, true, false, true }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 128, 101, 20, 1 }).second == GEMMConfigReshapedOnlyRHS{ 2, 2, 4, 2, true, true, true }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 400, 100, 512, 1 }).second == GEMMConfigReshapedOnlyRHS{ 5, 4, 4, 5, true, true, false }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g71", DataType::F16, 400, 100, 512, 1 }).second == GEMMConfigReshapedOnlyRHS{ 5, 4, 4, 5, true, true, false }),
+                       framework::LogLevel::ERRORS);
+
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped(Query{ "g76", DataType::F16, 100, 100, 20, 32 }).second == GEMMConfigReshaped{ 4, 2, 4, 2, 8, true, false, true, false }),
+                       framework::LogLevel::ERRORS);
+    ARM_COMPUTE_EXPECT((heuristics.query_gemm_config_reshaped(Query{ "g76", DataType::F16, 128, 512, 1024, 1 }).second == GEMMConfigReshaped{ 4, 2, 4, 2, 8, true, false, true, false }),
+                       framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(InvalidDotmlgoSyntaxShouldReturnInvalidStatus, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,pu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+
+        </heurist
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_SUITE(InvalidDotmlgoSemanticsShouldReturnInvalidStatus)
+// If the semantics errors are local to some trees instead of the entire heuristics, an alternative is to simply
+// ignore/remove those invalid trees. However the reason why we choose to throw, thus invalidating the entire
+// heuristics is that if there are some invalid trees, the quality of the dotmlgo is called into question even if
+// the rest of the trees are semantically valid, and they could severely degrade the performance of GEMM. Therefore
+// this "all or nothing" approach when it comes to dotmlgo correctness is safer and more defensive.
+
+// Also note that the semantic error of the tree only refers to those that obstruct its evaluation and thus query,
+// (e.g. invalid tree structure, unsupported features etc.) instead of those affecting the desired outcome
+// (usually in terms of final GEMM performance, e.g. the effectiveness of the decision tree)
+
+// In the future we might want to check the content of the exceptions as well. But right now it suffices to only
+// know that it throws exactly when it needs to.
+TEST_CASE(MismatchesBetweenHeuristicsTableEntriesAndHeuristicTrees, framework::DatasetMode::ALL)
+{
+    {
+        // Mismatching number of entries 1
+        std::string       mlgo_str = R"_(
+            <header>
+            gemm-version, [1,2,1]
+            ip-type,gpu
+            </header>
+            <heuristics-table>
+
+            0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+
+            </heuristics-table>
+        )_";
+        std::stringstream ss(mlgo_str);
+        MLGOHeuristics    heuristics;
+        // NOTE: This case might throw an internal error as the tree inserted by the heuristics-table cannot not be checked
+        ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+    }
+
+    {
+        // Mismatching number of entries 2
+        std::string       mlgo_str = R"_(
+            <header>
+            gemm-version, [1,2,1]
+            ip-type,gpu
+            </header>
+            <heuristics-table>
+            </heuristics-table>
+            <heuristic, 1>
+            l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+            </heuristic>
+        )_";
+        std::stringstream ss(mlgo_str);
+        MLGOHeuristics    heuristics;
+        ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+    }
+
+    {
+        // Mismatching info
+        std::string       mlgo_str = R"_(
+            <header>
+            gemm-version, [1,2,1]
+            ip-type,gpu
+            </header>
+            <heuristics-table>
+            0, g76 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]
+            </heuristics-table>
+            <heuristic, 0>
+            l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+            </heuristic>
+        )_";
+        std::stringstream ss(mlgo_str);
+        MLGOHeuristics    heuristics;
+        ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+    }
+}
+
+TEST_CASE(RepeatedHeuristicsTableEntriesId, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        0, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+        <heuristic, 1>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(RepeatedHeuristicsTableEntriesIndex, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        1, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+        <heuristic, 1>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(RepeatedHeuristicTreesId, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        1, g71 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+        <heuristic, 0>
+        l ,0,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(EmptyTree, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+
+TEST_CASE(InvalidTreeMissingRoot, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,2, var, m, ==, num, 10., 3, 4
+        l ,3,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,4,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeMissingNodes, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeRepeatedNodeIds, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,1,gemm-config-reshaped,[1,2,4,2,8,1,0,1,0]
+        l ,2,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeDisjointNodes, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,2,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+
+        b ,4, var, n, ==, num, 10., 5, 6
+        l ,5,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,6,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+
+        l ,7,gemm-config-reshaped,[2,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeLoop, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 0, 1
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeCycle, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, m, ==, num, 10., 1, 5
+        b ,1, var, n, ==, num, 10., 2, 3
+        l ,2,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        b ,3, var, k, ==, num, 10., 0, 4
+        l ,4,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,5,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_CASE(InvalidTreeInvalidFeatures, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-config-reshaped, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b ,0, var, magic_feature, ==, num, 10., 1, 2
+        l ,1,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        l ,2,gemm-config-reshaped,[4,2,4,2,8,1,0,1,0]
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(!heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // InvalidDotmlgoSemanticsShouldReturnInvalidStatus
+
+TEST_CASE(InvalidUsageOfHeuristicsShouldReturnInvalidStatus, framework::DatasetMode::ALL)
+{
+    std::string       mlgo_str = R"_(
+        <header>
+        gemm-version, [1,2,1]
+        ip-type,gpu
+        </header>
+        <heuristics-table>
+        0, g76 , 8, f32, best-performance, static, gemm-type, [m,n,k,n]
+        </heuristics-table>
+        <heuristic, 0>
+        b , 0, var, m, ==, num, 10., 1, 2
+        l , 1, gemm-type, reshaped
+        b , 2, var, r_mn, >=, num, 2., 3, 6
+        b , 3, var, n, >=, num, 200., 4, 5
+        l , 4, gemm-type, reshaped-only-rhs
+        l , 5, gemm-type, reshaped
+        l , 6, gemm-type, reshaped-only-rhs
+        </heuristic>
+    )_";
+    std::stringstream ss(mlgo_str);
+    MLGOHeuristics    heuristics;
+    ARM_COMPUTE_EXPECT(heuristics.reload_from_stream(ss), framework::LogLevel::ERRORS);
+
+    // Querying unavailable heuristic type should return invalid Status
+    ARM_COMPUTE_EXPECT(!heuristics.query_gemm_config_reshaped(Query{ "g76", DataType::F32, 1024, 1024, 100, 3 }).first, framework::LogLevel::ERRORS);
+    // Querying unavailable ip target should return invalid Status
+    ARM_COMPUTE_EXPECT(!heuristics.query_gemm_type(Query{ "g77", DataType::F32, 1024, 1024, 100, 3 }).first, framework::LogLevel::ERRORS);
+    // Querying unavailable data type should return invalid Status
+    ARM_COMPUTE_EXPECT(!heuristics.query_gemm_config_reshaped_only_rhs(Query{ "g76", DataType::QASYMM8, 1024, 1024, 100, 3 }).first, framework::LogLevel::ERRORS);
+}
+TEST_SUITE_END() // MLGOHeuristics
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/Multithreaded.cpp b/tests/validation/CL/UNIT/Multithreaded.cpp
new file mode 100644
index 0000000000..5c75df709d
--- /dev/null
+++ b/tests/validation/CL/UNIT/Multithreaded.cpp
@@ -0,0 +1,113 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/RuntimeContext.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/ParametersLibrary.h"
+#include "tests/validation/Validation.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLPixelWiseMultiplication.h"
+#include "tests/validation/reference/ActivationLayer.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+#include <thread>
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+TEST_SUITE(CL)
+TEST_SUITE(UNIT)
+TEST_SUITE(RuntimeContext)
+// This test tries scheduling work concurrently from two independent threads
+TEST_CASE(MultipleThreadedScheduller, framework::DatasetMode::ALL)
+{
+    constexpr auto num_threads(16u);
+    std::array<CLActivationLayer, num_threads>         func{};
+    std::array<CLPixelWiseMultiplication, num_threads> pmul{};
+    std::array<CLTensor, num_threads>                  s0{};
+    std::array<CLTensor, num_threads>                  s1{};
+
+    std::array<CLTensor, num_threads> st{};
+    std::array<CLTensor, num_threads> dt{};
+
+    const TensorShape         tensor_shape(128u, 4u, 5u);
+    const ActivationLayerInfo ainfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.5f, 1.f);
+    std::array<std::thread, num_threads> threads;
+    auto ctx = parameters->get_ctx<CLTensor>();
+
+    for(auto i = 0u; i < num_threads; ++i)
+    {
+        s0[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        s1[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        st[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        dt[i]   = create_tensor<CLTensor>(tensor_shape, DataType::F32, 1);
+        func[i] = CLActivationLayer(ctx);
+        pmul[i] = CLPixelWiseMultiplication();
+        threads[i] =
+            std::thread([&,i]
+        {
+            auto &s  = st[i];
+            auto &t  = dt[i];
+            auto &p0 = s0[i];
+            auto &p1 = s1[i];
+            pmul[i].configure(&p0, &p1, &s, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP);
+            func[i].configure(&s, &t, ainfo);
+            s.allocator()->allocate();
+            t.allocator()->allocate();
+            p0.allocator()->allocate();
+            p1.allocator()->allocate();
+            library->fill_tensor_uniform(CLAccessor(p0), 0, -1.f, 1.f);
+            library->fill_tensor_uniform(CLAccessor(p1), 0, -1.f, 1.f);
+            pmul[i].run();
+            func[i].run();
+        });
+    }
+
+    for(auto &t : threads)
+    {
+        t.join();
+    }
+
+    SimpleTensor<float> rs{ tensor_shape, DataType::F32, 1 };
+    SimpleTensor<float> ra{ tensor_shape, DataType::F32, 1 };
+    SimpleTensor<float> rb{ tensor_shape, DataType::F32, 1 };
+    library->fill_tensor_uniform(ra, 0, -1.f, 1.f);
+    library->fill_tensor_uniform(rb, 0, -1.f, 1.f);
+    const auto mul    = reference::pixel_wise_multiplication<float, float, float>(ra, rb, 1.f, ConvertPolicy::WRAP, RoundingPolicy::TO_NEAREST_UP, DataType::F32);
+    const auto golden = reference::activation_layer<float>(mul, ainfo);
+    for(auto &d : dt)
+    {
+        validate(CLAccessor(d), golden);
+    }
+}
+
+TEST_SUITE_END() // MultipleThreadedScheduller
+TEST_SUITE_END() // UNIT
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/TensorAllocator.cpp b/tests/validation/CL/UNIT/TensorAllocator.cpp
index 9db98fb534..559f47e16c 100644
--- a/tests/validation/CL/UNIT/TensorAllocator.cpp
+++ b/tests/validation/CL/UNIT/TensorAllocator.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,9 +24,14 @@
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 
 #include "arm_compute/core/utils/misc/MMappedFile.h"
+#include "arm_compute/runtime/BlobLifetimeManager.h"
+#include "arm_compute/runtime/CL/CLBufferAllocator.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
+#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h"
 #include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/PoolManager.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
 #include "tests/framework/Asserts.h"
@@ -60,12 +65,108 @@ cl_mem import_malloc_memory_helper(void *ptr, size_t size)
 
     return buf;
 }
+
+class DummyAllocator final : public IAllocator
+{
+public:
+    DummyAllocator() = default;
+
+    void *allocate(size_t size, size_t alignment) override
+    {
+        ++_n_calls;
+        return _backend_allocator.allocate(size, alignment);
+    }
+    void free(void *ptr) override
+    {
+        return _backend_allocator.free(ptr);
+    }
+    std::unique_ptr<IMemoryRegion> make_region(size_t size, size_t alignment) override
+    {
+        // Needs to be implemented as is the one that is used internally by the CLTensorAllocator
+        ++_n_calls;
+        return _backend_allocator.make_region(size, alignment);
+    }
+    int get_n_calls() const
+    {
+        return _n_calls;
+    }
+
+private:
+    int               _n_calls{};
+    CLBufferAllocator _backend_allocator{};
+};
+
+void run_conv2d(std::shared_ptr<IMemoryManager> mm, IAllocator &mm_allocator)
+{
+    // Create tensors
+    CLTensor src, weights, bias, dst;
+    src.allocator()->init(TensorInfo(TensorShape(16U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+    weights.allocator()->init(TensorInfo(TensorShape(16U, 3U, 3U, 32U), 1, DataType::F32, DataLayout::NHWC));
+    bias.allocator()->init(TensorInfo(TensorShape(32U), 1, DataType::F32, DataLayout::NHWC));
+    dst.allocator()->init(TensorInfo(TensorShape(32U, 32U, 32U, 2U), 1, DataType::F32, DataLayout::NHWC));
+
+    // Create and configure function
+    CLGEMMConvolutionLayer conv(mm);
+    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1U, 1U, 1U, 1U));
+
+    // Allocate tensors
+    src.allocator()->allocate();
+    weights.allocator()->allocate();
+    bias.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    // Finalize memory manager
+    if(mm != nullptr)
+    {
+        mm->populate(mm_allocator, 1 /* num_pools */);
+        ARM_COMPUTE_EXPECT(mm->lifetime_manager()->are_all_finalized(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_EXPECT(mm->pool_manager()->num_pools() == 1, framework::LogLevel::ERRORS);
+    }
+
+    conv.run();
+}
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(UNIT)
 TEST_SUITE(TensorAllocator)
 
+/* Validate that an external global allocator can be used for all internal allocations */
+TEST_CASE(ExternalGlobalAllocator, framework::DatasetMode::ALL)
+{
+    DummyAllocator global_tensor_alloc;
+    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+    // Run a convolution
+    run_conv2d(nullptr /* mm */, global_tensor_alloc);
+
+    // Check that allocator has been called multiple times > 4
+    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+    // Nullify global allocator
+    CLTensorAllocator::set_global_allocator(nullptr);
+}
+
+/* Validate that an external global allocator can be used for the pool manager */
+TEST_CASE(ExternalGlobalAllocatorMemoryPool, framework::DatasetMode::ALL)
+{
+    auto lifetime_mgr = std::make_shared<BlobLifetimeManager>();
+    auto pool_mgr     = std::make_shared<PoolManager>();
+    auto mm           = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr, pool_mgr);
+
+    DummyAllocator global_tensor_alloc;
+    CLTensorAllocator::set_global_allocator(&global_tensor_alloc);
+
+    // Run a convolution
+    run_conv2d(mm, global_tensor_alloc);
+
+    // Check that allocator has been called multiple times > 4
+    ARM_COMPUTE_EXPECT(global_tensor_alloc.get_n_calls() > 4, framework::LogLevel::ERRORS);
+
+    // Nullify global allocator
+    CLTensorAllocator::set_global_allocator(nullptr);
+}
+
 /** Validates import memory interface when importing cl buffer objects */
 TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
 {
@@ -79,31 +180,31 @@ TEST_CASE(ImportMemoryBuffer, framework::DatasetMode::ALL)
     // Negative case : Import nullptr
     CLTensor t1;
     t1.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(!bool(t1.allocator()->import_memory(cl::Buffer())), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t1.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t1.allocator()->import_memory(cl::Buffer())));
+    ARM_COMPUTE_ASSERT(t1.info()->is_resizable());
 
     // Negative case : Import memory to a tensor that is memory managed
     CLTensor    t2;
     MemoryGroup mg;
     t2.allocator()->set_associated_memory_group(&mg);
-    ARM_COMPUTE_EXPECT(!bool(t2.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t2.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t2.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(t2.info()->is_resizable());
 
     // Negative case : Invalid buffer size
     CLTensor         t3;
     const TensorInfo info_neg(TensorShape(32U, 16U, 3U), 1, DataType::F32);
     t3.allocator()->init(info_neg);
-    ARM_COMPUTE_EXPECT(!bool(t3.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(t3.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(!bool(t3.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(t3.info()->is_resizable());
 
     // Positive case : Set raw pointer
     CLTensor t4;
     t4.allocator()->init(info);
-    ARM_COMPUTE_EXPECT(bool(t4.allocator()->import_memory(buf)), framework::LogLevel::ERRORS);
-    ARM_COMPUTE_EXPECT(!t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(bool(t4.allocator()->import_memory(buf)));
+    ARM_COMPUTE_ASSERT(!t4.info()->is_resizable());
     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() == buf.get(), framework::LogLevel::ERRORS);
     t4.allocator()->free();
-    ARM_COMPUTE_EXPECT(t4.info()->is_resizable(), framework::LogLevel::ERRORS);
+    ARM_COMPUTE_ASSERT(t4.info()->is_resizable());
     ARM_COMPUTE_EXPECT(t4.cl_buffer().get() != buf.get(), framework::LogLevel::ERRORS);
 }
 
@@ -135,14 +236,14 @@ TEST_CASE(ImportMemoryMalloc, framework::DatasetMode::ALL)
         const size_t total_size_in_bytes = tensor.info()->total_size();
         const size_t alignment           = CLKernelLibrary::get().get_device().getInfo<CL_DEVICE_GLOBAL_MEM_CACHELINE_SIZE>();
         size_t       space               = total_size_in_bytes + alignment;
-        auto         raw_data            = support::cpp14::make_unique<uint8_t[]>(space);
+        auto         raw_data            = std::make_unique<uint8_t[]>(space);
 
         void *aligned_ptr = raw_data.get();
-        support::cpp11::align(alignment, total_size_in_bytes, aligned_ptr, space);
+        std::align(alignment, total_size_in_bytes, aligned_ptr, space);
 
         cl::Buffer wrapped_buffer(import_malloc_memory_helper(aligned_ptr, total_size_in_bytes));
-        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
+        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
 
         // Fill tensor
         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
@@ -205,12 +306,12 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
 
         // Map file
         utils::mmap_io::MMappedFile mmapped_file("test_mmap_import.bin", 0 /** Whole file */, 0);
-        ARM_COMPUTE_EXPECT(mmapped_file.is_mapped(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(mmapped_file.is_mapped());
         unsigned char *data = mmapped_file.data();
 
         cl::Buffer wrapped_buffer(import_malloc_memory_helper(data, total_size_in_bytes));
-        ARM_COMPUTE_EXPECT(bool(tensor.allocator()->import_memory(wrapped_buffer)), framework::LogLevel::ERRORS);
-        ARM_COMPUTE_EXPECT(!tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(bool(tensor.allocator()->import_memory(wrapped_buffer)));
+        ARM_COMPUTE_ASSERT(!tensor.info()->is_resizable());
 
         // Fill tensor
         std::uniform_real_distribution<float> distribution(-5.f, 5.f);
@@ -233,7 +334,7 @@ TEST_CASE(ImportMemoryMappedFile, framework::DatasetMode::ALL)
 
         // Release resources
         tensor.allocator()->free();
-        ARM_COMPUTE_EXPECT(tensor.info()->is_resizable(), framework::LogLevel::ERRORS);
+        ARM_COMPUTE_ASSERT(tensor.info()->is_resizable());
     }
 }
 #endif // !defined(BARE_METAL)
diff --git a/tests/validation/CL/UNIT/Tuner.cpp b/tests/validation/CL/UNIT/Tuner.cpp
deleted file mode 100644
index cf2513bf2c..0000000000
--- a/tests/validation/CL/UNIT/Tuner.cpp
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/tuners/BifrostTuner.h"
-#include "src/core/CL/kernels/CLDirectConvolutionLayerKernel.h"
-#include "tests/Utils.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-TEST_SUITE(CL)
-TEST_SUITE(UNIT)
-TEST_SUITE(Tuner)
-
-/** Validates static tuning of Bifrost tuner */
-TEST_CASE(BifrostTunerSimple, framework::DatasetMode::ALL)
-{
-    // Create tuner
-    tuners::BifrostTuner tuner;
-
-    // Create tensors
-    auto src     = create_tensor<CLTensor>(TensorShape(13U, 13U, 16U), DataType::F32);
-    auto weights = create_tensor<CLTensor>(TensorShape(3U, 3U, 16U, 3U), DataType::F32);
-    auto bias    = create_tensor<CLTensor>(TensorShape(3U), DataType::F32);
-    auto dst     = create_tensor<CLTensor>(TensorShape(13U, 13U, 3U), DataType::F32);
-
-    // Create kernel
-    cl::NDRange                    fake_lws(2000);
-    CLDirectConvolutionLayerKernel conv;
-    conv.set_target(GPUTarget::G72);
-
-    // Configure
-    conv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1));
-
-    // Hard-wire lws to kernel and validate lws
-    conv.set_lws_hint(fake_lws);
-    ARM_COMPUTE_EXPECT(conv.lws_hint()[0] == 2000, framework::LogLevel::ERRORS);
-
-    // Tune kernel and validate
-    tuner.tune_kernel_static(conv);
-    ARM_COMPUTE_EXPECT(conv.lws_hint()[0] != 2000, framework::LogLevel::ERRORS);
-
-    // Clear tuner
-    CLScheduler::get().default_init();
-}
-TEST_SUITE_END()
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/UNIT/WeightsRetention.cpp b/tests/validation/CL/UNIT/WeightsRetention.cpp
index acf795e48b..357c88af10 100644
--- a/tests/validation/CL/UNIT/WeightsRetention.cpp
+++ b/tests/validation/CL/UNIT/WeightsRetention.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,18 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
-#include "src/core/CL/kernels/CLDepthConvertLayerKernel.h"
 #include "src/core/CL/kernels/CLFillBorderKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
-#include "src/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "src/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
-#include "src/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/Globals.h"
diff --git a/tests/validation/CL/UpsampleLayer.cpp b/tests/validation/CL/UpsampleLayer.cpp
deleted file mode 100644
index eff51a487c..0000000000
--- a/tests/validation/CL/UpsampleLayer.cpp
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLUpsampleLayer.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/UpsampleLayerFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<float> tolerance(0.001f);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(UpsampleLayer)
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
-    framework::dataset::make("InputInfo", { TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Mismatching data type
-                                            TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Invalid output shape
-                                            TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Invalid stride
-                                            TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32), // Invalid policy
-                                            TensorInfo(TensorShape(10U, 10U, 2U), 1, DataType::F32),
-                                          }),
-    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F16),
-                                            TensorInfo(TensorShape(20U, 10U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F32),
-                                            TensorInfo(TensorShape(20U, 20U, 2U), 1, DataType::F32),
-                                          })),
-    framework::dataset::make("PadInfo", { Size2D(2, 2),
-                                          Size2D(2, 2),
-                                          Size2D(1, 1),
-                                          Size2D(2, 2),
-                                          Size2D(2, 2),
-                                           })),
-   framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR,
-                                                  InterpolationPolicy::NEAREST_NEIGHBOR,
-                                                  InterpolationPolicy::NEAREST_NEIGHBOR,
-                                                  InterpolationPolicy::BILINEAR,
-                                                  InterpolationPolicy::NEAREST_NEIGHBOR,
-                                                })),
-    framework::dataset::make("Expected", { false, false, false, false, true })),
-    input_info, output_info, pad_info, upsampling_policy, expected)
-{
-    bool is_valid = bool(CLUpsampleLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pad_info, upsampling_policy));
-    ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-TEST_SUITE(Float)
-template <typename T>
-using CLUpsampleLayerFixture = UpsampleLayerFixture<CLTensor, CLAccessor, CLUpsampleLayer, T>;
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLUpsampleLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
-                                                                                                                   framework::dataset::make("DataType", DataType::F32)),
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                   framework::dataset::make("PadInfo", { Size2D(2, 2) })),
-                                                                                                           framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLUpsampleLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(),
-                                                                                                                  framework::dataset::make("DataType",
-                                                                                                                          DataType::F16)),
-                                                                                                                  framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                  framework::dataset::make("PadInfo", { Size2D(2, 2) })),
-                                                                                                          framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Float
-
-TEST_SUITE(Quantized)
-template <typename T>
-using CLUpsampleLayerQuantizedFixture = UpsampleLayerQuantizedFixture<CLTensor, CLAccessor, CLUpsampleLayer, T>;
-TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLUpsampleLayerQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8)),
-                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                      framework::dataset::make("PadInfo", { Size2D(2, 2) })),
-                                                                                                                      framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })),
-                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-TEST_SUITE_END() // QASYMM8
-TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLUpsampleLayerQuantizedFixture<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(datasets::SmallShapes(),
-                                                                                                                     framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                     framework::dataset::make("PadInfo", { Size2D(2, 2) })),
-                                                                                                                     framework::dataset::make("UpsamplingPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR })),
-                                                                                                                     framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 10) })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance);
-}
-TEST_SUITE_END() // QASYMM8_SIGNED
-TEST_SUITE_END() // Quantized
-
-TEST_SUITE_END() // UpsampleLayer
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/WarpAffine.cpp b/tests/validation/CL/WarpAffine.cpp
deleted file mode 100644
index d10ba7f502..0000000000
--- a/tests/validation/CL/WarpAffine.cpp
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Utils.h"
-#include "arm_compute/runtime/CL/functions/CLWarpAffine.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/AssetsLibrary.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/InterpolationPolicyDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/WarpAffineFixture.h"
-#include "tests/validation/reference/Utils.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-/** Tolerance */
-constexpr AbsoluteTolerance<uint8_t> tolerance(1);
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(WarpAffine)
-
-template <typename T>
-using CLWarpAffineFixture = WarpAffineValidationFixture<CLTensor, CLAccessor, CLWarpAffine, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWarpAffineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::U8)),
-                                                                                                                framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                        datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, _valid_mask, tolerance, 0.02f);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWarpAffineFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType", DataType::U8)),
-                                                                                                                framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                        datasets::BorderModes()))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, _valid_mask, tolerance, 0.02f);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/WarpPerspective.cpp b/tests/validation/CL/WarpPerspective.cpp
deleted file mode 100644
index dd05059bed..0000000000
--- a/tests/validation/CL/WarpPerspective.cpp
+++ /dev/null
@@ -1,75 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/functions/CLWarpPerspective.h"
-#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/WarpPerspectiveFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<uint8_t> tolerance_value(1);
-constexpr float                      tolerance_number = 0.2f;
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(WarpPerspective)
-
-template <typename T>
-using CLWarpPerspectiveFixture = WarpPerspectiveValidationFixture<CLTensor, CLAccessor, CLWarpPerspective, T>;
-
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWarpPerspectiveFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType",
-                                                                                                                     DataType::U8)),
-                                                                                                                     framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                             datasets::BorderModes()))
-{
-    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWarpPerspectiveFixture<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), framework::dataset::make("DataType",
-                                                                                                                     DataType::U8)),
-                                                                                                                     framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::NEAREST_NEIGHBOR, InterpolationPolicy::BILINEAR })),
-                                                                                                             datasets::BorderModes()))
-{
-    validate(CLAccessor(_target), _reference, _valid_mask, tolerance_value, tolerance_number);
-}
-
-TEST_SUITE_END()
-TEST_SUITE_END()
-} // namespace validation
-} // namespace test
-} // namespace arm_compute
diff --git a/tests/validation/CL/WeightsReshape.cpp b/tests/validation/CL/WeightsReshape.cpp
index d04c10cee2..4345c4b08a 100644
--- a/tests/validation/CL/WeightsReshape.cpp
+++ b/tests/validation/CL/WeightsReshape.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,7 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "src/core/CL/kernels/CLWeightsReshapeKernel.h"
+#include "src/gpu/cl/kernels/ClWeightsReshapeKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/datasets/ShapeDatasets.h"
@@ -41,7 +41,7 @@ namespace validation
 TEST_SUITE(CL)
 TEST_SUITE(WeightsReshape)
 
-using CLWeightsReshape = CLSynthetizeFunction<CLWeightsReshapeKernel>;
+using ClWeightsReshape = ClSynthetizeOperatorWithBorder<opencl::kernels::ClWeightsReshapeKernel>;
 
 /** Validate tests
  *
@@ -87,15 +87,15 @@ framework::dataset::make("NumGroups", { 1, 1, 1, 2, 1, 2 })),
 framework::dataset::make("Expected", { false, false, false, false, false, false })),
 input_info, biases_info, output_info, num_groups, expected)
 {
-    bool status = bool(CLWeightsReshape::validate(&input_info, &biases_info, &output_info, num_groups));
+    bool status = bool(opencl::kernels::ClWeightsReshapeKernel::validate(&input_info, &biases_info, &output_info, num_groups));
     ARM_COMPUTE_EXPECT(status == expected, framework::LogLevel::ERRORS);
 }
 
 template <typename T>
-using CLWeightsReshapeFixture = WeightsReshapeValidationFixture<CLTensor, CLAccessor, CLWeightsReshape, T>;
+using ClWeightsReshapeFixture = WeightsReshapeOpValidationFixture<CLTensor, CLAccessor, ClWeightsReshape, T>;
 
 TEST_SUITE(Float)
-FIXTURE_DATA_TEST_CASE(FP32, CLWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(3U, 3U, 48U, 120U) }),
+FIXTURE_DATA_TEST_CASE(FP32, ClWeightsReshapeFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(3U, 3U, 48U, 120U) }),
                                                                                                                   framework::dataset::make("DataType", DataType::F32)),
                                                                                                           framework::dataset::make("HasBias", { true, false })),
                                                                                                   framework::dataset::make("NumGroups", { 1, 2 })))
@@ -104,7 +104,7 @@ FIXTURE_DATA_TEST_CASE(FP32, CLWeightsReshapeFixture<float>, framework::DatasetM
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(FP16, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(13U, 13U, 96U, 240U) }),
+FIXTURE_DATA_TEST_CASE(FP16, ClWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(13U, 13U, 96U, 240U) }),
                                                                                                                  framework::dataset::make("DataType", DataType::F16)),
                                                                                                          framework::dataset::make("HasBias", { true, false })),
                                                                                                  framework::dataset::make("NumGroups", { 3, 4 })))
@@ -113,7 +113,7 @@ FIXTURE_DATA_TEST_CASE(FP16, CLWeightsReshapeFixture<half>, framework::DatasetMo
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(BFloat16, CLWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(9U, 9U, 96U, 240U) }),
+FIXTURE_DATA_TEST_CASE(BFloat16, ClWeightsReshapeFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(9U, 9U, 96U, 240U) }),
                                                                                                                      framework::dataset::make("DataType", DataType::BFLOAT16)),
                                                                                                              framework::dataset::make("HasBias", { false })),
                                                                                                      framework::dataset::make("NumGroups", { 3, 4 })))
@@ -125,7 +125,7 @@ FIXTURE_DATA_TEST_CASE(BFloat16, CLWeightsReshapeFixture<half>, framework::Datas
 TEST_SUITE_END()
 
 TEST_SUITE(Quantized)
-FIXTURE_DATA_TEST_CASE(QASYMM8, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
+FIXTURE_DATA_TEST_CASE(QASYMM8, ClWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
                                                                                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
                                                                                                                framework::dataset::make("HasBias", { false })),
                                                                                                        framework::dataset::make("NumGroups", { 1, 2 })))
@@ -134,7 +134,7 @@ FIXTURE_DATA_TEST_CASE(QASYMM8, CLWeightsReshapeFixture<uint8_t>, framework::Dat
     validate(CLAccessor(_target), _reference);
 }
 
-FIXTURE_DATA_TEST_CASE(QASYMM8_SIGNED, CLWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
+FIXTURE_DATA_TEST_CASE(QASYMM8_SIGNED, ClWeightsReshapeFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("InputShape", { TensorShape(5U, 5U, 48U, 120U) }),
                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
                                                                                                                       framework::dataset::make("HasBias", { false })),
                                                                                                               framework::dataset::make("NumGroups", { 1, 2 })))
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index 750799ace2..196e7edb8c 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,12 +27,10 @@
 #include "arm_compute/runtime/CL/CLTensor.h"
 #include "arm_compute/runtime/CL/CLTensorAllocator.h"
 #include "arm_compute/runtime/CL/functions/CLWinogradConvolutionLayer.h"
-#include "arm_compute/runtime/CL/functions/CLWinogradInputTransform.h"
-#include "src/core/CL/kernels/CLWinogradFilterTransformKernel.h"
-#include "src/core/CL/kernels/CLWinogradOutputTransformKernel.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
 #include "tests/datasets/LargeConvolutionLayerDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/datasets/SmallConvolutionLayerDataset.h"
@@ -50,631 +48,379 @@ namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
 namespace
 {
 // *INDENT-OFF*
 // clang-format off
-constexpr AbsoluteTolerance<float> tolerance_f32(0.002f);
-const AbsoluteTolerance<half> tolerance_f16(half(0.5f));
+const AbsoluteTolerance<half> tolerance_f16(half(1.f));
 constexpr AbsoluteTolerance<float> tolerance_convolution_layer_f32(0.1f);
 const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
 RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */
 constexpr float                     tolerance_num   = 0.05f;  /**< Tolerance number */
 constexpr float                     abs_tolerance_convolution_layer_f16   = 2.5f;  /**< Tolerance number */
-constexpr float                      tolerance_num_f16 = 0.15f;                 /**< Tolerance number */
-
-// Input transform
-const auto SmallWinogradInputTransformDatasetNCHW =
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x2_3x3(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_3x1(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x2_1x3(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_3x1(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(),
-           framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(),
-                                      datasets::SmallWinogradInputTransformDataset1x4_1x5()))))))));
-
-const auto SmallWinogradInputTransformDatasetNHWC = framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_3x3(),
-                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_3x1(),
-                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x3(),
-                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x4_5x5(),
-                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset4x1_5x1(),
-                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset1x4_1x5(),
-                                                    framework::dataset::concat(datasets::SmallWinogradInputTransformDataset2x1_7x1(),
-                                                                               datasets::SmallWinogradInputTransformDataset1x2_1x7())))))));
-
-const auto SmallWinogradInputTransformDatasetNHWC_FP32 = framework::dataset::concat(SmallWinogradInputTransformDatasetNHWC,
-                                                                                    datasets::SmallWinogradInputTransformDataset2x2_7x7());
-
-const auto LargeWinogradInputTransformDatasetNCHW =
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x2_3x3(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset2x1_3x1(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x2_1x3(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_3x1(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset1x4_1x3(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(),
-                                               datasets::LargeWinogradInputTransformDataset1x4_1x5()))))))));
-
-const auto LargeWinogradInputTransformDatasetNHWC =
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_3x3(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x4_5x5(),
-           framework::dataset::concat(datasets::LargeWinogradInputTransformDataset4x1_5x1(),
-                                      datasets::LargeWinogradInputTransformDataset1x4_1x5())));
-
-const auto LargeWinogradInputTransformDatasetNHWC_FP32 =
-           framework::dataset::concat(LargeWinogradInputTransformDatasetNHWC,
-                                      (datasets::LargeWinogradInputTransformDataset2x2_7x7()));
-
-// Filter transform
-const auto SmallWinogradFilterTransformDatasetNCHW =
-           framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })),
-           framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-                                      combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
-
-const auto SmallWinogradFilterTransformDatasetNHWC_F16 =
-           framework::dataset::concat(combine(datasets::Small3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-           framework::dataset::concat(combine(datasets::Small1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-           framework::dataset::concat(combine(datasets::Small1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
-           framework::dataset::concat(combine(datasets::Small1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })),
-                                      combine(datasets::Small7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })))))))));
-
-const auto SmallWinogradFilterTransformDatasetNHWC_F32 =
-           framework::dataset::concat(SmallWinogradFilterTransformDatasetNHWC_F16,
-                                      combine(datasets::Small7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })));
-
-const auto LargeWinogradFilterTransformDatasetNCHW =
-           framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U), Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U), Size2D(4U, 1U) })),
-           framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U), Size2D(1U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-                                      combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })))))));
-
-const auto LargeWinogradFilterTransformDatasetNHWC_F16 =
-           framework::dataset::concat(combine(datasets::Large3x3Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large3x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-           framework::dataset::concat(combine(datasets::Large1x3Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large5x5Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large5x1Shapes(), framework::dataset::make("OutputTile", { Size2D(4U, 1U) })),
-           framework::dataset::concat(combine(datasets::Large1x5Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 4U) })),
-           framework::dataset::concat(combine(datasets::Large7x1Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 1U) })),
-                                      combine(datasets::Large1x7Shapes(), framework::dataset::make("OutputTile", { Size2D(1U, 2U) })))))))));
-
-const auto LargeWinogradFilterTransformDatasetNHWC_F32 =
-           framework::dataset::concat(LargeWinogradFilterTransformDatasetNHWC_F16,
-                                      combine(datasets::Large7x7Shapes(), framework::dataset::make("OutputTile", { Size2D(2U, 2U) })));
-
-// Output transform
-const auto SmallWinogradOutputTransformDatasetNCHW = datasets::SmallWinogradOutputTransformDatasetNCHW();
-
-const auto SmallWinogradOutputTransformDatasetNHWC_F16 = datasets::SmallWinogradOutputTransformDatasetNHWC_F16();
-
-const auto SmallWinogradOutputTransformDatasetNHWC_F32 = datasets::SmallWinogradOutputTransformDatasetNHWC_F32();
-
-const auto LargeWinogradOutputTransformDatasetNCHW = datasets::LargeWinogradOutputTransformDatasetNCHW();
-
-const auto LargeWinogradOutputTransformDatasetNHWC_F16 = datasets::LargeWinogradOutputTransformDatasetNHWC_F16();
-
-const auto LargeWinogradOutputTransformDatasetNHWC_F32 = datasets::LargeWinogradOutputTransformDatasetNHWC_F32();
-
-//Activation Functions
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+constexpr float                     tolerance_num_f16 = 0.15f;                 /**< Tolerance number */
+
+const auto ActivationFunctionsDataset = make("ActivationInfo",
 {
-    ActivationLayerInfo(),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.8f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
 });
-const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
 {
     ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU)
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f)
 });
 
 } // namespace
 
 using namespace arm_compute::misc::shape_calculator;
 
+/*
+    Testing Strategy of CL Winograd:
+        - For nchw and nhwc and for each kernel size, we have a dedicated OpenCL kernel.
+          (except 1xN and Nx1 uses NxN under the hood). Therefore, test cases should be
+          stressed for each of these configurations.
+        - Fp32 and Fp16 kernels are the same. Only the DATA_TYPE build option changes
+          between these two. Because the same kernel is stressed thoroughly for both
+          small and large shapes for Fp32 data type, Fp16 kernels are run on a subset
+          of the shapes, because we get diminishing returns by exhaustively testing the
+          same kernel.
+        - Activations only affect the output stage and it's calculated on the output tile.
+          Exhaustively testing all activations with all the shapes does not provide much
+          value but increases the testing time quite significantly. Therefore, all activations
+          are tested in a subset of the shapes, and for all MxM kernels and data layouts as
+          they represent different OpenCL kernels. (1xM and Mx1 kernels use MxM under the hood).
+*/
 TEST_SUITE(CL)
 TEST_SUITE(Winograd)
 
-TEST_SUITE(InputTransform)
-
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-                                                framework::dataset::make("InputInfo",{
-                                                                                        TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F16),     // F16 not supported
-                                                                                        TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported
-                                                                                        TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32),     // Kernel size not supported
-                                                                                        TensorInfo(TensorShape(53U, 21U, 5U, 3U), 1, DataType::F32),     // Strides not supported
-                                                                                        TensorInfo(TensorShape(53U, 33U, 4U), 1, DataType::F32),         // Padding needed
-                                                                                        TensorInfo(TensorShape(34U, 42U, 7U, 3U), 1, DataType::F32),     // Padding needed
-                                                                                        TensorInfo(TensorShape(31U, 37U, 37U), 1, DataType::F32)         // Padding needed
-                                                                                    }),
-                                                framework::dataset::make("OutputInfo", {
-                                                                                        TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::QASYMM8),
-                                                                                        TensorInfo(TensorShape(5U, 5U, 16U, 3U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(5U, 1U, 16U, 3U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(4U, 442U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(7U, 320U, 16U, 3U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(37U, 304U, 16U), 1, DataType::F32)
-                                                                                    })),
-                                                framework::dataset::make("WinogradInfo", {
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 1, 0), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 21U), PadStrideInfo(2, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(53U, 33U), PadStrideInfo(1, 1, 0, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(34U, 42U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2, 2), Size2D(3, 3), Size2D(31U, 37U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)
-                                                                                    })),
-                                                framework::dataset::make("Expected", { false, false, false, false, false, false, false })),
-                                            input_info, output_info, winograd_info, expected)
-{
-    ARM_COMPUTE_EXPECT(bool(CLWinogradInputTransform::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS);
-}
-
-using CLWinogradInputTransformFixtureFP32 = WinogradInputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradInputTransform, float>;
-using CLWinogradInputTransformFixtureFP16 = WinogradInputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradInputTransform, half>;
-
-TEST_SUITE(NCHW)
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNCHW,
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                                                                                                     framework::dataset::make("DataType", { DataType::F32 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNCHW,
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                                                                                                   framework::dataset::make("DataType", { DataType::F32 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNCHW,
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                                                                                                     framework::dataset::make("DataType", { DataType::F16 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNCHW,
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                                                                                                   framework::dataset::make("DataType", { DataType::F16 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // NCHW
-
-TEST_SUITE(NHWC)
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC,
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                     framework::dataset::make("DataType", { DataType::F16 })))
+TEST_SUITE(ConvolutionLayer)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputInfo", {
+        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16),     // Insufficient padding
+        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),     // Datatype mismatch
+        TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported
+        TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32),     // Padding needed
+        TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)  // Kernel size not supported
+        }),
+    make("WeightsInfo", {
+        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16),
+        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8),
+        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+        TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
+        }),
+    make("BiasesInfo", {
+        TensorInfo(TensorShape(19U), 1, DataType::F16),
+        TensorInfo(TensorShape(19U), 1, DataType::F32),
+        TensorInfo(TensorShape(21U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32)
+        }),
+    make("OutputInfo", {
+        TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16),
+        TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+        TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
+        }),
+    make("ConvInfo", {
+        PadStrideInfo(1, 1, 1, 1),
+        PadStrideInfo(1, 1, 1, 1),
+        PadStrideInfo(1, 2, 0, 0),
+        PadStrideInfo(1, 1, 1, 1),
+        PadStrideInfo(1, 1, 1, 0)
+    }),
+    make("Expected", { false, false, false, false, false })),
+    input_info, weights_info, bias_info, output_info, conv_info, expected)
 {
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
+    ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP16, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC,
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                   framework::dataset::make("DataType", { DataType::F16 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
-}
-TEST_SUITE_END() // FP16
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::PRECOMMIT, combine(combine(SmallWinogradInputTransformDatasetNHWC_FP32,
-                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                     framework::dataset::make("DataType", { DataType::F32 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
+DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip(
+    make("WeightsInfo", {
+        // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted
+
+        // Fp32/16, NCHW
+        // 3x1, 1x3, 3x3 --> all TRUE
+        TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+        // 5x1, 1x5, 5x5 --> all TRUE
+        TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+        TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+        // 7x1, 1x7, 7x7
+        // nchw does not support kernels with size 7 --> all FALSE
+        TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+        // unsupported kernel sizes
+        TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+        // Fp32/16, NHWC
+        // 7x1, 1x7, 7x7 --> all TRUE
+        TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+        TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+        // 3x1, 1x3, 3x3 --> all TRUE
+        TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+        TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+        // 5x1, 1x5, 5x5 --> all TRUE
+        TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+
+        // unsupported kernel sizes
+        TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+        }),
+    make("Expected", {
+        true, true, true,     // nchw, 3x3, 1x3, 3x1
+        true, true, true,     // nchw, 5x5, 1x5, 5x1
+        false, false, false,  // nchw, 7x7, 1x7, 7x1
+        false, false, false,  // nchw, random unsupported kernels
+        true, true, true,     // nhwc, 7x7, 1x7, 7x1
+        true, true, true,     // nhwc, 3x3, 1x3, 3x1
+        true, true, true,     // nhwc, 5x5, 1x5, 5x1
+        false, false, false,  // nchw, random unsupported kernels
+    })),
+    weights_info_const, expected)
+{
+    DataType data_type = weights_info_const.data_type();
+    DataLayout data_layout = weights_info_const.data_layout();
+
+    TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type);
+    TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type);
+    TensorInfo weights_info = weights_info_const;
+
+    if(data_layout == DataLayout::NHWC)
+    {
+        // Convert to NHWC
+        PermutationVector perm = PermutationVector(2U, 0U, 1U);
+
+        TensorShape input_shape = input_info.tensor_shape();
+        TensorShape weights_shape = weights_info.tensor_shape();
+        permute(input_shape, perm);
+        permute(weights_shape, perm);
+
+        input_info.set_tensor_shape(input_shape);
+        weights_info.set_tensor_shape(weights_shape);
+
+        input_info.set_data_layout(data_layout);
+        weights_info.set_data_layout(data_layout);
+        bias_info.set_data_layout(data_layout);
+    }
+
+    PadStrideInfo conv_info(1, 1, 0, 0);
+
+    TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info);
+    TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout);
+
+    Status status = CLWinogradConvolutionLayer::validate(
+        &input_info,
+        &weights_info,
+        &bias_info,
+        &output_info,
+        conv_info,
+        ActivationLayerInfo(),
+        true /* fast math */);
+
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradInputTransformFixtureFP32, framework::DatasetMode::NIGHTLY, combine(combine(LargeWinogradInputTransformDatasetNHWC_FP32,
-                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                   framework::dataset::make("DataType", { DataType::F32 })))
-{
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END() // FP32
-TEST_SUITE_END() // NHWC
-TEST_SUITE_END() // InputTransform
-
-TEST_SUITE(FilterTransform)
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-                                                framework::dataset::make("InputInfo",{
-                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::F16),     // F16 supported
-                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::QASYMM8), // QASYMM8 not supported
-                                                                                        TensorInfo(TensorShape(5U, 5U, 5U, 3U), 1, DataType::F32),     // Kernel size not supported
-                                                                                        TensorInfo(TensorShape(3U, 3U), 1, DataType::F32),             // Output tile not supported
-                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 3U), 1, DataType::F32),     // valid
-                                                                                        TensorInfo(TensorShape(3U, 3U, 37U, 2U), 1, DataType::F32),    // valid
-                                                                                        TensorInfo(TensorShape(3U, 3U, 37U, 22U), 1, DataType::F32)    // valid
-                                                                                    }),
-                                                framework::dataset::make("OutputInfo", {
-                                                                                        TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::QASYMM8),
-                                                                                        TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(1U, 1U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(3U, 5U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(2U, 37U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(22U, 37U, 36U), 1, DataType::F32)
-                                                                                    })),
-                                                framework::dataset::make("WinogradInfo", {
-                                                                                          WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ ),
-                                                                                          WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ ),
-                                                                                          WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ ),
-                                                                                          WinogradInfo(Size2D(3U, 3U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ ),
-                                                                                          WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ ),
-                                                                                          WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ ),
-                                                                                          WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D() /* Not needed */, PadStrideInfo() /* Not needed */, DataLayout::NCHW  /* Not needed */ )
-                                                                                         })),
-                                                framework::dataset::make("Expected", { true, false, false, false, true, true, true })),
-                                            input_info, output_info, winograd_info, expected)
-{
-    ARM_COMPUTE_EXPECT(bool(CLWinogradFilterTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS);
-}
-
-using CLWinogradFilterTransform        = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradFilterTransformKernel, 0>;
-using CLWinogradFilterTransformFixtureFP32 = WinogradFilterTransformValidationFixture<CLTensor, CLAccessor, CLWinogradFilterTransform, float>;
-using CLWinogradFilterTransformFixtureFP16 = WinogradFilterTransformValidationFixture<CLTensor, CLAccessor, CLWinogradFilterTransform, half>;
-
-TEST_SUITE(NCHW)
 TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(SmallWinogradFilterTransformDatasetNCHW,
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                       framework::dataset::make("DataType", { DataType::F32 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradFilterTransformDatasetNCHW,
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                       framework::dataset::make("DataType", { DataType::F32 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END() // FP32
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(SmallWinogradFilterTransformDatasetNCHW,
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                       framework::dataset::make("DataType", { DataType::F16 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradFilterTransformDatasetNCHW,
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW })),
-                                       framework::dataset::make("DataType", { DataType::F16 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // NCHW
-
-TEST_SUITE(NHWC)
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(SmallWinogradFilterTransformDatasetNHWC_F16,
-                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                       framework::dataset::make("DataType", { DataType::F16 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradFilterTransformDatasetNHWC_F16,
-                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                       framework::dataset::make("DataType", { DataType::F16 })))
+using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>;
+using CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float, float, true, true>;
+TEST_SUITE(Conv3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                                               make("DataType", { DataType::F32 }),
+                                               ActivationFunctionsSmallDataset,
+                                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-TEST_SUITE_END() // FP16
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(SmallWinogradFilterTransformDatasetNHWC_F32,
-                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                       framework::dataset::make("DataType", { DataType::F32 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradFilterTransformFixtureFP32, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradFilterTransformDatasetNHWC_F32,
-                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                       framework::dataset::make("DataType", { DataType::F32 })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END() // FP32
-TEST_SUITE_END() // NHWC
-TEST_SUITE_END() // FilterTransform
-
-TEST_SUITE(OutputTransform)
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
-                                                framework::dataset::make("InputInfo",{
-                                                                                        TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F16),      // F16 supported
-                                                                                        TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::QASYMM8),  // QASYMM8 not supported
-                                                                                        TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F32),      // Kernel size not supported
-                                                                                        TensorInfo(TensorShape(512U, 49U, 16U, 5U), 1, DataType::F32),      // Valid
-                                                                                        TensorInfo(TensorShape(13U, 108U, 16U, 4U), 1, DataType::F32),      // Padding needed
-                                                                                        TensorInfo(TensorShape(7U, 20U, 16U, 7U), 1, DataType::F32),        // Valid
-                                                                                        TensorInfo(TensorShape(7U, 20U, 16U, 7U), 1, DataType::F32),        // Wrong WinogradInfo
-                                                                                        TensorInfo(TensorShape(7U, 256U, 36U, 3U), 1, DataType::F32),       // Valid
-                                                                                        TensorInfo(TensorShape(7U, 256U, 16U, 3U), 1, DataType::F32)        // Wrong number of batches
-                                                                                    }),
-                                                framework::dataset::make("BiasInfo", {
-                                                                                        TensorInfo(TensorShape(512U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(512U), 1, DataType::QASYMM8),
-                                                                                        TensorInfo(TensorShape(512U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(512U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(13U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(7U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(7U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(7U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(7U), 1, DataType::F32)
-                                                                                    })),
-                                                framework::dataset::make("OutputInfo", {
-                                                                                        TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::QASYMM8),
-                                                                                        TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(14U, 14U, 512U, 5U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(17U, 23U, 13U, 4U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(8U, 10U, 7U, 7U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(7U, 9U, 7U, 7U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(64U, 64U, 7U, 3U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(64U, 64U, 7U, 3U), 1, DataType::F32)
-                                                                                    })),
-                                                framework::dataset::make("WinogradInfo", {
-                                                                                        WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2U, 2U), Size2D(5U, 5U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(14U, 14U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(17U, 23U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2U, 2U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(2U, 3U), Size2D(3U, 3U), Size2D(8U, 10U), PadStrideInfo(1, 1, 0, 0), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(64U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW),
-                                                                                        WinogradInfo(Size2D(4U, 4U), Size2D(3U, 3U), Size2D(64U, 64U), PadStrideInfo(1, 1, 1, 1), DataLayout::NCHW)
-                                                                                    })),
-                                                framework::dataset::make("Expected", { true, false, false, true, false, true, false, true, false })),
-                                            input_info, bias_info, output_info, winograd_info, expected)
-{
-    ARM_COMPUTE_EXPECT(bool(CLWinogradOutputTransformKernel::validate(&input_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), winograd_info)) == expected, framework::LogLevel::ERRORS);
-}
-
-using CLWinogradOutputTransform        = CLSynthetizeFunctionWithZeroConstantBorder<CLWinogradOutputTransformKernel, 0>;
-using CLWinogradOutputTransformFixtureFP32 = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, float>;
-using CLWinogradOutputTransformFixtureFP16 = WinogradOutputTransformValidationFixture<CLTensor, CLAccessor, CLWinogradOutputTransform, half>;
-
-TEST_SUITE(NCHW)
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::ALL,
-                       combine(combine(SmallWinogradOutputTransformDatasetNCHW,
-                               framework::dataset::make("DataType", { DataType::F16 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(8U, 8U, 32U)),
+                            make("Weight", TensorShape(3U, 3U, 32U, 4U)),
+                            make("Bias", TensorShape(4U)),
+                            make("Output", TensorShape(6U, 6U, 4U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
+TEST_SUITE_END() // Conv3x3
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradOutputTransformDatasetNCHW,
-                               framework::dataset::make("DataType", { DataType::F16 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-TEST_SUITE_END() // FP16
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::ALL,
-                       combine(combine(SmallWinogradOutputTransformDatasetNCHW,
-                               framework::dataset::make("DataType", { DataType::F32 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
+TEST_SUITE(Conv3x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradOutputTransformDatasetNCHW,
-                               framework::dataset::make("DataType", { DataType::F32 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-TEST_SUITE_END() // FP32
-TEST_SUITE_END() // NCHW
+TEST_SUITE_END() // Conv3x1
 
-TEST_SUITE(NHWC)
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::ALL,
-                       combine(combine(SmallWinogradOutputTransformDatasetNHWC_F16,
-                               framework::dataset::make("DataType", { DataType::F16 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
+TEST_SUITE(Conv1x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradOutputTransformDatasetNHWC_F16,
-                               framework::dataset::make("DataType", { DataType::F16 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(
+                            make("Input", TensorShape(8U, 8U, 32U)),
+                            make("Weight", TensorShape(1U, 3U, 32U, 1U)),
+                            make("Bias", TensorShape(1U)),
+                            make("Output", TensorShape(8U, 6U, 1U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num_f16);
-}
-TEST_SUITE_END() // FP16
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::ALL,
-                       combine(combine(SmallWinogradOutputTransformDatasetNHWC_F32,
-                               framework::dataset::make("DataType", { DataType::F32 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradOutputTransformFixtureFP32, framework::DatasetMode::NIGHTLY,
-                       combine(combine(LargeWinogradOutputTransformDatasetNHWC_F32,
-                               framework::dataset::make("DataType", { DataType::F32 })),
-                               framework::dataset::make("ActivationInfo",{ ActivationLayerInfo() }) ))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END() // FP32
-TEST_SUITE_END() // NHWC
-TEST_SUITE_END() // OutputTransform
-
-TEST_SUITE(ConvolutionLayer)
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
-                                                framework::dataset::make("InputInfo", {
-                                                                                        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16),     // Insufficient padding
-                                                                                        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),     // Datatype mismatch
-                                                                                        TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported
-                                                                                        TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32),     // Padding needed
-                                                                                        TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)  // Kernel size not supported
-                                                                                      }),
-                                                framework::dataset::make("WeightsInfo", {
-                                                                                        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8),
-                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
-                                                                                        })),
-                                                framework::dataset::make("BiasesInfo", {
-                                                                                        TensorInfo(TensorShape(19U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(19U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(21U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(16U), 1, DataType::F32)
-                                                                                       })),
-                                                framework::dataset::make("OutputInfo", {
-                                                                                        TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
-                                                                                       })),
-                                                framework::dataset::make("ConvInfo", {
-                                                                                        PadStrideInfo(1, 1, 1, 1),
-                                                                                        PadStrideInfo(1, 1, 1, 1),
-                                                                                        PadStrideInfo(1, 2, 0, 0),
-                                                                                        PadStrideInfo(1, 1, 1, 1),
-                                                                                        PadStrideInfo(1, 1, 1, 0)
-                                                                                                                 })),
-                                                framework::dataset::make("Expected", { false, false, false, false, false })),
-               input_info, weights_info, bias_info, output_info, conv_info, expected)
-{
-    ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS);
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
+TEST_SUITE_END() // Conv1x3
 
-TEST_SUITE(FP32)
-using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>;
-TEST_SUITE(Conv3x3)
+TEST_SUITE(Conv5x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
-}
-TEST_SUITE_END() // Conv3x3
+                       combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
-TEST_SUITE(Conv3x1)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(5U, 5U, 32U, 4U)),
+                            make("Bias", TensorShape(4U)),
+                            make("Output", TensorShape(9U, 9U, 4U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-TEST_SUITE_END() // Conv3x1
+TEST_SUITE_END() // Conv5x5
 
-TEST_SUITE(Conv1x3)
+TEST_SUITE(Conv5x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-TEST_SUITE_END() // Conv1x3
+TEST_SUITE_END() // Conv5x1
 
-TEST_SUITE(Conv5x5)
+TEST_SUITE(Conv1x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset ),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -682,64 +428,63 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset ),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-TEST_SUITE_END() // Conv5x5
+TEST_SUITE_END() // Conv1x5
 
-TEST_SUITE(Conv5x1)
+TEST_SUITE(Conv1x7)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
-
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(1U, 7U, 32U, 4U)),
+                            make("Bias", TensorShape(4U)),
+                            make("Output", TensorShape(13U, 11U, 4U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 2)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-TEST_SUITE_END() // Conv5x1
+TEST_SUITE_END() // Conv1x7
 
-TEST_SUITE(Conv1x5)
+TEST_SUITE(Conv7x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
+TEST_SUITE_END() // Conv7x1
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+/** @note: Although 7x7 is in the kernels, reference implementation
+ *  does not support it. So, it remains as a "test gap".
+ */
 
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
-}
-TEST_SUITE_END() // Conv1x5
 TEST_SUITE_END() // FP32
 
 
@@ -748,20 +493,36 @@ TEST_SUITE(FP16)
 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, half, float>;
 TEST_SUITE(Conv3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x3DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(8U, 8U, 32U)),
+                            make("Weight", TensorShape(3U, 3U, 32U, 6U)),
+                            make("Bias", TensorShape(6U)),
+                            make("Output", TensorShape(6U, 6U, 6U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -770,20 +531,20 @@ TEST_SUITE_END() // Conv3x3
 
 TEST_SUITE(Conv3x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x1DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -792,20 +553,20 @@ TEST_SUITE_END() // Conv3x1
 
 TEST_SUITE(Conv1x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x3DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -814,10 +575,10 @@ TEST_SUITE_END() // Conv1x3
 
 TEST_SUITE(Conv5x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -825,11 +586,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x5DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
 
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(5U, 5U, 32U, 6U)),
+                            make("Bias", TensorShape(6U)),
+                            make("Output", TensorShape(9U, 9U, 6U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -838,10 +615,10 @@ TEST_SUITE_END() // Conv5x5
 
 TEST_SUITE(Conv5x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -849,10 +626,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x1DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -862,10 +639,10 @@ TEST_SUITE_END() // Conv5x1
 
 TEST_SUITE(Conv1x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -873,10 +650,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x5DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -886,10 +663,10 @@ TEST_SUITE_END() // Conv1x5
 
 TEST_SUITE(Conv1x7)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
@@ -897,19 +674,47 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x7DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
 
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(1U, 7U, 32U, 6U)),
+                            make("Bias", TensorShape(6U)),
+                            make("Output", TensorShape(13U, 7U, 6U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
 }
 TEST_SUITE_END() // Conv1x7
 
-TEST_SUITE_END() // FP16
+TEST_SUITE(Conv7x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // Conv7x1
 
+TEST_SUITE_END() // FP16
 TEST_SUITE_END() // ConvolutionLayer
 TEST_SUITE_END() // Winograd
 TEST_SUITE_END() // CL
diff --git a/tests/validation/CL/YOLOLayer.cpp b/tests/validation/CL/YOLOLayer.cpp
deleted file mode 100644
index 95c18d3d95..0000000000
--- a/tests/validation/CL/YOLOLayer.cpp
+++ /dev/null
@@ -1,140 +0,0 @@
-/*
- * Copyright (c) 2018-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLTensor.h"
-#include "arm_compute/runtime/CL/CLTensorAllocator.h"
-#include "arm_compute/runtime/CL/functions/CLYOLOLayer.h"
-#include "tests/CL/CLAccessor.h"
-#include "tests/PaddingCalculator.h"
-#include "tests/datasets/ActivationFunctionsDataset.h"
-#include "tests/datasets/ShapeDatasets.h"
-#include "tests/framework/Asserts.h"
-#include "tests/framework/Macros.h"
-#include "tests/framework/datasets/Datasets.h"
-#include "tests/validation/Validation.h"
-#include "tests/validation/fixtures/YOLOLayerFixture.h"
-
-namespace arm_compute
-{
-namespace test
-{
-namespace validation
-{
-namespace
-{
-constexpr AbsoluteTolerance<float> tolerance_f32(1e-6f);
-constexpr RelativeTolerance<float> tolerance_f16(0.01f);
-
-/** Floating point data sets. */
-const auto YOLODataset = combine(combine(combine(combine(framework::dataset::make("InPlace", { false, true }), framework::dataset::make("ActivationFunction",
-                                                         ActivationLayerInfo::ActivationFunction::LOGISTIC)),
-                                                 framework::dataset::make("AlphaBeta", { 0.5f, 1.f })),
-                                         framework::dataset::make("Classes", 40)),
-                                 framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }));
-} // namespace
-
-TEST_SUITE(CL)
-TEST_SUITE(YOLOLayer)
-
-// *INDENT-OFF*
-// clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
-               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::U8),  // Wrong input data type
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),  // Invalid activation info
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),  // Wrong output data type
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),  // wrong number of classes
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),  // Mismatching shapes
-                                                       TensorInfo(TensorShape(17U, 16U, 6U), 1, DataType::F32),  // Shrink window
-                                                       TensorInfo(TensorShape(17U, 16U, 7U), 1, DataType::F32),  // Channels not multiple of (num_classes + 5)
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),  // Valid
-                                                     }),
-               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::U16),
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 11U, 6U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 7U), 1, DataType::F32),
-                                                       TensorInfo(TensorShape(16U, 16U, 6U), 1, DataType::F32),
-                                                     })),
-               framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
-                                                     })),
-               framework::dataset::make("Numclasses", { 1, 1, 1, 0, 1, 1, 1, 1
-                                                     })),
-               framework::dataset::make("Expected", { false, false, false, false, false, false, false, true})),
-               input_info, output_info, act_info, num_classes, expected)
-{
-    ARM_COMPUTE_EXPECT(bool(CLYOLOLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), act_info, num_classes)) == expected, framework::LogLevel::ERRORS);
-}
-// clang-format on
-// *INDENT-ON*
-
-template <typename T>
-using CLYOLOLayerFixture = YOLOValidationFixture<CLTensor, CLAccessor, CLYOLOLayer, T>;
-
-TEST_SUITE(Float)
-TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLYOLOLayerFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallYOLOShapes(), YOLODataset), framework::dataset::make("DataType",
-                                                                                                       DataType::F32)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(RunLarge, CLYOLOLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeYOLOShapes(), YOLODataset), framework::dataset::make("DataType",
-                                                                                                     DataType::F32)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32);
-}
-TEST_SUITE_END() // FP32
-
-TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLYOLOLayerFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallYOLOShapes(), YOLODataset), framework::dataset::make("DataType",
-                                                                                                      DataType::F16)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge, CLYOLOLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeYOLOShapes(), YOLODataset), framework::dataset::make("DataType",
-                                                                                                    DataType::F16)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-TEST_SUITE_END() // FP16
-TEST_SUITE_END() // Float
-
-TEST_SUITE_END() // YOLOLayer
-TEST_SUITE_END() // CL
-} // namespace validation
-} // namespace test
-} // namespace arm_compute