From 2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Wed, 17 Jan 2024 16:07:03 +0000
Subject: Use the stable CKW API in the GPU dynamic fusion backend

- Refactor all kernels to work with the CKW stable API
- Add support for sub-tile in the op_load/op_store CKW operator
- Fix mismatch in resize
- Add comments in all kernels written with CKW to help developers
understand the structure of the code
- Add texture image support in depthwise convolution written with CKW
- Add support for different block sizes in depthwise convolution
- Remove the use of the dynamic fusion helper functions.
- Add support for floor in the op_unary() of CKW

Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530

Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>

Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 .../validation/dynamic_fusion/gpu/Integration.cpp  |   4 +-
 tests/validation/dynamic_fusion/gpu/cl/Add.cpp     |   4 -
 tests/validation/dynamic_fusion/gpu/cl/Cast.cpp    | 166 +----------
 tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp  | 169 +++++-------
 tests/validation/dynamic_fusion/gpu/cl/Mul.cpp     |   4 -
 tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp  |  29 +-
 tests/validation/dynamic_fusion/gpu/cl/Resize.cpp  | 306 ++-------------------
 tests/validation/dynamic_fusion/gpu/cl/Sub.cpp     |  12 +-
 .../dynamic_fusion/gpu/cl/MatMulKernelFixture.h    |   2 +-
 .../fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h |  41 +--
 .../dynamic_fusion/operators/CastFixture.h         |   2 +
 11 files changed, 124 insertions(+), 615 deletions(-)

(limited to 'tests/validation')

diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp
index bb9c008f01..80dcaa8f90 100644
--- a/tests/validation/dynamic_fusion/gpu/Integration.cpp
+++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp
@@ -280,10 +280,10 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL)
     ITensorInfo *out_1_info = context.create_tensor_info();
 
     CastAttributes cast_0_attr;
-    cast_0_attr.data_type(DataType::S32).convert_policy(ConvertPolicy::SATURATE);
+    cast_0_attr.data_type(DataType::F16);
 
     CastAttributes cast_1_attr;
-    cast_1_attr.data_type(DataType::F32).convert_policy(ConvertPolicy::SATURATE);
+    cast_1_attr.data_type(DataType::F32);
 
     ITensorInfo *ans_0_info = GpuAdd::create_op(sketch, in_0_info, in_1_info);
     GpuOutput::create_op(sketch, ans_0_info, out_0_info);
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
index a358d47bdd..9bfdc961fe 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
@@ -22,9 +22,6 @@
  * SOFTWARE.
  */
 
-// TODO: Fix testing of CKW Elementwise Binary (COMPMID-6530)
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
 
@@ -265,4 +262,3 @@ TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp b/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp
index cb6c8c52f6..4ef359e74d 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Cast.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -42,102 +42,33 @@ namespace validation
 namespace
 {
 // Tolerance
-constexpr AbsoluteTolerance<float> one_tolerance(1);
 constexpr AbsoluteTolerance<float> zero_tolerance(0);
 
 /** Input data sets **/
-// QASYMM8
-const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32));
-
-// U8
-const auto CastU8toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S8));
-const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
-const auto CastU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16));
-const auto CastU8toU32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U32));
-const auto CastU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32));
-const auto CastU8toF16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F16));
-const auto CastU8toF32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F32));
-
-// S8
-const auto CastS8toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U8));
-const auto CastS8toU16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U16));
-const auto CastS8toS16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S16));
-const auto CastS8toU32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::U32));
-const auto CastS8toS32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::S32));
-const auto CastS8toF16Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F16));
-const auto CastS8toF32Dataset = combine(framework::dataset::make("DataType", DataType::S8), framework::dataset::make("DataType", DataType::F32));
-
-// U16
-const auto CastU16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8));
-const auto CastU16toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S8));
-const auto CastU16toS16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S16));
-const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
-const auto CastU16toS32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::S32));
-const auto CastU16toF16Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F16));
-const auto CastU16toF32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::F32));
-
-// S16
-const auto CastS16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto CastS16toS8Dataset  = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S8));
-const auto CastS16toU16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U16));
-const auto CastS16toU32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U32));
-const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
-const auto CastS16toF16Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F16));
-const auto CastS16toF32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::F32));
-
-// U32
-const auto CastU32toU8Dataset  = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U8));
-const auto CastU32toS8Dataset  = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S8));
-const auto CastU32toU16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::U16));
-const auto CastU32toS16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S16));
-const auto CastU32toS32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::S32));
-const auto CastU32toF16Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F16));
-const auto CastU32toF32Dataset = combine(framework::dataset::make("DataType", DataType::U32), framework::dataset::make("DataType", DataType::F32));
-
-// S32
-const auto CastS32toU8Dataset  = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8));
-const auto CastS32toS8Dataset  = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S8));
-const auto CastS32toU16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U16));
-const auto CastS32toS16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::S16));
-const auto CastS32toU32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U32));
-const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16));
-const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32));
 
 // F16
-const auto CastF16toU8Dataset  = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8));
-const auto CastF16toS8Dataset  = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S8));
-const auto CastF16toU16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U16));
-const auto CastF16toS16Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S16));
-const auto CastF16toU32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U32));
-const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32));
 const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
 
 // F32
-const auto CastF32toU8Dataset  = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8));
-const auto CastF32toS8Dataset  = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S8));
-const auto CastF32toU16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U16));
-const auto CastF32toS16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S16));
-const auto CastF32toU32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U32));
-const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
 const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+
+class DFConvertPolicies final : public framework::dataset::ContainerDataset<std::vector<ConvertPolicy>>
+{
+public:
+    DFConvertPolicies()
+        : ContainerDataset("ConvertPolicy",
+    {
+        ConvertPolicy::WRAP
+    })
+    {
+    }
+};
 } // namespace
 
 TEST_SUITE(CL)
 TEST_SUITE(DYNAMIC_FUSION)
 TEST_SUITE(CAST)
 
-template <typename T>
-using DynamicFusionCLCastToU8Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, uint8_t>;
-template <typename T>
-using DynamicFusionCLCastToS8Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, int8_t>;
-template <typename T>
-using DynamicFusionCLCastToU16Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, uint16_t>;
-template <typename T>
-using DynamicFusionCLCastToS16Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, int16_t>;
-template <typename T>
-using DynamicFusionCLCastToU32Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, uint32_t>;
-template <typename T>
-using DynamicFusionCLCastToS32Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, int32_t>;
 template <typename T>
 using DynamicFusionCLCastToF16Fixture = DynamicFusionCastValidationFixture<CLTensor, CLAccessor, GpuCast, T, half>;
 template <typename T>
@@ -146,85 +77,16 @@ using DynamicFusionCLCastToF32Fixture = DynamicFusionCastValidationFixture<CLTen
 #define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance)                                                                     \
     TEST_SUITE(NAME)                                                                                                             \
     FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \
-                                                                                      datasets::ConvertPolicies()))              \
+                                                                                              DFConvertPolicies()))              \
     {                                                                                                                            \
         validate(CLAccessor(_target), _reference, tolerance);                                                                    \
     }                                                                                                                            \
     TEST_SUITE_END()
 
-// QASYMM8
-CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, DynamicFusionCLCastToF32Fixture<uint8_t>, CastQASYMM8toF32Dataset, zero_tolerance)
-
-// U8
-CAST_SUITE(U8_to_S8, DataType::U8, DataType::S8, DynamicFusionCLCastToS8Fixture<uint8_t>, CastU8toS8Dataset, zero_tolerance)
-CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, DynamicFusionCLCastToU16Fixture<uint8_t>, CastU8toU16Dataset, zero_tolerance)
-CAST_SUITE(U8_to_S16, DataType::U8, DataType::S16, DynamicFusionCLCastToS16Fixture<uint8_t>, CastU8toS16Dataset, zero_tolerance)
-CAST_SUITE(U8_to_U32, DataType::U8, DataType::U32, DynamicFusionCLCastToU32Fixture<uint8_t>, CastU8toU32Dataset, zero_tolerance)
-CAST_SUITE(U8_to_S32, DataType::U8, DataType::S32, DynamicFusionCLCastToS32Fixture<uint8_t>, CastU8toS32Dataset, zero_tolerance)
-CAST_SUITE(U8_to_F16, DataType::U8, DataType::F16, DynamicFusionCLCastToF16Fixture<uint8_t>, CastU8toF16Dataset, zero_tolerance)
-CAST_SUITE(U8_to_F32, DataType::U8, DataType::F32, DynamicFusionCLCastToF32Fixture<uint8_t>, CastU8toF32Dataset, zero_tolerance)
-
-// S8
-CAST_SUITE(S8_to_U8, DataType::S8, DataType::U8, DynamicFusionCLCastToU8Fixture<int8_t>, CastS8toU8Dataset, zero_tolerance)
-CAST_SUITE(S8_to_U16, DataType::S8, DataType::U16, DynamicFusionCLCastToU16Fixture<int8_t>, CastS8toU16Dataset, zero_tolerance)
-CAST_SUITE(S8_to_S16, DataType::S8, DataType::S16, DynamicFusionCLCastToS16Fixture<int8_t>, CastS8toS16Dataset, zero_tolerance)
-CAST_SUITE(S8_to_U32, DataType::S8, DataType::U32, DynamicFusionCLCastToU32Fixture<int8_t>, CastS8toU32Dataset, zero_tolerance)
-CAST_SUITE(S8_to_S32, DataType::S8, DataType::S32, DynamicFusionCLCastToS32Fixture<int8_t>, CastS8toS32Dataset, zero_tolerance)
-CAST_SUITE(S8_to_F16, DataType::S8, DataType::F16, DynamicFusionCLCastToF16Fixture<int8_t>, CastS8toF16Dataset, zero_tolerance)
-CAST_SUITE(S8_to_F32, DataType::S8, DataType::F32, DynamicFusionCLCastToF32Fixture<int8_t>, CastS8toF32Dataset, zero_tolerance)
-
-// U16
-CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, DynamicFusionCLCastToU8Fixture<uint16_t>, CastU16toU8Dataset, zero_tolerance)
-CAST_SUITE(U16_to_S8, DataType::U16, DataType::S8, DynamicFusionCLCastToS8Fixture<uint16_t>, CastU16toS8Dataset, zero_tolerance)
-CAST_SUITE(U16_to_S16, DataType::U16, DataType::S16, DynamicFusionCLCastToS16Fixture<uint16_t>, CastU16toS16Dataset, zero_tolerance)
-CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, DynamicFusionCLCastToU32Fixture<uint16_t>, CastU16toU32Dataset, zero_tolerance)
-CAST_SUITE(U16_to_S32, DataType::U16, DataType::S32, DynamicFusionCLCastToS32Fixture<uint16_t>, CastU16toS32Dataset, zero_tolerance)
-CAST_SUITE(U16_to_F16, DataType::U16, DataType::F16, DynamicFusionCLCastToF16Fixture<uint16_t>, CastU16toF16Dataset, zero_tolerance)
-CAST_SUITE(U16_to_F32, DataType::U16, DataType::F32, DynamicFusionCLCastToF32Fixture<uint16_t>, CastU16toF32Dataset, zero_tolerance)
-
-// S16
-CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, DynamicFusionCLCastToU8Fixture<int16_t>, CastS16toU8Dataset, zero_tolerance)
-CAST_SUITE(S16_to_S8, DataType::S16, DataType::S8, DynamicFusionCLCastToS8Fixture<int16_t>, CastS16toS8Dataset, zero_tolerance)
-CAST_SUITE(S16_to_U16, DataType::S16, DataType::U16, DynamicFusionCLCastToU16Fixture<int16_t>, CastS16toU16Dataset, zero_tolerance)
-CAST_SUITE(S16_to_U32, DataType::S16, DataType::U32, DynamicFusionCLCastToU32Fixture<int16_t>, CastS16toU32Dataset, zero_tolerance)
-CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, DynamicFusionCLCastToS32Fixture<int16_t>, CastS16toS32Dataset, zero_tolerance)
-CAST_SUITE(S16_to_F16, DataType::S16, DataType::F16, DynamicFusionCLCastToF16Fixture<int16_t>, CastS16toF16Dataset, zero_tolerance)
-CAST_SUITE(S16_to_F32, DataType::S16, DataType::F32, DynamicFusionCLCastToF32Fixture<int16_t>, CastS16toF32Dataset, zero_tolerance)
-
-// U32
-CAST_SUITE(U32_to_U8, DataType::U32, DataType::U8, DynamicFusionCLCastToU8Fixture<uint32_t>, CastU32toU8Dataset, zero_tolerance)
-CAST_SUITE(U32_to_S8, DataType::U32, DataType::S8, DynamicFusionCLCastToS8Fixture<uint32_t>, CastU32toS8Dataset, zero_tolerance)
-CAST_SUITE(U32_to_U16, DataType::U32, DataType::U16, DynamicFusionCLCastToU16Fixture<uint32_t>, CastU32toU16Dataset, zero_tolerance)
-CAST_SUITE(U32_to_S16, DataType::U32, DataType::S16, DynamicFusionCLCastToS16Fixture<uint32_t>, CastU32toS16Dataset, zero_tolerance)
-CAST_SUITE(U32_to_S32, DataType::U32, DataType::S32, DynamicFusionCLCastToS32Fixture<uint32_t>, CastU32toS32Dataset, zero_tolerance)
-CAST_SUITE(U32_to_F16, DataType::U32, DataType::F16, DynamicFusionCLCastToF16Fixture<uint32_t>, CastU32toF16Dataset, zero_tolerance)
-CAST_SUITE(U32_to_F32, DataType::U32, DataType::F32, DynamicFusionCLCastToF32Fixture<uint32_t>, CastU32toF32Dataset, zero_tolerance)
-
-// S32
-CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, DynamicFusionCLCastToU8Fixture<int32_t>, CastS32toU8Dataset, zero_tolerance)
-CAST_SUITE(S32_to_S8, DataType::S32, DataType::S8, DynamicFusionCLCastToS8Fixture<int32_t>, CastS32toS8Dataset, zero_tolerance)
-CAST_SUITE(S32_to_U16, DataType::S32, DataType::U16, DynamicFusionCLCastToU16Fixture<int32_t>, CastS32toU16Dataset, zero_tolerance)
-CAST_SUITE(S32_to_S16, DataType::S32, DataType::S16, DynamicFusionCLCastToS16Fixture<int32_t>, CastS32toS16Dataset, zero_tolerance)
-CAST_SUITE(S32_to_U32, DataType::S32, DataType::U32, DynamicFusionCLCastToU32Fixture<int32_t>, CastS32toU32Dataset, zero_tolerance)
-CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, DynamicFusionCLCastToF16Fixture<int32_t>, CastS32toF16Dataset, zero_tolerance)
-CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, DynamicFusionCLCastToF32Fixture<int32_t>, CastS32toF32Dataset, zero_tolerance)
-
 // F16
-CAST_SUITE(F16_to_U8, DataType::F16, DataType::U8, DynamicFusionCLCastToU8Fixture<half>, CastF16toU8Dataset, one_tolerance)
-CAST_SUITE(F16_to_S8, DataType::F16, DataType::S8, DynamicFusionCLCastToS8Fixture<half>, CastF16toS8Dataset, one_tolerance)
-CAST_SUITE(F16_to_U16, DataType::F16, DataType::U16, DynamicFusionCLCastToU16Fixture<half>, CastF16toU16Dataset, one_tolerance)
-CAST_SUITE(F16_to_S16, DataType::F16, DataType::S16, DynamicFusionCLCastToS16Fixture<half>, CastF16toS16Dataset, one_tolerance)
-CAST_SUITE(F16_to_U32, DataType::F16, DataType::U32, DynamicFusionCLCastToU32Fixture<half>, CastF16toU32Dataset, one_tolerance)
-CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, DynamicFusionCLCastToS32Fixture<half>, CastF16toS32Dataset, one_tolerance)
 CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, DynamicFusionCLCastToF32Fixture<half>, CastF16toF32Dataset, zero_tolerance)
 
 // F32
-CAST_SUITE(F32_to_U8, DataType::F32, DataType::U8, DynamicFusionCLCastToU8Fixture<float>, CastF32toU8Dataset, one_tolerance)
-CAST_SUITE(F32_to_S8, DataType::F32, DataType::S8, DynamicFusionCLCastToS8Fixture<float>, CastF32toS8Dataset, one_tolerance)
-CAST_SUITE(F32_to_U16, DataType::F32, DataType::U16, DynamicFusionCLCastToU16Fixture<float>, CastF32toU16Dataset, one_tolerance)
-CAST_SUITE(F32_to_S16, DataType::F32, DataType::S16, DynamicFusionCLCastToS16Fixture<float>, CastF32toS16Dataset, one_tolerance)
-CAST_SUITE(F32_to_U32, DataType::F32, DataType::U32, DynamicFusionCLCastToU32Fixture<float>, CastF32toU32Dataset, one_tolerance)
-CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, DynamicFusionCLCastToS32Fixture<float>, CastF32toS32Dataset, one_tolerance)
 CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, DynamicFusionCLCastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance)
 
 TEST_SUITE_END() // CAST
diff --git a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp
index d714a2f70c..96b79679c3 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp
@@ -25,6 +25,7 @@
 #include "tests/AssetsLibrary.h"
 #include "tests/CL/CLAccessor.h"
 #include "tests/datasets/LargeMatMulDataset.h"
+#include "tests/datasets/MatMulDataset.h"
 #include "tests/datasets/SmallMatMulDataset.h"
 #include "tests/framework/datasets/Datasets.h"
 #include "tests/framework/Fixture.h"
@@ -54,27 +55,36 @@ RelativeTolerance<half_float::half> tolerance_f16(half(
     0.02)); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */
 } // namespace
 
-/** M0 values to test --precommit*/
-const auto m0_values_precommit = framework::dataset::make("M0", {1, 3});
+/** M0 values to test - precommit */
+const auto m0_values_lhs_nt_precommit = framework::dataset::make("M0", {1, 2, 3});
 
-/** N0 values to test --precommit*/
-const auto n0_values_precommit = framework::dataset::make("N0", {1, 2, 4});
+/** N0 values to test - precommit */
+const auto n0_values_rhs_t_precommit = framework::dataset::make("N0", {1, 2, 4});
 
-/** K0 values to test --precommit*/
-const auto k0_values_precommit = framework::dataset::make("K0", {1, 2, 3});
+/** K0 values to test - precommit */
+const auto k0_values_rhs_t_precommit = framework::dataset::make("K0", {1, 2, 4});
 
-/** M0 values to test --nightly*/
-const auto m0_values_nightly_lhs_nt = framework::dataset::make("M0", {1, 2, 3, 4, 5, 6, 7, 8});
-const auto m0_values_nightly_lhs_t  = framework::dataset::make("M0", {1, 2, 3, 4, 8});
+/** M0 values to test - nightly */
+const auto m0_values_lhs_nt_nightly = framework::dataset::make("M0", {1, 2, 3, 4});
 
-/** N0 values to test --nightly*/
-const auto n0_values_nightly_rhs_nt = framework::dataset::make("N0", {1, 2, 3, 4, 8, 16});
-const auto n0_values_nightly_rhs_t  = framework::dataset::make("N0", {1, 2, 3, 4, 8});
+/** N0 values to test - nightly */
+const auto n0_values_rhs_t_nightly = framework::dataset::make("N0", {1, 2, 3, 4, 8});
 
-/** K0 values to test --nightly*/
-const auto k0_values_nightly_lhs_nt_rhs_nt = framework::dataset::make("K0", {1, 2, 3, 4, 8, 16});
-const auto k0_values_nightly_rhs_t         = framework::dataset::make("K0", {1, 2, 3, 4, 8});
-const auto k0_values_nightly_lhs_t_rhs_nt  = framework::dataset::make("K0", {1, 2, 3, 4, 5, 6, 7, 8});
+/** K0 values to test - nightly */
+const auto k0_values_rhs_t_nightly = framework::dataset::make("K0", {1, 2, 3, 4, 8});
+
+class DFMatMulDataset final : public datasets::MatMulDataset
+{
+public:
+    DFMatMulDataset()
+    {
+        // LHS = [K, M], RHS = [N, K], DST = [N, M]
+        add_config(TensorShape(1U, 1U), TensorShape(1U, 1U), TensorShape(1U, 1U));
+        add_config(TensorShape(1U, 2U), TensorShape(2U, 1U), TensorShape(2U, 2U));
+        add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U));
+        add_config(TensorShape(32U, 37U), TensorShape(17U, 32U), TensorShape(17U, 37U));
+    }
+};
 
 TEST_SUITE(CL)
 TEST_SUITE(DYNAMIC_FUSION)
@@ -247,70 +257,33 @@ using DynamicFusionGpuMatmulFixture = DynamicFusionGpuMatMulValidationFixture<CL
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 
-FIXTURE_DATA_TEST_CASE(
-    RunTiny,
-    DynamicFusionGpuMatmulFixture<float>,
-    framework::DatasetMode::ALL,
-    combine(combine(combine(combine(combine(combine(combine(datasets::TinyMatMulDataset(),
-                                                            framework::dataset::make("TransposeA", {false})),
-                                                    framework::dataset::make("TransposeB", {true})),
-                                            m0_values_precommit),
-                                    n0_values_precommit),
-                            k0_values_precommit),
-                    framework::dataset::make("ExportRhsToCLImage", {false})),
-            framework::dataset::make("DataType", DataType::F32)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(
-    RunSmall,
-    DynamicFusionGpuMatmulFixture<float>,
-    framework::DatasetMode::ALL,
-    combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
-                                                            framework::dataset::make("TransposeA", {false})),
-                                                    framework::dataset::make("TransposeB", {true})),
-                                            m0_values_precommit),
-                                    n0_values_precommit),
-                            k0_values_precommit),
-                    framework::dataset::make("ExportRhsToCLImage", {false})),
-            framework::dataset::make("DataType", DataType::F32)))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
-}
-
-FIXTURE_DATA_TEST_CASE(
-    RunLargeRhsTransposed,
-    DynamicFusionGpuMatmulFixture<float>,
-    framework::DatasetMode::NIGHTLY,
-    combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
-                                                            framework::dataset::make("TransposeA", {false})),
-                                                    framework::dataset::make("TransposeB", {true})),
-                                            m0_values_nightly_lhs_nt),
-                                    n0_values_nightly_rhs_t),
-                            k0_values_nightly_rhs_t),
-                    framework::dataset::make("ExportRhsToCLImage", {false})),
-            framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunPrecommit,
+                       DynamicFusionGpuMatmulFixture<float>,
+                       framework::DatasetMode::ALL,
+                       combine(DFMatMulDataset(),
+                               framework::dataset::make("TransposeA", {false}),
+                               framework::dataset::make("TransposeB", {true}),
+                               m0_values_lhs_nt_precommit,
+                               n0_values_rhs_t_precommit,
+                               k0_values_rhs_t_precommit,
+                               framework::dataset::make("ExportRhsToCLImage", {false}),
+                               framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
 }
 
-// Running High Dimensional test is enough for FP32, because we're stressing the number of dimensions, not data type or M0/N0/K0
-FIXTURE_DATA_TEST_CASE(
-    RunHighDimensional,
-    DynamicFusionGpuMatmulFixture<float>,
-    framework::DatasetMode::ALL,
-    combine(combine(combine(combine(combine(combine(combine(datasets::HighDimensionalMatMulDataset(),
-                                                            framework::dataset::make("TransposeA", {false})),
-                                                    framework::dataset::make("TransposeB", {true})),
-                                            framework::dataset::make("M0", {2})),
-                                    framework::dataset::make("N0", {2})),
-                            framework::dataset::make("K0", {2})),
-                    framework::dataset::make("ExportRhsToCLImage", {false})),
-            framework::dataset::make("DataType", DataType::F32)))
+FIXTURE_DATA_TEST_CASE(RunNightly,
+                       DynamicFusionGpuMatmulFixture<float>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(DFMatMulDataset(),
+                               framework::dataset::make("TransposeA", {false}),
+                               framework::dataset::make("TransposeB", {true}),
+                               m0_values_lhs_nt_nightly,
+                               n0_values_rhs_t_nightly,
+                               k0_values_rhs_t_nightly,
+                               framework::dataset::make("ExportRhsToCLImage", {false}),
+                               framework::dataset::make("DataType", DataType::F32)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f32, 0.f, abs_tolerance_f32);
@@ -319,35 +292,33 @@ TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
 
-FIXTURE_DATA_TEST_CASE(
-    RunSmall,
-    DynamicFusionGpuMatmulFixture<half>,
-    framework::DatasetMode::ALL,
-    combine(combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(),
-                                                            framework::dataset::make("TransposeA", {false})),
-                                                    framework::dataset::make("TransposeB", {true})),
-                                            m0_values_precommit),
-                                    n0_values_precommit),
-                            k0_values_precommit),
-                    framework::dataset::make("ExportRhsToCLImage", {false})),
-            framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunPrecommit,
+                       DynamicFusionGpuMatmulFixture<half>,
+                       framework::DatasetMode::ALL,
+                       combine(DFMatMulDataset(),
+                               framework::dataset::make("TransposeA", {false}),
+                               framework::dataset::make("TransposeB", {true}),
+                               m0_values_lhs_nt_precommit,
+                               n0_values_rhs_t_precommit,
+                               k0_values_rhs_t_precommit,
+                               framework::dataset::make("ExportRhsToCLImage", {false}),
+                               framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
 }
 
-FIXTURE_DATA_TEST_CASE(
-    RunLargeRhsTransposed,
-    DynamicFusionGpuMatmulFixture<half>,
-    framework::DatasetMode::NIGHTLY,
-    combine(combine(combine(combine(combine(combine(combine(datasets::LargeMatMulDataset(),
-                                                            framework::dataset::make("TransposeA", {false})),
-                                                    framework::dataset::make("TransposeB", {true})),
-                                            m0_values_nightly_lhs_nt),
-                                    n0_values_nightly_rhs_t),
-                            k0_values_nightly_rhs_t),
-                    framework::dataset::make("ExportRhsToCLImage", {false})),
-            framework::dataset::make("DataType", DataType::F16)))
+FIXTURE_DATA_TEST_CASE(RunNightly,
+                       DynamicFusionGpuMatmulFixture<half>,
+                       framework::DatasetMode::NIGHTLY,
+                       combine(DFMatMulDataset(),
+                               framework::dataset::make("TransposeA", {false}),
+                               framework::dataset::make("TransposeB", {true}),
+                               m0_values_lhs_nt_nightly,
+                               n0_values_rhs_t_nightly,
+                               k0_values_rhs_t_nightly,
+                               framework::dataset::make("ExportRhsToCLImage", {false}),
+                               framework::dataset::make("DataType", DataType::F16)))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_f16, 0.f, abs_tolerance_f16);
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
index c11bffe459..af02ce3eaa 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
@@ -22,9 +22,6 @@
  * SOFTWARE.
  */
 
-// TODO: Fix testing of CKW Elementwise Binary (COMPMID-6530)
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
 
@@ -222,4 +219,3 @@ TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
index f894ce3cf1..e537826c71 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp
@@ -55,17 +55,11 @@ const auto PoolingLayerDatasetFP =
                     framework::dataset::make("Stride", {Size2D(1, 1), Size2D(2, 1), Size2D(5, 7)})),
             framework::dataset::make("ExcludePadding", {true}));
 
-const auto pool_fp_mixed_precision_dataset = framework::dataset::make("FpMixedPrecision", {true, false});
-
 template <typename T>
 using DynamicFusionGpuPool2dFixture = DynamicFusionGpuPool2dValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>;
 
 template <typename T>
 using DFSpecialGpuPool2dFixture = DynamicFusionGpuPool2dSpecialValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>;
-
-template <typename T>
-using DFPoolMixedPrecisionFixture =
-    DynamicFusionGpuPool2dMixedPrecisionValidationFixture<CLTensor, CLAccessor, GpuPool2d, T>;
 // *INDENT-OFF*
 // clang-format off
 
@@ -92,7 +86,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
     GpuWorkloadSketch sketch{ &context };
 
     // Declare GpuPool2d settings
-    const GpuPool2dSettings &settings = GpuPool2dSettings().mixed_precision(false);
+    const GpuPool2dSettings &settings = GpuPool2dSettings();
 
     // Validate Pool2d Configuration
     auto                   src_info    = context.create_tensor_info(input_info);
@@ -175,27 +169,6 @@ TEST_SUITE_END() // GlobalPooling
 TEST_SUITE_END() // FP32
 
 TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmall,
-                       DFPoolMixedPrecisionFixture<half>,
-                       framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallNoneUnitShapes(), PoolingLayerDatasetFP),
-                                       framework::dataset::make("DataType", DataType::F16)),
-                               pool_fp_mixed_precision_dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-FIXTURE_DATA_TEST_CASE(RunLarge,
-                       DFPoolMixedPrecisionFixture<half>,
-                       framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeShapes(), PoolingLayerDatasetFP),
-                                       framework::dataset::make("DataType", DataType::F16)),
-                               pool_fp_mixed_precision_dataset))
-{
-    // Validate output
-    validate(CLAccessor(_target), _reference, tolerance_f16);
-}
-
 TEST_SUITE(GlobalPooling)
 FIXTURE_DATA_TEST_CASE(
     RunSmall,
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
index 10915acfaa..a6bcf4ae26 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Resize.cpp
@@ -64,10 +64,7 @@ const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo",
                                                           });
 
 /** Tolerance */
-constexpr AbsoluteTolerance<uint8_t> tolerance_q8(1);
-constexpr AbsoluteTolerance<int8_t>  tolerance_qs8(1);
-constexpr AbsoluteTolerance<int16_t> tolerance_s16(1);
-constexpr float                      tolerance_f32_absolute(0.001f);
+constexpr float tolerance_f32_absolute(0.001f);
 
 RelativeTolerance<float> tolerance_f32(0.05);
 constexpr float          abs_tolerance_f16(0.1f);
@@ -105,26 +102,27 @@ TEST_CASE(NullPtr, framework::DatasetMode::ALL)
 
 TEST_CASE(SupportDataType, framework::DatasetMode::ALL)
 {
-    const std::map<DataType, bool> supported_data_types = {
-        {DataType::U8, true},
-        {DataType::S8, false},
-        {DataType::QSYMM8, false},
-        {DataType::QASYMM8, true},
-        {DataType::QASYMM8_SIGNED, true},
-        {DataType::QSYMM8_PER_CHANNEL, false},
-        {DataType::U16, false},
-        {DataType::S16, true},
-        {DataType::QSYMM16, false},
-        {DataType::QASYMM16, false},
-        {DataType::U32, false},
-        {DataType::S32, false},
-        {DataType::U64, false},
-        {DataType::S64, false},
-        {DataType::BFLOAT16, false},
-        {DataType::F16, true},
-        {DataType::F32, true},
-        {DataType::F64, false},
-        {DataType::SIZET, false},
+    const std::map<DataType, bool> supported_data_types =
+    {
+        { DataType::U8, false },
+        { DataType::S8, false },
+        { DataType::QSYMM8, false },
+        { DataType::QASYMM8, false },
+        { DataType::QASYMM8_SIGNED, false },
+        { DataType::QSYMM8_PER_CHANNEL, false },
+        { DataType::U16, false },
+        { DataType::S16, false },
+        { DataType::QSYMM16, false },
+        { DataType::QASYMM16, false },
+        { DataType::U32, false },
+        { DataType::S32, false },
+        { DataType::U64, false },
+        { DataType::S64, false },
+        { DataType::BFLOAT16, false },
+        { DataType::F16, true },
+        { DataType::F32, true },
+        { DataType::F64, false },
+        { DataType::SIZET, false },
     };
 
     for (auto &kv : supported_data_types)
@@ -352,266 +350,6 @@ FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // Float
 
-TEST_SUITE(Integer)
-TEST_SUITE(U8)
-const auto u8_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<uint8_t>())),
-                              framework::dataset::make("DataType", DataType::U8));
-FIXTURE_DATA_TEST_CASE(Run,
-                       DynamicFusionResizeFixture<uint8_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(u8_shape, ScaleSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-FIXTURE_DATA_TEST_CASE(RunAlignCorners,
-                       DynamicFusionResizeFixture<uint8_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(u8_shape, ScaleAlignCornersSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-const auto u8_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<uint8_t>())),
-                                      framework::dataset::make("DataType", DataType::U8));
-FIXTURE_DATA_TEST_CASE(RunNightly,
-                       DynamicFusionResizeFixture<uint8_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(u8_nightly_shape, ScaleSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
-                       DynamicFusionResizeFixture<uint8_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(u8_nightly_shape, ScaleAlignCornersSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-TEST_SUITE_END() // U8
-
-TEST_SUITE(S16)
-const auto s16_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<int16_t>())),
-                               framework::dataset::make("DataType", DataType::S16));
-FIXTURE_DATA_TEST_CASE(Run,
-                       DynamicFusionResizeFixture<int16_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(s16_shape, ScaleSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_s16);
-}
-FIXTURE_DATA_TEST_CASE(RunAlignCorners,
-                       DynamicFusionResizeFixture<int16_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(s16_shape, ScaleAlignCornersSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_s16);
-}
-const auto s16_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<int16_t>())),
-                                       framework::dataset::make("DataType", DataType::S16));
-FIXTURE_DATA_TEST_CASE(RunNightly,
-                       DynamicFusionResizeFixture<int16_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(s16_nightly_shape, ScaleSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_s16);
-}
-FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
-                       DynamicFusionResizeFixture<int16_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_DATASET_DYNAMIC_FUSION(s16_nightly_shape, ScaleAlignCornersSamplingPolicySet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_s16);
-}
-TEST_SUITE_END() // S16
-TEST_SUITE_END() // Integer
-
-template <typename T>
-using DynamicFusionResizeQuantizedFixture =
-    DynamicFusionResizeQuantizedValidationFixture<CLTensor, CLAccessor, GpuResize, T>;
-TEST_SUITE(Quantized)
-TEST_SUITE(QASYMM8)
-const auto qasymm8_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<uint8_t>())),
-                                   framework::dataset::make("DataType", DataType::QASYMM8));
-FIXTURE_DATA_TEST_CASE(Run,
-                       DynamicFusionResizeQuantizedFixture<uint8_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_shape,
-                                                                 ScaleSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-FIXTURE_DATA_TEST_CASE(RunAlignCorners,
-                       DynamicFusionResizeQuantizedFixture<uint8_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_shape,
-                                                                 ScaleAlignCornersSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-const auto qasymm8_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<uint8_t>())),
-                                           framework::dataset::make("DataType", DataType::QASYMM8));
-FIXTURE_DATA_TEST_CASE(RunNightly,
-                       DynamicFusionResizeQuantizedFixture<uint8_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_nightly_shape,
-                                                                 ScaleSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
-                       DynamicFusionResizeQuantizedFixture<uint8_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_nightly_shape,
-                                                                 ScaleAlignCornersSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_q8);
-}
-TEST_SUITE_END() // QASYMM8
-
-TEST_SUITE(QASYMM8_SIGNED)
-const auto qasymm8_signed_shape = combine((SCALE_PRECOMMIT_SHAPE_DATASET(num_elements_per_vector<int8_t>())),
-                                          framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
-FIXTURE_DATA_TEST_CASE(Run,
-                       DynamicFusionResizeQuantizedFixture<int8_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_signed_shape,
-                                                                 ScaleSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_qs8);
-}
-FIXTURE_DATA_TEST_CASE(RunAlignCorners,
-                       DynamicFusionResizeQuantizedFixture<int8_t>,
-                       framework::DatasetMode::ALL,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_signed_shape,
-                                                                 ScaleAlignCornersSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_qs8);
-}
-const auto qasymm8_signed_nightly_shape = combine((SCALE_NIGHTLY_SHAPE_DATASET(num_elements_per_vector<int8_t>())),
-                                                  framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
-FIXTURE_DATA_TEST_CASE(RunNightly,
-                       DynamicFusionResizeQuantizedFixture<int8_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_signed_nightly_shape,
-                                                                 ScaleSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_qs8);
-}
-FIXTURE_DATA_TEST_CASE(RunNightlyAlignCorners,
-                       DynamicFusionResizeQuantizedFixture<int8_t>,
-                       framework::DatasetMode::NIGHTLY,
-                       ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(qasymm8_signed_nightly_shape,
-                                                                 ScaleAlignCornersSamplingPolicySet,
-                                                                 QuantizationInfoSet))
-{
-    //Create valid region
-    TensorInfo        src_info(_shape, 1, _data_type);
-    const ValidRegion valid_region =
-        calculate_valid_region_scale(src_info, _reference.shape(), _interpolation_policy, _sampling_policy, false);
-
-    // Validate output
-    validate(CLAccessor(_target), _reference, valid_region, tolerance_qs8);
-}
-TEST_SUITE_END() // QASYMM8_SIGNED
-
-TEST_SUITE_END() // Quantized
-
 TEST_SUITE_END() // RESIZE
 TEST_SUITE_END() // DYNAMIC_FUSION
 TEST_SUITE_END() // CL
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
index ef9f75b1c0..c7ab1e717c 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
@@ -22,9 +22,6 @@
  * SOFTWARE.
  */
 
-// TODO: Fix testing of CKW Elementwise Binary (COMPMID-6530)
-#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-
 #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h"
 
@@ -63,13 +60,13 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),    // Unsupported data type QASYMM8
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED),    // Unsupported data type QASYMM8
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Invalid data type combination
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),    // S16 is valid data type for Sub
-                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),    // S32 is valid data type for Sub
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),    // Invalid data type combination
+                                                        TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),    // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),    // Mismatching shapes
                                                         TensorInfo(TensorShape(32U,  1U, 1U), 1, DataType::F32),    // Broadcasting allowed for lhs
                                                         TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
                                                         TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32),    // Broadcast Y dimension is not allowed
-                                                        TensorInfo(TensorShape( 3U,  8U, 9U), 1, DataType::S16),    // Broadcast Z dimension is not allowed
+                                                        TensorInfo(TensorShape( 3U,  8U, 9U), 1, DataType::S16),    // Invalid data type combination
                                                         TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
                                                       }),
                framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
@@ -86,7 +83,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
                                                        TensorInfo(TensorShape( 3U,  8U, 1U), 1, DataType::S16),
                                                        TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
                                                       })),
-               framework::dataset::make("Expected", { true, false, false, false, false, true, true, false, true, true, false, false, true })),
+               framework::dataset::make("Expected", { true, false, false, false, false, false, false, false, true, true, false, false, true })),
                input1_info, input2_info, expected)
 {
     // Create a new workload sketch
@@ -263,4 +260,3 @@ TEST_SUITE_END() // CL
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif // ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
index 65a3363e24..4c1cc94d3d 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
@@ -203,7 +203,7 @@ protected:
                                       bool               pretranspose_b,
                                       DataType           data_type)
     {
-        // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D
+        // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 3D
         // This is necessary unless we choose to extend gemm reference for 5D+ tensors
         TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ);
         TensorShape shape_a_collapsed      = shape_a.collapsed_from(Window::DimZ);
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
index dd3519b549..b0c7143d91 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
@@ -51,11 +51,11 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
 class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture
 {
 public:
-    void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type, bool mixed_precision)
+    void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type)
     {
-        _target = compute_target(input_shape, pool_attr, data_type, mixed_precision);
-        _reference =
-            compute_reference(input_shape, convert_pool_attr_to_pool_info(pool_attr, mixed_precision), data_type);
+        _target    = compute_target(input_shape, pool_attr, data_type);
+        _reference = compute_reference(
+            input_shape, convert_pool_attr_to_pool_info(pool_attr, true /* mixed_precision */), data_type);
     }
 
 protected:
@@ -82,10 +82,7 @@ protected:
     }
 
     // Given input is in nchw format
-    TensorType compute_target(TensorShape             input_shape,
-                              const Pool2dAttributes &pool_attr,
-                              const DataType          data_type,
-                              bool                    mixed_precision)
+    TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type)
     {
         CLScheduler::get().default_reinit();
 
@@ -102,7 +99,7 @@ protected:
         auto dst_info   = context.create_tensor_info();
 
         // Create Pool2dSettings
-        GpuPool2dSettings pool_settings = GpuPool2dSettings().mixed_precision(mixed_precision);
+        GpuPool2dSettings pool_settings = GpuPool2dSettings();
 
         ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings);
         GpuOutput::create_op(sketch, ans_info, dst_info);
@@ -168,29 +165,7 @@ public:
             input_shape,
             Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(
                 exclude_padding),
-            data_type, false);
-    }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
-class DynamicFusionGpuPool2dMixedPrecisionValidationFixture
-    : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
-{
-public:
-    void setup(TensorShape input_shape,
-               PoolingType pool_type,
-               Size2D      pool_size,
-               Padding2D   pad,
-               Size2D      stride,
-               bool        exclude_padding,
-               DataType    data_type,
-               bool        mixed_precision)
-    {
-        DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
-            input_shape,
-            Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(
-                exclude_padding),
-            data_type, mixed_precision);
+            data_type);
     }
 };
 
@@ -202,7 +177,7 @@ public:
     void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type)
     {
         DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
-            input_shape, pool_attr, data_type, false);
+            input_shape, pool_attr, data_type);
     }
 };
 
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
index edf0dff54b..08fffb305b 100644
--- a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
@@ -120,6 +120,8 @@ protected:
         GpuWorkloadSketch sketch{&context};
 
         // Create sketch tensors
+        // Here, we use DataLayout::NCHW just for the test. However, the optimal data layout to
+        // be used with dynamic fusion is NHWC
         ITensorInfo *src_info =
             context.create_tensor_info(TensorInfo(shape, 1, dt_in, DataLayout::NCHW)); // layout is not important
         ITensorInfo *dst_info = context.create_tensor_info();
-- 
cgit v1.2.1