12 files changed, 2785 insertions, 0 deletions
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
new file mode 100644
index 0000000000..ca4de11a15
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DepthwiseConv2dFixture.h
@@ -0,0 +1,246 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/DepthwiseConvolutionLayer.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuDepthwiseConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value ||
+                                                std::is_same<typename std::decay<T>::type, int8_t>::value,
+                                            int32_t,
+                                            T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T
+
+    void setup(TensorShape          input_shape,
+               Size2D               kernel_size,
+               const PadStrideInfo &pad_stride,
+               const Size2D        &dilation,
+               const unsigned int   depth_multiplier,
+               const DataType       data_type,
+               const DataLayout     data_layout)
+    {
+        ARM_COMPUTE_ERROR_ON(data_layout !=
+                             DataLayout::NHWC); // Dynamic fusion depthwise conv2d only supports NHWC layout
+
+        DepthwiseConv2dAttributes dwc_conv2d_attr;
+        const Padding2D           padding_2d(pad_stride.pad_left(), pad_stride.pad_right(), pad_stride.pad_top(),
+                                             pad_stride.pad_bottom());
+        dwc_conv2d_attr.pad(padding_2d)
+            .stride(Size2D(pad_stride.stride().first, pad_stride.stride().second))
+            .dilation(dilation)
+            .depth_multiplier(depth_multiplier)
+            .dimension_rounding_type(pad_stride.round());
+
+        // Calculate Output and Weight Shapes
+        TensorShape weights_shape = TensorShape(kernel_size.width, kernel_size.height);
+
+        const TensorInfo in_info(input_shape, 1, data_type);
+        const TensorInfo we_info(weights_shape, 1, data_type);
+
+        const ConvolutionInfo info{pad_stride, depth_multiplier, ActivationLayerInfo(), dilation};
+        const TensorShape     output_shape =
+            misc::shape_calculator::compute_depthwise_convolution_shape(in_info, we_info, info);
+
+        weights_shape.set(2, output_shape.z());
+        const TensorShape bias_shape = TensorShape(weights_shape[2]);
+
+        _data_type   = data_type;
+        _data_layout = data_layout;
+        _target      = compute_target(input_shape, weights_shape, bias_shape, dwc_conv2d_attr);
+        _reference   = compute_reference(input_shape, weights_shape, bias_shape, output_shape, dwc_conv2d_attr);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch (tensor.data_type())
+        {
+            case DataType::F16:
+            {
+                arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    // Given input is in nchw format
+    TensorType compute_target(TensorShape                     input_shape,
+                              TensorShape                     weights_shape,
+                              const TensorShape              &bias_shape,
+                              const DepthwiseConv2dAttributes dwc_conv2d_attr)
+    {
+        ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC);
+
+        // Our test shapes are assumed in NCHW data layout, thus the permutation
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+        permute(weights_shape, PermutationVector(2U, 0U, 1U));
+
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        ITensorInfo *input_info  = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout));
+        ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout));
+        ITensorInfo *bias_info   = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout));
+        ITensorInfo *dst_info    = context.create_tensor_info();
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, dwc_conv2d_attr);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_input{};
+        TensorType t_weight{};
+        TensorType t_bias{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_input.allocator()->init(*input_info);
+        t_weight.allocator()->init(*weight_info);
+        t_bias.allocator()->init(*bias_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_input.allocator()->allocate();
+        t_weight.allocator()->allocate();
+        t_bias.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_input), 0);
+        fill(AccessorType(t_weight), 1);
+        fill(AccessorType(t_bias), 2);
+
+        // Run runtime
+        runtime.run({&t_input, &t_weight, &t_bias, &t_dst});
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape        &input_shape,
+                                      const TensorShape        &weights_shape,
+                                      const TensorShape        &bias_shape,
+                                      const TensorShape        &output_shape,
+                                      DepthwiseConv2dAttributes dwc_conv2d_attr)
+    {
+        // Create reference
+        SimpleTensor<T>     src{input_shape, _data_type, 1};
+        SimpleTensor<T>     weight{weights_shape, _data_type, 1};
+        SimpleTensor<TBias> bias{bias_shape, _data_type, 1};
+
+        fill(src, 0);
+        fill(weight, 1);
+        fill(bias, 2);
+
+        auto src_nchw          = src;
+        auto weights_nchw      = weight;
+        auto bias_nchw         = bias;
+        auto output_shape_nchw = output_shape;
+
+        PadStrideInfo legacy_pad_stride(dwc_conv2d_attr.stride().x(), dwc_conv2d_attr.stride().y(),
+                                        dwc_conv2d_attr.pad().left, dwc_conv2d_attr.pad().right,
+                                        dwc_conv2d_attr.pad().top, dwc_conv2d_attr.pad().bottom,
+                                        DimensionRoundingType{});
+        auto          dst_nchw =
+            reference::depthwise_convolution(src_nchw, weights_nchw, bias_nchw, output_shape_nchw, legacy_pad_stride,
+                                             dwc_conv2d_attr.depth_multiplier(), dwc_conv2d_attr.dilation());
+        return dst_nchw;
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    DataType        _data_type{};
+    DataLayout      _data_layout{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuDepthwiseConv2dValidationFixture
+    : public DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape          input_shape,
+               Size2D               kernel_size,
+               const PadStrideInfo &info,
+               const Size2D        &dilation,
+               const unsigned int   depth_multiplier,
+               DataType             data_type,
+               DataLayout           data_layout)
+    {
+        DynamicFusionGpuDepthwiseConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            input_shape, kernel_size, info, dilation, depth_multiplier, data_type, data_layout);
+    }
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DEPTHWISECONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
new file mode 100644
index 0000000000..1f4e223b93
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/DirectConv2dFixture.h
@@ -0,0 +1,411 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/ConvolutionLayer.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename U>
+void fill(U &&tensor, int i)
+{
+    switch (tensor.data_type())
+    {
+        case DataType::F16:
+        {
+            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+            library->fill(tensor, distribution, i);
+            break;
+        }
+        case DataType::F32:
+        {
+            std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+            library->fill(tensor, distribution, i);
+            break;
+        }
+        default:
+            library->fill_tensor_uniform(tensor, i);
+    }
+}
+
+} // namespace
+
+/** General Conv2d fixture
+ *  Adapted from tests/validation/fixtures/ConvolutionLayerFixture.h
+ *  TODO: Parameterize to be fully backend agnostic: COMPMID-5760; remove Gpu from name
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+    using TBias = typename std::conditional<std::is_same<typename std::decay<T>::type, uint8_t>::value ||
+                                                std::is_same<typename std::decay<T>::type, int8_t>::value,
+                                            int32_t,
+                                            T>::type; // If T: uint8_t or int8_t then TBias: int32_t, otherwise TBias: T
+
+    void setup(TensorShape          input_shape,
+               TensorShape          weights_shape,
+               TensorShape          bias_shape,
+               TensorShape          output_shape,
+               const PadStrideInfo &info,
+               const Size2D        &dilation,
+               DataType             data_type,
+               DataLayout           data_layout,
+               QuantizationInfo     quantization_info,
+               QuantizationInfo     weight_quantization_info)
+    {
+        ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout
+        const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, dilation);
+        _data_type                         = data_type;
+        _data_layout                       = data_layout;
+        _is_quantized                      = is_data_type_quantized_asymmetric(data_type);
+        _quantization_info                 = quantization_info;
+        _weight_quantization_info          = weight_quantization_info;
+        _bias_data_type                    = _is_quantized ? DataType::S32 : data_type;
+        _target                            = compute_target(input_shape, weights_shape, bias_shape, conv2d_attr);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr);
+    }
+
+protected:
+    // Given input is in nchw format
+    TensorType compute_target(TensorShape        input_shape,
+                              TensorShape        weights_shape,
+                              const TensorShape &bias_shape,
+                              Conv2dAttributes   conv2d_attr)
+    {
+        ARM_COMPUTE_ERROR_ON(_data_layout != DataLayout::NHWC);
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+        permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        CLScheduler::get().default_reinit();
+
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        ITensorInfo *input_info  = context.create_tensor_info(TensorInfo(input_shape, 1, _data_type, _data_layout));
+        ITensorInfo *weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, _data_type, _data_layout));
+        ITensorInfo *bias_info   = context.create_tensor_info(TensorInfo(bias_shape, 1, _data_type, _data_layout));
+        ITensorInfo *dst_info    = context.create_tensor_info();
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+        // Construct user tensors
+        TensorType t_input{};
+        TensorType t_weight{};
+        TensorType t_bias{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_input.allocator()->init(*input_info);
+        t_weight.allocator()->init(*weight_info);
+        t_bias.allocator()->init(*bias_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_input.allocator()->allocate();
+        t_weight.allocator()->allocate();
+        t_bias.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_input), 0);
+        fill(AccessorType(t_weight), 1);
+        fill(AccessorType(t_bias), 2);
+
+        // Run runtime
+        runtime.run({&t_input, &t_weight, &t_bias, &t_dst});
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &input_shape,
+                                      const TensorShape &weights_shape,
+                                      const TensorShape &bias_shape,
+                                      const TensorShape &output_shape,
+                                      Conv2dAttributes   conv2d_attr)
+    {
+        // Create reference
+        SimpleTensor<T>     src{input_shape, _data_type, 1, _quantization_info};
+        SimpleTensor<T>     weight{weights_shape, _data_type, 1, _weight_quantization_info};
+        SimpleTensor<TBias> bias{bias_shape, _data_type, 1, _quantization_info};
+
+        fill(src, 0);
+        fill(weight, 1);
+        fill(bias, 2);
+
+        auto src_nchw          = src;
+        auto weights_nchw      = weight;
+        auto bias_nchw         = bias;
+        auto output_shape_nchw = output_shape;
+
+        PadStrideInfo legacy_pad_stride(conv2d_attr.stride().x(), conv2d_attr.stride().y(), conv2d_attr.pad().left,
+                                        conv2d_attr.pad().right, conv2d_attr.pad().top, conv2d_attr.pad().bottom,
+                                        DimensionRoundingType{});
+        auto          dst_nchw = reference::convolution_layer(src_nchw, weights_nchw, bias_nchw, output_shape_nchw,
+                                                              legacy_pad_stride, conv2d_attr.dilation());
+        return dst_nchw;
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+    DataType         _data_type{};
+    DataType         _bias_data_type{};
+    DataLayout       _data_layout{};
+    QuantizationInfo _quantization_info{};
+    QuantizationInfo _weight_quantization_info{};
+    bool             _is_quantized = false;
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuConv2dValidationFixture
+    : public DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape          input_shape,
+               TensorShape          weights_shape,
+               TensorShape          output_shape,
+               TensorShape          bias_shape,
+               const PadStrideInfo &info,
+               const Size2D        &dialation,
+               DataType             data_type,
+               DataLayout           data_layout,
+               QuantizationInfo     quantization_info)
+    {
+        DynamicFusionGpuConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            input_shape, weights_shape, output_shape, bias_shape, info, dialation, data_type, data_layout,
+            quantization_info, quantization_info);
+    }
+};
+
+/** Specific Conv2d method: Direct Conv2d fixture
+ *  Adapted from tests/validation/fixtures/DirectConvolutionLayerFixture.h
+ *  TODO: Parameterize to be fully backend agnostic: COMPMID-5760
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionDirectConv2dValidationGenericFixture : public framework::Fixture
+{
+public:
+    using TBias =
+        typename std::conditional<std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T>::type;
+
+    void setup(TensorShape      input_shape,
+               int              stride_x,
+               int              stride_y,
+               int              pad_x,
+               int              pad_y,
+               unsigned int     kernel_size,
+               unsigned int     num_kernels,
+               DataType         data_type,
+               QuantizationInfo quantization_info,
+               DataLayout       data_layout)
+    {
+        ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion conv2d only supports NHWC layout
+
+        TensorShape         weights_shape(kernel_size, kernel_size, input_shape.z(), num_kernels);
+        const TensorShape   bias_shape(num_kernels);
+        const PadStrideInfo info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
+        const DataType      bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type;
+
+        const Conv2dAttributes conv2d_attr = convert_pad_stride_info_to_conv_attr(info, {1U, 1U} /* dilation */);
+
+        TensorInfo input_info   = TensorInfo(input_shape, 1, data_type);
+        TensorInfo weights_info = TensorInfo(weights_shape, 1, data_type);
+
+        const TensorShape output_shape =
+            misc::shape_calculator::compute_deep_convolution_shape(input_info, weights_info, info);
+
+        _target    = compute_target(input_shape, weights_shape, bias_shape, output_shape, conv2d_attr, data_type,
+                                    bias_data_type, quantization_info, data_layout);
+        _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type,
+                                       bias_data_type, quantization_info);
+    }
+
+protected:
+    TensorType compute_target(TensorShape             input_shape,
+                              TensorShape             weights_shape,
+                              const TensorShape      &bias_shape,
+                              TensorShape             output_shape,
+                              const Conv2dAttributes &conv2d_attr,
+                              DataType                data_type,
+                              DataType                bias_data_type,
+                              QuantizationInfo        quantization_info,
+                              const DataLayout       &data_layout)
+    {
+        ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC);
+        ARM_COMPUTE_UNUSED(quantization_info);
+        // Dataset shapes are in NCHW layout
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+        permute(weights_shape, PermutationVector(2U, 0U, 1U));
+        permute(output_shape, PermutationVector(2U, 0U, 1U));
+
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        auto input_info  = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, data_layout));
+        auto weight_info = context.create_tensor_info(TensorInfo(weights_shape, 1, data_type, data_layout));
+        auto bias_info   = context.create_tensor_info(TensorInfo(bias_shape, 1, bias_data_type, data_layout));
+        auto dst_info    = context.create_tensor_info();
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, weight_info, bias_info, conv2d_attr);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+        // Construct user tensors
+        TensorType t_input{};
+        TensorType t_weight{};
+        TensorType t_bias{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_input.allocator()->init(*input_info);
+        t_weight.allocator()->init(*weight_info);
+        t_bias.allocator()->init(*bias_info);
+        t_dst.allocator()->init(*dst_info);
+
+        ARM_COMPUTE_ASSERT(t_input.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(t_weight.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(t_bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable());
+
+        // Allocate and fill user tensors
+        t_input.allocator()->allocate();
+        t_weight.allocator()->allocate();
+        t_bias.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!t_input.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!t_weight.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!t_bias.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable());
+
+        fill(AccessorType(t_input), 0);
+        fill(AccessorType(t_weight), 1);
+        fill(AccessorType(t_bias), 2);
+
+        // Run runtime
+        runtime.run({&t_input, &t_weight, &t_bias, &t_dst});
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape   &input_shape,
+                                      const TensorShape   &weights_shape,
+                                      const TensorShape   &bias_shape,
+                                      const TensorShape   &output_shape,
+                                      const PadStrideInfo &info,
+                                      DataType             data_type,
+                                      DataType             bias_data_type,
+                                      QuantizationInfo     quantization_info)
+    {
+        // Create reference
+        SimpleTensor<T>     src{input_shape, data_type, 1, quantization_info};
+        SimpleTensor<T>     weights{weights_shape, data_type, 1, quantization_info};
+        SimpleTensor<TBias> bias{bias_shape, bias_data_type, 1, quantization_info};
+
+        // Fill reference
+        fill(src, 0);
+        fill(weights, 1);
+        fill(bias, 2);
+
+        SimpleTensor<T> dst = reference::convolution_layer<T>(src, weights, bias, output_shape, info);
+        return dst;
+    }
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionDirectConv2dValidationFixture
+    : public DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape  input_shape,
+               int          stride_x,
+               int          stride_y,
+               int          pad_x,
+               int          pad_y,
+               unsigned int kernel_size,
+               unsigned int num_kernels,
+               DataType     data_type,
+               DataLayout   data_layout)
+    {
+        DynamicFusionDirectConv2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            input_shape, stride_x, stride_y, pad_x, pad_y, kernel_size, num_kernels, data_type, QuantizationInfo(),
+            data_layout);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_DIRECTCONV2DFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
new file mode 100644
index 0000000000..69bd0efbdc
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
@@ -0,0 +1,273 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/ElementwiseOperations.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framework::Fixture
+{
+public:
+    void setup(ArithmeticOperation ref_op,
+               const TensorShape  &shape0,
+               const TensorShape  &shape1,
+               const TensorShape  &shape2,
+               DataType            data_type,
+               bool                is_inplace,
+               bool                fuse_two_ops = false)
+    {
+        _ref_op     = ref_op;
+        _is_inplace = is_inplace;
+        _data_type  = data_type;
+        _fuse       = fuse_two_ops;
+        ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
+        ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
+        _target    = compute_target(shape0, shape1, shape2);
+        _reference = compute_reference(shape0, shape1, shape2);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        if (is_data_type_float(tensor.data_type()))
+        {
+            switch (_ref_op)
+            {
+                case ArithmeticOperation::DIV:
+                    library->fill_tensor_uniform_ranged(tensor, i, {std::pair<float, float>(-0.001f, 0.001f)});
+                    break;
+                case ArithmeticOperation::POWER:
+                    library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f);
+                    break;
+                default:
+                    library->fill_tensor_uniform(tensor, i);
+            }
+        }
+        else if (tensor.data_type() == DataType::S32)
+        {
+            switch (_ref_op)
+            {
+                case ArithmeticOperation::DIV:
+                    library->fill_tensor_uniform_ranged(tensor, i, {std::pair<int32_t, int32_t>(-1U, 1U)});
+                    break;
+                default:
+                    library->fill_tensor_uniform(tensor, i);
+            }
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+    {
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Fuse first element wise binary Op
+        ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type));
+        ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type));
+        ITensorInfo *dst_info = context.create_tensor_info();
+
+        ITensorInfo *rhs_info_fuse = nullptr;
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info);
+
+        if (_fuse)
+        {
+            rhs_info_fuse          = context.create_tensor_info(TensorInfo(shape2, 1, _data_type));
+            ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse);
+            GpuOutput::create_op(sketch, ans2_info, dst_info);
+        }
+        else
+        {
+            GpuOutput::create_op(sketch, ans_info, dst_info);
+        }
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_lhs{};
+        TensorType t_rhs{};
+        TensorType t_rhs_fuse{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_lhs.allocator()->init(*lhs_info);
+        t_rhs.allocator()->init(*rhs_info);
+        t_dst.allocator()->init(*dst_info);
+        if (_fuse)
+        {
+            t_rhs_fuse.allocator()->init(*rhs_info_fuse);
+        }
+
+        // Allocate and fill user tensors
+        // Instead of using ACL allocator, the user can choose to import memory into the tensors
+        t_lhs.allocator()->allocate();
+        t_rhs.allocator()->allocate();
+        t_dst.allocator()->allocate();
+        if (_fuse)
+        {
+            t_rhs_fuse.allocator()->allocate();
+        }
+
+        fill(AccessorType(t_lhs), 0);
+        fill(AccessorType(t_rhs), 1);
+        if (_fuse)
+        {
+            fill(AccessorType(t_rhs_fuse), 2);
+        }
+
+        // Run runtime
+        if (_fuse)
+        {
+            runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst});
+        }
+        else
+        {
+            runtime.run({&t_lhs, &t_rhs, &t_dst});
+        }
+
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+    {
+        const TensorShape out_shape      = TensorShape::broadcast_shape(shape0, shape1);
+        const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1);
+
+        // Create reference
+        SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_dst{out_shape, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_dst_fuse{out_shape_fuse, _data_type, 1, QuantizationInfo()};
+
+        // Fill reference
+        fill(ref_lhs, 0);
+        fill(ref_rhs, 1);
+
+        reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP);
+        if (_fuse)
+        {
+            fill(ref_rhs_fuse, 2);
+            reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP);
+        }
+        SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst;
+        return *ret;
+    }
+
+    ArithmeticOperation _ref_op{ArithmeticOperation::ADD};
+    TensorType          _target{};
+    SimpleTensor<T>     _reference{};
+    DataType            _data_type{};
+    DataLayout          _data_layout{};
+    bool                _is_inplace{false};
+    bool                _fuse{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture
+    : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace)
+    {
+        DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            ref_op, shape0, shape0, TensorShape(), data_type, is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture
+    : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(ArithmeticOperation ref_op,
+               const TensorShape  &shape0,
+               const TensorShape  &shape1,
+               DataType            data_type,
+               bool                is_inplace)
+    {
+        DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            ref_op, shape0, shape1, TensorShape(), data_type, is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture
+    : public DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(ArithmeticOperation ref_op,
+               const TensorShape  &shape0,
+               const TensorShape  &shape1,
+               const TensorShape  &shape2,
+               DataType            data_type,
+               bool                is_inplace,
+               bool                fuse_two_ops)
+    {
+        DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_ELEMENTWISEBINARYFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
new file mode 100644
index 0000000000..4c1cc94d3d
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/MatMulKernelFixture.h
@@ -0,0 +1,297 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/MatMulAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/Helpers.h"
+#include "tests/validation/reference/GEMM.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/ReshapeLayer.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+template <typename U>
+void fill(U &&tensor, int i)
+{
+    switch (tensor.data_type())
+    {
+        case DataType::F16:
+        {
+            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+            library->fill(tensor, distribution, i);
+            break;
+        }
+        case DataType::F32:
+        {
+            std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+            library->fill(tensor, distribution, i);
+            break;
+        }
+        default:
+            library->fill_tensor_uniform(tensor, i);
+    }
+}
+
+} // namespace
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuMatMulValidationGenericFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape lhs_shape,
+               TensorShape rhs_shape,
+               TensorShape output_shape,
+               bool        transpose_a,
+               bool        transpose_b,
+               int         M0,
+               int         N0,
+               int         K0,
+               bool        export_rhs_to_cl_image,
+               DataType    data_type)
+    {
+        //For brevity, the input shapes are assumed to be not-transposed for both a and b matrices.
+        if (transpose_a)
+        {
+            permute(lhs_shape, PermutationVector(1U, 0U));
+        }
+        if (transpose_b)
+        {
+            permute(rhs_shape, PermutationVector(1U, 0U));
+        }
+
+        // Skip configurations unsupported by the device.
+        _device_supports_export_to_cl_image = image2d_from_buffer_supported(CLKernelLibrary::get().get_device());
+        if (!_device_supports_export_to_cl_image && export_rhs_to_cl_image)
+        {
+            ARM_COMPUTE_TEST_INFO("cl_khr_image2d_from_buffer not supported. TEST skipped");
+            framework::ARM_COMPUTE_PRINT_INFO();
+            return; // Note: Also need to skip the validate in corresponding FIXTURE_DATA_TEST_CASEs.
+        }
+
+        _target    = compute_target(lhs_shape, rhs_shape, transpose_a, transpose_b, M0, N0, K0, export_rhs_to_cl_image,
+                                    data_type);
+        _reference = compute_reference(lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, data_type);
+    }
+
+protected:
+    TensorType compute_target(TensorShape &shape_a,
+                              TensorShape &shape_b,
+                              bool         transpose_a,
+                              bool         transpose_b,
+                              int          M0,
+                              int          N0,
+                              int          K0,
+                              bool         export_rhs_to_cl_image,
+                              DataType     data_type)
+    {
+        ARM_COMPUTE_UNUSED(export_rhs_to_cl_image);
+        CLScheduler::get().default_reinit();
+
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape_a, 1, data_type));
+        ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape_b, 1, data_type));
+        ITensorInfo *dst_info = context.create_tensor_info();
+
+        MatMulAttributes matmul_attr{};
+        matmul_attr.adj_lhs(transpose_a);
+        matmul_attr.adj_rhs(transpose_b);
+
+        GpuMatMulSettings matmul_settings{};
+        matmul_settings.m0(M0);
+        matmul_settings.n0(N0);
+        matmul_settings.k0(K0);
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info, matmul_attr, matmul_settings);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_lhs{};
+        TensorType t_rhs{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_lhs.allocator()->init(*lhs_info);
+        t_rhs.allocator()->init(*rhs_info);
+        t_dst.allocator()->init(*dst_info);
+
+        ARM_COMPUTE_ASSERT(t_lhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(t_rhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(t_dst.info()->is_resizable());
+
+        // Allocate and fill user tensors
+        t_lhs.allocator()->allocate();
+        t_rhs.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        ARM_COMPUTE_ASSERT(!t_lhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!t_rhs.info()->is_resizable());
+        ARM_COMPUTE_ASSERT(!t_dst.info()->is_resizable());
+
+        fill(AccessorType(t_lhs), 0);
+        fill(AccessorType(t_rhs), 1);
+
+        // Run runtime
+        runtime.run({&t_lhs, &t_rhs, &t_dst});
+
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape_a,
+                                      const TensorShape &shape_b,
+                                      const TensorShape &output_shape,
+                                      bool               pretranspose_a,
+                                      bool               pretranspose_b,
+                                      DataType           data_type)
+    {
+        // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 3D
+        // This is necessary unless we choose to extend gemm reference for 5D+ tensors
+        TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ);
+        TensorShape shape_a_collapsed      = shape_a.collapsed_from(Window::DimZ);
+        TensorShape shape_b_collapsed      = shape_b.collapsed_from(Window::DimZ);
+
+        // Create reference
+        SimpleTensor<T> a{shape_a_collapsed, data_type, 1};
+        SimpleTensor<T> b{shape_b_collapsed, data_type, 1};
+        SimpleTensor<T> c{output_shape_collapsed, data_type, 1};
+
+        // Fill reference
+        fill(a, 0);
+        fill(b, 1);
+
+        /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
+           therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
+           in order to be able to call reference implementation that works with (B x M x K) input.
+           Similarly, if pretranspose_B is set to true, then B is assumed to be (B x N x K), B must be pre-transposed before passing it to the fixture. */
+
+        // Define transposed shapes
+        TensorShape a_transposed_shape(a.shape());
+        a_transposed_shape.set(0, a.shape().y());
+        a_transposed_shape.set(1, a.shape().x());
+
+        TensorShape b_transposed_shape(b.shape());
+        b_transposed_shape.set(0, b.shape().y());
+        b_transposed_shape.set(1, b.shape().x());
+
+        // Define transposed tensors
+        SimpleTensor<T> a_transposed{a_transposed_shape, data_type};
+        SimpleTensor<T> b_transposed{b_transposed_shape, data_type};
+
+        //pretranspose a if necessary
+        if (pretranspose_a)
+        {
+            a_transposed = reference::permute<T>(a, PermutationVector(1U, 0U));
+        }
+
+        // pretranspose b if necessary
+        if (pretranspose_b)
+        {
+            b_transposed = reference::permute<T>(b, PermutationVector(1U, 0U));
+        }
+
+        // Use transposed tensors if boolean enabled else use original tensors
+        SimpleTensor<T> result =
+            reference::gemm<T>((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f);
+
+        // We reshape the gemm output back if the tensor is high dimensional
+        if (output_shape_collapsed != output_shape)
+        {
+            // std::cout << "called reshape: \n";
+            result = reference::reshape_layer(result, output_shape);
+        }
+
+        return result;
+    }
+
+    CLTensor        _target{};
+    SimpleTensor<T> _reference{};
+    bool            _device_supports_export_to_cl_image{false};
+    bool            _device_supports_mmul{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuMatMulValidationFixture
+    : public DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape lhs_shape,
+               TensorShape rhs_shape,
+               TensorShape output_shape,
+               bool        transpose_a,
+               bool        transpose_b,
+               int         M0,
+               int         N0,
+               int         K0,
+               bool        export_rhs_to_cl_image,
+               DataType    data_type)
+    {
+        ARM_COMPUTE_UNUSED(export_rhs_to_cl_image);
+        DynamicFusionGpuMatMulValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            lhs_shape, rhs_shape, output_shape, transpose_a, transpose_b, M0, N0, K0,
+            false /* export_rhs_to_cl_image bias */, data_type);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_MATMULKERNELFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
new file mode 100644
index 0000000000..b0c7143d91
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/Pool2dFixture.h
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
+#include "src/dynamic_fusion/utils/Utils.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/PoolingLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuPool2dValidationGenericFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape input_shape, const Pool2dAttributes &pool_attr, DataType data_type)
+    {
+        _target    = compute_target(input_shape, pool_attr, data_type);
+        _reference = compute_reference(
+            input_shape, convert_pool_attr_to_pool_info(pool_attr, true /* mixed_precision */), data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        switch (tensor.data_type())
+        {
+            case DataType::F16:
+            {
+                arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-1.0f, 1.0f};
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            case DataType::F32:
+            {
+                std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
+                library->fill(tensor, distribution, i);
+                break;
+            }
+            default:
+                library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    // Given input is in nchw format
+    TensorType compute_target(TensorShape input_shape, const Pool2dAttributes &pool_attr, const DataType data_type)
+    {
+        CLScheduler::get().default_reinit();
+
+        // Change shape due to NHWC data layout, test shapes are NCHW
+        permute(input_shape, PermutationVector(2U, 0U, 1U));
+
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        auto input_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type, DataLayout::NHWC));
+        auto dst_info   = context.create_tensor_info();
+
+        // Create Pool2dSettings
+        GpuPool2dSettings pool_settings = GpuPool2dSettings();
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, input_info, pool_attr, pool_settings);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+        // Construct user tensors
+        TensorType t_input{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_input.allocator()->init(*input_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_input.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_input), 0);
+
+        // Run runtime
+        runtime.run({&t_input, &t_dst});
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T> src(shape, data_type, 1, QuantizationInfo());
+        // Fill reference
+        fill(src, 0);
+        return reference::pooling_layer<T>(src, pool_info, QuantizationInfo(), nullptr, DataLayout::NCHW);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuPool2dValidationFixture
+    : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape input_shape,
+               PoolingType pool_type,
+               Size2D      pool_size,
+               Padding2D   pad,
+               Size2D      stride,
+               bool        exclude_padding,
+               DataType    data_type)
+    {
+        DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            input_shape,
+            Pool2dAttributes().pool_type(pool_type).pool_size(pool_size).pad(pad).stride(stride).exclude_padding(
+                exclude_padding),
+            data_type);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuPool2dSpecialValidationFixture
+    : public DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape input_shape, Pool2dAttributes pool_attr, DataType data_type)
+    {
+        DynamicFusionGpuPool2dValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            input_shape, pool_attr, data_type);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_GPU_CL_POOL2DFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h
new file mode 100644
index 0000000000..c9ffbccbc7
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ActivationFixture.h
@@ -0,0 +1,207 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, typename... TArgs>
+class DynamicFusionActivationValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape shape, bool fuse, DataType data_type, ActivationLayerInfo act_info, TArgs... args)
+    {
+        _fuse      = fuse;
+        _data_type = data_type;
+        _function  = act_info.activation();
+        _target    = compute_target(shape, args...);
+        _reference = compute_reference(shape, act_info);
+    }
+
+protected:
+    std::vector<T> get_boundary_values(T min, T max)
+    {
+        // This function will return a vector filled with the following values that can
+        // represent two partitions derived from equivalent partitioning.
+        // * Lower partition: min, min + delta, lower quarter (nominal), center - delta
+        // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max
+        const auto delta         = is_data_type_float(_data_type) ? T(0.1f) : T(1);
+        const auto center_value  = (min + max) / 2;
+        const auto lower_quarter = (min + center_value) / 2;
+        const auto upper_quarter = (center_value + max) / 2;
+
+        std::vector<T> boundary_values{};
+
+        // To ensure all the inserted values are within the given range after subtracing/adding delta
+        auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values)
+        {
+            for (auto &v : new_values)
+            {
+                if (v >= min && v <= max)
+                {
+                    boundary_values.emplace_back(v);
+                }
+            }
+        };
+
+        insert_values({min, static_cast<T>(min + delta), static_cast<T>(lower_quarter),
+                       static_cast<T>(center_value - delta)}); // lower partition
+        insert_values({static_cast<T>(center_value), static_cast<T>(center_value + delta),
+                       static_cast<T>(upper_quarter), static_cast<T>(max - delta), max}); // upper partition
+
+        return boundary_values;
+    }
+
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        float min_bound                = 0;
+        float max_bound                = 0;
+        std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(_function, _data_type);
+        library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound)));
+    }
+
+    TensorType compute_target(const TensorShape &shape, TArgs... args)
+    {
+        // Create a new workload sketch
+        CLCompileContext   cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        GpuWorkloadContext context{&cl_compile_ctx};
+        GpuWorkloadSketch  sketch{&context};
+
+        // Create sketch tensors
+        ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+        ITensorInfo *dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+
+        ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, args...);
+        if (_fuse)
+        {
+            ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, args...);
+            GpuOutput::create_op(sketch, ans_1_info, dst_info);
+        }
+        else
+        {
+            GpuOutput::create_op(sketch, ans_0_info, dst_info);
+        }
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // Construct user tensors
+        TensorType t_src{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_src.allocator()->init(*src_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_src.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_src));
+
+        // Run runtime
+        runtime.run({&t_src, &t_dst});
+
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info)
+    {
+        // Create reference
+        SimpleTensor<T> src{shape, _data_type, 1};
+
+        // Fill reference
+        fill(src);
+
+        auto tmp = reference::activation_layer<T>(src, act_info);
+
+        if (_fuse)
+        {
+            auto dst = reference::activation_layer<T>(tmp, act_info);
+            return dst;
+        }
+        else
+        {
+            return tmp;
+        }
+    }
+
+protected:
+    ActivationLayerInfo::ActivationFunction _function{};
+    bool                                    _fuse{false};
+    DataType                                _data_type{};
+    TensorType                              _target{};
+    SimpleTensor<T>                         _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSigmoidValidationFixture
+    : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, bool fuse, DataType data_type)
+    {
+        ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC};
+        DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse,
+                                                                                                   data_type, act_info);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionTanhValidationFixture
+    : public DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, bool fuse, DataType data_type)
+    {
+        ActivationLayerInfo act_info{ActivationLayerInfo::ActivationFunction::TANH, 1.0f, 1.0f};
+        DynamicFusionActivationValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, fuse,
+                                                                                                   data_type, act_info);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_ACTIVATIONFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
new file mode 100644
index 0000000000..08fffb305b
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/CastFixture.h
@@ -0,0 +1,186 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/DepthConvertLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T1, typename T2>
+class DynamicFusionCastValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy)
+    {
+        _target    = compute_target(shape, dt_in, dt_out, policy);
+        _reference = compute_reference(shape, dt_in, dt_out, policy);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i, DataType dt_in, DataType dt_out)
+    {
+        // Restricting range to avoid inf values
+        if (dt_out == DataType::F16)
+        {
+            constexpr int signed_min   = -32000;
+            constexpr int signed_max   = 32000;
+            constexpr int unsigned_min = 0;
+            constexpr int unsigned_max = 65000;
+
+            switch (dt_in)
+            {
+                case DataType::U8:
+                case DataType::QASYMM8:
+                case DataType::QASYMM8_SIGNED:
+                case DataType::S8:
+                case DataType::F32:
+                {
+                    library->fill_tensor_uniform(tensor, i);
+                    break;
+                }
+                case DataType::U16:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<uint16_t>(unsigned_min),
+                                                 static_cast<uint16_t>(unsigned_max));
+                    break;
+                }
+                case DataType::S16:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<int16_t>(signed_min),
+                                                 static_cast<int16_t>(signed_max));
+                    break;
+                }
+                case DataType::U32:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<uint32_t>(unsigned_min),
+                                                 static_cast<uint32_t>(unsigned_max));
+                    break;
+                }
+                case DataType::S32:
+                {
+                    library->fill_tensor_uniform(tensor, i, static_cast<int32_t>(signed_min),
+                                                 static_cast<int32_t>(signed_max));
+                    break;
+                }
+                default:
+                    ARM_COMPUTE_ERROR("NOT SUPPORTED!");
+            }
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, i);
+        }
+    }
+
+    // Given input is in nchw format
+    TensorType
+    compute_target(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy)
+    {
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        // Here, we use DataLayout::NCHW just for the test. However, the optimal data layout to
+        // be used with dynamic fusion is NHWC
+        ITensorInfo *src_info =
+            context.create_tensor_info(TensorInfo(shape, 1, dt_in, DataLayout::NCHW)); // layout is not important
+        ITensorInfo *dst_info = context.create_tensor_info();
+
+        CastAttributes attributes;
+        attributes.convert_policy(policy).data_type(dt_out);
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_src{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_src.allocator()->init(*src_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_src.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_src), 0, dt_in, dt_out);
+
+        // Run runtime
+        runtime.run({&t_src, &t_dst});
+        return t_dst;
+    }
+
+    SimpleTensor<T2>
+    compute_reference(const TensorShape &shape, const DataType dt_in, const DataType dt_out, const ConvertPolicy policy)
+    {
+        // Create reference
+        SimpleTensor<T1> src{shape, dt_in, 1};
+
+        // Fill reference
+        fill(src, 0, dt_in, dt_out);
+
+        return reference::depth_convert<T1, T2>(src, dt_out, policy, 0);
+    }
+
+    TensorType       _target{};
+    SimpleTensor<T2> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CASTFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h
new file mode 100644
index 0000000000..e8f6f83e42
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h
@@ -0,0 +1,171 @@
+/*
+ * Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionClampValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape shape, ClampAttributes attributes, bool fuse, DataType data_type)
+    {
+        // CLAMP is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped.
+        ActivationLayerInfo act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+        _fuse       = fuse;
+        _attributes = attributes;
+        _data_type  = data_type;
+        _target     = compute_target(shape, attributes);
+        _reference  = compute_reference(shape, act_info);
+    }
+
+protected:
+    std::vector<T> get_boundary_values(T min, T max)
+    {
+        // This function will return a vector filled with the following values that can
+        // represent two partitions derived from equivalent partitioning.
+        // * Lower partition: min, min + delta, lower quarter (nominal), center - delta
+        // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max
+        const auto delta         = is_data_type_float(_data_type) ? T(0.1f) : T(1);
+        const auto center_value  = (min + max) / 2;
+        const auto lower_quarter = (min + center_value) / 2;
+        const auto upper_quarter = (center_value + max) / 2;
+
+        std::vector<T> boundary_values{};
+
+        // To ensure all the inserted values are within the given range after subtracing/adding delta
+        auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values)
+        {
+            for(auto &v : new_values)
+            {
+                if(v >= min && v <= max)
+                {
+                    boundary_values.emplace_back(v);
+                }
+            }
+        };
+
+        insert_values({ min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), static_cast<T>(center_value - delta) });                               // lower partition
+        insert_values({ static_cast<T>(center_value), static_cast<T>(center_value + delta), static_cast<T>(upper_quarter), static_cast<T>(max - delta), max }); // upper partition
+
+        return boundary_values;
+    }
+
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        float min_bound = 0;
+        float max_bound = 0;
+        std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, _data_type);
+        library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound)));
+    }
+
+    TensorType compute_target(const TensorShape &shape, ClampAttributes attributes)
+    {
+        // Create a new workload sketch
+        CLCompileContext   cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        GpuWorkloadContext context{ &cl_compile_ctx };
+        GpuWorkloadSketch  sketch{ &context };
+
+        // Create sketch tensors
+        ITensorInfo* src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+        ITensorInfo* dst_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type));
+
+        ITensorInfo *ans_0_info = FunctionType::create_op(sketch, src_info, attributes);
+        if(_fuse)
+        {
+            ITensorInfo *ans_1_info = FunctionType::create_op(sketch, ans_0_info, attributes);
+            GpuOutput::create_op(sketch, ans_1_info, dst_info);
+        }
+        else
+        {
+            GpuOutput::create_op(sketch, ans_0_info, dst_info);
+        }
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // Construct user tensors
+        TensorType t_src{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_src.allocator()->init(*src_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_src.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_src));
+
+        // Run runtime
+        runtime.run({ &t_src, &t_dst });
+
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info)
+    {
+        // Create reference
+        SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info };
+
+        // Fill reference
+        fill(src);
+
+        auto dst = reference::activation_layer<T>(src, act_info, _quantization_info);
+        return dst;
+    }
+
+protected:
+    QuantizationInfo _quantization_info{};
+    ClampAttributes  _attributes{};
+    bool             _fuse{ false };
+    DataType         _data_type{};
+    TensorType       _target{};
+    SimpleTensor<T>  _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h
new file mode 100644
index 0000000000..f02aa5e36a
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/Globals.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* We use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing
+ * the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros
+ * to print the test data.
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulValidationFixture : public framework::Fixture
+{
+public:
+    void setup(const TensorShape &shape0,
+               const TensorShape &shape1,
+               const TensorShape &shape2,
+               DataType           data_type,
+               bool               is_inplace,
+               bool               fuse_two_ops = false)
+    {
+        _data_type  = data_type;
+        _is_inplace = is_inplace;
+        _fuse       = fuse_two_ops;
+        ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
+        ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
+        _target    = compute_target(shape0, shape1, shape2);
+        _reference = compute_reference(shape0, shape1, shape2);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+    {
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Fuse first multiplication op
+        ITensorInfo *lhs_info = context.create_tensor_info(TensorInfo(shape0, 1, _data_type));
+        ITensorInfo *rhs_info = context.create_tensor_info(TensorInfo(shape1, 1, _data_type));
+        ITensorInfo *dst_info = context.create_tensor_info();
+
+        ITensorInfo *rhs_info_fuse = nullptr;
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, lhs_info, rhs_info);
+
+        if (_fuse)
+        {
+            rhs_info_fuse          = context.create_tensor_info(TensorInfo(shape2, 1, _data_type));
+            ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, rhs_info_fuse);
+            GpuOutput::create_op(sketch, ans2_info, dst_info);
+        }
+        else
+        {
+            GpuOutput::create_op(sketch, ans_info, dst_info);
+        }
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_lhs{};
+        TensorType t_rhs{};
+        TensorType t_rhs_fuse{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_lhs.allocator()->init(*lhs_info);
+        t_rhs.allocator()->init(*rhs_info);
+        t_dst.allocator()->init(*dst_info);
+        if (_fuse)
+        {
+            t_rhs_fuse.allocator()->init(*rhs_info_fuse);
+        }
+
+        // Allocate and fill user tensors
+        // Instead of using ACL allocator, the user can choose to import memory into the tensors
+        t_lhs.allocator()->allocate();
+        t_rhs.allocator()->allocate();
+        t_dst.allocator()->allocate();
+        if (_fuse)
+        {
+            t_rhs_fuse.allocator()->allocate();
+        }
+
+        fill(AccessorType(t_lhs), 0);
+        fill(AccessorType(t_rhs), 1);
+        if (_fuse)
+        {
+            fill(AccessorType(t_rhs_fuse), 2);
+        }
+
+        // Run runtime
+        if (_fuse)
+        {
+            runtime.run({&t_lhs, &t_rhs, &t_rhs_fuse, &t_dst});
+        }
+        else
+        {
+            runtime.run({&t_lhs, &t_rhs, &t_dst});
+        }
+
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+    {
+        // Create reference
+        SimpleTensor<T> ref_lhs{shape0, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_rhs{shape1, _data_type, 1, QuantizationInfo()};
+        SimpleTensor<T> ref_rhs_fuse{shape2, _data_type, 1, QuantizationInfo()};
+
+        // Fill reference
+        fill(ref_lhs, 0);
+        fill(ref_rhs, 1);
+        SimpleTensor<T> ref_dst = reference::pixel_wise_multiplication<T, T, T>(
+            ref_lhs, ref_rhs, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type,
+            QuantizationInfo());
+        if (_fuse)
+        {
+            fill(ref_rhs_fuse, 2);
+            SimpleTensor<T> ref_dst_fuse = reference::pixel_wise_multiplication<T, T, T>(
+                ref_dst, ref_rhs_fuse, 1.f, ConvertPolicy::SATURATE, RoundingPolicy::TO_NEAREST_UP, _data_type,
+                QuantizationInfo());
+            return ref_dst_fuse;
+        }
+        return ref_dst;
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+    DataType        _data_type{};
+    bool            _is_inplace{false};
+    bool            _fuse{false};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulOneOpValidationFixture
+    : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(const TensorShape &shape0, DataType data_type, bool is_inplace)
+    {
+        DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape0, shape0, TensorShape(), data_type, is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulBroadcastValidationFixture
+    : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace)
+    {
+        DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape0, shape1, TensorShape(), data_type, is_inplace);
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulTwoOpsValidationFixture
+    : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(const TensorShape &shape0,
+               const TensorShape &shape1,
+               const TensorShape &shape2,
+               DataType           data_type,
+               bool               is_inplace,
+               bool               fuse_two_ops)
+    {
+        DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
new file mode 100644
index 0000000000..bde3360940
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ReshapeFixture.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ReshapeAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h"
+
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Fixture.h"
+#include "tests/Globals.h"
+#include "tests/validation/reference/ReshapeLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionGpuReshapeLayerValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape input_shape, TensorShape output_shape, DataType data_type)
+    {
+        _target    = compute_target(input_shape, output_shape, data_type);
+        _reference = compute_reference(input_shape, output_shape, data_type);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor, int i)
+    {
+        library->fill_tensor_uniform(tensor, i);
+    }
+
+    TensorType compute_target(TensorShape &input_shape, TensorShape &output_shape, DataType data_type)
+    {
+        // Check if indeed the input shape can be reshape to the output one
+        ARM_COMPUTE_ASSERT(input_shape.total_size() == output_shape.total_size());
+
+        // Create a new workload sketch
+        auto              cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        auto              context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch sketch{&context};
+
+        // Create sketch tensors
+        ITensorInfo      *src_info = context.create_tensor_info(TensorInfo(input_shape, 1, data_type));
+        ITensorInfo      *dst_info = context.create_tensor_info(TensorInfo(output_shape, 1, data_type));
+        ReshapeAttributes attributes;
+        attributes.shape(output_shape);
+
+        ITensorInfo *ans_info = FunctionType::create_op(sketch, src_info, attributes);
+        GpuOutput::create_op(sketch, ans_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_src{};
+        TensorType t_dst{};
+        // Initialize user tensors
+        t_src.allocator()->init(*src_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_src.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_src), 0);
+
+        // Run runtime
+        runtime.run({&t_src, &t_dst});
+
+        return t_dst;
+    }
+
+    SimpleTensor<T>
+    compute_reference(const TensorShape &input_shape, const TensorShape &output_shape, DataType data_type)
+    {
+        // Create reference
+        SimpleTensor<T> src{input_shape, data_type};
+
+        // Fill reference
+        fill(src, 0);
+
+        return reference::reshape_layer<T>(src, output_shape);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+/** [ReshapeLayer fixture] **/
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESHAPEFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
new file mode 100644
index 0000000000..711767b66f
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ResizeFixture.h
@@ -0,0 +1,272 @@
+/*
+* Copyright (c) 2022-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/reference/Permute.h"
+#include "tests/validation/reference/Scale.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionResizeGenericValidationFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape         shape,
+               DataType            data_type,
+               QuantizationInfo    quantization_info,
+               DataLayout          data_layout,
+               InterpolationPolicy interpolation_policy,
+               SamplingPolicy      sampling_policy,
+               bool                align_corners,
+               QuantizationInfo    output_quantization_info)
+    {
+        _shape                    = shape;
+        _interpolation_policy     = interpolation_policy;
+        _sampling_policy          = sampling_policy;
+        _data_type                = data_type;
+        _input_quantization_info  = quantization_info;
+        _output_quantization_info = output_quantization_info;
+        _align_corners            = align_corners;
+        _data_layout              = data_layout;
+
+        ARM_COMPUTE_ERROR_ON(data_layout != DataLayout::NHWC); // Dynamic fusion resize supports only NHWC layout
+
+        generate_scale(shape);
+
+        std::mt19937                            generator(library->seed());
+        std::uniform_int_distribution<uint32_t> distribution_u8(0, 255);
+
+        _target    = compute_target(shape);
+        _reference = compute_reference(shape);
+    }
+
+protected:
+    void generate_scale(const TensorShape &shape)
+    {
+        static constexpr float _min_scale{0.25f};
+        static constexpr float _max_scale{3.f};
+
+        constexpr float max_width{8192.0f};
+        constexpr float max_height{6384.0f};
+        constexpr float min_width{1.f};
+        constexpr float min_height{1.f};
+
+        std::mt19937                          generator(library->seed());
+        std::uniform_real_distribution<float> distribution_float(_min_scale, _max_scale);
+
+        auto generate = [&](size_t input_size, float min_output, float max_output) -> int
+        {
+            const float generated_scale = distribution_float(generator);
+            const int   output_size     = static_cast<int>(
+                utility::clamp(static_cast<float>(input_size) * generated_scale, min_output, max_output));
+            return output_size;
+        };
+
+        // Input shape is always given in NCHW layout. NHWC is dealt by permute in compute_target()
+        const int idx_width  = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH);
+        const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT);
+
+        _output_width  = generate(shape[idx_width], min_width, max_width);
+        _output_height = generate(shape[idx_height], min_height, max_height);
+    }
+
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        if (tensor.data_type() == DataType::F32)
+        {
+            std::uniform_real_distribution<float> distribution(-5.0f, 5.0f);
+            library->fill(tensor, distribution, 0);
+        }
+        else if (tensor.data_type() == DataType::F16)
+        {
+            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-5.0f, 5.0f};
+            library->fill(tensor, distribution, 0);
+        }
+        else if (is_data_type_quantized(tensor.data_type()))
+        {
+            std::uniform_int_distribution<> distribution(0, 100);
+            library->fill(tensor, distribution, 0);
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
+    }
+
+    TensorType compute_target(TensorShape shape)
+    {
+        // Our test shapes are assumed in NCHW data layout, thus the permutation
+        permute(shape, PermutationVector(2U, 0U, 1U));
+
+        // Create a new workload sketch
+        CLCompileContext   cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        GpuWorkloadContext context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch  sketch{&context};
+
+        // Create sketch tensors
+        ITensorInfo *src_info = context.create_tensor_info(TensorInfo(shape, 1, _data_type, _data_layout));
+        src_info->set_quantization_info(_input_quantization_info);
+        ITensorInfo *dst_info = context.create_tensor_info();
+
+        ResizeAttributes attributes;
+        attributes.align_corners(_align_corners)
+            .sampling_policy(_sampling_policy)
+            .interpolation_policy(_interpolation_policy)
+            .output_width(_output_width)
+            .output_height(_output_height);
+
+        ITensorInfo *scale_result_info = FunctionType::create_op(sketch, src_info, attributes);
+        GpuOutput::create_op(sketch, scale_result_info, dst_info);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+
+        // Construct user tensors
+        TensorType t_src{};
+        TensorType t_dst{};
+
+        // Initialize user tensors
+        t_src.allocator()->init(*src_info);
+        t_dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        t_src.allocator()->allocate();
+        t_dst.allocator()->allocate();
+
+        fill(AccessorType(t_src));
+
+        // Run runtime
+        runtime.run({&t_src, &t_dst});
+
+        return t_dst;
+    }
+
+    SimpleTensor<T> compute_reference(const TensorShape &shape)
+    {
+        // Create reference
+        SimpleTensor<T> src{shape, _data_type, 1, _input_quantization_info};
+
+        // Reference code is NCHW, so the input shapes are NCHW
+        const int idx_width  = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::WIDTH);
+        const int idx_height = get_data_layout_dimension_index(DataLayout::NCHW, DataLayoutDimension::HEIGHT);
+
+        const float scale_x = static_cast<float>(_output_width) / shape[idx_width];
+        const float scale_y = static_cast<float>(_output_height) / shape[idx_height];
+
+        // Fill reference
+        fill(src);
+
+        return reference::scale<T>(src, scale_x, scale_y, _interpolation_policy, BorderMode::REPLICATE,
+                                   static_cast<T>(0), _sampling_policy, /* ceil_policy_scale */ false, _align_corners,
+                                   _output_quantization_info);
+    }
+
+    TensorType          _target{};
+    SimpleTensor<T>     _reference{};
+    TensorShape         _shape{};
+    InterpolationPolicy _interpolation_policy{};
+    SamplingPolicy      _sampling_policy{};
+    DataType            _data_type{};
+    DataLayout          _data_layout{};
+    QuantizationInfo    _input_quantization_info{};
+    QuantizationInfo    _output_quantization_info{};
+    bool                _align_corners{false};
+    int                 _output_width{0};
+    int                 _output_height{0};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionResizeValidationFixture
+    : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape         shape,
+               DataType            data_type,
+               DataLayout          data_layout,
+               InterpolationPolicy policy,
+               SamplingPolicy      sampling_policy,
+               bool                align_corners)
+    {
+        DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape, data_type, QuantizationInfo(), data_layout, policy, sampling_policy, align_corners,
+            QuantizationInfo());
+    }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false>
+class DynamicFusionResizeQuantizedValidationFixture
+    : public DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape         shape,
+               DataType            data_type,
+               QuantizationInfo    quantization_info,
+               DataLayout          data_layout,
+               InterpolationPolicy policy,
+               SamplingPolicy      sampling_policy,
+               bool                align_corners)
+    {
+        DynamicFusionResizeGenericValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape, data_type, quantization_info, data_layout, policy, sampling_policy, align_corners,
+            quantization_info);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_RESIZEFIXTURE_H
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h
new file mode 100644
index 0000000000..175d4ff889
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/SoftmaxFixture.h
@@ -0,0 +1,158 @@
+/*
+* Copyright (c) 2023-2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H
+
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/SimpleTensor.h"
+#include "tests/validation/reference/SoftmaxLayer.h"
+#include "tests/validation/Validation.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSoftmaxValidationGenericFixture : public framework::Fixture
+{
+public:
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log)
+    {
+        _reference = compute_reference(shape, data_type, beta, axis, is_log);
+        _target    = compute_target(shape, data_type, beta, axis, is_log);
+    }
+
+protected:
+    template <typename U>
+    void fill(U &&tensor)
+    {
+        if (tensor.data_type() == DataType::F32)
+        {
+            std::uniform_real_distribution<float> distribution(-10.0f, 10.0f);
+            library->fill(tensor, distribution, 0);
+        }
+        else if (tensor.data_type() == DataType::F16)
+        {
+            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{-10.0f, 10.0f};
+            library->fill(tensor, distribution, 0);
+        }
+        else if (!is_data_type_quantized(tensor.data_type()))
+        {
+            std::uniform_int_distribution<> distribution(0, 100);
+            library->fill(tensor, distribution, 0);
+        }
+        else
+        {
+            library->fill_tensor_uniform(tensor, 0);
+        }
+    }
+
+    TensorType compute_target(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log)
+    {
+        // Create a new workload sketch
+        CLCompileContext   cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+        GpuWorkloadContext context        = GpuWorkloadContext{&cl_compile_ctx};
+        GpuWorkloadSketch  sketch{&context};
+
+        SoftmaxAttributes softmax_attr{};
+        softmax_attr.axis(axis).beta(beta).is_log_softmax(is_log);
+        ITensorInfo *src_info = context.create_tensor_info(shape, 1, data_type);
+        ITensorInfo *dst_info = context.create_tensor_info(shape, 1, data_type);
+        FunctionType::create_op(sketch, src_info, dst_info, softmax_attr);
+
+        // Configure runtime
+        ClWorkloadRuntime runtime;
+        runtime.configure(sketch);
+
+        // (Important) Allocate auxiliary tensor memory if there are any
+        // Instead of using ACL allocated memory, the user can choose to import memory into the tensors
+        for (auto &data : runtime.get_auxiliary_tensors())
+        {
+            CLTensor     *tensor      = std::get<0>(data);
+            TensorInfo    info        = std::get<1>(data);
+            AuxMemoryInfo aux_mem_req = std::get<2>(data);
+            tensor->allocator()->init(info, aux_mem_req.alignment);
+            tensor->allocator()->allocate(); // Use ACL allocated memory
+        }
+        // Construct user tensors
+        TensorType src{};
+        TensorType dst{};
+
+        // Initialize user tensors
+        src.allocator()->init(*src_info);
+        dst.allocator()->init(*dst_info);
+
+        // Allocate and fill user tensors
+        src.allocator()->allocate();
+        dst.allocator()->allocate();
+        fill(AccessorType(src));
+
+        // Run runtime
+        runtime.run({&src, &dst});
+
+        return dst;
+    }
+
+    SimpleTensor<T>
+    compute_reference(const TensorShape &shape, DataType data_type, float beta, int32_t axis, bool is_log)
+    {
+        // Create reference
+        SimpleTensor<T> src{shape, data_type, 1};
+
+        // Fill reference
+        fill(src);
+
+        return reference::softmax_layer<T>(src, beta, axis, is_log);
+    }
+
+    TensorType      _target{};
+    SimpleTensor<T> _reference{};
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionSoftmaxValidationFixture
+    : public DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+    void setup(TensorShape shape, DataType data_type, float beta, size_t axis, bool is_log)
+    {
+        DynamicFusionSoftmaxValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(
+            shape, data_type, beta, axis, is_log);
+    }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_SOFTMAXFIXTURE_H