Optimize CL and Neon Winograd tests

Several test optimizations have been introduced into Winograd tests for Gpu and Cpu backends. The testing strategy has been detailed as a comment header in the test design files. In summary - Very large shapes in the nightly are made smaller - If the underlying kernel is the same for different data types, we only need to stress some key aspects of the kernels (e.g. read/write lengths in case of fp32/fp16). - In case the underlying kernel is the same (OpenCL), Fp16 is tested on a subset of the shapes - In Cpu, there is no need to test every combination for both NCHW and NHWC as we just permute the inputs and use NHWC kernels anyways - All activations does not need to be tested for each and every shape Resolves: COMPMID-6464 Change-Id: Ie25fded85c65b9c7386dc21b23f9b695b1e77b07 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10393 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Gunes Bayir <gunes.bayir@arm.com> 2023-09-28 10:30:18 +0100
committer: Gunes Bayir <gunes.bayir@arm.com> 2023-10-02 16:07:22 +0000
commit: c2a51bd2cc7c4148d9444e7377af44b2f6c264ba (patch)
tree: e8f66188d7e048a3f61d660c236ef66b33a0bf35
parent: a396da19ee6e5c36ae07c11e4f16a6787e9bc143 (diff)
download: ComputeLibrary-c2a51bd2cc7c4148d9444e7377af44b2f6c264ba.tar.gz
5 files changed, 913 insertions, 377 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
index 7f4e354362..6caa2aeb59 100644
--- a/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
-#define ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
 
 #include "arm_compute/core/Types.h"
 #include "arm_compute/function_info/ActivationLayerInfo.h"
@@ -77,7 +77,8 @@ public:
      *                              while every optional dimension from 4 and above represent a batch of inputs.
      *                              Data types supported: F16/F32.
      * @param[in]  weights          Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
-     *                              Currently only 3x3 and 5x5 kernels are supported.
+     *                              Supported kernel sizes: (height, width) -> 3x3, 1x3, 3x1, 5x5, 1x5, 5x1 for Fp32
+     *                              -> 3x3 for Fp16
      * @param[in]  biases           Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
      * @param[out] output           Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                              Data types supported: Same as @p input.
@@ -117,4 +118,4 @@ private:
     std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_NEWINOGRADCONVOLUTIONLAYER_H */
+#endif // ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEWINOGRADCONVOLUTIONLAYER_H
diff --git a/src/cpu/operators/CpuWinogradConv2d.h b/src/cpu/operators/CpuWinogradConv2d.h
index 7e1d952462..ba9b879431 100644
--- a/src/cpu/operators/CpuWinogradConv2d.h
+++ b/src/cpu/operators/CpuWinogradConv2d.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,16 +21,16 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_CPU_WINOGRAD_CONV2D_KERNEL_H
-#define ARM_COMPUTE_CPU_WINOGRAD_CONV2D_KERNEL_H
+#ifndef ACL_SRC_CPU_OPERATORS_CPUWINOGRADCONV2D_H
+#define ACL_SRC_CPU_OPERATORS_CPUWINOGRADCONV2D_H
 
 #include "arm_compute/core/TensorInfo.h"
 #include "arm_compute/runtime/FunctionDescriptors.h"
 
 #include "src/core/common/Macros.h"
 #include "src/cpu/ICpuOperator.h"
-#include "src/cpu/kernels/assembly/gemm_common.hpp"
 #include "src/cpu/kernels/CpuWinogradConv2dKernel.h"
+#include "src/cpu/kernels/assembly/gemm_common.hpp"
 #include "src/cpu/operators/CpuActivation.h"
 #include "src/cpu/operators/CpuGemm.h"
 #include "src/cpu/operators/CpuPermute.h"
@@ -65,7 +65,7 @@ public:
      *                              while every optional dimension from 4 and above represent a batch of inputs.
      *                              Data types supported: F16/F32.
      * @param[in]  weights          Weights tensor Info. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input.
-     *                              Currently only 3x3 and 5x5 kernels are supported.
+     *                              For supported kernel sizes, see @ref arm_compute::NEWinogradConvolutionLayer
      * @param[in]  biases           Biases tensor Info. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
      * @param[out] dst              Destination tensor Info. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs.
      *                              Data types supported: Same as @p input.
@@ -96,8 +96,8 @@ public:
                            bool                       enable_fast_math = false);
 
     // Inherited methods overridden:
-    void                             run(ITensorPack &tensors) override;
-    void                             prepare(ITensorPack &constants) override;
+    void run(ITensorPack &tensors) override;
+    void prepare(ITensorPack &constants) override;
     experimental::MemoryRequirements workspace() const override;
 
 private:
@@ -124,9 +124,9 @@ private:
     std::unique_ptr<CpuPermute>      _permute_input;
     std::unique_ptr<CpuPermute>      _permute_output;
     std::unique_ptr<CpuPermute>      _permute_weights;
-    experimental::MemoryRequirements _aux_mem{Count};
+    experimental::MemoryRequirements _aux_mem{ Count };
     std::unique_ptr<arm_conv::ConvolutionArgs>
-        _conv_args; // Make it unique ptr because this type does not have a default constructor
+    _conv_args; // Make it unique ptr because this type does not have a default constructor
     arm_conv::winograd::WinogradImpl _winograd_impl;
     DataLayout                       _data_layout;
     TensorInfo                       _winograd_transformed_input;
@@ -143,4 +143,4 @@ private:
 } // namespace cpu
 } // namespace arm_compute
 
-#endif /* ARM_COMPUTE_CPU_WINOGRAD_CONV2D_KERNEL_H */
+#endif // ACL_SRC_CPU_OPERATORS_CPUWINOGRADCONV2D_H
diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h
index 1cffc9a221..72f73ba6d9 100644
--- a/tests/datasets/LargeConvolutionLayerDataset.h
+++ b/tests/datasets/LargeConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET
-#define ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET
+#ifndef ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H
+#define ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H
 
 #include "tests/datasets/ConvolutionLayerDataset.h"
 
@@ -44,18 +44,31 @@ public:
     {
         // Kernel size 3
         // Batch size 1
-        add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 256U), TensorShape(256U), TensorShape(51U, 54U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 3U, 256U, 256U), TensorShape(256U), TensorShape(54U, 54U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(12U, 12U, 512U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 3, 2 and 4
-        add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(110U, 111U, 128U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 128U), TensorShape(128U), TensorShape(51U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 1, 1));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1));
+    }
+};
+
+class LargeWinogradConvolutionLayer3x3DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer3x3DatasetFp16Subset()
+    {
+        // Kernel size 3
+        // Batch size 1
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1));
     }
 };
 
@@ -66,18 +79,31 @@ public:
     {
         // Kernel size 3
         // Batch size 1
-        add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 1U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 256U), TensorShape(256U), TensorShape(51U, 56U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 1U, 256U, 256U), TensorShape(256U), TensorShape(56U, 56U, 256U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 512U), TensorShape(512U), TensorShape(26U, 28U, 512U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(12U, 14U, 512U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 3, 2 and 4
-        add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(110U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 1U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 1U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 1U, 128U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0));
+    }
+};
+
+class LargeWinogradConvolutionLayer3x1DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer3x1DatasetFp16Subset()
+    {
+        // Kernel size 3
+        // Batch size 1
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0));
     }
 };
 
@@ -88,18 +114,31 @@ public:
     {
         // Kernel size 3
         // Batch size 1
-        add_config(TensorShape(224U, 222U, 64U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(112U, 113U, 64U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(112U, 112U, 128U), TensorShape(1U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 256U), TensorShape(256U), TensorShape(53U, 56U, 256U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(56U, 56U, 256U), TensorShape(1U, 3U, 256U, 256U), TensorShape(256U), TensorShape(56U, 54U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(28U, 28U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(14U, 12U, 512U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 3, 2 and 4
-        add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(1U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(1U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(1U, 3U, 128U, 128U), TensorShape(128U), TensorShape(56U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1));
+    }
+};
+
+class LargeWinogradConvolutionLayer1x3DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer1x3DatasetFp16Subset()
+    {
+        // Kernel size 3
+        // Batch size 1
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1));
     }
 };
 
@@ -110,15 +149,27 @@ public:
     {
         // Kernel size 5
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 2));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 32U), TensorShape(32U), TensorShape(220U, 220U, 32U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U, 2U), PadStrideInfo(1, 1, 0, 0));
+        // Batch > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2));
+    }
+};
+
+class LargeWinogradConvolutionLayer5x5DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer5x5DatasetFp16Subset()
+    {
+        // Kernel size 5
+        // Batch size 1
+        add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2));
+
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -128,15 +179,26 @@ public:
     LargeWinogradConvolutionLayer5x1Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 2, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 0));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 2, 0));
+        // Batch > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0));
+    }
+};
+
+class LargeWinogradConvolutionLayer5x1DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer5x1DatasetFp16Subset()
+    {
+        // Batch size 1
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0));
+        add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0));
+
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -146,15 +208,12 @@ public:
     LargeWinogradConvolutionLayer7x1Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(218U, 224U, 64U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 3, 0));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 32U), TensorShape(32U), TensorShape(218U, 224U, 32U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(7U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 3, 0));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 3, 0));
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 3, 0));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -164,15 +223,26 @@ public:
     LargeWinogradConvolutionLayer1x7Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 218U, 64U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 0, 3));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 32U), TensorShape(32U), TensorShape(224U, 218U, 32U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3));
+
+        // Batch > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3));
+    }
+};
+
+class LargeWinogradConvolutionLayer1x7DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer1x7DatasetFp16Subset()
+    {
+        // Batch size 1
         add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 3));
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -182,15 +252,26 @@ public:
     LargeWinogradConvolutionLayer1x5Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 0, 2));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 2));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 2));
+        // Batch size > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0));
+    }
+};
+
+class LargeWinogradConvolutionLayer1x5DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer1x5DatasetFp16Subset()
+    {
+        // Batch size 1
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2));
+        add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch size > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -233,4 +314,4 @@ public:
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET */
+#endif // ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H
diff --git a/tests/validation/CL/Winograd.cpp b/tests/validation/CL/Winograd.cpp
index 6ac37d1475..196e7edb8c 100644
--- a/tests/validation/CL/Winograd.cpp
+++ b/tests/validation/CL/Winograd.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2021 Arm Limited.
+ * Copyright (c) 2018-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -30,6 +30,7 @@
 #include "tests/CL/CLAccessor.h"
 #include "tests/CL/Helper.h"
 #include "tests/PaddingCalculator.h"
+#include "tests/datasets/ActivationFunctionsDataset.h"
 #include "tests/datasets/LargeConvolutionLayerDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 #include "tests/datasets/SmallConvolutionLayerDataset.h"
@@ -47,6 +48,7 @@ namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
 namespace
 {
 // *INDENT-OFF*
@@ -57,108 +59,232 @@ const AbsoluteTolerance<half> tolerance_convolution_layer_f16(half(0.4f));
 RelativeTolerance<half_float::half> rel_tolerance_f16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for FP16 data types */
 constexpr float                     tolerance_num   = 0.05f;  /**< Tolerance number */
 constexpr float                     abs_tolerance_convolution_layer_f16   = 2.5f;  /**< Tolerance number */
-constexpr float                      tolerance_num_f16 = 0.15f;                 /**< Tolerance number */
+constexpr float                     tolerance_num_f16 = 0.15f;                 /**< Tolerance number */
 
-//Activation Functions
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsDataset = make("ActivationInfo",
 {
-    ActivationLayerInfo(),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU)
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.8f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU)
 });
-const auto ActivationFunctionsSmallDataset = framework::dataset::make("ActivationInfo",
+
+const auto ActivationFunctionsSmallDataset = make("ActivationInfo",
 {
     ActivationLayerInfo(),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU),
-    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU)
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.8f, -0.5f)
 });
 
 } // namespace
 
 using namespace arm_compute::misc::shape_calculator;
 
+/*
+    Testing Strategy of CL Winograd:
+        - For nchw and nhwc and for each kernel size, we have a dedicated OpenCL kernel.
+          (except 1xN and Nx1 uses NxN under the hood). Therefore, test cases should be
+          stressed for each of these configurations.
+        - Fp32 and Fp16 kernels are the same. Only the DATA_TYPE build option changes
+          between these two. Because the same kernel is stressed thoroughly for both
+          small and large shapes for Fp32 data type, Fp16 kernels are run on a subset
+          of the shapes, because we get diminishing returns by exhaustively testing the
+          same kernel.
+        - Activations only affect the output stage and it's calculated on the output tile.
+          Exhaustively testing all activations with all the shapes does not provide much
+          value but increases the testing time quite significantly. Therefore, all activations
+          are tested in a subset of the shapes, and for all MxM kernels and data layouts as
+          they represent different OpenCL kernels. (1xM and Mx1 kernels use MxM under the hood).
+*/
 TEST_SUITE(CL)
 TEST_SUITE(Winograd)
 
 TEST_SUITE(ConvolutionLayer)
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
-                                                framework::dataset::make("InputInfo", {
-                                                                                        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16),     // Insufficient padding
-                                                                                        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),     // Datatype mismatch
-                                                                                        TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported
-                                                                                        TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32),     // Padding needed
-                                                                                        TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)  // Kernel size not supported
-                                                                                      }),
-                                                framework::dataset::make("WeightsInfo", {
-                                                                                        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8),
-                                                                                        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
-                                                                                        })),
-                                                framework::dataset::make("BiasesInfo", {
-                                                                                        TensorInfo(TensorShape(19U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(19U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(21U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(16U), 1, DataType::F32)
-                                                                                       })),
-                                                framework::dataset::make("OutputInfo", {
-                                                                                        TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16),
-                                                                                        TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32),
-                                                                                        TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
-                                                                                       })),
-                                                framework::dataset::make("ConvInfo", {
-                                                                                        PadStrideInfo(1, 1, 1, 1),
-                                                                                        PadStrideInfo(1, 1, 1, 1),
-                                                                                        PadStrideInfo(1, 2, 0, 0),
-                                                                                        PadStrideInfo(1, 1, 1, 1),
-                                                                                        PadStrideInfo(1, 1, 1, 0)
-                                                                                                                 })),
-                                                framework::dataset::make("Expected", { false, false, false, false, false })),
-               input_info, weights_info, bias_info, output_info, conv_info, expected)
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputInfo", {
+        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F16),     // Insufficient padding
+        TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32),     // Datatype mismatch
+        TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32), // Stride y not supported
+        TensorInfo(TensorShape(16U, 16U, 8U), 1, DataType::F32),     // Padding needed
+        TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)  // Kernel size not supported
+        }),
+    make("WeightsInfo", {
+        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::F16),
+        TensorInfo(TensorShape(3U, 3U, 2U, 19U), 1, DataType::QASYMM8),
+        TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
+        TensorInfo(TensorShape(3U, 3U, 8U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
+        }),
+    make("BiasesInfo", {
+        TensorInfo(TensorShape(19U), 1, DataType::F16),
+        TensorInfo(TensorShape(19U), 1, DataType::F32),
+        TensorInfo(TensorShape(21U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U), 1, DataType::F32)
+        }),
+    make("OutputInfo", {
+        TensorInfo(TensorShape(17U, 31U, 19U), 1, DataType::F16),
+        TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32),
+        TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32),
+        TensorInfo(TensorShape(16U, 16U, 16U), 1, DataType::F32),
+        TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
+        }),
+    make("ConvInfo", {
+        PadStrideInfo(1, 1, 1, 1),
+        PadStrideInfo(1, 1, 1, 1),
+        PadStrideInfo(1, 2, 0, 0),
+        PadStrideInfo(1, 1, 1, 1),
+        PadStrideInfo(1, 1, 1, 0)
+    }),
+    make("Expected", { false, false, false, false, false })),
+    input_info, weights_info, bias_info, output_info, conv_info, expected)
 {
     ARM_COMPUTE_EXPECT(bool(CLWinogradConvolutionLayer::validate(&input_info.clone()->set_is_resizable(false), &weights_info.clone()->set_is_resizable(false), &bias_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), conv_info)) == expected, framework::LogLevel::ERRORS);
 }
 
+DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip(
+    make("WeightsInfo", {
+        // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted
+
+        // Fp32/16, NCHW
+        // 3x1, 1x3, 3x3 --> all TRUE
+        TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+        // 5x1, 1x5, 5x5 --> all TRUE
+        TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+        TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+        // 7x1, 1x7, 7x7
+        // nchw does not support kernels with size 7 --> all FALSE
+        TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+        // unsupported kernel sizes
+        TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+        TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+        // Fp32/16, NHWC
+        // 7x1, 1x7, 7x7 --> all TRUE
+        TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+        TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+        // 3x1, 1x3, 3x3 --> all TRUE
+        TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+        TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+        // 5x1, 1x5, 5x5 --> all TRUE
+        TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+
+        // unsupported kernel sizes
+        TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+        TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+        }),
+    make("Expected", {
+        true, true, true,     // nchw, 3x3, 1x3, 3x1
+        true, true, true,     // nchw, 5x5, 1x5, 5x1
+        false, false, false,  // nchw, 7x7, 1x7, 7x1
+        false, false, false,  // nchw, random unsupported kernels
+        true, true, true,     // nhwc, 7x7, 1x7, 7x1
+        true, true, true,     // nhwc, 3x3, 1x3, 3x1
+        true, true, true,     // nhwc, 5x5, 1x5, 5x1
+        false, false, false,  // nchw, random unsupported kernels
+    })),
+    weights_info_const, expected)
+{
+    DataType data_type = weights_info_const.data_type();
+    DataLayout data_layout = weights_info_const.data_layout();
+
+    TensorInfo input_info = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type);
+    TensorInfo bias_info = TensorInfo(TensorShape(8U), 1, data_type);
+    TensorInfo weights_info = weights_info_const;
+
+    if(data_layout == DataLayout::NHWC)
+    {
+        // Convert to NHWC
+        PermutationVector perm = PermutationVector(2U, 0U, 1U);
+
+        TensorShape input_shape = input_info.tensor_shape();
+        TensorShape weights_shape = weights_info.tensor_shape();
+        permute(input_shape, perm);
+        permute(weights_shape, perm);
+
+        input_info.set_tensor_shape(input_shape);
+        weights_info.set_tensor_shape(weights_shape);
+
+        input_info.set_data_layout(data_layout);
+        weights_info.set_data_layout(data_layout);
+        bias_info.set_data_layout(data_layout);
+    }
+
+    PadStrideInfo conv_info(1, 1, 0, 0);
+
+    TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info);
+    TensorInfo output_info = TensorInfo(output_shape, 1, data_type, data_layout);
+
+    Status status = CLWinogradConvolutionLayer::validate(
+        &input_info,
+        &weights_info,
+        &bias_info,
+        &output_info,
+        conv_info,
+        ActivationLayerInfo(),
+        true /* fast math */);
+
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
 TEST_SUITE(FP32)
 using CLWinogradConvolutionLayerFastMathFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float>;
 using CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, float, float, true, true>;
 TEST_SUITE(Conv3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                                               make("DataType", { DataType::F32 }),
+                                               ActivationFunctionsSmallDataset,
+                                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(combine(combine(
-                                                framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
-                                                framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
-                                                framework::dataset::make("Bias", TensorShape(1U))),
-                                                framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
-                                                framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
-                                                framework::dataset::make("Dilation", Size2D(1U, 1U))),
-                                                framework::dataset::make("DataType", { DataType::F32 })),
-                                                ActivationFunctionsSmallDataset),
-                                                framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
-FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(8U, 8U, 32U)),
+                            make("Weight", TensorShape(3U, 3U, 32U, 4U)),
+                            make("Bias", TensorShape(4U)),
+                            make("Output", TensorShape(6U, 6U, 4U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -167,20 +293,20 @@ TEST_SUITE_END() // Conv3x3
 
 TEST_SUITE(Conv3x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -189,20 +315,36 @@ TEST_SUITE_END() // Conv3x1
 
 TEST_SUITE(Conv1x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLWinogradConvolutionLayerFastMathMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(
+                            make("Input", TensorShape(8U, 8U, 32U)),
+                            make("Weight", TensorShape(1U, 3U, 32U, 1U)),
+                            make("Bias", TensorShape(1U)),
+                            make("Output", TensorShape(8U, 6U, 1U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -211,10 +353,10 @@ TEST_SUITE_END() // Conv1x3
 
 TEST_SUITE(Conv5x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset ),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -222,11 +364,27 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset ),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
 
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(5U, 5U, 32U, 4U)),
+                            make("Bias", TensorShape(4U)),
+                            make("Output", TensorShape(9U, 9U, 4U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
@@ -235,10 +393,10 @@ TEST_SUITE_END() // Conv5x5
 
 TEST_SUITE(Conv5x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -246,10 +404,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -259,10 +417,10 @@ TEST_SUITE_END() // Conv5x1
 
 TEST_SUITE(Conv1x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -270,16 +428,63 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, fram
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
 }
 TEST_SUITE_END() // Conv1x5
+
+TEST_SUITE(Conv1x7)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(1U, 7U, 32U, 4U)),
+                            make("Bias", TensorShape(4U)),
+                            make("Output", TensorShape(13U, 11U, 4U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 2)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+TEST_SUITE_END() // Conv1x7
+
+TEST_SUITE(Conv7x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+                            make("DataType", { DataType::F32 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f32);
+}
+TEST_SUITE_END() // Conv7x1
+
+/** @note: Although 7x7 is in the kernels, reference implementation
+ *  does not support it. So, it remains as a "test gap".
+ */
+
 TEST_SUITE_END() // FP32
 
 
@@ -288,20 +493,36 @@ TEST_SUITE(FP16)
 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<CLTensor, CLAccessor, CLWinogradConvolutionLayer, half, float>;
 TEST_SUITE(Conv3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x3DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(8U, 8U, 32U)),
+                            make("Weight", TensorShape(3U, 3U, 32U, 6U)),
+                            make("Bias", TensorShape(6U)),
+                            make("Output", TensorShape(6U, 6U, 6U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -310,20 +531,20 @@ TEST_SUITE_END() // Conv3x3
 
 TEST_SUITE(Conv3x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x1DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -332,20 +553,20 @@ TEST_SUITE_END() // Conv3x1
 
 TEST_SUITE(Conv1x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
-                                       framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsDataset),
-                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x3DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
@@ -354,10 +575,10 @@ TEST_SUITE_END() // Conv1x3
 
 TEST_SUITE(Conv5x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -365,23 +586,39 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x5DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
 }
+
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(5U, 5U, 32U, 6U)),
+                            make("Bias", TensorShape(6U)),
+                            make("Output", TensorShape(9U, 9U, 6U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
 TEST_SUITE_END() // Conv5x5
 
 TEST_SUITE(Conv5x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -389,10 +626,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x1DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -402,10 +639,10 @@ TEST_SUITE_END() // Conv5x1
 
 TEST_SUITE(Conv1x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -413,10 +650,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x5DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -426,10 +663,10 @@ TEST_SUITE_END() // Conv1x5
 
 TEST_SUITE(Conv1x7)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsSmallDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
@@ -437,16 +674,46 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x7Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                               ActivationFunctionsDataset),
-                                               framework::dataset::make("DataLayout", { DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x7DatasetFp16Subset(),
+                            make("DataType", { DataType::F16 }),
+                            make("ActivationInfo", { ActivationLayerInfo() }),
+                            make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
+}
 
+FIXTURE_DATA_TEST_CASE(RunActivations, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
+                       combine(
+                            make("Input", TensorShape(13U, 13U, 32U)),
+                            make("Weight", TensorShape(1U, 7U, 32U, 6U)),
+                            make("Bias", TensorShape(6U)),
+                            make("Output", TensorShape(13U, 7U, 6U)),
+                            make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                            make("Dilation", Size2D(1U, 1U)),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_convolution_layer_f16);
 }
 TEST_SUITE_END() // Conv1x7
+
+TEST_SUITE(Conv7x1)
+FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
+                       combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+                            make("DataType", { DataType::F16 }),
+                            ActivationFunctionsSmallDataset,
+                            make("DataLayout", { DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_convolution_layer_f16, tolerance_num_f16);
+}
+TEST_SUITE_END() // Conv7x1
+
 TEST_SUITE_END() // FP16
 TEST_SUITE_END() // ConvolutionLayer
 TEST_SUITE_END() // Winograd
diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp
index 06fe9f7803..2f66100fb6 100644
--- a/tests/validation/NEON/ConvolutionLayer.cpp
+++ b/tests/validation/NEON/ConvolutionLayer.cpp
@@ -28,15 +28,16 @@
 #include "arm_compute/runtime/NEON/functions/NEWinogradConvolutionLayer.h"
 #include "arm_compute/runtime/Tensor.h"
 #include "arm_compute/runtime/TensorAllocator.h"
+
+#include "src/core/CPP/Validate.h"
 #include "src/core/helpers/MemoryHelpers.h"
 #include "src/cpu/operators/CpuGemmConv2d.h"
 #include "src/cpu/operators/CpuGemmDirectConv2d.h"
 #include "src/cpu/operators/CpuWinogradConv2d.h"
+
 #include "tests/NEON/Accessor.h"
-#include "tests/PaddingCalculator.h"
 #include "tests/datasets/LargeConvolutionLayerDataset.h"
 #include "tests/datasets/SmallConvolutionLayerDataset.h"
-#include "tests/datasets/TinyConvolutionLayerDataset.h"
 #include "tests/framework/Asserts.h"
 #include "tests/framework/Macros.h"
 #include "tests/framework/datasets/Datasets.h"
@@ -50,6 +51,8 @@ namespace test
 {
 namespace validation
 {
+using framework::dataset::make;
+
 namespace detail
 {
 template <>
@@ -85,13 +88,13 @@ constexpr float                           tolerance_num = 0.07f;
 #ifdef ARM_COMPUTE_ENABLE_SME
 // TODO(COMPMID-6011): SME kernels and the reference model use different rounding mode.
 // Temporarily increase the tolerance for quantized data.
-constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0);                           /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-#else // ARM_COMPUTE_ENABLE_SME
-constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0);                           /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-#endif // ARM_COMPUTE_ENABLE_SME
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#else                                                      // ARM_COMPUTE_ENABLE_SME
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#endif                                                     // ARM_COMPUTE_ENABLE_SME
 
 /** CNN data types */
-const auto CNNDataTypes = framework::dataset::make("DataType",
+const auto CNNDataTypes = make("DataType",
 {
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
     DataType::F16,
@@ -99,14 +102,36 @@ const auto CNNDataTypes = framework::dataset::make("DataType",
     DataType::F32,
     DataType::QASYMM8,
 });
-const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo",
+const auto ActivationFunctionsDataset = make("ActivationInfo",
 {
     ActivationLayerInfo(),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
     ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f)
 });
 
-const auto QuantizationData = framework::dataset::make("QuantizationInfo",
+const auto ActivationFunctionsDatasetNightly = make("ActivationInfo",
+{
+    ActivationLayerInfo(),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 0.5f),
+
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 0.5f, -0.5f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SOFT_RELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ELU),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::ABS),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SQUARE),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::SWISH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::HARD_SWISH),
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 2.f, 1.f),
+#ifdef __aarch64__
+    ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::GELU),
+#endif // __aarch64__
+});
+
+const auto QuantizationData = make("QuantizationInfo",
 {
     QuantizationInfo(0.5f, 10),
     QuantizationInfo(0.3f, 3),
@@ -121,32 +146,32 @@ TEST_SUITE(ConvolutionLayer)
 // *INDENT-OFF*
 // clang-format off
 DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
-                                          framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
+                                          make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F32),
                                                                                   TensorInfo(TensorShape(23U, 27U, 32U, 4U), 1, DataType::F32),
                                                                                   TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32),
                                                                                   TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32)
                                           }),
-                                          framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
+                                          make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(5U, 5U, 32U, 21U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32),
                                                                                     TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16)
                                           })),
-                                          framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
+                                          make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(19U, 23U, 21U, 4U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32),
                                                                                    TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32)
                                           })),
-                                          framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+                                          make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
                                                                                  PadStrideInfo(1, 1, 0, 0),
                                                                                  PadStrideInfo(2, 1, 0, 0),
                                                                                  PadStrideInfo(3, 2, 1, 0)
                                           })),
-                                          framework::dataset::make("FastMath", { true,
+                                          make("FastMath", { true,
                                                                                  true,
                                                                                  false,
                                                                                  false
                                           })),
-                                                                           framework::dataset::make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
+                                                                           make("Expected", { ConvolutionMethod::WINOGRAD, ConvolutionMethod::WINOGRAD, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })),
                input_info, weights_info, output_info, conv_info, fast_math, expected)
 {
     ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
@@ -158,6 +183,14 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
 // *INDENT-ON*
 TEST_SUITE_END() // ConvolutionLayer
 
+/*
+    Testing Strategy of Neon Winograd:
+        - There is no need to thoroughly test nchw cases because winograd kernels accept
+          nhwc and the tensors are permuted before and after if they're nchw.
+        - Except relu and bounded relu, testing activations for a single input
+          combination is enough because activation is not fused into winograd and called
+          separately.
+*/
 TEST_SUITE(WinogradLayer)
 template <typename T>
 using NEWinogradConvolutionLayerFixture = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, T>;
@@ -269,38 +302,148 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
     }
 }
 
+DATA_TEST_CASE(SupportedKernels, framework::DatasetMode::ALL, zip(
+                   make("WeightsInfo",
+{
+    // Shapes are always in NCHW format. When layout is NHWC, the shape is permuted
+
+    // Fp32, NCHW/NHWC (layout does not matter as it's )
+    // 3x1, 1x3, 3x3 --> all TRUE
+    TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+    TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+    TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+    // 5x1, 1x5, 5x5 --> all TRUE
+    TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+    TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+    TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+    // 7x1, 1x7, 7x7
+    //  --> all FALSE
+    TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+    TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+    TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+
+    // unsupported kernel sizes
+    TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+    TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F32, DataLayout::NHWC),
+    TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F32, DataLayout::NCHW),
+
+    // Fp16
+    TensorInfo(TensorShape(3U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+    TensorInfo(TensorShape(1U, 3U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+    TensorInfo(TensorShape(3U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+    // 5x1, 1x5, 5x5 --> all TRUE
+    TensorInfo(TensorShape(5U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+    TensorInfo(TensorShape(1U, 5U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+    TensorInfo(TensorShape(5U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+    // 7x1, 1x7, 7x7
+    //  --> all FALSE
+    TensorInfo(TensorShape(7U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+    TensorInfo(TensorShape(1U, 7U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+    TensorInfo(TensorShape(7U, 1U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+
+    // unsupported kernel sizes
+    TensorInfo(TensorShape(2U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+    TensorInfo(TensorShape(5U, 2U, 2U, 8U), 1, DataType::F16, DataLayout::NHWC),
+    TensorInfo(TensorShape(3U, 6U, 2U, 8U), 1, DataType::F16, DataLayout::NCHW),
+
+}),
+make("Expected",
+{
+    // fp32
+    true, true, true,    // 3x3, 1x3, 3x1
+    true, true, true,    // 5x5, 1x5, 5x1
+    false, true, true,   // 7x7, 1x7, 7x1
+    false, false, false, // random unsupported kernels
+
+    // fp16
+    true, false, false,  // 3x3, 1x3, 3x1
+    false, false, false, // 5x5, 1x5, 5x1
+    false, false, false, // 7x7, 1x7, 7x1
+    false, false, false, // random unsupported kernels
+})),
+weights_info_const, expected_const)
+{
+    DataType   data_type   = weights_info_const.data_type();
+    DataLayout data_layout = weights_info_const.data_layout();
+
+    TensorInfo input_info   = TensorInfo(TensorShape(17U, 31U, 2U), 1, data_type);
+    TensorInfo bias_info    = TensorInfo(TensorShape(8U), 1, data_type);
+    TensorInfo weights_info = weights_info_const;
+
+    if(data_layout == DataLayout::NHWC)
+    {
+        // Convert to NHWC
+        PermutationVector perm = PermutationVector(2U, 0U, 1U);
+
+        TensorShape input_shape   = input_info.tensor_shape();
+        TensorShape weights_shape = weights_info.tensor_shape();
+        permute(input_shape, perm);
+        permute(weights_shape, perm);
+
+        input_info.set_tensor_shape(input_shape);
+        weights_info.set_tensor_shape(weights_shape);
+
+        input_info.set_data_layout(data_layout);
+        weights_info.set_data_layout(data_layout);
+        bias_info.set_data_layout(data_layout);
+    }
+
+    PadStrideInfo conv_info(1, 1, 0, 0);
+
+    TensorShape output_shape = compute_deep_convolution_shape(input_info, weights_info, conv_info);
+    TensorInfo  output_info  = TensorInfo(output_shape, 1, data_type, data_layout);
+
+    Status status = NEWinogradConvolutionLayer::validate(
+                        &input_info,
+                        &weights_info,
+                        &bias_info,
+                        &output_info,
+                        conv_info,
+                        ActivationLayerInfo(),
+                        true /* fast math */);
+
+    Status fp16_supported = ::arm_compute::error_on_unsupported_cpu_fp16("N/A", "N/A", 0, &input_info);
+    bool   expected       = expected_const && static_cast<bool>(fp16_supported);
+
+    ARM_COMPUTE_EXPECT(bool(status) == expected, framework::LogLevel::ERRORS);
+}
+
 TEST_SUITE(FP32)
 
 TEST_SUITE(Conv1x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x3Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEWinogradConvolutionLayerMixedDataLayoutFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(combine(combine(combine(combine(combine(
-                                                                                   framework::dataset::make("Input", TensorShape(8U, 8U, 32U)),
-                                                                                   framework::dataset::make("Weight", TensorShape(1U, 3U, 32U, 1U))),
-                                                                               framework::dataset::make("Bias", TensorShape(1U))),
-                                                                       framework::dataset::make("Output", TensorShape(8U, 6U, 1U))),
-                                                               framework::dataset::make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0))),
-                                                       framework::dataset::make("Dilation", Size2D(1U, 1U))),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(
+                           make("Input", TensorShape(8U, 8U, 32U)),
+                           make("Weight", TensorShape(1U, 3U, 32U, 1U)),
+                           make("Bias", TensorShape(1U)),
+                           make("Output", TensorShape(8U, 6U, 1U)),
+                           make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                           make("Dilation", Size2D(1U, 1U)),
+                           make("DataType", { DataType::F32 }),
+                           ActivationFunctionsDataset,
+                           make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x3Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -310,19 +453,19 @@ TEST_SUITE_END() // Conv1x3
 
 TEST_SUITE(Conv3x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x1Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x1Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -332,19 +475,19 @@ TEST_SUITE_END() // Conv3x1
 
 TEST_SUITE(Conv1x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x5Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer1x5Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -354,19 +497,19 @@ TEST_SUITE_END() // Conv1x5
 
 TEST_SUITE(Conv5x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x1Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x1Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -376,10 +519,10 @@ TEST_SUITE_END() // Conv5x1
 
 TEST_SUITE(Conv7x1)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer7x1Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
@@ -387,9 +530,9 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, frame
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
                        combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                                               make("DataType", { DataType::F32 })),
+                                       make("ActivationInfo", { ActivationLayerInfo() })),
+                               make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -398,20 +541,20 @@ TEST_SUITE_END() // Conv7x1
 
 TEST_SUITE(Conv1x7)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer1x7Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer7x1Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_1xN_f32);
@@ -420,20 +563,40 @@ TEST_SUITE_END() // Conv1x7
 
 TEST_SUITE(Conv3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+
+{
+    // Validate output
+    validate(Accessor(_target), _reference, abs_tolerance_f32);
+}
 
+/// It's enough to run the activations for a single weight/input combination and data type because
+/// activation function is called on top of the winograd output as a separate operator
+/// TODO: Enable after COMPMID-6573 is resolved
+FIXTURE_DATA_TEST_CASE(RunActivations, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::DISABLED,
+                       combine(
+                           make("Input", TensorShape(3U, 3U, 32U)),
+                           make("Weight", TensorShape(3U, 3U, 32U, 4U)),
+                           make("Bias", TensorShape(4U)),
+                           make("Output", TensorShape(1U, 1U, 4U)),
+                           make("PadStrideInfo", PadStrideInfo(1, 1, 0, 0)),
+                           make("Dilation", Size2D(1U, 1U)),
+                           make("DataType", { DataType::F32 }),
+                           ActivationFunctionsDatasetNightly,
+                           make("DataLayout", { DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
+
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
@@ -444,20 +607,20 @@ TEST_SUITE_END() // Conv3x3
 
 TEST_SUITE(Conv5x5)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer5x5Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
 }
 FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer5x5Dataset(),
+                               make("DataType", { DataType::F32 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
@@ -467,12 +630,12 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEWinogradConvolutionLayerFixture<float>, frame
 TEST_SUITE_END() // Conv5x5
 
 FIXTURE_DATA_TEST_CASE(RunSmallNoBias, NEWinogradConvolutionLayerNoBiasFixture<float>, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(framework::dataset::concat(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                                                          datasets::SmallWinogradConvolutionLayer5x5Dataset()),
-                                               framework::dataset::make("DataType", { DataType::F32 })),
-                                       ActivationFunctionsDataset),
-
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(framework::dataset::concat(
+                                   datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                                   datasets::SmallWinogradConvolutionLayer5x5Dataset()),
+                               make("DataType", { DataType::F32 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 {
     // Validate output
     validate(Accessor(_target), _reference, abs_tolerance_f32);
@@ -484,24 +647,26 @@ TEST_SUITE_END() // FP32
 TEST_SUITE(FP16)
 using CLWinogradConvolutionLayerFastMathFixture16 = WinogradConvolutionLayerFastMathValidationFixture<Tensor, Accessor, NEWinogradConvolutionLayer, half, float>;
 
-DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
-                                          framework::dataset::make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
-                                                                                  TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
-                                          }),
-                                          framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
-                                                                                    TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
-                                          })),
-                                          framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
-                                                                                   TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
-                                          })),
-                                          framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
-                                                                                 PadStrideInfo(1, 1, 0, 0)
-                                          })),
-                                          framework::dataset::make("FastMath", { false, // case fp16 and fast_math False then disable Winograd
-                                                                                 true   // case fp16 and fast_math True then enable Winograd
-                                          })),
-                                                                           framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
-               input_info, weights_info, output_info, conv_info, fast_math, expected)
+DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(
+                   make("InputInfo", { TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16),
+                                       TensorInfo(TensorShape(18U, 18U, 32U), 1, DataType::F16)
+                                     }),
+                   make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16),
+                                         TensorInfo(TensorShape(3U, 3U, 32U, 21U), 1, DataType::F16)
+                                       }),
+                   make("OutputInfo", { TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F32),
+                                        TensorInfo(TensorShape(16U, 16U, 21U), 1, DataType::F16)
+                                      }),
+                   make("ConvInfo", { PadStrideInfo(1, 1, 0, 0),
+                                      PadStrideInfo(1, 1, 0, 0)
+                                    }),
+                   make("FastMath",
+{
+    false, // case fp16 and fast_math False then disable Winograd
+    true   // case fp16 and fast_math True then enable Winograd
+}),
+make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::WINOGRAD })),
+input_info, weights_info, output_info, conv_info, fast_math, expected)
 {
     ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(true),
                                                                             &weights_info.clone()->set_is_resizable(true),
@@ -511,10 +676,10 @@ DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(z
 
 TEST_SUITE(Conv3x3)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::PRECOMMIT,
-                       combine(combine(combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::SmallWinogradConvolutionLayer3x3Dataset(),
+                               make("DataType", { DataType::F16 }),
+                               ActivationFunctionsDataset,
+                               make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
 
 {
     // Validate output
@@ -522,10 +687,10 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLWinogradConvolutionLayerFastMathFixture16, fr
 }
 
 FIXTURE_DATA_TEST_CASE(RunLarge, CLWinogradConvolutionLayerFastMathFixture16, framework::DatasetMode::NIGHTLY,
-                       combine(combine(combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
-                                               framework::dataset::make("DataType", { DataType::F16 })),
-                                       ActivationFunctionsDataset),
-                               framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+                       combine(datasets::LargeWinogradConvolutionLayer3x3Dataset(),
+                               make("DataType", { DataType::F16 }),
+                               make("ActivationInfo", { ActivationLayerInfo() }),
+                               make("DataLayout", { DataLayout::NHWC })))
 
 {
     // Validate output
@@ -968,7 +1133,9 @@ TEST_SUITE(Float)
 #if defined(ARM_COMPUTE_ENABLE_BF16)
 TEST_SUITE(BFLOAT16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                    framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::BFLOAT16)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                    framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                    framework::dataset::make("DataType", DataType::BFLOAT16)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),
                                                                                                             ActivationFunctionsDataset))
 {
     // Validate output
@@ -980,7 +1147,10 @@ TEST_SUITE_END() // BFLOAT16
 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
 TEST_SUITE(FP16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                   framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", { DataLayout::NCHW })), ActivationFunctionsDataset))
+                                                                                                                   framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                   framework::dataset::make("DataType", DataType::F16)),
+                                                                                                                   framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                                                                                           ActivationFunctionsDataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, rel_tolerance_f16, tolerance_num, abs_tolerance_f16);
@@ -990,7 +1160,9 @@ TEST_SUITE_END() // FP16
 
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                    framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                    framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                    framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
                                                                                                             ActivationFunctionsDataset))
 {
     // Validate output
@@ -1032,8 +1204,11 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                       framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                                                                                                       QuantizedActivationFunctionsDataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -1059,8 +1234,11 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMConvolutionLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                      framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
-                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
+                                                                                                                      framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                      framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                      framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+                                                                                                                      framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })),
+                                                                                                                      QuantizedActivationFunctionsDataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -1214,7 +1392,10 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL)
 TEST_SUITE(Float)
 TEST_SUITE(FP32)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                     framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NHWC })), ActivationFunctionsDataset))
+                                                                                                                     framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                     framework::dataset::make("DataType", DataType::F32)),
+                                                                                                                     framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                             ActivationFunctionsDataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, rel_tolerance_f32, 0.f, float(abs_tolerance_f32));
@@ -1238,8 +1419,11 @@ const auto QuantizedActivationFunctionsDataset = framework::dataset::make("Activ
 TEST_SUITE(Quantized)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                        framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), QuantizedActivationFunctionsDataset))
+                                                                                                                        framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                        framework::dataset::make("DataType", DataType::QASYMM8)),
+                                                                                                                        framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                        framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })),
+                                                                                                                        QuantizedActivationFunctionsDataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -1248,8 +1432,11 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEDirectGEMMConv2dLayerQuantizedFixture<int8_t>, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(datasets::SmallConvolutionLayerDataset(),
-                                                                                                                       framework::dataset::make("ReshapeWeights", { true })), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataLayout", { DataLayout::NHWC })),
-                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })), QuantizedActivationFunctionsDataset))
+                                                                                                                       framework::dataset::make("ReshapeWeights", { true })),
+                                                                                                                       framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
+                                                                                                                       framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+                                                                                                                       framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.01f, -10) })),
+                                                                                                                       QuantizedActivationFunctionsDataset))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_qasymm8);
author	Gunes Bayir <gunes.bayir@arm.com>	2023-09-28 10:30:18 +0100
committer	Gunes Bayir <gunes.bayir@arm.com>	2023-10-02 16:07:22 +0000
commit	c2a51bd2cc7c4148d9444e7377af44b2f6c264ba (patch)
tree	e8f66188d7e048a3f61d660c236ef66b33a0bf35
parent	a396da19ee6e5c36ae07c11e4f16a6787e9bc143 (diff)
download	ComputeLibrary-c2a51bd2cc7c4148d9444e7377af44b2f6c264ba.tar.gz