26 files changed, 2108 insertions, 290 deletions
diff --git a/tests/datasets/ActivationFunctionsDataset.h b/tests/datasets/ActivationFunctionsDataset.h
index 1f3313c476..9b0d775376 100644
--- a/tests/datasets/ActivationFunctionsDataset.h
+++ b/tests/datasets/ActivationFunctionsDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019, 2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -53,7 +53,10 @@ public:
                             ActivationLayerInfo::ActivationFunction::SQRT,
                             ActivationLayerInfo::ActivationFunction::SQUARE,
                             ActivationLayerInfo::ActivationFunction::TANH,
-                            ActivationLayerInfo::ActivationFunction::IDENTITY
+                            ActivationLayerInfo::ActivationFunction::IDENTITY,
+#ifdef __aarch64__
+                            ActivationLayerInfo::ActivationFunction::GELU,
+#endif /* __aarch64__ */
     })
     {
     }
diff --git a/tests/datasets/BatchToSpaceDataset.h b/tests/datasets/BatchToSpaceDataset.h
index 1edd457aad..2670af50df 100644
--- a/tests/datasets/BatchToSpaceDataset.h
+++ b/tests/datasets/BatchToSpaceDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -38,15 +38,17 @@ namespace datasets
 class BatchToSpaceLayerDataset
 {
 public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape>;
+    using type = std::tuple<TensorShape, std::vector<int32_t>, CropInfo, TensorShape>;
 
     struct iterator
     {
-        iterator(std::vector<TensorShape>::const_iterator src_it,
-                 std::vector<TensorShape>::const_iterator block_shape_it,
-                 std::vector<TensorShape>::const_iterator dst_it)
+        iterator(std::vector<TensorShape>::const_iterator          src_it,
+                 std::vector<std::vector<int32_t>>::const_iterator block_shape_it,
+                 std::vector<CropInfo>::const_iterator             crop_info_it,
+                 std::vector<TensorShape>::const_iterator          dst_it)
             : _src_it{ std::move(src_it) },
               _block_shape_it{ std::move(block_shape_it) },
+              _crop_info_it{ std::move(crop_info_it) },
               _dst_it{ std::move(dst_it) }
         {
         }
@@ -56,44 +58,48 @@ public:
             std::stringstream description;
             description << "In=" << *_src_it << ":";
             description << "BlockShape=" << *_block_shape_it << ":";
+            description << "CropInfo=" << *_crop_info_it << ":";
             description << "Out=" << *_dst_it;
             return description.str();
         }
 
         BatchToSpaceLayerDataset::type operator*() const
         {
-            return std::make_tuple(*_src_it, *_block_shape_it, *_dst_it);
+            return std::make_tuple(*_src_it, *_block_shape_it, *_crop_info_it, *_dst_it);
         }
 
         iterator &operator++()
         {
             ++_src_it;
             ++_block_shape_it;
+            ++_crop_info_it;
             ++_dst_it;
 
             return *this;
         }
 
     private:
-        std::vector<TensorShape>::const_iterator _src_it;
-        std::vector<TensorShape>::const_iterator _block_shape_it;
-        std::vector<TensorShape>::const_iterator _dst_it;
+        std::vector<TensorShape>::const_iterator          _src_it;
+        std::vector<std::vector<int32_t>>::const_iterator _block_shape_it;
+        std::vector<CropInfo>::const_iterator             _crop_info_it;
+        std::vector<TensorShape>::const_iterator          _dst_it;
     };
 
     iterator begin() const
     {
-        return iterator(_src_shapes.begin(), _block_shape_shapes.begin(), _dst_shapes.begin());
+        return iterator(_src_shapes.begin(), _block_shapes.begin(), _crop_infos.begin(), _dst_shapes.begin());
     }
 
     int size() const
     {
-        return std::min(_src_shapes.size(), std::min(_block_shape_shapes.size(), _dst_shapes.size()));
+        return std::min(std::min(std::min(_src_shapes.size(), _block_shapes.size()), _crop_infos.size()), _dst_shapes.size());
     }
 
-    void add_config(TensorShape src, TensorShape block_shape, TensorShape dst)
+    void add_config(const TensorShape &src, const std::vector<int32_t> &block_shape, const CropInfo &crop_info, const TensorShape &dst)
     {
         _src_shapes.emplace_back(std::move(src));
-        _block_shape_shapes.emplace_back(std::move(block_shape));
+        _block_shapes.emplace_back(std::move(block_shape));
+        _crop_infos.emplace_back(std::move(crop_info));
         _dst_shapes.emplace_back(std::move(dst));
     }
 
@@ -102,35 +108,60 @@ protected:
     BatchToSpaceLayerDataset(BatchToSpaceLayerDataset &&) = default;
 
 private:
-    std::vector<TensorShape> _src_shapes{};
-    std::vector<TensorShape> _block_shape_shapes{};
-    std::vector<TensorShape> _dst_shapes{};
+    std::vector<TensorShape>          _src_shapes{};
+    std::vector<std::vector<int32_t>> _block_shapes{};
+    std::vector<CropInfo>             _crop_infos{};
+    std::vector<TensorShape>          _dst_shapes{};
 };
 
+/** Follow NCHW data layout across all datasets. I.e.
+ * TensorShape(Width(X), Height(Y), Channel(Z), Batch(W))
+ */
+
 class SmallBatchToSpaceLayerDataset final : public BatchToSpaceLayerDataset
 {
 public:
     SmallBatchToSpaceLayerDataset()
     {
-        add_config(TensorShape(1U, 1U, 1U, 4U), TensorShape(2U), TensorShape(2U, 2U, 1U, 1U));
-        add_config(TensorShape(3U, 1U, 1U, 4U), TensorShape(2U), TensorShape(6U, 2U, 1U, 1U));
-        add_config(TensorShape(1U, 2U, 2U, 4U), TensorShape(2U), TensorShape(2U, 4U, 2U, 1U));
-        add_config(TensorShape(1U, 3U, 1U, 8U), TensorShape(2U), TensorShape(2U, 6U, 1U, 2U));
-        add_config(TensorShape(3U, 4U, 1U, 4U), TensorShape(2U), TensorShape(6U, 8U, 1U, 1U));
-        add_config(TensorShape(1U, 1U, 1U, 8U), TensorShape(4U, 2U), TensorShape(4U, 2U, 1U, 1U));
-        add_config(TensorShape(3U, 1U, 1U, 8U), TensorShape(2U, 4U), TensorShape(6U, 4U, 1U, 1U));
+        // Block size = 1 (effectively no batch to space)
+        add_config(TensorShape(1U, 1U, 1U, 4U), { 1U, 1U }, CropInfo(), TensorShape(1U, 1U, 1U, 4U));
+        add_config(TensorShape(8U, 2U, 4U, 3U), { 1U, 1U }, CropInfo(), TensorShape(8U, 2U, 4U, 3U));
+        // Same block size in both x and y
+        add_config(TensorShape(3U, 2U, 1U, 4U), { 2U, 2U }, CropInfo(), TensorShape(6U, 4U, 1U, 1U));
+        add_config(TensorShape(1U, 3U, 2U, 9U), { 3U, 3U }, CropInfo(), TensorShape(3U, 9U, 2U, 1U));
+        // Different block size in x and y
+        add_config(TensorShape(5U, 7U, 7U, 4U), { 2U, 1U }, CropInfo(), TensorShape(10U, 7U, 7U, 2U));
+        add_config(TensorShape(3U, 3U, 1U, 8U), { 1U, 2U }, CropInfo(), TensorShape(3U, 6U, 1U, 4U));
+        add_config(TensorShape(5U, 2U, 2U, 6U), { 3U, 2U }, CropInfo(), TensorShape(15U, 4U, 2U, 1U));
     }
 };
 
+/** Relative small shapes that are still large enough to leave room for testing cropping of the output shape
+ */
+class SmallBatchToSpaceLayerWithCroppingDataset final : public BatchToSpaceLayerDataset
+{
+public:
+    SmallBatchToSpaceLayerWithCroppingDataset()
+    {
+        // Crop in both dims
+        add_config(TensorShape(5U, 3U, 2U, 8U), { 2U, 2U }, CropInfo(1U, 1U, 2U, 1U), TensorShape(8U, 3U, 2U, 2U));
+        // Left crop in x dim
+        add_config(TensorShape(1U, 1U, 1U, 20U), { 4U, 5U }, CropInfo(2U, 1U, 0U, 2U), TensorShape(1U, 3U, 1U, 1U));
+        // Left crop in y dim
+        add_config(TensorShape(3U, 1U, 1U, 8U), { 2U, 4U }, CropInfo(0U, 0U, 2U, 1U), TensorShape(6U, 1U, 1U, 1U));
+    }
+};
 class LargeBatchToSpaceLayerDataset final : public BatchToSpaceLayerDataset
 {
 public:
     LargeBatchToSpaceLayerDataset()
     {
-        add_config(TensorShape(64U, 32U, 2U, 4U), TensorShape(2U), TensorShape(128U, 64U, 2U, 1U));
-        add_config(TensorShape(128U, 16U, 2U, 16U), TensorShape(2U), TensorShape(512U, 64U, 2U, 1U));
-        add_config(TensorShape(16U, 8U, 2U, 8U), TensorShape(4U, 2U), TensorShape(64U, 16U, 2U, 1U));
-        add_config(TensorShape(8U, 16U, 2U, 8U), TensorShape(2U, 4U), TensorShape(16U, 64U, 2U, 1U));
+        // Same block size in both x and y
+        add_config(TensorShape(64U, 32U, 2U, 4U), { 2U, 2U }, CropInfo(), TensorShape(128U, 64U, 2U, 1U));
+        add_config(TensorShape(128U, 16U, 2U, 18U), { 3U, 3U }, CropInfo(), TensorShape(384U, 48U, 2U, 2U));
+        // Different block size in x and y
+        add_config(TensorShape(16U, 8U, 2U, 8U), { 4U, 1U }, CropInfo(), TensorShape(64U, 8U, 2U, 2U));
+        add_config(TensorShape(8U, 16U, 2U, 8U), { 2U, 4U }, CropInfo(), TensorShape(16U, 64U, 2U, 1U));
     }
 };
 } // namespace datasets
diff --git a/tests/datasets/ChannelShuffleLayerDataset.h b/tests/datasets/ChannelShuffleLayerDataset.h
index afab893234..a851480fa1 100644
--- a/tests/datasets/ChannelShuffleLayerDataset.h
+++ b/tests/datasets/ChannelShuffleLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018, 2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -105,6 +105,7 @@ class SmallRandomChannelShuffleLayerDataset final : public ChannelShuffleLayerDa
 public:
     SmallRandomChannelShuffleLayerDataset()
     {
+        add_config(TensorShape(1U, 1U, 605U, 16U), 5);
         add_config(TensorShape(15U, 16U, 4U, 12U), 2);
         add_config(TensorShape(21U, 11U, 12U, 7U), 4);
         add_config(TensorShape(21U, 11U, 12U, 7U), 6);
diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h
index 4fd461dd9d..17e03368ac 100644
--- a/tests/datasets/DepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET
-#define ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET
+#ifndef ACL_TESTS_DATASETS_DEPTHWISECONVOLUTIONLAYERDATASET_H
+#define ACL_TESTS_DATASETS_DEPTHWISECONVOLUTIONLAYERDATASET_H
 
 #include "utils/TypePrinter.h"
 
@@ -121,13 +121,13 @@ public:
     SmallDepthwiseConvolutionLayerDataset()
     {
         add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(23U, 27U, 5U), Size2D(3U, 5U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(3U, 3U, 2U), Size2D(2U, 2U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(33U, 27U, 7U), Size2D(7U, 3U), PadStrideInfo(3, 2, 1, 0));
         // Asymmetric padding
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR));
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR));
         // Ceil rounding
-        add_config(TensorShape(7U, 8U, 5U, 9U), Size2D(8U, 6U), PadStrideInfo(2, 3, 1, 1, 1, 3, DimensionRoundingType::CEIL), Size2D(1U, 2U));
+        add_config(TensorShape(7U, 8U, 5U, 9U), Size2D(8U, 6U), PadStrideInfo(2, 3, 1, 1, 1, 3, DimensionRoundingType::CEIL));
     }
 };
 
@@ -138,20 +138,50 @@ public:
     LargeDepthwiseConvolutionLayerDataset()
     {
         add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 1));
-        add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(17U, 31U, 2U), Size2D(13U, 9U), PadStrideInfo(1, 2, 1, 1));
         add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0));
         add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(1U, 1U), PadStrideInfo(2, 1, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 4U), PadStrideInfo(2, 3, 0, 1));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1));
+        add_config(TensorShape(133U, 127U, 55U), Size2D(1U, 1U), PadStrideInfo(2, 1, 0, 0));
+        add_config(TensorShape(233U, 109U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 111U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 87U, 55U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(333U, 79U, 77U), Size2D(3U, 4U), PadStrideInfo(2, 3, 0, 1));
+        add_config(TensorShape(67U, 211U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1));
         // Asymmetric padding
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR));
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR));
+        // Padding greater than kernel size.
+        add_config(TensorShape(128, 56, 56), Size2D(4, 4), PadStrideInfo(2, 2, 0, 10, 0, 10, DimensionRoundingType::FLOOR));
+    }
+};
+
+class LargeDepthwiseConvolutionLayerDatasetFp16Subset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeDepthwiseConvolutionLayerDatasetFp16Subset()
+    {
+        add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 4U), PadStrideInfo(1, 2, 0, 1));
+        add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(233U, 109U, 77U), Size2D(1U, 1U), PadStrideInfo(3, 2, 1, 0));
+        add_config(TensorShape(177U, 111U, 22U), Size2D(3U, 4U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(67U, 211U, 22U), Size2D(3U, 4U), PadStrideInfo(2, 1, 1, 1));
+        // Asymmetric padding
+        add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR));
+        add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR));
+        // Padding greater than kernel size.
+        add_config(TensorShape(128, 56, 56), Size2D(4, 4), PadStrideInfo(2, 2, 0, 10, 0, 10, DimensionRoundingType::FLOOR));
+    }
+};
+
+/** Dataset containing large kernel size for generic depthwise convolution. */
+class LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeKernelSizeDepthwiseConvolutionLayerNHWCDataset()
+    {
+        add_config(TensorShape(6U, 210U, 8U), Size2D(4U, 194U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -186,21 +216,39 @@ class LargeDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvoluti
 public:
     LargeDepthwiseConvolutionLayerDataset3x3()
     {
-        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1));
         add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1));
-        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1));
         add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 0));
+        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 2));
+
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 2));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1));
-        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1));
-        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 0));
-        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1));
-        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 0));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1));
-        // Width and height are a multipile of the processing tile size
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 2, 1));
+
+        add_config(TensorShape(77U, 209U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(123U, 76U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(133U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 0));
+        add_config(TensorShape(77U, 95U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1));
+
+        // Width and height are a multiple of the processing tile size
+        add_config(TensorShape(32U, 21U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1));
+    }
+};
+
+class LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeDepthwiseConvolutionLayerDataset3x3Fp16Subset()
+    {
+        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 2));
+
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 2));
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1));
+
+        add_config(TensorShape(123U, 76U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0));
+        add_config(TensorShape(77U, 95U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1));
+
+        // Width and height are a multiple of the processing tile size
         add_config(TensorShape(32U, 21U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1));
     }
 };
@@ -220,8 +268,6 @@ public:
         add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
         add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
         add_config(TensorShape(9U, 9U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, DimensionRoundingType::CEIL));
-        // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
-        // add_config(TensorShape(9U, 9U, 1U), Size2D(3U, 3U), PadStrideInfo(2, 2, 2, 2, DimensionRoundingType::CEIL), Size2D(2U, 2U));
     }
 };
 /** Dataset containing optimized, 3x3 depthwise convolution shapes. */
@@ -231,14 +277,14 @@ public:
     LargeOptimizedDepthwiseConvolutionLayerDataset3x3()
     {
         // Stride 1
-        add_config(TensorShape(233U, 277U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
-        add_config(TensorShape(233U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
+        add_config(TensorShape(233U, 173U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
+        add_config(TensorShape(133U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
         add_config(TensorShape(7U, 7U, 21U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
         add_config(TensorShape(28U, 28U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL));
         add_config(TensorShape(28U, 28U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
         // Stride 2
-        add_config(TensorShape(233U, 277U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
-        add_config(TensorShape(233U, 277U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL));
+        add_config(TensorShape(133U, 97U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
+        add_config(TensorShape(153U, 77U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL));
         add_config(TensorShape(8U, 8U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::FLOOR));
         add_config(TensorShape(8U, 8U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL));
         add_config(TensorShape(8U, 8U, 33U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL));
@@ -259,14 +305,31 @@ public:
         add_config(TensorShape(7U, 7U, 16U), Size2D(5U, 5U), PadStrideInfo(1, 1, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U));
         // Stride 2
         add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL));
-        // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
-        // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+        add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), Size2D(2U, 2U));
         add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 2, 2, 2, 2, DimensionRoundingType::CEIL));
-        // TODO(COMPMID-2464): Enable once dilated conv with stride 2 is supported
-        // add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+        add_config(TensorShape(9U, 9U, 32U), Size2D(5U, 5U), PadStrideInfo(2, 2, 4, 4, 4, 4, DimensionRoundingType::CEIL), Size2D(2U, 2U));
+    }
+};
+
+/** Dataset containing in-place 1x1 depthwise convolution shapes.
+ *
+ * For a depthwise convolution op to be in-place:
+ * * Output has the same shape as the input;
+ *      * 1x1 filter
+ *      * stride == 1
+ *      * dilations == 1
+ *      * No paddings
+*/
+class SmallInPlaceDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    SmallInPlaceDepthwiseConvolutionLayerDataset()
+    {
+        add_config(TensorShape(7U, 7U, 1U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(11U, 13U, 16U), Size2D(1U, 1U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DEPTHWISE_CONVOLUTION_DATASET */
-\ No newline at end of file
+#endif // ACL_TESTS_DATASETS_DEPTHWISECONVOLUTIONLAYERDATASET_H
diff --git a/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h b/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h
index 8ddc71cf5a..a58650a5e4 100644
--- a/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h
+++ b/tests/datasets/DilatedDepthwiseConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET
-#define ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET
+#ifndef ACL_TESTS_DATASETS_DILATEDDEPTHWISECONVOLUTIONLAYERDATASET_H
+#define ACL_TESTS_DATASETS_DILATEDDEPTHWISECONVOLUTIONLAYERDATASET_H
 
 #include "utils/TypePrinter.h"
 
@@ -48,6 +48,7 @@ public:
         add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 2U), PadStrideInfo(1, 1, 0, 0), Size2D(2U, 1U));
         add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 2U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U));
         add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 2U), PadStrideInfo(2, 2, 0, 0), Size2D(1U, 2U));
+        add_config(TensorShape(7U, 8U, 5U, 9U), Size2D(8U, 6U), PadStrideInfo(2, 3, 1, 1, 1, 3, DimensionRoundingType::CEIL), Size2D(1U, 2U));
 
         add_config(TensorShape(7U, 8U, 1U), Size2D(2U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(2U, 2U));
         add_config(TensorShape(23U, 27U, 5U), Size2D(3U, 5U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 1U));
@@ -96,15 +97,16 @@ public:
     LargeDepthwiseDilatedConvolutionLayerDataset()
     {
         add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U));
-        add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(1U, 2U));
         add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(1U, 3U));
+        add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(1U, 2U));
         add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 0), Size2D(3U, 2U));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 2U));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1), Size2D(2U, 2U));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 5U));
+
+        add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U));
+        add_config(TensorShape(233U, 177U, 77U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 0), Size2D(3U, 2U));
+        add_config(TensorShape(77U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
+        add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 2U));
+        add_config(TensorShape(233U, 177U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1), Size2D(2U, 2U));
+        add_config(TensorShape(177U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 5U));
         // Asymmetric padding
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR), Size2D(3U, 2U));
         add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR), Size2D(4U, 4U));
@@ -113,6 +115,24 @@ public:
     }
 };
 
+class LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeDepthwiseDilatedConvolutionLayerDatasetFp16Subset()
+    {
+        add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U));
+        add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(1U, 3U));
+        add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
+
+        add_config(TensorShape(77U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U));
+        add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 2U));
+        add_config(TensorShape(177U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 5U));
+        // Asymmetric padding
+        add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR), Size2D(4U, 4U));
+        add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR), Size2D(2U, 2U));
+    }
+};
+
 /** Dataset containing large, 3x3 depthwise convolution shapes with dilation. */
 class LargeDepthwiseDilatedConvolutionLayerDataset3x3 final : public DepthwiseConvolutionLayerDataset
 {
@@ -120,23 +140,44 @@ public:
     LargeDepthwiseDilatedConvolutionLayerDataset3x3()
     {
         add_config(TensorShape(32U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1), Size2D(2U, 1U));
+
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1), Size2D(2U, 2U));
-        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0), Size2D(2U, 2U));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 3U));
-        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 0), Size2D(2U, 1U));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1), Size2D(3U, 3U));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(2U, 2U));
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1), Size2D(4U, 4U));
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1), Size2D(2U, 5U));
+
+        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0), Size2D(2U, 2U));
+        add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 0), Size2D(2U, 1U));
         add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 0), Size2D(2U, 2U));
+
+        add_config(TensorShape(133U, 177U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 5U));
+        add_config(TensorShape(233U, 77U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 0), Size2D(4U, 4U));
+        add_config(TensorShape(77U, 211U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(4U, 4U));
+        add_config(TensorShape(77U, 111U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(3U, 3U));
+    }
+};
+
+class LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset final : public DepthwiseConvolutionLayerDataset
+{
+public:
+    LargeDepthwiseDilatedConvolutionLayerDataset3x3Fp16Subset()
+    {
+        add_config(TensorShape(32U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 1), Size2D(2U, 1U));
+
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1), Size2D(2U, 1U));
+        add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 1), Size2D(3U, 3U));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1), Size2D(4U, 4U));
         add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1), Size2D(2U, 5U));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1), Size2D(4U, 4U));
-        add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0), Size2D(5U, 5U));
-        add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 0), Size2D(4U, 4U));
-        add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(3U, 3U));
+
+        add_config(TensorShape(21U, 31U, 9U, 10U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 0), Size2D(2U, 2U));
+
+        add_config(TensorShape(77U, 111U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1), Size2D(3U, 3U));
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET */
-\ No newline at end of file
+#endif // ACL_TESTS_DATASETS_DILATEDDEPTHWISECONVOLUTIONLAYERDATASET_H
diff --git a/tests/datasets/DynamicFusionDataset.h b/tests/datasets/DynamicFusionDataset.h
new file mode 100644
index 0000000000..5a1453b9ab
--- /dev/null
+++ b/tests/datasets/DynamicFusionDataset.h
@@ -0,0 +1,126 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TESTS_DATASETS_DYNAMICFUSIONDATASET
+#define TESTS_DATASETS_DYNAMICFUSIONDATASET
+
+#include "utils/TypePrinter.h"
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class DynamicFusionThreeInputs
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, TensorShape>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator shape0_it,
+                 std::vector<TensorShape>::const_iterator shape1_it,
+                 std::vector<TensorShape>::const_iterator shape2_it)
+            : _shape0_it{ std::move(shape0_it) },
+              _shape1_it{ std::move(shape1_it) },
+              _shape2_it{ std::move(shape2_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "shape0=" << *_shape0_it << ":";
+            description << "shape1=" << *_shape1_it << ":";
+            description << "shape2=" << *_shape2_it << ":";
+
+            return description.str();
+        }
+
+        DynamicFusionThreeInputs::type operator*() const
+        {
+            return std::make_tuple(*_shape0_it, *_shape1_it, *_shape2_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_shape0_it;
+            ++_shape1_it;
+            ++_shape2_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator _shape0_it;
+        std::vector<TensorShape>::const_iterator _shape1_it;
+        std::vector<TensorShape>::const_iterator _shape2_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_shape0_shapes.begin(), _shape1_shapes.begin(), _shape2_shapes.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_shape0_shapes.size(), std::min(_shape1_shapes.size(), _shape2_shapes.size()));
+    }
+
+    void add_config(TensorShape shape0, TensorShape shape1, TensorShape shape2)
+    {
+        _shape0_shapes.emplace_back(std::move(shape0));
+        _shape1_shapes.emplace_back(std::move(shape1));
+        _shape2_shapes.emplace_back(std::move(shape2));
+    }
+
+protected:
+    DynamicFusionThreeInputs()                            = default;
+    DynamicFusionThreeInputs(DynamicFusionThreeInputs &&) = default;
+
+private:
+    std::vector<TensorShape> _shape0_shapes{};
+    std::vector<TensorShape> _shape1_shapes{};
+    std::vector<TensorShape> _shape2_shapes{};
+};
+
+class DynamicFusionElementwiseBinaryTwoOpsSmallShapes final : public DynamicFusionThreeInputs
+{
+public:
+    DynamicFusionElementwiseBinaryTwoOpsSmallShapes()
+    {
+        add_config(TensorShape{ 9U, 9U, 5U }, TensorShape{ 9U, 9U, 5U }, TensorShape{ 9U, 9U, 5U });
+        add_config(TensorShape{ 9U, 9U, 5U }, TensorShape{ 1U, 1U, 1U } /* Broadcast in X, Y, Z*/, TensorShape{ 9U, 9U, 5U });
+        add_config(TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 1U, 1U } /* Broadcast in Y and Z*/, TensorShape{ 27U, 13U, 2U });
+        add_config(TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 13U, 2U }, TensorShape{ 27U, 1U, 1U } /* Broadcast in Y and Z*/);
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* TESTS_DATASETS_DYNAMICFUSIONDATASET */
diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
index 7ab068c211..b0ad4879ba 100644
--- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
+++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET
-#define ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET
+#ifndef ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
+#define ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
 
 #include "utils/TypePrinter.h"
 
@@ -40,21 +40,17 @@ namespace datasets
 class GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape, int32_t, int32_t, GEMMLowpOutputStageInfo>;
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, GEMMLowpOutputStageType>;
 
     struct iterator
     {
         iterator(std::vector<TensorShape>::const_iterator             a_it,
                  std::vector<TensorShape>::const_iterator             b_it,
                  std::vector<TensorShape>::const_iterator             c_it,
-                 std::vector<int32_t>::const_iterator                 a_offset_it,
-                 std::vector<int32_t>::const_iterator                 b_offset_it,
-                 std::vector<GEMMLowpOutputStageInfo>::const_iterator output_stage_it)
+                 std::vector<GEMMLowpOutputStageType>::const_iterator output_stage_it)
             : _a_it{ std::move(a_it) },
               _b_it{ std::move(b_it) },
               _c_it{ std::move(c_it) },
-              _a_offset_it{ std::move(a_offset_it) },
-              _b_offset_it{ std::move(b_offset_it) },
               _output_stage_it{ std::move(output_stage_it) }
         {
         }
@@ -65,33 +61,14 @@ public:
             description << "A=" << *_a_it << ":";
             description << "B=" << *_b_it << ":";
             description << "C=" << *_c_it << ":";
-            description << "a_offset=" << *_a_offset_it << ":";
-            description << "b_offset=" << *_b_offset_it << ":";
-            description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":";
-            description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":";
-            description << "output_multiplier={";
-            for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it)
-            {
-                description << (*it) << ", ";
-            }
-            description << "}:";
-            description << "output_shift={";
-
-            for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it)
-            {
-                description << (*it) << ", ";
-            }
-            description << "}:";
-            description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":";
-            description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":";
-            description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":";
+            description << "output_type=" << string_from_gemmlowp_output_stage(*_output_stage_it) << ":";
 
             return description.str();
         }
 
         GEMMLowpFusedOffsetOutputDataset::type operator*() const
         {
-            return std::make_tuple(*_a_it, *_b_it, *_c_it, *_a_offset_it, *_b_offset_it, *_output_stage_it);
+            return std::make_tuple(*_a_it, *_b_it, *_c_it, *_output_stage_it);
         }
 
         iterator &operator++()
@@ -99,8 +76,6 @@ public:
             ++_a_it;
             ++_b_it;
             ++_c_it;
-            ++_a_offset_it;
-            ++_b_offset_it;
             ++_output_stage_it;
 
             return *this;
@@ -110,45 +85,27 @@ public:
         std::vector<TensorShape>::const_iterator             _a_it;
         std::vector<TensorShape>::const_iterator             _b_it;
         std::vector<TensorShape>::const_iterator             _c_it;
-        std::vector<int32_t>::const_iterator                 _a_offset_it;
-        std::vector<int32_t>::const_iterator                 _b_offset_it;
-        std::vector<GEMMLowpOutputStageInfo>::const_iterator _output_stage_it;
+        std::vector<GEMMLowpOutputStageType>::const_iterator _output_stage_it;
     };
 
     iterator begin() const
     {
-        return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _a_offset.begin(), _b_offset.begin(), _output_stage.begin());
+        return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _output_stage.begin());
     }
 
     int size() const
     {
-        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), std::min(_a_offset.size(), std::min(_b_offset.size(), _output_stage.size())))));
+        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), _output_stage.size())));
     }
 
-    void add_config(TensorShape a, TensorShape b, TensorShape c, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+    void add_config(TensorShape a, TensorShape b, TensorShape c, GEMMLowpOutputStageType output_stage)
     {
         _a_shapes.emplace_back(std::move(a));
         _b_shapes.emplace_back(std::move(b));
         _c_shapes.emplace_back(std::move(c));
-        _a_offset.emplace_back(std::move(a_offset));
-        _b_offset.emplace_back(std::move(b_offset));
         _output_stage.emplace_back(std::move(output_stage));
     }
 
-    GEMMLowpOutputStageInfo OutputStageInfo(GEMMLowpOutputStageType type, int32_t offset, int32_t multiplier, int32_t shift, int32_t min, int32_t max)
-    {
-        GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
-        output_stage.type                    = type;
-        output_stage.gemmlowp_offset         = offset;
-        output_stage.gemmlowp_multiplier     = multiplier;
-        output_stage.gemmlowp_shift          = shift;
-        output_stage.gemmlowp_min_bound      = min;
-        output_stage.gemmlowp_max_bound      = max;
-        output_stage.gemmlowp_multipliers.push_back(multiplier);
-        output_stage.gemmlowp_shifts.push_back(shift);
-        return output_stage;
-    }
-
 protected:
     GEMMLowpFusedOffsetOutputDataset()                                    = default;
     GEMMLowpFusedOffsetOutputDataset(GEMMLowpFusedOffsetOutputDataset &&) = default;
@@ -157,9 +114,7 @@ private:
     std::vector<TensorShape>             _a_shapes{};
     std::vector<TensorShape>             _b_shapes{};
     std::vector<TensorShape>             _c_shapes{};
-    std::vector<int32_t>                 _a_offset{};
-    std::vector<int32_t>                 _b_offset{};
-    std::vector<GEMMLowpOutputStageInfo> _output_stage{};
+    std::vector<GEMMLowpOutputStageType> _output_stage{};
 };
 
 class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset
@@ -167,45 +122,72 @@ class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff
 public:
     SmallGEMMLowpFusedOffsetOutputUint8Dataset()
     {
-        add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210));
-
-        add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, 210));
-        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, 210));
-        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 10, 10, 210));
-        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, 10, 210));
+        add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
-class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffsetOutputDataset
+class SmallGEMMLowpFusedBatchedMatMulDataset final : public GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    SmallGEMMLowpFusedOffsetOutputInt8Dataset()
+    SmallGEMMLowpFusedBatchedMatMulDataset()
+    {
+        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+    }
+};
+
+class SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset
+{
+public:
+    SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset()
+    {
+        add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+    }
+};
+
+class SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset
+{
+public:
+    SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset()
     {
-        add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 13, -10, 110));
-        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110));
-        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110));
-        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -40, 2, 13, -10, 110));
-
-        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 10, -10, 110));
-        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 10, -10, 110));
-        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 10, -10, 110));
-        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, -10, 110));
+        add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
-class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset
+class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    SmallGEMMLowpFusedOffsetOutputPerChannelDataset()
+    SmallGEMMLowpFusedOffsetOutputInt8Dataset()
     {
-        add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210));
-        add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
+        add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -214,15 +196,12 @@ class LargeGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff
 public:
     LargeGEMMLowpFusedOffsetOutputUint8Dataset()
     {
-        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 18, 10, 210));
-        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 18, 10, 210));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 200, 2, 18, 10, 210));
-        add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 18, 10, 210));
-
-        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 15, 10, 210));
-        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 15, 10, 210));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, 10, 210));
-        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 15, 10, 210));
+        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+
     }
 };
 
@@ -231,18 +210,17 @@ class LargeGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs
 public:
     LargeGEMMLowpFusedOffsetOutputInt8Dataset()
     {
-        add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110));
-        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110));
-        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110));
-        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110));
-
-        add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 15, -10, 110));
-        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 15, -10, 110));
-        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 15, -10, 110));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, -10, 110));
+        add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET */
+#endif // ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
diff --git a/tests/datasets/GatherDataset.h b/tests/datasets/GatherDataset.h
index 29a99d5239..74ea3b4a06 100644
--- a/tests/datasets/GatherDataset.h
+++ b/tests/datasets/GatherDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2019, 2022-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -106,6 +106,64 @@ private:
     std::vector<int>         _axis{};
 };
 
+class SmallGatherMultiDimIndicesDataset final : public GatherDataset
+{
+public:
+    SmallGatherMultiDimIndicesDataset()
+    {
+        add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 1);
+        add_config(TensorShape(15U, 15U), TensorShape(3U, 2U, 2U), 1);
+        add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 1);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 1);
+        add_config(TensorShape(1U, 5U, 3U), TensorShape(1U, 7U, 3U), 1);
+
+        add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 0);
+        add_config(TensorShape(9U), TensorShape(3U, 2U, 4U), 0);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 0);
+
+        add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U), 2);
+        add_config(TensorShape(8U, 2U, 3U), TensorShape(4U, 2U, 5U), 2);
+    }
+};
+
+class CLSmallGatherMultiDimIndicesDataset final : public GatherDataset
+{
+public:
+    CLSmallGatherMultiDimIndicesDataset()
+    {
+        add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 0);
+        add_config(TensorShape(15U, 15U), TensorShape(3U, 2U, 2U), 0);
+        add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 0);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 0);
+
+        add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 0);
+        add_config(TensorShape(9U), TensorShape(3U, 2U, 4U), 0);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 0);
+
+        add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U),0);
+
+        add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 1);
+        add_config(TensorShape(15U, 15U), TensorShape(3U, 2U, 2U), 1);
+        add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 1);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 1);
+
+        add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 1);
+        add_config(TensorShape(9U), TensorShape(3U, 2U, 4U), 1);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 1);
+
+        add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U),1);
+
+        add_config(TensorShape(2U, 6U), TensorShape(4U, 9U), 2);
+        add_config(TensorShape(15U, 15U), TensorShape(2U, 11U), 2);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(2U, 7U), 2);
+
+        add_config(TensorShape(3U, 5U), TensorShape(2U, 3U), 2);
+        add_config(TensorShape(5U, 3U, 4U), TensorShape(5U, 6U), 2);
+
+        add_config(TensorShape(7U, 4U, 5U), TensorShape(2U, 3U),2);
+    }
+};
+
 class SmallGatherDataset final : public GatherDataset
 {
 public:
diff --git a/tests/datasets/LargeConvolutionLayerDataset.h b/tests/datasets/LargeConvolutionLayerDataset.h
index 1cffc9a221..c299f2460b 100644
--- a/tests/datasets/LargeConvolutionLayerDataset.h
+++ b/tests/datasets/LargeConvolutionLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2020, 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET
-#define ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET
+#ifndef ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H
+#define ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H
 
 #include "tests/datasets/ConvolutionLayerDataset.h"
 
@@ -44,18 +44,31 @@ public:
     {
         // Kernel size 3
         // Batch size 1
-        add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 256U), TensorShape(256U), TensorShape(51U, 54U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 3U, 256U, 256U), TensorShape(256U), TensorShape(54U, 54U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U, 512U), TensorShape(512U), TensorShape(12U, 12U, 512U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 3, 2 and 4
-        add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 1));
-        add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 3U, 64U, 128U), TensorShape(128U), TensorShape(110U, 111U, 128U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 3U, 125U, 128U), TensorShape(128U), TensorShape(51U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 1, 1));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1));
+    }
+};
+
+class LargeWinogradConvolutionLayer3x3DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer3x3DatasetFp16Subset()
+    {
+        // Kernel size 3
+        // Batch size 1
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 1));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U, 128U), TensorShape(128U), TensorShape(54U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 1));
     }
 };
 
@@ -66,18 +79,31 @@ public:
     {
         // Kernel size 3
         // Batch size 1
-        add_config(TensorShape(224U, 222U, 64U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(112U, 113U, 64U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(112U, 112U, 128U), TensorShape(3U, 1U, 128U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 256U), TensorShape(256U), TensorShape(51U, 56U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(56U, 56U, 256U), TensorShape(3U, 1U, 256U, 256U), TensorShape(256U), TensorShape(56U, 56U, 256U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 512U), TensorShape(512U), TensorShape(26U, 28U, 512U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(28U, 28U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 1U, 512U, 512U), TensorShape(512U), TensorShape(12U, 14U, 512U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 3, 2 and 4
-        add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(3U, 1U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 1, 0));
-        add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(3U, 1U, 64U, 128U), TensorShape(128U), TensorShape(110U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(3U, 1U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 1U, 64U, 129U), TensorShape(129U), TensorShape(112U, 112U, 129U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 1U, 128U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0));
+    }
+};
+
+class LargeWinogradConvolutionLayer3x1DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer3x1DatasetFp16Subset()
+    {
+        // Kernel size 3
+        // Batch size 1
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(3U, 1U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 1, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(3U, 1U, 125U, 128U), TensorShape(128U), TensorShape(51U, 56U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(3U, 1U, 257U, 128U), TensorShape(128U), TensorShape(26U, 28U, 128U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(3U, 1U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 1, 0));
     }
 };
 
@@ -88,18 +114,31 @@ public:
     {
         // Kernel size 3
         // Batch size 1
-        add_config(TensorShape(224U, 222U, 64U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(112U, 113U, 64U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(112U, 112U, 128U), TensorShape(1U, 3U, 128U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 256U), TensorShape(256U), TensorShape(53U, 56U, 256U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(56U, 56U, 256U), TensorShape(1U, 3U, 256U, 256U), TensorShape(256U), TensorShape(56U, 54U, 256U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(28U, 28U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(28U, 28U, 512U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(14U, 14U, 512U), TensorShape(1U, 3U, 512U, 512U), TensorShape(512U), TensorShape(14U, 12U, 512U), PadStrideInfo(1, 1, 0, 0));
-        // Batch size 3, 2 and 4
-        add_config(TensorShape(224U, 222U, 64U, 3U), TensorShape(1U, 3U, 64U, 64U), TensorShape(64U), TensorShape(224U, 222U, 64U, 3U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(112U, 113U, 64U, 2U), TensorShape(1U, 3U, 64U, 128U), TensorShape(128U), TensorShape(112U, 113U, 128U, 2U), PadStrideInfo(1, 1, 0, 1));
-        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 128U), TensorShape(128U), TensorShape(111U, 112U, 128U, 4U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(224U, 222U, 32U), TensorShape(1U, 3U, 32U, 32U), TensorShape(32U), TensorShape(224U, 222U, 32U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(112U, 113U, 32U), TensorShape(1U, 3U, 32U, 64U), TensorShape(64U), TensorShape(112U, 113U, 64U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(56U, 56U, 128U), TensorShape(1U, 3U, 128U, 128U), TensorShape(128U), TensorShape(56U, 54U, 128U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1));
+    }
+};
+
+class LargeWinogradConvolutionLayer1x3DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer1x3DatasetFp16Subset()
+    {
+        // Kernel size 3
+        // Batch size 1
+        add_config(TensorShape(112U, 112U, 64U), TensorShape(1U, 3U, 64U, 129U), TensorShape(129U), TensorShape(112U, 110U, 129U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(53U, 56U, 125U), TensorShape(1U, 3U, 125U, 128U), TensorShape(128U), TensorShape(53U, 56U, 128U), PadStrideInfo(1, 1, 0, 1));
+        add_config(TensorShape(28U, 28U, 257U), TensorShape(1U, 3U, 257U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(1, 1, 0, 1));
+
+        // Batch > 1
+        add_config(TensorShape(111U, 112U, 127U, 4U), TensorShape(1U, 3U, 127U, 64U), TensorShape(64U), TensorShape(111U, 112U, 64U, 4U), PadStrideInfo(1, 1, 0, 1));
     }
 };
 
@@ -110,15 +149,27 @@ public:
     {
         // Kernel size 5
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 2));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 5U, 3U, 32U), TensorShape(32U), TensorShape(220U, 220U, 32U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 5U, 3U, 64U), TensorShape(64U), TensorShape(220U, 220U, 64U, 2U), PadStrideInfo(1, 1, 0, 0));
+        // Batch > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2));
+    }
+};
+
+class LargeWinogradConvolutionLayer5x5DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer5x5DatasetFp16Subset()
+    {
+        // Kernel size 5
+        // Batch size 1
+        add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 2));
+
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 2));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 5U, 42U, 100U), TensorShape(100U), TensorShape(177U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -128,15 +179,26 @@ public:
     LargeWinogradConvolutionLayer5x1Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 2, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 2, 0));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(5U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 2, 0));
+        // Batch > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0));
+    }
+};
+
+class LargeWinogradConvolutionLayer5x1DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer5x1DatasetFp16Subset()
+    {
+        // Batch size 1
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(5U, 1U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 2, 0));
+        add_config(TensorShape(200U, 201U, 24U), TensorShape(5U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 2, 0));
+
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(5U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 2, 0));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(5U, 1U, 42U, 100U), TensorShape(100U), TensorShape(177U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -146,15 +208,12 @@ public:
     LargeWinogradConvolutionLayer7x1Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(218U, 224U, 64U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 3, 0));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 1U, 3U, 32U), TensorShape(32U), TensorShape(218U, 224U, 32U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(7U, 1U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 3, 0));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(7U, 1U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 3, 0));
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(7U, 1U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 3, 0));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(7U, 1U, 42U, 100U), TensorShape(100U), TensorShape(175U, 152U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -164,15 +223,26 @@ public:
     LargeWinogradConvolutionLayer1x7Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 218U, 64U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U), PadStrideInfo(1, 1, 0, 3));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 7U, 3U, 32U), TensorShape(32U), TensorShape(224U, 218U, 32U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 7U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 3));
+        // Batch > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3));
+    }
+};
+
+class LargeWinogradConvolutionLayer1x7DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer1x7DatasetFp16Subset()
+    {
+        // Batch size 1
+        add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 7U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 3));
+
+        // Batch > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 7U, 16U, 7U), TensorShape(7U), TensorShape(123U, 134U, 7U, 3U), PadStrideInfo(1, 1, 0, 3));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 7U, 42U, 100U), TensorShape(100U), TensorShape(181U, 146U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -182,15 +252,26 @@ public:
     LargeWinogradConvolutionLayer1x5Dataset()
     {
         // Batch size 1
-        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U), PadStrideInfo(1, 1, 0, 2));
-        add_config(TensorShape(123U, 134U, 16U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U), PadStrideInfo(1, 1, 0, 0));
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2));
         add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
         add_config(TensorShape(200U, 201U, 24U), TensorShape(1U, 5U, 24U, 61), TensorShape(61U), TensorShape(200U, 201U, 61), PadStrideInfo(1, 1, 0, 2));
 
-        // Batch size 2, 3 and 4
-        add_config(TensorShape(224U, 224U, 3U, 2U), TensorShape(1U, 5U, 3U, 64U), TensorShape(64U), TensorShape(224U, 224U, 64U, 2U), PadStrideInfo(1, 1, 0, 2));
+        // Batch size > 1
+        add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0));
+    }
+};
+
+class LargeWinogradConvolutionLayer1x5DatasetFp16Subset final : public ConvolutionLayerDataset
+{
+public:
+    LargeWinogradConvolutionLayer1x5DatasetFp16Subset()
+    {
+        // Batch size 1
+        add_config(TensorShape(224U, 224U, 3U), TensorShape(1U, 5U, 3U, 32U), TensorShape(32U), TensorShape(224U, 224U, 32U), PadStrideInfo(1, 1, 0, 2));
+        add_config(TensorShape(181U, 152U, 42U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U), PadStrideInfo(1, 1, 0, 0));
+
+        // Batch size > 1
         add_config(TensorShape(123U, 134U, 16U, 3U), TensorShape(1U, 5U, 16U, 7U), TensorShape(7U), TensorShape(123U, 130U, 7U, 3U), PadStrideInfo(1, 1, 0, 0));
-        add_config(TensorShape(181U, 152U, 42U, 4U), TensorShape(1U, 5U, 42U, 100U), TensorShape(100U), TensorShape(181U, 148U, 100U, 4U), PadStrideInfo(1, 1, 0, 0));
     }
 };
 
@@ -213,6 +294,16 @@ public:
     }
 };
 
+class VeryLargeConvolutionLayerDataset final : public ConvolutionLayerDataset
+{
+public:
+    VeryLargeConvolutionLayerDataset()
+    {
+        // Tensor size > 1e7 bytes && weight dimensions > 7
+        add_config(TensorShape(336U, 336U, 32U), TensorShape(9U, 9U, 32U, 64U), TensorShape(64U), TensorShape(168U, 168U, 64U), PadStrideInfo(2, 2, 4, 4));
+    }
+};
+
 class LargeGroupedConvolutionLayerDataset final : public ConvolutionLayerDataset
 {
 public:
@@ -233,4 +324,4 @@ public:
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_LARGE_CONVOLUTION_LAYER_DATASET */
+#endif // ACL_TESTS_DATASETS_LARGECONVOLUTIONLAYERDATASET_H
diff --git a/tests/datasets/LargeGEMMDataset.h b/tests/datasets/LargeGEMMDataset.h
index 6cdff7f559..e45319ef57 100644
--- a/tests/datasets/LargeGEMMDataset.h
+++ b/tests/datasets/LargeGEMMDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019, 2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_LARGE_GEMM_DATASET
-#define ARM_COMPUTE_TEST_LARGE_GEMM_DATASET
+#ifndef ACL_TESTS_DATASETS_LARGEGEMMDATASET_H
+#define ACL_TESTS_DATASETS_LARGEGEMMDATASET_H
 
 #include "tests/datasets/GEMMDataset.h"
 
@@ -79,7 +79,20 @@ public:
         add_config(TensorShape(1729U, 17U, 10U, 3U), TensorShape(128U, 1729U), TensorShape(128U), TensorShape(128U, 17U, 10U, 3U), 1.0f, 0.3f);
     }
 };
+
+class LargeAccumulateGEMMDataset final : public GEMMDataset
+{
+public:
+    LargeAccumulateGEMMDataset()
+    {
+        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), TensorShape(871U, 429U), 1.0f, 0.0f);
+        add_config(TensorShape(1021U, 1U), TensorShape(783U, 1021U), TensorShape(783U, 1U), TensorShape(783U, 1U), 1.0f, 0.0f);
+        add_config(TensorShape(1021U, 1U), TensorShape(783U, 1021U), TensorShape(783U, 1U), TensorShape(783U, 1U), 1.0f, 0.0f);
+        add_config(TensorShape(941U, 1U), TensorShape(623U, 941U), TensorShape(623U, 1U), TensorShape(623U, 1U), 1.0f, 0.0f);
+    }
+};
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_LARGE_GEMM_DATASET */
+#endif // ACL_TESTS_DATASETS_LARGEGEMMDATASET_H
diff --git a/tests/datasets/LargeMatMulDataset.h b/tests/datasets/LargeMatMulDataset.h
new file mode 100644
index 0000000000..8f6c000d37
--- /dev/null
+++ b/tests/datasets/LargeMatMulDataset.h
@@ -0,0 +1,82 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_DATASETS_LARGEMATMULDATASET
+#define ACL_TESTS_DATASETS_LARGEMATMULDATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/datasets/MatMulDataset.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class LargeMatMulDataset final : public MatMulDataset
+{
+public:
+    LargeMatMulDataset()
+    {
+        add_config(TensorShape(21U, 13U, 3U, 2U), TensorShape(33U, 21U, 3U, 2U), TensorShape(33U, 13U, 3U, 2U));
+        add_config(TensorShape(38U, 12U, 1U, 5U), TensorShape(21U, 38U, 1U, 5U), TensorShape(21U, 12U, 1U, 5U));
+        add_config(TensorShape(45U, 38U, 3U, 2U), TensorShape(21U, 45U, 3U, 2U), TensorShape(21U, 38U, 3U, 2U));
+    }
+};
+
+class HighDimensionalMatMulDataset final : public MatMulDataset
+{
+public:
+    HighDimensionalMatMulDataset()
+    {
+        add_config(TensorShape(5U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor
+    }
+};
+
+class LargeMatMulDatasetRhsExportToCLImageRhsNT final : public MatMulDataset
+{
+public:
+    // For shape choices, please refer to the explanations given in SmallMatMulDatasetRhsExportToCLImageRhsNT
+    LargeMatMulDatasetRhsExportToCLImageRhsNT()
+    {
+        add_config(TensorShape(21U, 13U, 3U, 2U), TensorShape(32U, 21U, 3U, 2U), TensorShape(32U, 13U, 3U, 2U));
+        add_config(TensorShape(38U, 12U, 1U, 5U, 2U), TensorShape(20U, 38U, 1U, 5U, 2U), TensorShape(20U, 12U, 1U, 5U, 2U));
+        add_config(TensorShape(45U, 38U, 3U, 2U, 3U), TensorShape(20U, 45U, 3U, 2U, 3U), TensorShape(20U, 38U, 3U, 2U, 3U));
+    }
+};
+class LargeMatMulDatasetRhsExportToCLImageRhsT final : public MatMulDataset
+{
+public:
+    // For shape choices, please refer to the explanations given in SmallMatMulDatasetRhsExportToCLImageRhsT
+    LargeMatMulDatasetRhsExportToCLImageRhsT()
+    {
+        add_config(TensorShape(28U, 13U, 3U, 2U), TensorShape(32U, 28U, 3U, 2U), TensorShape(32U, 13U, 3U, 2U));
+        add_config(TensorShape(40U, 12U, 1U, 5U, 2U), TensorShape(20U, 40U, 1U, 5U, 2U), TensorShape(20U, 12U, 1U, 5U, 2U));
+        add_config(TensorShape(44U, 38U, 3U, 2U, 3U), TensorShape(20U, 44U, 3U, 2U, 3U), TensorShape(20U, 38U, 3U, 2U, 3U));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ACL_TESTS_DATASETS_LARGEMATMULDATASET */
diff --git a/tests/datasets/LargeMatMulMMULDataset.h b/tests/datasets/LargeMatMulMMULDataset.h
new file mode 100644
index 0000000000..23e0b3e5c8
--- /dev/null
+++ b/tests/datasets/LargeMatMulMMULDataset.h
@@ -0,0 +1,64 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_DATASETS_LARGEMATMULMMULDATASET
+#define ACL_TESTS_DATASETS_LARGEMATMULMMULDATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/datasets/MatMulDataset.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+/** MatMul MMUL shapes are similar to MatMul shapes except that K has to be a multiple of MMUL_K0 which is 4 (e.g. see src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp for the definition)
+ */
+class LargeMatMulMMULDataset final : public MatMulDataset
+{
+public:
+    LargeMatMulMMULDataset()
+    {
+        add_config(TensorShape(24U, 13U, 3U, 2U), TensorShape(33U, 24U, 3U, 2U), TensorShape(33U, 13U, 3U, 2U));
+        add_config(TensorShape(36U, 12U, 1U, 5U), TensorShape(21U, 36U, 1U, 5U), TensorShape(21U, 12U, 1U, 5U));
+        add_config(TensorShape(44U, 38U, 3U, 2U), TensorShape(21U, 44U, 3U, 2U), TensorShape(21U, 38U, 3U, 2U));
+    }
+};
+
+class HighDimensionalMatMulMMULDataset final : public MatMulDataset
+{
+public:
+    HighDimensionalMatMulMMULDataset()
+    {
+        add_config(TensorShape(4U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 4U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+
+#endif /* ACL_TESTS_DATASETS_LARGEMATMULMMULDATASET */
diff --git a/tests/datasets/MatMulDataset.h b/tests/datasets/MatMulDataset.h
new file mode 100644
index 0000000000..9c1c5fb05d
--- /dev/null
+++ b/tests/datasets/MatMulDataset.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_DATASETS_MATMULDATASET
+#define ACL_TESTS_DATASETS_MATMULDATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class MatMulDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, TensorShape>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator a_it,
+                 std::vector<TensorShape>::const_iterator b_it,
+                 std::vector<TensorShape>::const_iterator dst_it)
+            : _a_it{ std::move(a_it) },
+              _b_it{ std::move(b_it) },
+              _dst_it{ std::move(dst_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "A=" << *_a_it << ":";
+            description << "B=" << *_b_it << ":";
+            description << "Out=" << *_dst_it << ":";
+            return description.str();
+        }
+
+        MatMulDataset::type operator*() const
+        {
+            return std::make_tuple(*_a_it, *_b_it, *_dst_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_a_it;
+            ++_b_it;
+            ++_dst_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator _a_it;
+        std::vector<TensorShape>::const_iterator _b_it;
+        std::vector<TensorShape>::const_iterator _dst_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_a_shapes.begin(), _b_shapes.begin(), _dst_shapes.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), _dst_shapes.size()));
+    }
+
+    void add_config(TensorShape a, TensorShape b, TensorShape dst)
+    {
+        _a_shapes.emplace_back(std::move(a));
+        _b_shapes.emplace_back(std::move(b));
+        _dst_shapes.emplace_back(std::move(dst));
+    }
+
+protected:
+    MatMulDataset()                 = default;
+    MatMulDataset(MatMulDataset &&) = default;
+
+private:
+    std::vector<TensorShape> _a_shapes{};
+    std::vector<TensorShape> _b_shapes{};
+    std::vector<TensorShape> _dst_shapes{};
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ACL_TESTS_DATASETS_MATMULDATASET */
diff --git a/tests/datasets/MatMulLowpMMULDataset.h b/tests/datasets/MatMulLowpMMULDataset.h
new file mode 100644
index 0000000000..1b22e1061f
--- /dev/null
+++ b/tests/datasets/MatMulLowpMMULDataset.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_DATASETS_MATMULLOWPMMULDATASET_H
+#define ACL_TESTS_DATASETS_MATMULLOWPMMULDATASET_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/datasets/MatMulDataset.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+/** MatMulLowp MMUL shapes are similar to MatMul MMUL shapes except that K has to be a
+ * multiple of MMUL_K0 which is 16 (e.g. see src/gpu/cl/kernels/ClMatMulLowpNativeMMULKernel.cpp for the definition)
+ */
+class SmallMatMulLowpMMULDataset final : public MatMulDataset
+{
+public:
+    SmallMatMulLowpMMULDataset()
+    {
+        add_config(TensorShape(16U, 4U), TensorShape(4U, 16U), TensorShape(4U, 4U)); // same as mmul block
+        add_config(TensorShape(96U, 1U), TensorShape(1U, 96U), TensorShape(1U, 1U)); // vector x vector
+        add_config(TensorShape(32U, 4U, 2U), TensorShape(16U, 32U, 2U), TensorShape(16U, 4U, 2U));
+        add_config(TensorShape(48U, 2U), TensorShape(17U, 48U), TensorShape(17U, 2U));
+        add_config(TensorShape(32U, 6U), TensorShape(7U, 32U), TensorShape(7U, 6U));
+    }
+};
+
+// This dataset is for smaller number of tests that will still use small shapes
+// e.g. not repeating everything for QASYMM8 while we're already testing for QASYMM8_SIGNED
+class SmallMatMulLowpMMULDatasetSubset final : public MatMulDataset
+{
+public:
+    SmallMatMulLowpMMULDatasetSubset()
+    {
+        add_config(TensorShape(32U, 4U, 2U), TensorShape(16U, 32U, 2U), TensorShape(16U, 4U, 2U));
+        add_config(TensorShape(32U, 6U), TensorShape(7U, 32U), TensorShape(7U, 6U));
+    }
+};
+
+class SmallMatMulLowpMMULWithBiasDataset final : public MatMulDataset
+{
+public:
+    SmallMatMulLowpMMULWithBiasDataset()
+    {
+        add_config(TensorShape(32U, 4U, 2U, 2U), TensorShape(16U, 32U, 2U, 2U), TensorShape(16U, 4U, 2U, 2U));
+    }
+};
+
+class LargeMatMulLowpMMULDataset final : public MatMulDataset
+{
+public:
+    LargeMatMulLowpMMULDataset()
+    {
+        add_config(TensorShape(192U, 38U, 3U, 2U), TensorShape(21U, 192U, 3U, 2U), TensorShape(21U, 38U, 3U, 2U));
+    }
+};
+
+class HighDimensionalMatMulLowpMMULDataset final : public MatMulDataset
+{
+public:
+    HighDimensionalMatMulLowpMMULDataset()
+    {
+        add_config(TensorShape(16U, 5U, 2U, 2U, 2U, 2U), TensorShape(5U, 16U, 2U, 2U, 2U, 2U), TensorShape(5U, 5U, 2U, 2U, 2U, 2U)); // 6D tensor
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+
+#endif // ACL_TESTS_DATASETS_MATMULLOWPMMULDATASET_H
diff --git a/tests/datasets/Pooling3dLayerDataset.h b/tests/datasets/Pooling3dLayerDataset.h
new file mode 100644
index 0000000000..cfe970e8be
--- /dev/null
+++ b/tests/datasets/Pooling3dLayerDataset.h
@@ -0,0 +1,121 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_TEST_POOLING_3D_LAYER_DATASET
+#define ARM_COMPUTE_TEST_POOLING_3D_LAYER_DATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class Pooling3dLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, Pooling3dLayerInfo>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator        src_it,
+                 std::vector<Pooling3dLayerInfo>::const_iterator infos_it)
+            : _src_it{ std::move(src_it) },
+              _infos_it{ std::move(infos_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_src_it << ":";
+            description << "Info=" << *_infos_it << ":";
+            return description.str();
+        }
+
+        Pooling3dLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_it, *_infos_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_it;
+            ++_infos_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator        _src_it;
+        std::vector<Pooling3dLayerInfo>::const_iterator _infos_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _infos.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), _infos.size());
+    }
+
+    void add_config(TensorShape src, Pooling3dLayerInfo info)
+    {
+        _src_shapes.emplace_back(std::move(src));
+        _infos.emplace_back(std::move(info));
+    }
+
+protected:
+    Pooling3dLayerDataset()                         = default;
+    Pooling3dLayerDataset(Pooling3dLayerDataset &&) = default;
+
+private:
+    std::vector<TensorShape>        _src_shapes{};
+    std::vector<Pooling3dLayerInfo> _infos{};
+};
+
+// Special pooling dataset
+class Pooling3dLayerDatasetSpecial final : public Pooling3dLayerDataset
+{
+public:
+    Pooling3dLayerDatasetSpecial()
+    {
+        // Special cases
+        add_config(TensorShape(2U, 3U, 4U, 2U, 4U), Pooling3dLayerInfo(PoolingType::AVG, /*pool size*/ Size3D(2, 2, 1), /*pool strides*/ Size3D(3, 3, 1), /*pool padding*/ Padding3D(0, 0, 0), true));
+        add_config(TensorShape(20U, 22U, 10U, 2U), Pooling3dLayerInfo(PoolingType::AVG, Size3D(100, 100, 100), Size3D(5, 5, 5), Padding3D(50, 50, 50), true));
+        add_config(TensorShape(10U, 20U, 32U, 3U, 2U), Pooling3dLayerInfo(PoolingType::MAX, /*pool size*/ 3, /*pool strides*/ Size3D(2, 2, 2), Padding3D(1, 1, 1, 1, 1, 1), false, false,
+                                                                    DimensionRoundingType::FLOOR));
+        add_config(TensorShape(14U, 10U, 10U, 3U, 5U), Pooling3dLayerInfo(PoolingType::AVG,  Size3D(3, 3, 3), /*pool strides*/ Size3D(3, 3, 3), Padding3D(2, 1, 2), true, false, DimensionRoundingType::CEIL));
+        add_config(TensorShape(14U, 10U, 10U, 2U, 4U), Pooling3dLayerInfo(PoolingType::AVG,  Size3D(3, 3, 3), /*pool strides*/ Size3D(3, 3, 3), Padding3D(2, 1, 2), false, false, DimensionRoundingType::CEIL));
+        add_config(TensorShape(15U, 13U, 13U, 3U, 5U), Pooling3dLayerInfo(PoolingType::AVG,  Size3D(4, 4, 4), /*pool strides*/ Size3D(2, 2, 2), Padding3D(2, 2, 2), true, false, DimensionRoundingType::CEIL));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_TEST_POOLING_3D_LAYER_DATASET */
diff --git a/tests/datasets/ReorderLayerDataset.h b/tests/datasets/ReorderLayerDataset.h
new file mode 100644
index 0000000000..8e1a8422b2
--- /dev/null
+++ b/tests/datasets/ReorderLayerDataset.h
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_DATASETS_REORDERLAYERDATASET
+#define ACL_TESTS_DATASETS_REORDERLAYERDATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+/** [ReorderLayer datasets] **/
+class ReorderLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, WeightFormat, WeightFormat>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator  in_it,
+                 std::vector<TensorShape>::const_iterator  out_it,
+                 std::vector<WeightFormat>::const_iterator _wf_in_it,
+                 std::vector<WeightFormat>::const_iterator _wf_out_it)
+            : _in_it{ std::move(in_it) },
+              _out_it{ std::move(out_it) },
+              _wf_in_it{ std::move(_wf_in_it) },
+              _wf_out_it{ std::move(_wf_out_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_in_it << ":";
+            description << "Out=" << *_out_it << ":";
+            description << "Wf_In=" << *_wf_in_it << ":";
+            description << "Wf_Out=" << *_wf_out_it;
+            return description.str();
+        }
+
+        ReorderLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_in_it, *_out_it, *_wf_in_it, *_wf_out_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_in_it;
+            ++_out_it;
+            ++_wf_in_it;
+            ++_wf_out_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator  _in_it;
+        std::vector<TensorShape>::const_iterator  _out_it;
+        std::vector<WeightFormat>::const_iterator _wf_in_it;
+        std::vector<WeightFormat>::const_iterator _wf_out_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_in_shapes.begin(), _out_shapes.begin(), _in_wfs.begin(), _out_wfs.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_in_shapes.size(), std::min(_out_shapes.size(), std::min(_in_wfs.size(), _out_wfs.size())));
+    }
+
+    void add_config(TensorShape in, TensorShape out, WeightFormat in_wf, WeightFormat out_wf)
+    {
+        _in_shapes.emplace_back(std::move(in));
+        _out_shapes.emplace_back(std::move(out));
+        _in_wfs.emplace_back(std::move(in_wf));
+        _out_wfs.emplace_back(std::move(out_wf));
+    }
+
+    // protected:
+    ReorderLayerDataset()                       = default;
+    ReorderLayerDataset(ReorderLayerDataset &&) = default;
+
+    private:
+    std::vector<TensorShape>  _in_shapes{};
+    std::vector<TensorShape>  _out_shapes{};
+    std::vector<WeightFormat> _in_wfs{};
+    std::vector<WeightFormat> _out_wfs{};
+};
+
+/** [ReorderLayer datasets] **/
+
+class ReorderLayerDatasetBlock4 final : public ReorderLayerDataset
+{
+    public:
+    ReorderLayerDatasetBlock4()
+    {
+        add_config(TensorShape(10U, 9U), TensorShape(10U, 12U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(16U, 16U), TensorShape(16U, 16U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(10U, 511U), TensorShape(10U, 512U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(234U, 301U), TensorShape(234U, 304U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(1024U, 1024U), TensorShape(1024U, 1024U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(10U, 9U, 1U, 1U), TensorShape(10U, 12U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(16U, 16U, 1U, 1U), TensorShape(16U, 16U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(10U, 511U, 1U, 1U), TensorShape(10U, 512U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(234U, 301U, 1U, 1U), TensorShape(234U, 304U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+        add_config(TensorShape(1024U, 1024U, 1U, 1U), TensorShape(1024U, 1024U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo4);
+    }
+};
+
+class ReorderLayerDatasetBlock8 final : public ReorderLayerDataset
+{
+    public:
+    ReorderLayerDatasetBlock8()
+    {
+        add_config(TensorShape(10U, 9U), TensorShape(10U, 16U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(16U, 16U), TensorShape(16U, 16U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(10U, 511U), TensorShape(10U, 512U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(234U, 301U), TensorShape(234U, 304U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(1024U, 1024U), TensorShape(1024U, 1024U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(10U, 9U, 1U, 1U), TensorShape(10U, 16U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(16U, 16U, 1U, 1U), TensorShape(16U, 16U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(10U, 511U, 1U, 1U), TensorShape(10U, 512U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(234U, 301U, 1U, 1U), TensorShape(234U, 304U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+        add_config(TensorShape(1024U, 1024U, 1U, 1U), TensorShape(1024U, 1024U, 1U, 1U), WeightFormat::OHWI, WeightFormat::OHWIo8);
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ACL_TESTS_DATASETS_REORDERLAYERDATASET */
diff --git a/tests/datasets/ReshapeLayerDataset.h b/tests/datasets/ReshapeLayerDataset.h
index d1a1667683..015f9157aa 100644
--- a/tests/datasets/ReshapeLayerDataset.h
+++ b/tests/datasets/ReshapeLayerDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2018, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_RESHAPE_LAYER_DATASET
-#define ARM_COMPUTE_TEST_RESHAPE_LAYER_DATASET
+#ifndef ACL_TESTS_DATASETS_RESHAPELAYERDATASET_H
+#define ACL_TESTS_DATASETS_RESHAPELAYERDATASET_H
 
 #include "utils/TypePrinter.h"
 
@@ -111,9 +111,10 @@ public:
         add_config(TensorShape(17U, 3U, 12U), TensorShape(1U, 1U, 612U));
         add_config(TensorShape(26U, 26U, 32U), TensorShape(13U, 13U, 128U));
         add_config(TensorShape(31U, 23U, 4U, 7U), TensorShape(2U, 14U, 713U));
+        add_config(TensorShape(8U, 8U, 8U), TensorShape(8U, 64U));
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_RESHAPE_LAYER_DATASET */
+#endif // ACL_TESTS_DATASETS_RESHAPELAYERDATASET_H
diff --git a/tests/datasets/ScaleValidationDataset.h b/tests/datasets/ScaleValidationDataset.h
index c0073f93f5..8987c3a1c1 100644
--- a/tests/datasets/ScaleValidationDataset.h
+++ b/tests/datasets/ScaleValidationDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2022 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,15 +21,11 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET
-#define ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET
+#ifndef TESTS_DATASETS_SCALEVALIDATIONDATASET
+#define TESTS_DATASETS_SCALEVALIDATIONDATASET
 
-#include "utils/TypePrinter.h"
-
-#include "arm_compute/core/TensorShape.h"
 #include "arm_compute/core/Types.h"
 #include "tests/datasets/BorderModeDataset.h"
-#include "tests/datasets/InterpolationPolicyDataset.h"
 #include "tests/datasets/SamplingPolicyDataset.h"
 #include "tests/datasets/ShapeDatasets.h"
 
@@ -147,9 +143,9 @@ framework::dataset::make("AlignCorners", { true }));
  */
 #define SCALE_SHAPE_DATASET(element_per_iteration)                                    \
     concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(),  \
-                        ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()),  \
+                         ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 2>()), \
                   ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 1>()),        \
-           ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>())
+           ScaleShapesBaseDataSet<40, 3, (element_per_iteration), 0>())
 
 // To prevent long precommit time for OpenCL, shape set for OpenCL is separated into below two parts.
 /** Generated shapes for precommits to achieve essential coverage. Used by CL precommit and nightly
@@ -166,17 +162,34 @@ framework::dataset::make("AlignCorners", { true }));
  */
 #define SCALE_NIGHTLY_SHAPE_DATASET(element_per_iteration)                            \
     concat(concat(concat(ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 0>(),  \
-                        ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()),  \
+                         ScaleShapesBaseDataSet<1, 1, (element_per_iteration), 1>()), \
                   ScaleShapesBaseDataSet<3, 1, (element_per_iteration), 0>()),        \
            ScaleShapesBaseDataSet<3, 3, (element_per_iteration), 0>())
 
-/** Generating dataset for non-quantized data tyeps with the given shapes */
+/** Generating dataset for non-quantized data types with the given shapes */
 #define ASSEMBLE_DATASET(shape, samping_policy_set)             \
     combine(combine(combine(combine((shape), ScaleDataLayouts), \
                             ScaleInterpolationPolicySet),       \
                     datasets::BorderModes()),                   \
             samping_policy_set)
 
+#define ASSEMBLE_DATASET_DYNAMIC_FUSION(shape, samping_policy_set)                                  \
+    combine(combine(combine((shape), framework::dataset::make("DataLayout", { DataLayout::NHWC })), \
+                    ScaleInterpolationPolicySet),                                                   \
+            samping_policy_set)
+
+#define ASSEMBLE_S8_DATASET(shape, samping_policy_set)                                                           \
+    combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)),          \
+                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR })), \
+                    framework::dataset::make("BorderMode", { BorderMode::REPLICATE })),                          \
+            samping_policy_set)
+
+#define ASSEMBLE_NHWC_DATASET(shape, samping_policy_set)                                                      \
+    combine(combine(combine(combine((shape), framework::dataset::make("DataLayout", DataLayout::NHWC)),       \
+                            ScaleInterpolationPolicySet),                                                     \
+                    framework::dataset::make("BorderMode", { BorderMode::CONSTANT, BorderMode::REPLICATE })), \
+            samping_policy_set)
+
 /** Generating dataset for quantized data tyeps with the given shapes */
 #define ASSEMBLE_QUANTIZED_DATASET(shape, sampling_policy_set, quantization_info_set) \
     combine(combine(combine(combine(combine(shape,                                    \
@@ -186,7 +199,24 @@ framework::dataset::make("AlignCorners", { true }));
                     datasets::BorderModes()),                                         \
             sampling_policy_set)
 
+#define ASSEMBLE_QUANTIZED_DATASET_DYNAMIC_FUSION(shape, sampling_policy_set, quantization_info_set) \
+    combine(combine(combine(combine(shape,                                                           \
+                                    quantization_info_set),                                          \
+                            framework::dataset::make("DataLayout", { DataLayout::NHWC })),           \
+                    ScaleInterpolationPolicySet),                                                    \
+            sampling_policy_set)
+
+/** Generating dataset for quantized data tyeps with the given shapes */
+#define ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(shape, sampling_policy_set, input_quant_info_set, output_quant_info_set) \
+    combine(combine(combine(combine(combine(combine(shape,                                                              \
+                                                    input_quant_info_set),                                              \
+                                            output_quant_info_set),                                                     \
+                                    framework::dataset::make("DataLayout", { DataLayout::NHWC })),                      \
+                            framework::dataset::make("InterpolationPolicy", { InterpolationPolicy::BILINEAR })),        \
+                    framework::dataset::make("BorderMode", { BorderMode::REPLICATE })),                                 \
+            sampling_policy_set)
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SCALE_VALIDATION_DATASET */
+#endif /* TESTS_DATASETS_SCALEVALIDATIONDATASET */
diff --git a/tests/datasets/ScatterDataset.h b/tests/datasets/ScatterDataset.h
new file mode 100644
index 0000000000..8fd4448d2d
--- /dev/null
+++ b/tests/datasets/ScatterDataset.h
@@ -0,0 +1,228 @@
+/*
+ * Copyright (c) 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_DATASETS_SCATTERDATASET_H
+#define ACL_TESTS_DATASETS_SCATTERDATASET_H
+
+#include "arm_compute/core/TensorShape.h"
+#include "utils/TypePrinter.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+
+class ScatterDataset
+{
+public:
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator src_it,
+                 std::vector<TensorShape>::const_iterator updates_it,
+                 std::vector<TensorShape>::const_iterator indices_it,
+                 std::vector<TensorShape>::const_iterator dst_it)
+            : _src_it{ std::move(src_it) },
+              _updates_it{ std::move(updates_it) },
+              _indices_it{std::move(indices_it)},
+              _dst_it{ std::move(dst_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "A=" << *_src_it << ":";
+            description << "B=" << *_updates_it << ":";
+            description << "C=" << *_indices_it << ":";
+            description << "Out=" << *_dst_it << ":";
+            return description.str();
+        }
+
+        ScatterDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_it, *_updates_it, *_indices_it, *_dst_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_it;
+            ++_updates_it;
+            ++_indices_it;
+            ++_dst_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator _src_it;
+        std::vector<TensorShape>::const_iterator _updates_it;
+        std::vector<TensorShape>::const_iterator _indices_it;
+        std::vector<TensorShape>::const_iterator _dst_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _update_shapes.begin(), _indices_shapes.begin(), _dst_shapes.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), std::min(_indices_shapes.size(), std::min(_update_shapes.size(), _dst_shapes.size())));
+    }
+
+    void add_config(TensorShape a, TensorShape b, TensorShape c, TensorShape dst)
+    {
+        _src_shapes.emplace_back(std::move(a));
+        _update_shapes.emplace_back(std::move(b));
+        _indices_shapes.emplace_back(std::move(c));
+        _dst_shapes.emplace_back(std::move(dst));
+    }
+
+protected:
+    ScatterDataset()                 = default;
+    ScatterDataset(ScatterDataset &&) = default;
+
+private:
+    std::vector<TensorShape> _src_shapes{};
+    std::vector<TensorShape> _update_shapes{};
+    std::vector<TensorShape> _indices_shapes{};
+    std::vector<TensorShape> _dst_shapes{};
+};
+
+
+// 1D dataset for simple scatter tests.
+class Small1DScatterDataset final : public ScatterDataset
+{
+public:
+    Small1DScatterDataset()
+    {
+        add_config(TensorShape(6U), TensorShape(6U), TensorShape(1U, 6U), TensorShape(6U));
+        add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U));
+    }
+};
+
+// This dataset represents the (m+1)-D updates/dst case.
+class SmallScatterMultiDimDataset final : public ScatterDataset
+{
+public:
+    SmallScatterMultiDimDataset()
+    {
+        // NOTE: Config is src, updates, indices, output.
+        //      - In this config, the dim replaced is the final number (largest tensor dimension)
+        //      - Largest "updates" dim should match y-dim of indices.
+        //      - src/updates/dst should all have same number of dims. Indices should be 2D.
+        add_config(TensorShape(6U, 5U), TensorShape(6U, 2U), TensorShape(1U, 2U), TensorShape(6U, 5U));
+        add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U));
+        add_config(TensorShape(17U, 3U, 2U, 4U), TensorShape(17U, 3U, 2U, 7U), TensorShape(1U, 7U), TensorShape(17U, 3U, 2U, 4U));
+    }
+};
+
+// This dataset represents the (m+1)-D updates tensor, (m+n)-d output tensor cases
+class SmallScatterMultiIndicesDataset final : public ScatterDataset
+{
+public:
+    SmallScatterMultiIndicesDataset()
+    {
+        // NOTE: Config is src, updates, indices, output.
+        // NOTE: indices.shape.x = src.num_dimensions - updates.num_dimensions + 1
+
+        // index length is 2
+        add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 4U), TensorShape(2U, 4U), TensorShape(6U, 5U, 2U));
+        add_config(TensorShape(17U, 3U, 3U, 2U), TensorShape(17U, 3U, 2U), TensorShape(2U, 2U), TensorShape(17U, 3U, 3U, 2U));
+        add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U));
+        add_config(TensorShape(5U, 4U, 3U, 3U, 2U, 4U), TensorShape(5U, 4U, 3U, 3U, 5U), TensorShape(2U, 5U), TensorShape(5U, 4U, 3U, 3U, 2U, 4U));
+
+        // index length is 3
+        add_config(TensorShape(4U, 3U, 2U, 2U), TensorShape(4U, 2U), TensorShape(3U, 2U), TensorShape(4U, 3U, 2U, 2U));
+        add_config(TensorShape(17U, 4U, 3U, 2U, 2U), TensorShape(17U, 4U, 4U), TensorShape(3U, 4U), TensorShape(17U, 4U, 3U, 2U, 2U));
+        add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 5U, 3U), TensorShape(3U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U));
+
+        // index length is 4
+        add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U));
+        add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 4U, 3U), TensorShape(4U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U));
+
+        // index length is 5
+        add_config(TensorShape(10U, 4U, 5U, 3U, 2U, 2U), TensorShape(10U, 3U), TensorShape(5U, 3U), TensorShape(10U, 4U, 5U, 3U, 2U, 2U));
+    }
+};
+
+// This dataset represents the (m+k)-D updates tensor, (k+1)-d indices tensor and (m+n)-d output tensor cases
+class SmallScatterBatchedDataset final : public ScatterDataset
+{
+public:
+    SmallScatterBatchedDataset()
+    {
+        // NOTE: Config is src, updates, indices, output.
+        // NOTE: Updates/Indices tensors are now batched.
+        // NOTE: indices.shape.x = (updates_batched) ? (src.num_dimensions - updates.num_dimensions) + 2 : (src.num_dimensions - updates.num_dimensions) + 1
+        // k is the number of batch dimensions
+        // k = 2
+        add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U), TensorShape(1U, 2U, 2U), TensorShape(6U, 5U));
+        add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 5U, 6U, 2U), TensorShape(3U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U));
+
+        // k = 3
+        add_config(TensorShape(6U, 5U), TensorShape(6U, 2U, 2U, 2U), TensorShape(1U, 2U, 2U, 2U), TensorShape(6U, 5U));
+        add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 5U, 3U, 6U, 2U), TensorShape(3U, 3U, 6U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U));
+
+        // k = 4
+        add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 6U, 2U, 3U, 2U), TensorShape(4U, 6U, 2U, 3U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U));
+
+        // k = 5
+        add_config(TensorShape(5U, 5U, 4U, 2U, 2U), TensorShape(5U, 3U, 4U, 3U, 2U, 2U), TensorShape(4U, 3U, 4U, 3U, 2U, 2U), TensorShape(5U, 5U, 4U, 2U, 2U));
+    }
+};
+
+class SmallScatterScalarDataset final : public ScatterDataset
+{
+public:
+    // batched scalar case
+    SmallScatterScalarDataset()
+    {
+        add_config(TensorShape(6U, 5U), TensorShape(6U), TensorShape(2U, 6U), TensorShape(6U, 5U));
+        add_config(TensorShape(6U, 5U), TensorShape(6U, 6U), TensorShape(2U, 6U, 6U), TensorShape(6U, 5U));
+        add_config(TensorShape(3U, 3U, 6U, 5U), TensorShape(6U, 6U), TensorShape(4U, 6U, 6U), TensorShape(3U, 3U, 6U, 5U));
+    }
+};
+
+// This dataset is for data types that does not require full testing. It contains selected tests from the above.
+class SmallScatterMixedDataset final : public ScatterDataset
+{
+public:
+    SmallScatterMixedDataset()
+    {
+        add_config(TensorShape(10U), TensorShape(2U), TensorShape(1U, 2U), TensorShape(10U));
+        add_config(TensorShape(9U, 3U, 4U), TensorShape(9U, 3U, 2U), TensorShape(1U, 2U), TensorShape(9U, 3U, 4U));
+        add_config(TensorShape(6U, 5U), TensorShape(6U, 6U), TensorShape(2U, 6U, 6U), TensorShape(6U, 5U));
+        add_config(TensorShape(35U, 4U, 3U, 2U, 2U), TensorShape(35U, 4U), TensorShape(4U, 4U), TensorShape(35U, 4U, 3U, 2U, 2U));
+        add_config(TensorShape(11U, 3U, 3U, 2U, 4U), TensorShape(11U, 3U, 3U, 4U), TensorShape(2U, 4U), TensorShape(11U, 3U, 3U, 2U, 4U));
+        add_config(TensorShape(6U, 5U, 2U), TensorShape(6U, 2U, 2U), TensorShape(2U, 2U, 2U), TensorShape(6U, 5U, 2U));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif // ACL_TESTS_DATASETS_SCATTERDATASET_H
diff --git a/tests/datasets/ShapeDatasets.h b/tests/datasets/ShapeDatasets.h
index 37c5f1626d..c1e61444a8 100644
--- a/tests/datasets/ShapeDatasets.h
+++ b/tests/datasets/ShapeDatasets.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -135,7 +135,7 @@ public:
     Tiny4DShapes()
         : ShapeDataset("Shape",
     {
-        TensorShape{ 7U, 7U, 5U, 3U },
+        TensorShape{ 2U, 7U, 5U, 3U },
                      TensorShape{ 17U, 13U, 7U, 2U },
     })
     {
@@ -171,6 +171,26 @@ public:
     {
     }
 };
+/** Data set containing small tensor shapes with none of the dimensions equal to 1 (unit). */
+class SmallNoneUnitShapes final : public ShapeDataset
+{
+public:
+    SmallNoneUnitShapes()
+        : ShapeDataset("Shape",
+    {
+        // Batch size 1
+        TensorShape{ 13U, 11U },
+                     TensorShape{ 16U, 16U },
+                     TensorShape{ 24U, 26U, 5U },
+                     TensorShape{ 7U, 7U, 17U, 2U },
+                     // Batch size 4
+                     TensorShape{ 27U, 13U, 2U, 4U },
+                     // Arbitrary batch size
+                     TensorShape{ 8U, 7U, 5U, 5U }
+    })
+    {
+    }
+};
 /** Data set containing small tensor shapes. */
 class SmallShapes final : public ShapeDataset
 {
@@ -179,12 +199,12 @@ public:
         : ShapeDataset("Shape",
     {
         // Batch size 1
-        TensorShape{ 11U, 11U },
-                     TensorShape{ 16U, 16U },
+        TensorShape{ 3U, 11U },
+                     TensorShape{ 1U, 16U },
                      TensorShape{ 27U, 13U, 7U },
                      TensorShape{ 7U, 7U, 17U, 2U },
-                     // Batch size 4
-                     TensorShape{ 27U, 13U, 2U, 4U },
+                     // Batch size 4 and 2 SIMD iterations
+                     TensorShape{ 33U, 13U, 2U, 4U },
                      // Arbitrary batch size
                      TensorShape{ 11U, 11U, 3U, 5U }
     })
@@ -192,6 +212,25 @@ public:
     }
 };
 
+/** Data set containing small tensor shapes. */
+class SmallShapesNoBatches final : public ShapeDataset
+{
+public:
+    SmallShapesNoBatches()
+        : ShapeDataset("Shape",
+    {
+        // Batch size 1
+        TensorShape{ 3U, 11U },
+                     TensorShape{ 1U, 16U },
+                     TensorShape{ 27U, 13U, 7U },
+                     TensorShape{ 7U, 7U, 17U },
+                     TensorShape{ 33U, 13U, 2U },
+                     TensorShape{ 11U, 11U, 3U }
+    })
+    {
+    }
+};
+
 /** Data set containing pairs of tiny tensor shapes that are broadcast compatible. */
 class TinyShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
 {
@@ -211,6 +250,25 @@ public:
     {
     }
 };
+/** Data set containing pairs of tiny tensor shapes that are broadcast compatible and can do in_place calculation. */
+class TinyShapesBroadcastInplace final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
+{
+public:
+    TinyShapesBroadcastInplace()
+        : ZipDataset<ShapeDataset, ShapeDataset>(
+              ShapeDataset("Shape0",
+    {
+        TensorShape{ 9U },
+                     TensorShape{ 10U, 2U, 14U, 2U },
+    }),
+    ShapeDataset("Shape1",
+    {
+        TensorShape{ 9U, 1U, 9U },
+        TensorShape{ 10U },
+    }))
+    {
+    }
+};
 /** Data set containing pairs of small tensor shapes that are broadcast compatible. */
 class SmallShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
 {
@@ -243,6 +301,52 @@ public:
     }
 };
 
+class TemporaryLimitedSmallShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
+{
+public:
+    TemporaryLimitedSmallShapesBroadcast()
+        : ZipDataset<ShapeDataset, ShapeDataset>(
+              ShapeDataset("Shape0",
+    {
+        TensorShape{ 1U, 3U, 4U, 2U },  // LHS broadcast X
+        TensorShape{ 6U, 4U, 2U, 3U },  // RHS broadcast X
+        TensorShape{ 7U, 1U, 1U, 4U },  // LHS broadcast Y, Z
+        TensorShape{ 8U, 5U, 6U, 3U },  // RHS broadcast Y, Z
+        TensorShape{ 1U, 1U, 1U, 2U },  // LHS broadcast X, Y, Z
+        TensorShape{ 2U, 6U, 4U, 3U },  // RHS broadcast X, Y, Z
+    }),
+    ShapeDataset("Shape1",
+    {
+        TensorShape{ 5U, 3U, 4U, 2U },
+        TensorShape{ 1U, 4U, 2U, 3U },
+        TensorShape{ 7U, 2U, 3U, 4U },
+        TensorShape{ 8U, 1U, 1U, 3U },
+        TensorShape{ 4U, 7U, 3U, 2U },
+        TensorShape{ 1U, 1U, 1U, 3U },
+    }))
+    {
+    }
+};
+
+class TemporaryLimitedLargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
+{
+public:
+    TemporaryLimitedLargeShapesBroadcast()
+        : ZipDataset<ShapeDataset, ShapeDataset>(
+              ShapeDataset("Shape0",
+    {
+        TensorShape{ 127U, 25U, 5U },
+                     TensorShape{ 485, 40U, 10U }
+    }),
+    ShapeDataset("Shape1",
+    {
+        TensorShape{ 1U, 1U, 1U },   // Broadcast in X, Y, Z
+        TensorShape{ 485U, 1U, 1U }, // Broadcast in Y, Z
+    }))
+    {
+    }
+};
+
 /** Data set containing medium tensor shapes. */
 class MediumShapes final : public ShapeDataset
 {
@@ -320,6 +424,19 @@ public:
     }
 };
 
+/** Data set containing large tensor shapes. */
+class LargeShapesNoBatches final : public ShapeDataset
+{
+public:
+    LargeShapesNoBatches()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 582U, 131U, 2U },
+    })
+    {
+    }
+};
+
 /** Data set containing pairs of large tensor shapes that are broadcast compatible. */
 class LargeShapesBroadcast final : public framework::dataset::ZipDataset<ShapeDataset, ShapeDataset>
 {
@@ -501,6 +618,21 @@ public:
     }
 };
 
+/** Data set containing small 5D tensor shapes. */
+class Small5dShapes final : public ShapeDataset
+{
+public:
+    Small5dShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 5U, 5U, 7U, 4U, 3U },
+                     TensorShape{ 5U, 5U, 4U, 13U, 2U },
+                     TensorShape{ 5U, 5U, 3U, 5U, 2U },
+    })
+    {
+    }
+};
+
 /** Data set containing large 5x5 tensor shapes. */
 class Large5x5Shapes final : public ShapeDataset
 {
@@ -514,6 +646,19 @@ public:
     }
 };
 
+/** Data set containing large 5D tensor shapes. */
+class Large5dShapes final : public ShapeDataset
+{
+public:
+    Large5dShapes()
+        : ShapeDataset("Shape",
+    {
+        TensorShape{ 30U, 40U, 30U, 32U, 3U }
+    })
+    {
+    }
+};
+
 /** Data set containing small 5x1 tensor shapes. */
 class Small5x1Shapes final : public ShapeDataset
 {
@@ -651,6 +796,7 @@ public:
     SmallDeconvolutionShapes()
         : ShapeDataset("InputShape",
     {
+        // Multiple Vector Loops for FP32
         TensorShape{ 5U, 4U, 3U, 2U },
                      TensorShape{ 5U, 5U, 3U },
                      TensorShape{ 11U, 13U, 4U, 3U }
@@ -659,6 +805,19 @@ public:
     }
 };
 
+class SmallDeconvolutionShapesWithLargerChannels final : public ShapeDataset
+{
+public:
+    SmallDeconvolutionShapesWithLargerChannels()
+        : ShapeDataset("InputShape",
+    {
+        // Multiple Vector Loops for all data types
+        TensorShape{ 5U, 5U, 35U }
+    })
+    {
+    }
+};
+
 /** Data set containing tiny tensor shapes for direct convolution. */
 class TinyDirectConvolutionShapes final : public ShapeDataset
 {
@@ -689,6 +848,23 @@ public:
     }
 };
 
+class SmallDirectConv3DShapes final : public ShapeDataset
+{
+public:
+    SmallDirectConv3DShapes()
+        : ShapeDataset("InputShape",
+    {
+        // Batch size 2
+        TensorShape{ 1U, 3U, 4U, 5U, 2U },
+                     // Batch size 3
+                     TensorShape{ 7U, 27U, 3U, 6U, 3U },
+                     // Batch size 1
+                     TensorShape{ 32U, 37U, 13U, 1U, 1U },
+    })
+    {
+    }
+};
+
 /** Data set containing small tensor shapes for direct convolution. */
 class SmallDirectConvolutionTensorShiftShapes final : public ShapeDataset
 {
diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h
index 7d1db5a73e..67eade1e64 100644
--- a/tests/datasets/SmallConvolutionLayerDataset.h
+++ b/tests/datasets/SmallConvolutionLayerDataset.h
@@ -181,6 +181,17 @@ public:
     }
 };
 
+class SmallConvolutionLayerPrePaddingDataset final : public ConvolutionLayerDataset
+{
+public:
+    SmallConvolutionLayerPrePaddingDataset()
+    {
+        // output shape is calculated by accounting pre-padding layer as well -- all the data is in nchw
+        add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(17U, 16U, 19U), PadStrideInfo(1, 2, 1, 1));
+        add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(12U, 13U, 16U), PadStrideInfo(3, 2, 2, 0));
+    }
+};
+
 class SmallConvolutionLayerReducedDataset final : public ConvolutionLayerDataset
 {
 public:
diff --git a/tests/datasets/SmallGEMMDataset.h b/tests/datasets/SmallGEMMDataset.h
index 7d2b42a0d6..99c7abbf64 100644
--- a/tests/datasets/SmallGEMMDataset.h
+++ b/tests/datasets/SmallGEMMDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_SMALL_GEMM_DATASET
-#define ARM_COMPUTE_TEST_SMALL_GEMM_DATASET
+#ifndef ACL_TESTS_DATASETS_SMALLGEMMDATASET_H
+#define ACL_TESTS_DATASETS_SMALLGEMMDATASET_H
 
 #include "tests/datasets/GEMMDataset.h"
 
@@ -50,6 +50,7 @@ public:
         add_config(TensorShape(32U, 1U), TensorShape(17U, 32U), TensorShape(17U, 1U), TensorShape(17U, 1U), 0.4f, 0.7f);
     }
 };
+
 class SmallGEMMOutput3DDataset final : public GEMMDataset
 {
 public:
@@ -77,7 +78,37 @@ public:
         add_config(TensorShape(16U, 16U, 5U, 3U), TensorShape(8U, 16U), TensorShape(8U), TensorShape(8U, 16U, 5U, 3U), 1.0f, 0.3f);
     }
 };
+
+class SmallBatchedMatMulDataset final : public GEMMDataset
+{
+public:
+    SmallBatchedMatMulDataset()
+    {
+        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U), TensorShape(2U, 3U), 1.0f, 0.0f);
+        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U), TensorShape(7U, 15U), 1.0f, 0.0f);
+        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U), TensorShape(36U, 17U), 1.0f, 0.0f);
+        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U), TensorShape(5U, 4U, 3U), 1.0f, 0.0f);
+        add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U, 36U), TensorShape(29U), TensorShape(29U, 7U, 36U), 1.0f, 0.0f);
+        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U), TensorShape(5U, 17U, 32U), 1.0f, 0.0f);
+        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U), TensorShape(19U, 256U, 32U), 1.0f, 0.0f);
+        // Broadcast in RHS's batch dimension
+        add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U), TensorShape(29U), TensorShape(29U, 7U, 36U), 1.0f, 0.0f);
+        add_config(TensorShape(15U, 7U, 36U, 2U), TensorShape(29U, 15U), TensorShape(29U), TensorShape(29U, 7U, 36U, 2U), 1.0f, 0.0f);
+    }
+};
+
+class SmallAccumulateGEMMDataset final : public GEMMDataset
+{
+public:
+    SmallAccumulateGEMMDataset()
+    {
+        add_config(TensorShape(8U, 2U), TensorShape(16U, 8U), TensorShape(16U, 2U), TensorShape(16U, 2U), 1.0f, 0.0f);
+        add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U), TensorShape(23U, 1U), 1.0f, 0.0f);
+        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), TensorShape(33U, 13U), 1.0f, 0.0f);
+    }
+};
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_SMALL_GEMM_DATASET */
+#endif // ACL_TESTS_DATASETS_SMALLGEMMDATASET_H
diff --git a/tests/datasets/SmallGEMMLowpDataset.h b/tests/datasets/SmallGEMMLowpDataset.h
index 1b6c65307b..929940d2d9 100644
--- a/tests/datasets/SmallGEMMLowpDataset.h
+++ b/tests/datasets/SmallGEMMLowpDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -58,11 +58,10 @@ public:
     SmallGEMMLowpOutput3DDataset()
     {
         add_config(TensorShape(21U, 14U), TensorShape(34U, 21U), TensorShape(34U, 7U, 2U), 0, 0);
-        add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U, 1U), -2, 13);
-        add_config(TensorShape(38U, 12U), TensorShape(21U, 38U), TensorShape(21U, 4U, 3U), 0, 4);
-        add_config(TensorShape(32U, 1U), TensorShape(17U, 32U), TensorShape(17U, 1U, 1U), -2, 1);
-        add_config(TensorShape(16U, 16U), TensorShape(8U, 16U), TensorShape(8U, 8U, 2U), 5, 9);
-        add_config(TensorShape(16U, 16U, 5U), TensorShape(8U, 16U, 5U), TensorShape(8U, 8U, 2U, 5U), -7, 2);
+        add_config(TensorShape(31U, 1U), TensorShape(3U, 31U), TensorShape(3U, 1U, 1U), -2, 13);
+        add_config(TensorShape(38U, 12U), TensorShape(1U, 38U), TensorShape(1U, 4U, 3U), 0, 4);
+        add_config(TensorShape(16U, 16U), TensorShape(11U, 16U), TensorShape(11U, 8U, 2U), 2, -1);
+        add_config(TensorShape(16U, 16U, 5U), TensorShape(13U, 16U, 5U), TensorShape(13U, 8U, 2U, 5U), -3, 2);
     }
 };
 class SmallGEMMLowpInputOutput3DDataset final : public GEMMLowpDataset
@@ -71,13 +70,28 @@ public:
     SmallGEMMLowpInputOutput3DDataset()
     {
         add_config(TensorShape(21U, 14U, 13U), TensorShape(34U, 21U), TensorShape(34U, 14U, 13U), 0, 0);
-        add_config(TensorShape(31U, 1U, 3U), TensorShape(23U, 31U), TensorShape(23U, 1U, 3U), 0, 0);
+        add_config(TensorShape(31U, 1U, 3U), TensorShape(1U, 31U), TensorShape(1U, 1U, 3U), 0, 0);
         add_config(TensorShape(38U, 12U, 2U), TensorShape(21U, 38U), TensorShape(21U, 12U, 2U), -2, 13);
-        add_config(TensorShape(32U, 1U, 4U, 3U), TensorShape(17U, 32U), TensorShape(17U, 1U, 4U, 3U), 0, 4);
-        add_config(TensorShape(16U, 16U, 3U, 2U), TensorShape(8U, 16U), TensorShape(8U, 16U, 3U, 2U), -2, 0);
+        add_config(TensorShape(16U, 16U, 3U, 2U), TensorShape(15U, 16U), TensorShape(15U, 16U, 3U, 2U), -2, 0);
         add_config(TensorShape(16U, 16U, 5U, 3U), TensorShape(8U, 16U), TensorShape(8U, 16U, 5U, 3U), -9, 1);
     }
 };
+
+class SmallGEMMLowpBatchedMatMulDataset final : public GEMMLowpDataset
+{
+public:
+    SmallGEMMLowpBatchedMatMulDataset()
+    {
+        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0);
+        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), 0, 0);
+        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13);
+        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -2, 0);
+        add_config(TensorShape(15U, 7U, 36U), TensorShape(29U, 15U, 36U), TensorShape(29U, 7U, 36U), -9, 1);
+        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), -3, 2);
+        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 13);
+    }
+};
+
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
diff --git a/tests/datasets/SmallMatMulDataset.h b/tests/datasets/SmallMatMulDataset.h
new file mode 100644
index 0000000000..bb4cdad54b
--- /dev/null
+++ b/tests/datasets/SmallMatMulDataset.h
@@ -0,0 +1,102 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_TESTS_DATASETS_SMALLMATMULDATASET
+#define ACL_TESTS_DATASETS_SMALLMATMULDATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/datasets/MatMulDataset.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+class SmallMatMulDataset final : public MatMulDataset
+{
+public:
+    SmallMatMulDataset()
+    {
+        add_config(TensorShape(3U, 4U, 2U, 2U), TensorShape(2U, 3U, 2U, 2U), TensorShape(2U, 4U, 2U, 2U));
+        add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U));
+        add_config(TensorShape(31U, 1U), TensorShape(23U, 31U), TensorShape(23U, 1U));
+        add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U));
+        add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U));
+    }
+};
+
+class SmallerMatMulDataset final : public MatMulDataset
+{
+public:
+    SmallerMatMulDataset()
+    {
+        add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U));
+        add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U));
+        add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U));
+    }
+};
+
+class TinyMatMulDataset final : public MatMulDataset
+{
+public:
+    TinyMatMulDataset()
+    {
+        add_config(TensorShape(1U), TensorShape(1U), TensorShape(1U));
+        add_config(TensorShape(2U, 2U), TensorShape(2U, 2U), TensorShape(2U, 2U));
+    }
+};
+
+class SmallMatMulDatasetRhsExportToCLImageRhsT final : public MatMulDataset
+{
+public:
+    // Some considerations:
+    //  1) K dimension should be a multiple of 4
+    //  See (2), (3), and (4) in SmallMatMulDatasetRhsExportToCLImageRhsNT
+    SmallMatMulDatasetRhsExportToCLImageRhsT()
+    {
+        add_config(TensorShape(8U /*K*/, 3U /*M*/, 2U, 1U, 2U), TensorShape(20U /*N*/, 8U /*K*/, 2U, 1U, 2U), TensorShape(20U /*N*/, 3U /*M*/, 2U, 1U, 2U));
+    }
+};
+
+class SmallMatMulDatasetRhsExportToCLImageRhsNT final : public MatMulDataset
+{
+public:
+    // Some considerations:
+    //  (1) N (Dimension 0 of Rhs matrix) dimension should be a multiple of 4
+    //  (2) Having N=20 enables us to test all possible N0 values, i.e. 4, 8, 16
+    //  (3) It's important to have more than one loop iterations in the K dimension
+    //      K has been chosen in accordance with K0
+    //  (4) The 5-th dimension has been chosen as non-unit because export_to_cl_iamge checks
+    //      were using dim1 * dim2 * dim3 to calculate the CLImage height; however, in our case
+    //      the tensor can be > 4D. To stress that case, the fifth dimension is chosen to be non-unit as well
+    SmallMatMulDatasetRhsExportToCLImageRhsNT()
+    {
+        add_config(TensorShape(7U, 3U, 2U, 1U, 2U), TensorShape(20U, 7U, 2U, 1U, 2U), TensorShape(20U, 3U, 2U, 1U, 2U));
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+#endif /* ACL_TESTS_DATASETS_SMALLMATMULDATASET */
diff --git a/tests/datasets/SmallMatMulMMULDataset.h b/tests/datasets/SmallMatMulMMULDataset.h
new file mode 100644
index 0000000000..9e517488af
--- /dev/null
+++ b/tests/datasets/SmallMatMulMMULDataset.h
@@ -0,0 +1,66 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ACL_TESTS_DATASETS_SMALLMATMULMMULDATASET
+#define ACL_TESTS_DATASETS_SMALLMATMULMMULDATASET
+
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "tests/datasets/MatMulDataset.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+/** MatMul MMUL shapes are similar to MatMul shapes except that K has to be a multiple of MMUL_K0 which is 4 (e.g. see src/gpu/cl/kernels/ClMatMulNativeMMULKernel.cpp for the definition)
+ */
+class SmallMatMulMMULDataset final : public MatMulDataset
+{
+public:
+    SmallMatMulMMULDataset()
+    {
+        add_config(TensorShape(8U, 4U, 2U, 2U), TensorShape(2U, 8U, 2U, 2U), TensorShape(2U, 4U, 2U, 2U));
+        add_config(TensorShape(28U, 1U), TensorShape(23U, 28U), TensorShape(23U, 1U));
+        add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U));
+        add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U));
+        add_config(TensorShape(8U, 6U), TensorShape(7U, 8U), TensorShape(7U, 6U));
+    }
+};
+
+class TinyMatMulMMULDataset final : public MatMulDataset
+{
+public:
+    TinyMatMulMMULDataset()
+    {
+        add_config(TensorShape(4U, 4U), TensorShape(4U, 4U), TensorShape(4U, 4U));
+    }
+};
+
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+
+#endif /* ACL_TESTS_DATASETS_SMALLMATMULMMULDATASET */
diff --git a/tests/datasets/dynamic_fusion/PoolingLayerDataset.h b/tests/datasets/dynamic_fusion/PoolingLayerDataset.h
new file mode 100644
index 0000000000..c4911f4940
--- /dev/null
+++ b/tests/datasets/dynamic_fusion/PoolingLayerDataset.h
@@ -0,0 +1,122 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/TensorShape.h"
+#include "arm_compute/core/Types.h"
+#include "utils/TypePrinter.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+
+
+using Pool2dAttributes = arm_compute::experimental::dynamic_fusion::Pool2dAttributes;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace datasets
+{
+
+class DynamicFusionPoolingLayerDataset
+{
+public:
+    using type = std::tuple<TensorShape, Pool2dAttributes>;
+
+    struct iterator
+    {
+        iterator(std::vector<TensorShape>::const_iterator      src_it,
+                 std::vector<Pool2dAttributes>::const_iterator infos_it)
+            : _src_it{ std::move(src_it) },
+              _infos_it{ std::move(infos_it) }
+        {
+        }
+
+        std::string description() const
+        {
+            std::stringstream description;
+            description << "In=" << *_src_it << ":";
+            description << "Info=" << *_infos_it << ":";
+            return description.str();
+        }
+
+        DynamicFusionPoolingLayerDataset::type operator*() const
+        {
+            return std::make_tuple(*_src_it, *_infos_it);
+        }
+
+        iterator &operator++()
+        {
+            ++_src_it;
+            ++_infos_it;
+
+            return *this;
+        }
+
+    private:
+        std::vector<TensorShape>::const_iterator      _src_it;
+        std::vector<Pool2dAttributes>::const_iterator _infos_it;
+    };
+
+    iterator begin() const
+    {
+        return iterator(_src_shapes.begin(), _infos.begin());
+    }
+
+    int size() const
+    {
+        return std::min(_src_shapes.size(), _infos.size());
+    }
+
+    void add_config(TensorShape src, Pool2dAttributes info)
+    {
+        _src_shapes.emplace_back(std::move(src));
+        _infos.emplace_back(std::move(info));
+    }
+
+protected:
+    DynamicFusionPoolingLayerDataset()                       = default;
+    DynamicFusionPoolingLayerDataset(DynamicFusionPoolingLayerDataset &&) = default;
+
+private:
+    std::vector<TensorShape>      _src_shapes{};
+    std::vector<Pool2dAttributes> _infos{};
+};
+
+// Special pooling dataset
+class PoolingLayerDatasetSpecialDynamicFusion final : public DynamicFusionPoolingLayerDataset
+{
+public:
+    PoolingLayerDatasetSpecialDynamicFusion()
+    {
+        // NCHW DataLayout 
+        // Special cases
+        add_config(TensorShape(2U, 3U, 4U, 1U), Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(2,2)).stride(Size2D(3,3)));
+        add_config(TensorShape(60U, 52U, 3U, 2U), Pool2dAttributes().pool_type(PoolingType::AVG).pool_size(Size2D(100,100)).stride(Size2D(5,5)).pad(Padding2D(50,50,50,50)));
+        // Asymmetric padding
+        add_config(TensorShape(112U, 112U, 32U), Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(3,3)).pad(Padding2D(0,1,0,1)).stride(Size2D(2,2)));
+        add_config(TensorShape(14U, 14U, 832U), Pool2dAttributes().pool_type(PoolingType::MAX).pool_size(Size2D(2,2)).stride(Size2D(1,1)).pad(Padding2D(0,0,0,0)));
+
+    }
+};
+} // namespace datasets
+} // namespace test
+} // namespace arm_compute
+\ No newline at end of file