5 files changed, 334 insertions, 348 deletions
diff --git a/LICENSE b/LICENSE
index 0d2cb83aaa..781685ab31 100644
--- a/LICENSE
+++ b/LICENSE
@@ -1,6 +1,6 @@
 MIT License
 
-Copyright (c) 2017-2023 Arm Limited
+Copyright (c) 2017-2024 Arm Limited
 
 Permission is hereby granted, free of charge, to any person obtaining a copy
 of this software and associated documentation files (the "Software"), to deal
diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
index 8c90efcbdd..b0ad4879ba 100644
--- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
+++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET
-#define ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET
+#ifndef ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
+#define ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
 
 #include "utils/TypePrinter.h"
 
@@ -40,21 +40,17 @@ namespace datasets
 class GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape, int32_t, int32_t, GEMMLowpOutputStageInfo>;
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, GEMMLowpOutputStageType>;
 
     struct iterator
     {
         iterator(std::vector<TensorShape>::const_iterator             a_it,
                  std::vector<TensorShape>::const_iterator             b_it,
                  std::vector<TensorShape>::const_iterator             c_it,
-                 std::vector<int32_t>::const_iterator                 a_offset_it,
-                 std::vector<int32_t>::const_iterator                 b_offset_it,
-                 std::vector<GEMMLowpOutputStageInfo>::const_iterator output_stage_it)
+                 std::vector<GEMMLowpOutputStageType>::const_iterator output_stage_it)
             : _a_it{ std::move(a_it) },
               _b_it{ std::move(b_it) },
               _c_it{ std::move(c_it) },
-              _a_offset_it{ std::move(a_offset_it) },
-              _b_offset_it{ std::move(b_offset_it) },
               _output_stage_it{ std::move(output_stage_it) }
         {
         }
@@ -65,33 +61,14 @@ public:
             description << "A=" << *_a_it << ":";
             description << "B=" << *_b_it << ":";
             description << "C=" << *_c_it << ":";
-            description << "a_offset=" << *_a_offset_it << ":";
-            description << "b_offset=" << *_b_offset_it << ":";
-            description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":";
-            description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":";
-            description << "output_multiplier={";
-            for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it)
-            {
-                description << (*it) << ", ";
-            }
-            description << "}:";
-            description << "output_shift={";
-
-            for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it)
-            {
-                description << (*it) << ", ";
-            }
-            description << "}:";
-            description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":";
-            description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":";
-            description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":";
+            description << "output_type=" << string_from_gemmlowp_output_stage(*_output_stage_it) << ":";
 
             return description.str();
         }
 
         GEMMLowpFusedOffsetOutputDataset::type operator*() const
         {
-            return std::make_tuple(*_a_it, *_b_it, *_c_it, *_a_offset_it, *_b_offset_it, *_output_stage_it);
+            return std::make_tuple(*_a_it, *_b_it, *_c_it, *_output_stage_it);
         }
 
         iterator &operator++()
@@ -99,8 +76,6 @@ public:
             ++_a_it;
             ++_b_it;
             ++_c_it;
-            ++_a_offset_it;
-            ++_b_offset_it;
             ++_output_stage_it;
 
             return *this;
@@ -110,45 +85,27 @@ public:
         std::vector<TensorShape>::const_iterator             _a_it;
         std::vector<TensorShape>::const_iterator             _b_it;
         std::vector<TensorShape>::const_iterator             _c_it;
-        std::vector<int32_t>::const_iterator                 _a_offset_it;
-        std::vector<int32_t>::const_iterator                 _b_offset_it;
-        std::vector<GEMMLowpOutputStageInfo>::const_iterator _output_stage_it;
+        std::vector<GEMMLowpOutputStageType>::const_iterator _output_stage_it;
     };
 
     iterator begin() const
     {
-        return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _a_offset.begin(), _b_offset.begin(), _output_stage.begin());
+        return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _output_stage.begin());
     }
 
     int size() const
     {
-        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), std::min(_a_offset.size(), std::min(_b_offset.size(), _output_stage.size())))));
+        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), _output_stage.size())));
     }
 
-    void add_config(TensorShape a, TensorShape b, TensorShape c, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+    void add_config(TensorShape a, TensorShape b, TensorShape c, GEMMLowpOutputStageType output_stage)
     {
         _a_shapes.emplace_back(std::move(a));
         _b_shapes.emplace_back(std::move(b));
         _c_shapes.emplace_back(std::move(c));
-        _a_offset.emplace_back(std::move(a_offset));
-        _b_offset.emplace_back(std::move(b_offset));
         _output_stage.emplace_back(std::move(output_stage));
     }
 
-    GEMMLowpOutputStageInfo OutputStageInfo(GEMMLowpOutputStageType type, int32_t offset, int32_t multiplier, int32_t shift, int32_t min, int32_t max)
-    {
-        GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
-        output_stage.type                    = type;
-        output_stage.gemmlowp_offset         = offset;
-        output_stage.gemmlowp_multiplier     = multiplier;
-        output_stage.gemmlowp_shift          = shift;
-        output_stage.gemmlowp_min_bound      = min;
-        output_stage.gemmlowp_max_bound      = max;
-        output_stage.gemmlowp_multipliers.push_back(multiplier);
-        output_stage.gemmlowp_shifts.push_back(shift);
-        return output_stage;
-    }
-
 protected:
     GEMMLowpFusedOffsetOutputDataset()                                    = default;
     GEMMLowpFusedOffsetOutputDataset(GEMMLowpFusedOffsetOutputDataset &&) = default;
@@ -157,9 +114,7 @@ private:
     std::vector<TensorShape>             _a_shapes{};
     std::vector<TensorShape>             _b_shapes{};
     std::vector<TensorShape>             _c_shapes{};
-    std::vector<int32_t>                 _a_offset{};
-    std::vector<int32_t>                 _b_offset{};
-    std::vector<GEMMLowpOutputStageInfo> _output_stage{};
+    std::vector<GEMMLowpOutputStageType> _output_stage{};
 };
 
 class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset
@@ -167,47 +122,28 @@ class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff
 public:
     SmallGEMMLowpFusedOffsetOutputUint8Dataset()
     {
-        add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-
-        add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, 210));
-        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, 210));
-        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 10, 10, 210));
-        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, 10, 210));
+        add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
-class SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned final : public GEMMLowpFusedOffsetOutputDataset
+class SmallGEMMLowpFusedBatchedMatMulDataset final : public GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned()
+    SmallGEMMLowpFusedBatchedMatMulDataset()
     {
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, 0, 254));
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, 0, 254));
-        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, 20, 210));
-        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, 14, 210));
-        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, 0, 127));
-        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, 10, 210));
-        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), -3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 3, 10, 210));
-        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, 50, 225));
-    }
-};
-
-class SmallGEMMLowpFusedBatchedMatMulDatasetSigned final : public GEMMLowpFusedOffsetOutputDataset
-{
-public:
-    SmallGEMMLowpFusedBatchedMatMulDatasetSigned()
-    {
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, -128, 127));
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, -128, 127));
-        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, -108, 127));
-        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, -98, 107));
-        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, -127, 64));
-        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, -64, 127));
-        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), 3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -127, 110));
-        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -77, 115));
+        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -216,14 +152,12 @@ class SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset final : public GEMMLowp
 public:
     SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset()
     {
-        add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210));
-        add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10,
-                                                                                                                               210));
-        add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10,
-                   210));
+        add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -232,14 +166,12 @@ class SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset final : public GEM
 public:
     SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset()
     {
-        add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210));
-        add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10,
-                   210));
-        add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10,
-                   210));
+        add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -248,28 +180,14 @@ class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs
 public:
     SmallGEMMLowpFusedOffsetOutputInt8Dataset()
     {
-        add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 13, -10, 110));
-        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110));
-        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110));
-        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -40, 2, 13, -10, 110));
-
-        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 10, -10, 110));
-        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 10, -10, 110));
-        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 10, -10, 110));
-        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, -10, 110));
-    }
-};
-
-class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset
-{
-public:
-    SmallGEMMLowpFusedOffsetOutputPerChannelDataset()
-    {
-        add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210));
-        add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
+        add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -278,15 +196,12 @@ class LargeGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff
 public:
     LargeGEMMLowpFusedOffsetOutputUint8Dataset()
     {
-        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 18, 10, 210));
-        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 18, 10, 210));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 18, 10, 210));
-        add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 18, 10, 210));
+        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
 
-        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 15, 10, 210));
-        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 15, 10, 210));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, 10, 210));
-        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 15, 10, 210));
     }
 };
 
@@ -295,18 +210,17 @@ class LargeGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs
 public:
     LargeGEMMLowpFusedOffsetOutputInt8Dataset()
     {
-        add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110));
-        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110));
-        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110));
-        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110));
-
-        add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 15, -10, 110));
-        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 15, -10, 110));
-        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 15, -10, 110));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, -10, 110));
+        add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET */
+#endif // ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 0b057b9dce..1ae9e96626 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,9 @@ namespace test
 {
 namespace validation
 {
+
+using framework::dataset::make;
+
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
@@ -72,9 +75,9 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
 TEST_SUITE(BatchedMatMul)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
@@ -84,9 +87,9 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
@@ -96,9 +99,10 @@ TEST_SUITE_END() // BatchedMatMul
 TEST_SUITE(FusedOffsetOutput)
 TEST_SUITE(QASYMM8)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                       framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -108,9 +112,9 @@ TEST_SUITE(Output3D)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>;
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -121,18 +125,19 @@ TEST_SUITE(InputOutput3D)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>;
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
 TEST_SUITE_END() // InputOutput3D
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                       framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -141,8 +146,9 @@ TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -185,24 +191,24 @@ TEST_SUITE(QuantizeDownInt32Scale)
 
 TEST_SUITE(QASYMM8)
 
-const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
-                                                      3)
-                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+                                                      * make("min", 0) * make("max", 255) * make("addBias", { false, true });
 
-const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
-                                                           2)
-                                                           * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+                                                           * make("result_shift", 2, 3) * make("min", 0, 2) * make("max", 171, 173) * make("addBias", { false, true });
 
 using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -213,24 +219,24 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 
-const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
-                                                     3)
-                                                     * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+                                                     * make("min", -128) * make("max", 127) * make("addBias", { false, true });
 
-const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
-                                                          2)
-                                                          * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+                                                          * make("result_shift", 2, 3) * make("min", -100, -98) * make("max", 71, 73) * make("addBias", { false, true });
 
 using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -247,13 +253,14 @@ using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture =
     GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, uint8_t>;
 
 FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
-                                                                       datasets::TinyShapes()),
-                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
-                                                       framework::dataset::make("result_offset", 2, 3)),
-                                               framework::dataset::make("min", 0)),
-                                       framework::dataset::make("max", 255)),
-                               framework::dataset::make("addBias", { false, true })))
+    combine(
+        make("DataType", DataType::QASYMM8),
+        datasets::TinyShapes(),
+        make("result_real_multiplier", 0.33f),
+        make("result_offset", 2, 3),
+        make("min", 0),
+        make("max", 255),
+        make("addBias", { false, true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -264,13 +271,14 @@ TEST_SUITE(QASYMM8_SIGNED)
 using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed =
     GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, int8_t>;
 FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
-                                                                       datasets::TinyShapes()),
-                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
-                                                       framework::dataset::make("result_offset", 2, 3)),
-                                               framework::dataset::make("min", -128)),
-                                       framework::dataset::make("max", 127)),
-                               framework::dataset::make("addBias", { false, true })))
+    combine(
+        make("DataType", DataType::QASYMM8_SIGNED),
+        datasets::TinyShapes(),
+        make("result_real_multiplier", 0.33f),
+        make("result_offset", 2, 3),
+        make("min", -128),
+        make("max", 127),
+        make("addBias", { false, true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 46058bd148..9c4d1741eb 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,9 +50,12 @@ namespace validation
 TEST_SUITE(NEON)
 TEST_SUITE(GEMMLowp)
 TEST_SUITE(MatrixMultiplyCore)
+
 using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
 using NEGEMMLowpBatchedMatMulFixture      = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
 
+using framework::dataset::make;
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
                shape_a, shape_b, shape_c, a_offset, b_offset)
 {
@@ -80,26 +83,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-    framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),                                 // Mismatching data type
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
                                              TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
                                           }),
-    framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+    make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
-                                          })),
-    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
+                                          }),
+    make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
                                             TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
-                                           })),
-    framework::dataset::make("Expected", { true, false, false, false, true })),
+                                           }),
+    make("Expected", { true, false, false, false, true })),
     a_info, b_info, output_info, expected)
 {
     // Lock tensors
@@ -231,9 +234,9 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
 TEST_SUITE(BatchedMatMul)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(Accessor(_target), _reference, tolerance_batched);
 }
@@ -243,9 +246,9 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(Accessor(_target), _reference, tolerance_batched);
 }
@@ -256,15 +259,17 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMulti
 constexpr AbsoluteTolerance<float> tolerance_quant(1);
 
 TEST_SUITE(FusedOffsetOutput)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_quant);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_quant);
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 1492ac6945..a65a1e6bd8 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,14 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
-#define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
 
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
+#include "tests/validation/Helpers.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/reference/GEMMLowp.h"
 
+#include <cstdint>
+#include <vector>
+
 namespace arm_compute
 {
 namespace test
@@ -37,82 +42,46 @@ namespace validation
 {
 namespace
 {
+
 template <typename U>
 void fill(U &&tensor, int i)
 {
-    switch(tensor.data_type())
-    {
-        case DataType::QSYMM8_PER_CHANNEL:
-        {
-            int min_bound = 128;
-            int max_bound = -127;
-            for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++)
-            {
-                std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
-                if(bounds.first < min_bound)
-                {
-                    min_bound = bounds.first;
-                }
-                if(bounds.second > max_bound)
-                {
-                    max_bound = bounds.second;
-                }
-            }
-            std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::QASYMM8:
-        {
-            std::uniform_int_distribution<uint32_t> distribution(1, 254);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::S32:
-        {
-            std::uniform_int_distribution<int32_t> distribution(-20000, 20000);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::F16:
-        {
-            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::F32:
-        {
-            std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        default:
-            library->fill_tensor_uniform(tensor, i);
-    }
+    ARM_COMPUTE_ASSERT(is_data_type_quantized(tensor.data_type()));
+    library->fill_tensor_uniform(tensor, i);
 }
 
+template <typename U>
+void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max)
+{
+    ARM_COMPUTE_ASSERT(tensor.data_type() == DataType::S32);
+    std::uniform_int_distribution<int32_t> distribution(min, max);
+    library->fill(tensor, distribution, i);
+}
+
+/** Information about how to fill tensors */
+struct TensorFillInfo
+{
+    // Bias fill range. Default values are arbitrary
+    int32_t min_bias {-20000};
+    int32_t max_bias {20000};
+    // Optional extra hash to randomize tensor filling
+    int32_t hash     {0};
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
-TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
-                                   GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
-                                   QuantizationInfo b_qinfo = QuantizationInfo(), bool reshape_b_only_on_first_run = false)
+TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                   const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
+                                   GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo() )
 {
+    ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+    ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
     // Create tensors
-    DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
+    const DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
 
-    TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1);
-    TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
-    TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1);
+    TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1, a_qinfo);
+    TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1, b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
+    TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */);
 
-    a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
-
-    if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
-    {
-        b.info()->set_quantization_info(b_qinfo);
-    }
-    else
-    {
-        b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
-    }
     TensorType bias;
     if(is_fused)
     {
@@ -142,26 +111,26 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
     ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
 
     // Fill tensors
-    fill(AccessorType(a), 0);
-    fill(AccessorType(b), 1);
+    fill(AccessorType(a), 0 + finfo.hash);
+    fill(AccessorType(b), 1 + finfo.hash);
 
     if(is_fused)
     {
         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
         bias.allocator()->allocate();
         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
-        fill(AccessorType(bias), 2);
+        fill_bias_s32(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
     }
 
     // Run with variable inputs.
     if(run_twice)
     {
         gemmlowp.run();
-        fill(AccessorType(a), 3); // Fill tensors with new seed after run
-        fill(AccessorType(b), 4);
+        fill(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run
+        fill(AccessorType(b), 4 + finfo.hash);
         if(is_fused)
         {
-            fill(AccessorType(bias), 5);
+            fill_bias_s32(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
         }
     }
 
@@ -171,9 +140,11 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
 }
 
 template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t, bool pretranspose_A = false, bool pretranspose_B = false, bool run_twice = false>
-SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
-                                                 DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
+SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                                 DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo())
 {
+    ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+    ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
     TensorShape shape_a_to_use = shape_a;
     if(reinterpret_input_as_3d)
     {
@@ -182,8 +153,8 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
     }
 
     // Create reference
-    SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1 };
-    SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
+    SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1, a_qinfo };
+    SimpleTensor<TW> b{ shape_b, data_type_b, 1, b_qinfo };
 
     TensorShape shape_a_to_use_transposed{ shape_a_to_use };
     TensorShape shape_b_transposed{ shape_b };
@@ -193,12 +164,12 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
     shape_b_transposed.set(0, shape_b[1]);
     shape_b_transposed.set(1, shape_b[0]);
 
-    SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1 };
-    SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
+    SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1, a_qinfo };
+    SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, b_qinfo };
 
     // Fill reference
-    fill(a, 0);
-    fill(b, 1);
+    fill(a, 0 + finfo.hash);
+    fill(b, 1 + finfo.hash);
 
     // Transpose reference if required
     /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
@@ -216,16 +187,18 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
     }
 
     // Run with variable inputs.
+    const int32_t a_offset = a_qinfo.uniform().offset;
+    const int32_t b_offset = b_qinfo.uniform().offset;
     if(run_twice)
     {
         reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
-        fill((pretranspose_A) ? a_transposed : a, 3);
-        fill((pretranspose_B) ? b_transposed : b, 4);
+        fill((pretranspose_A) ? a_transposed : a, 3 + finfo.hash);
+        fill((pretranspose_B) ? b_transposed : b, 4 + finfo.hash);
     }
 
     return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
 }
-}
+} // namespace
 
 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
 class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
@@ -233,20 +206,22 @@ class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
     {
-        _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset);
-        _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset);
+        const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
+        const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo);
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo)
     {
-        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_offset,
-                b_offset);
+        const auto output_qinfo = QuantizationInfo(); // No output stage
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo);
     }
 
-    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo)
     {
-        return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset);
+        return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo);
     }
 
     TensorType            _target{};
@@ -257,54 +232,138 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo
 class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture : public framework::Fixture
 {
 public:
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b,
+    /** Dynamically initialize the quantization info with saturation awareness
+     */
+    template <typename T>
+    static void setup_quantization(DataType data_type, const TensorShape& shape_a, const TensorShape& shape_b, QuantizationInfo& a_qinfo, QuantizationInfo& b_qinfo, QuantizationInfo& output_qinfo, TensorFillInfo& finfo)
+    {
+        // This hash is used by random generators. There may be hash collisions but
+        // this is intentional as it's a very easy way to make the the current
+        // random generation process almost different for many test configurations,
+        // which were using the same set of values before.
+        finfo.hash = shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1];
+
+        const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+        const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+        std::mt19937                           generator(library->seed() + finfo.hash);
+        std::uniform_real_distribution<float>  distribution_float(-5.0f, 3.0f);
+        std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+        const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+        const int32_t offset_lhs = distribution_t(generator);
+        const int32_t offset_rhs = distribution_t(generator);
+
+        a_qinfo = QuantizationInfo(scale_lhs, offset_lhs);
+        b_qinfo = QuantizationInfo(scale_rhs, offset_rhs);
+
+        // reinterpret_input_as_3d or reinterpret_output_as_3d can be ignored, as the underlying gemm / matmul computation
+        // is equivalent to a standard 2D one with m-n-k dimensions
+        const int m = shape_a.y();
+        const int n = shape_b.x();
+        const int k = shape_a.x();
+
+        const float bias_fraction = 0.5f; // We enabled is_fused in compute_gemmlowp_target below, thus bias is included
+
+        QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(a_qinfo, b_qinfo, m, n, k, data_type, bias_fraction);
+        output_qinfo            = q_hint.q_info;
+        finfo.min_bias          = q_hint.bias_min;
+        finfo.max_bias          = q_hint.bias_max;
+
+        // Both target and reference implementations use negated offsets, i.e.
+        //      float_val = (int_val + offset) * scale
+        // instead of
+        //      float_val = (int_val - offset) * scale
+        // as usual. Therefore, after calculating the output quantization above, we
+        // negate the offsets of inputs' offsets.
+        a_qinfo = QuantizationInfo(scale_lhs, -offset_lhs);
+        b_qinfo = QuantizationInfo(scale_rhs, -offset_rhs);
+    }
+
+    /** Initialize output stage info from quantization info */
+    static Status init_gemmlowp_output_stage_info(
+                                        DataType                data_type,
+                                        const QuantizationInfo& a_qinfo,
+                                        const QuantizationInfo& b_qinfo,
+                                        const QuantizationInfo& output_qinfo,
+                                        GEMMLowpOutputStageType type,
+                                        GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(!is_data_type_quantized_asymmetric(data_type));
+
+        const UniformQuantizationInfo aq_unif   = a_qinfo.uniform();
+        const UniformQuantizationInfo bq_unif   = b_qinfo.uniform();
+        const UniformQuantizationInfo oq_unif   = output_qinfo.uniform();
+
+        float   multiplier = (aq_unif.scale * bq_unif.scale) / oq_unif.scale;
+        int32_t int_multiplier;
+        int32_t shift;
+
+        ARM_COMPUTE_RETURN_ON_ERROR(
+            quantization::calculate_quantized_multiplier(multiplier, &int_multiplier, &shift));
+
+        int32_t type_min             = 0;
+        int32_t type_max             = 0;
+        std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(output_qinfo, ActivationLayerInfo(), data_type);
+
+        gemmlowp_output_stage_info.gemmlowp_real_multiplier = multiplier;
+        gemmlowp_output_stage_info.gemmlowp_multiplier = int_multiplier;
+        gemmlowp_output_stage_info.gemmlowp_multipliers = { int_multiplier };
+        gemmlowp_output_stage_info.gemmlowp_shift      = shift;
+        gemmlowp_output_stage_info.gemmlowp_shifts     = { shift };
+        gemmlowp_output_stage_info.gemmlowp_offset     = oq_unif.offset;
+        gemmlowp_output_stage_info.type                = type;
+        gemmlowp_output_stage_info.gemmlowp_min_bound  = type_min;
+        gemmlowp_output_stage_info.gemmlowp_max_bound  = type_max;
+
+        return Status{};
+    }
+
+    /** Currently this fixture only tests the following data type configurations:
+     *
+     * 1. a and b are of the same data type
+     * 2. The data type is quantized asymmetric
+     *
+     */
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type,
                bool reshape_b_only_on_first_run)
     {
-        ARM_COMPUTE_ASSERT(output_stage.type != GEMMLowpOutputStageType::NONE);
-        DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8;
+        ARM_COMPUTE_ASSERT(output_stage_type != GEMMLowpOutputStageType::NONE);
+        ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type));
 
-        if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
-        {
-            output_stage.is_quantized_per_channel              = true;
-            const size_t                          num_channels = shape_b[0];
-            std::vector<float>                    scales(num_channels);
-            std::uniform_real_distribution<float> distribution(0.f, 1.f);
-            library->fill(scales, distribution, 0);
-            output_stage.gemmlowp_multipliers.resize(num_channels);
-            output_stage.gemmlowp_shifts.resize(num_channels);
-            for(size_t i = 0; i < num_channels; ++i)
-            {
-                quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]);
-            }
+        // Randomized dynamic quantization: randomize quantization info in a way that ensures no result saturation
+        // most of the time
+        QuantizationInfo a_qinfo;
+        QuantizationInfo b_qinfo;
+        QuantizationInfo output_qinfo;
+        TensorFillInfo finfo;
+        setup_quantization<TI>(data_type, shape_a, shape_b, a_qinfo, b_qinfo, output_qinfo, finfo);
 
-            _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales));
-            _target    = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales), reshape_b_only_on_first_run);
-        }
-        else
-        {
-            _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo());
-            _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo(), reshape_b_only_on_first_run);
-        }
+        GEMMLowpOutputStageInfo output_stage;
+        init_gemmlowp_output_stage_info(data_type, a_qinfo, b_qinfo, output_qinfo, output_stage_type, output_stage);
+
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, output_stage, finfo);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage,
-                              DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo, bool reshape_b_only_on_first_run = false)
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo,
+                              DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo())
     {
-        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_offset,
-                b_offset,
-                output_stage, data_type_a, data_type_b, b_qinfo, reshape_b_only_on_first_run);
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo,
+                b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo);
     }
 
-    SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
-                                       GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo)
+    SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                       DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, const TensorFillInfo& finfo = TensorFillInfo())
     {
-        SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b,
-                                                                                                                            b_qinfo);
+        SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo);
 
         TensorShape           bias_shape(shape_b[0]);
         SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
-        (run_twice) ? fill(bias, 5) : fill(bias, 2); // Fill bias with same seed as last run of gemmlowp_target
+        (run_twice) ? fill_bias_s32(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill_bias_s32(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target
 
         switch(output_stage.type)
         {
@@ -330,10 +389,10 @@ class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>
 {
 public:
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type)
     {
         GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>::setup(shape_a, shape_b,
-                shape_output, a_offset, b_offset, output_stage, data_type_b, false);
+                shape_output, output_stage_type, data_type, false /* reshape_b_only_on_first_run */);
     }
 };
 
@@ -2076,4 +2135,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H