From 11ab45148a69f76e7821fa5e0670e4bacb05c776 Mon Sep 17 00:00:00 2001
From: SiCong Li <sicong.li@arm.com>
Date: Tue, 7 Nov 2023 12:04:59 +0000
Subject: Implement dynamic quantization for GEMMLowp tests

This patch calculates the output quantization info based on the inputs'
quantization information.
The previous approach was using the same quantization information for
input, weights and output.

Remove QSYMM8_PER_CHANNEL path from the fixture as there are no
related tests

Remove repeated shapes from the dataset now that we get rid of the
quantization info from the dataset.

Combine signed and unsigned SmallGEMMLowpFusedBatchedMatMulDataset
into one as they become identical

Resolves COMPMID-6481, COMPMID-6634

Change-Id: I9f5a20f4bb45c3e5adab388564135ae8a5c0a9ea
Signed-off-by: SiCong Li <sicong.li@arm.com>
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10680
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 tests/datasets/GEMMLowpFusedOffsetOutputDataset.h | 210 +++++---------
 tests/validation/CL/GEMMLowp.cpp                  | 110 ++++----
 tests/validation/NEON/GEMMLowp.cpp                |  41 +--
 tests/validation/fixtures/GEMMLowpFixture.h       | 319 +++++++++++++---------
 4 files changed, 333 insertions(+), 347 deletions(-)

(limited to 'tests')
diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
index 8c90efcbdd..b0ad4879ba 100644
--- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
+++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2022 Arm Limited.
+ * Copyright (c) 2019-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET
-#define ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET
+#ifndef ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
+#define ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
 
 #include "utils/TypePrinter.h"
 
@@ -40,21 +40,17 @@ namespace datasets
 class GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    using type = std::tuple<TensorShape, TensorShape, TensorShape, int32_t, int32_t, GEMMLowpOutputStageInfo>;
+    using type = std::tuple<TensorShape, TensorShape, TensorShape, GEMMLowpOutputStageType>;
 
     struct iterator
     {
         iterator(std::vector<TensorShape>::const_iterator             a_it,
                  std::vector<TensorShape>::const_iterator             b_it,
                  std::vector<TensorShape>::const_iterator             c_it,
-                 std::vector<int32_t>::const_iterator                 a_offset_it,
-                 std::vector<int32_t>::const_iterator                 b_offset_it,
-                 std::vector<GEMMLowpOutputStageInfo>::const_iterator output_stage_it)
+                 std::vector<GEMMLowpOutputStageType>::const_iterator output_stage_it)
             : _a_it{ std::move(a_it) },
               _b_it{ std::move(b_it) },
               _c_it{ std::move(c_it) },
-              _a_offset_it{ std::move(a_offset_it) },
-              _b_offset_it{ std::move(b_offset_it) },
               _output_stage_it{ std::move(output_stage_it) }
         {
         }
@@ -65,33 +61,14 @@ public:
             description << "A=" << *_a_it << ":";
             description << "B=" << *_b_it << ":";
             description << "C=" << *_c_it << ":";
-            description << "a_offset=" << *_a_offset_it << ":";
-            description << "b_offset=" << *_b_offset_it << ":";
-            description << "output_type=" << string_from_gemmlowp_output_stage((*_output_stage_it).type) << ":";
-            description << "output_offset=" << (*_output_stage_it).gemmlowp_offset << ":";
-            description << "output_multiplier={";
-            for(auto it = (*_output_stage_it).gemmlowp_multipliers.begin(); it != (*_output_stage_it).gemmlowp_multipliers.end(); ++it)
-            {
-                description << (*it) << ", ";
-            }
-            description << "}:";
-            description << "output_shift={";
-
-            for(auto it = (*_output_stage_it).gemmlowp_shifts.begin(); it != (*_output_stage_it).gemmlowp_shifts.end(); ++it)
-            {
-                description << (*it) << ", ";
-            }
-            description << "}:";
-            description << "output_min=" << (*_output_stage_it).gemmlowp_min_bound << ":";
-            description << "output_max=" << (*_output_stage_it).gemmlowp_max_bound << ":";
-            description << "is_quantized_per_channel=" << (*_output_stage_it).is_quantized_per_channel << ":";
+            description << "output_type=" << string_from_gemmlowp_output_stage(*_output_stage_it) << ":";
 
             return description.str();
         }
 
         GEMMLowpFusedOffsetOutputDataset::type operator*() const
         {
-            return std::make_tuple(*_a_it, *_b_it, *_c_it, *_a_offset_it, *_b_offset_it, *_output_stage_it);
+            return std::make_tuple(*_a_it, *_b_it, *_c_it, *_output_stage_it);
         }
 
         iterator &operator++()
@@ -99,8 +76,6 @@ public:
             ++_a_it;
             ++_b_it;
             ++_c_it;
-            ++_a_offset_it;
-            ++_b_offset_it;
             ++_output_stage_it;
 
             return *this;
@@ -110,45 +85,27 @@ public:
         std::vector<TensorShape>::const_iterator             _a_it;
         std::vector<TensorShape>::const_iterator             _b_it;
         std::vector<TensorShape>::const_iterator             _c_it;
-        std::vector<int32_t>::const_iterator                 _a_offset_it;
-        std::vector<int32_t>::const_iterator                 _b_offset_it;
-        std::vector<GEMMLowpOutputStageInfo>::const_iterator _output_stage_it;
+        std::vector<GEMMLowpOutputStageType>::const_iterator _output_stage_it;
     };
 
     iterator begin() const
     {
-        return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _a_offset.begin(), _b_offset.begin(), _output_stage.begin());
+        return iterator(_a_shapes.begin(), _b_shapes.begin(), _c_shapes.begin(), _output_stage.begin());
     }
 
     int size() const
     {
-        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), std::min(_a_offset.size(), std::min(_b_offset.size(), _output_stage.size())))));
+        return std::min(_a_shapes.size(), std::min(_b_shapes.size(), std::min(_c_shapes.size(), _output_stage.size())));
     }
 
-    void add_config(TensorShape a, TensorShape b, TensorShape c, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage)
+    void add_config(TensorShape a, TensorShape b, TensorShape c, GEMMLowpOutputStageType output_stage)
     {
         _a_shapes.emplace_back(std::move(a));
         _b_shapes.emplace_back(std::move(b));
         _c_shapes.emplace_back(std::move(c));
-        _a_offset.emplace_back(std::move(a_offset));
-        _b_offset.emplace_back(std::move(b_offset));
         _output_stage.emplace_back(std::move(output_stage));
     }
 
-    GEMMLowpOutputStageInfo OutputStageInfo(GEMMLowpOutputStageType type, int32_t offset, int32_t multiplier, int32_t shift, int32_t min, int32_t max)
-    {
-        GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo();
-        output_stage.type                    = type;
-        output_stage.gemmlowp_offset         = offset;
-        output_stage.gemmlowp_multiplier     = multiplier;
-        output_stage.gemmlowp_shift          = shift;
-        output_stage.gemmlowp_min_bound      = min;
-        output_stage.gemmlowp_max_bound      = max;
-        output_stage.gemmlowp_multipliers.push_back(multiplier);
-        output_stage.gemmlowp_shifts.push_back(shift);
-        return output_stage;
-    }
-
 protected:
     GEMMLowpFusedOffsetOutputDataset()                                    = default;
     GEMMLowpFusedOffsetOutputDataset(GEMMLowpFusedOffsetOutputDataset &&) = default;
@@ -157,9 +114,7 @@ private:
     std::vector<TensorShape>             _a_shapes{};
     std::vector<TensorShape>             _b_shapes{};
     std::vector<TensorShape>             _c_shapes{};
-    std::vector<int32_t>                 _a_offset{};
-    std::vector<int32_t>                 _b_offset{};
-    std::vector<GEMMLowpOutputStageInfo> _output_stage{};
+    std::vector<GEMMLowpOutputStageType> _output_stage{};
 };
 
 class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset
@@ -167,47 +122,28 @@ class SmallGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff
 public:
     SmallGEMMLowpFusedOffsetOutputUint8Dataset()
     {
-        add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-
-        add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10, 210));
-        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10, 210));
-        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 10, 10, 210));
-        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, 10, 210));
+        add_config(TensorShape(21U, 13U), TensorShape(1U, 21U), TensorShape(1U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(52U, 13U), TensorShape(33U, 52U), TensorShape(33U, 13U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 27U), TensorShape(23U, 31U), TensorShape(23U, 27U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(16U, 32U), TensorShape(16U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(21U, 1U), TensorShape(43U, 21U), TensorShape(43U, 1U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
-class SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned final : public GEMMLowpFusedOffsetOutputDataset
+class SmallGEMMLowpFusedBatchedMatMulDataset final : public GEMMLowpFusedOffsetOutputDataset
 {
 public:
-    SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned()
+    SmallGEMMLowpFusedBatchedMatMulDataset()
     {
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, 0, 254));
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, 0, 254));
-        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, 20, 210));
-        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, 14, 210));
-        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, 0, 127));
-        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, 10, 210));
-        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), -3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 3, 10, 210));
-        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, 50, 225));
-    }
-};
-
-class SmallGEMMLowpFusedBatchedMatMulDatasetSigned final : public GEMMLowpFusedOffsetOutputDataset
-{
-public:
-    SmallGEMMLowpFusedBatchedMatMulDatasetSigned()
-    {
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, -128, 127));
-        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, -128, 127));
-        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, -108, 127));
-        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, -98, 107));
-        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, -127, 64));
-        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, -64, 127));
-        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), 3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -127, 110));
-        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -77, 115));
+        add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -216,14 +152,12 @@ class SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset final : public GEMMLowp
 public:
     SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset()
     {
-        add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210));
-        add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10,
-                                                                                                                               210));
-        add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10,
-                   210));
+        add_config(TensorShape(21U, 1421U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 1200U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 1600U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 1600U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -232,14 +166,12 @@ class SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset final : public GEM
 public:
     SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset()
     {
-        add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 13, 10, 210));
-        add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), 18, 23, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 13, 10, 210));
-        add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 13, 10, 210));
-        add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 10, 10,
-                   210));
-        add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 10, 10,
-                   210));
+        add_config(TensorShape(21U, 7U, 203U, 33U), TensorShape(34U, 21U), TensorShape(34U, 7U, 203U, 33U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(31U, 1U, 102U, 55U), TensorShape(23U, 31U), TensorShape(23U, 1U, 102U, 55U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 4U, 300U, 77U), TensorShape(21U, 38U), TensorShape(21U, 4U, 300U, 77U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 1U, 103U, 99U), TensorShape(17U, 32U), TensorShape(17U, 1U, 103U, 99U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 8U, 200U, 111U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 111U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(16U, 8U, 200U, 113U), TensorShape(8U, 16U), TensorShape(8U, 8U, 200U, 113U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -248,28 +180,14 @@ class SmallGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs
 public:
     SmallGEMMLowpFusedOffsetOutputInt8Dataset()
     {
-        add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 13, -10, 110));
-        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110));
-        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, -10, 110));
-        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -40, 2, 13, -10, 110));
-
-        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 10, -10, 110));
-        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 10, -10, 110));
-        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 10, -10, 110));
-        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 10, -10, 110));
-    }
-};
-
-class SmallGEMMLowpFusedOffsetOutputPerChannelDataset final : public GEMMLowpFusedOffsetOutputDataset
-{
-public:
-    SmallGEMMLowpFusedOffsetOutputPerChannelDataset()
-    {
-        add_config(TensorShape(21U, 1U, 6U), TensorShape(43U, 21U, 6U), TensorShape(43U, 1U, 6U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -200, 2, 13, 10, 210));
-        add_config(TensorShape(21U, 13U, 3U), TensorShape(33U, 21U, 3U), TensorShape(33U, 13U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -100, 2, 13, 10, 210));
-        add_config(TensorShape(31U, 3U, 2U), TensorShape(72U, 31U, 2U), TensorShape(72U, 3U, 2U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 13U, 7U), TensorShape(33U, 52U, 7U), TensorShape(33U, 13U, 7U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 100, 2, 13, 10, 210));
-        add_config(TensorShape(52U, 26U, 8U), TensorShape(33U, 52U, 8U), TensorShape(33U, 26U, 8U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 13, 10, 210));
+        add_config(TensorShape(21U, 1U), TensorShape(1U, 21U), TensorShape(1U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(31U, 3U), TensorShape(72U, 31U), TensorShape(72U, 3U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(21U, 13U), TensorShape(33U, 21U), TensorShape(33U, 13U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(52U, 26U), TensorShape(33U, 52U), TensorShape(33U, 26U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(38U, 43U), TensorShape(21U, 38U), TensorShape(21U, 43U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(32U, 72U), TensorShape(17U, 32U), TensorShape(17U, 72U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 
@@ -278,15 +196,12 @@ class LargeGEMMLowpFusedOffsetOutputUint8Dataset final : public GEMMLowpFusedOff
 public:
     LargeGEMMLowpFusedOffsetOutputUint8Dataset()
     {
-        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 18, 10, 210));
-        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 2, 18, 10, 210));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 200, 2, 18, 10, 210));
-        add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U), -9, 1, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -100, 2, 18, 10, 210));
+        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(941U, 1011U), TensorShape(623U, 941U), TensorShape(623U, 1011U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U),GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
 
-        add_config(TensorShape(923U, 429U), TensorShape(871U, 923U), TensorShape(871U, 429U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601600, 15, 10, 210));
-        add_config(TensorShape(873U, 513U), TensorShape(784U, 873U), TensorShape(784U, 513U), 0, 4, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 1, 254601600, 15, 10, 210));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, 10, 210));
-        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -1, 254601602, 15, 10, 210));
     }
 };
 
@@ -295,18 +210,17 @@ class LargeGEMMLowpFusedOffsetOutputInt8Dataset final : public GEMMLowpFusedOffs
 public:
     LargeGEMMLowpFusedOffsetOutputInt8Dataset()
     {
-        add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110));
-        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110));
-        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, 0, 2, 18, -10, 110));
-        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), -3, -2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN, -50, 2, 18, -10, 110));
-
-        add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601600, 15, -10, 110));
-        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), -1, 3, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 254601600, 15, -10, 110));
-        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), -2, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 2, 254601602, 15, -10, 110));
-        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), 5, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -2, 254601602, 15, -10, 110));
+        add_config(TensorShape(923U, 1U, 15U), TensorShape(871U, 923U, 15U), TensorShape(871U, 1U, 15U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(681U, 1023U), TensorShape(213U, 681U), TensorShape(213U, 1023U), GEMMLowpOutputStageType::QUANTIZE_DOWN);
+        add_config(TensorShape(923U, 1U), TensorShape(871U, 923U), TensorShape(871U, 1U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(873U, 7U), TensorShape(784U, 873U), TensorShape(784U, 7U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(697U, 872U), TensorShape(563U, 697U), TensorShape(563U, 872U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
+        add_config(TensorShape(1021U, 973U), TensorShape(783U, 1021U), TensorShape(783U, 973U), GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT);
     }
 };
 } // namespace datasets
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWPOUTPUT_DATASET */
+#endif // ACL_TESTS_DATASETS_GEMMLOWPFUSEDOFFSETOUTPUTDATASET_H
diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp
index 0b057b9dce..1ae9e96626 100644
--- a/tests/validation/CL/GEMMLowp.cpp
+++ b/tests/validation/CL/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -44,6 +44,9 @@ namespace test
 {
 namespace validation
 {
+
+using framework::dataset::make;
+
 namespace
 {
 constexpr AbsoluteTolerance<float> tolerance_quant(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
@@ -72,9 +75,9 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
 TEST_SUITE(BatchedMatMul)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
@@ -84,9 +87,9 @@ using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
@@ -96,9 +99,10 @@ TEST_SUITE_END() // BatchedMatMul
 TEST_SUITE(FusedOffsetOutput)
 TEST_SUITE(QASYMM8)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                       framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -108,9 +112,9 @@ TEST_SUITE(Output3D)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, true>;
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputOutput3DUint8Fixture, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -121,18 +125,19 @@ TEST_SUITE(InputOutput3D)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, true, true>;
 FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInputOutput3DUint8Fixture, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputInputOutput3DUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
 }
 TEST_SUITE_END() // InputOutput3D
 
-FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                       framework::dataset::make("reshape_b_only_on_first_run", { true, false })))
+FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { true, false })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -141,8 +146,9 @@ TEST_SUITE_END() // QASYMM8
 TEST_SUITE(QASYMM8_SIGNED)
 using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture<CLTensor, CLAccessor, CLGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t>;
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputInt8Fixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputInt8Dataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference, tolerance_quant);
@@ -185,24 +191,24 @@ TEST_SUITE(QuantizeDownInt32Scale)
 
 TEST_SUITE(QASYMM8)
 
-const auto quantize_down_int32_to_uint8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
-                                                      3)
-                                                      * framework::dataset::make("min", 0) * framework::dataset::make("max", 255) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+                                                      * make("min", 0) * make("max", 255) * make("addBias", { false, true });
 
-const auto quantize_down_int32_to_uint8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
-                                                           2)
-                                                           * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", 0, 2) * framework::dataset::make("max", 171, 173) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_uint8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+                                                           * make("result_shift", 2, 3) * make("min", 0, 2) * make("max", 171, 173) * make("addBias", { false, true });
 
 using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToUint8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_uint8_scale_relu_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -213,24 +219,24 @@ TEST_SUITE_END() // QASYMM8
 
 TEST_SUITE(QASYMM8_SIGNED)
 
-const auto quantize_down_int32_to_int8_scale_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1, 2) * framework::dataset::make("result_shift", 2,
-                                                     3)
-                                                     * framework::dataset::make("min", -128) * framework::dataset::make("max", 127) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2) * make("result_shift", 2, 3)
+                                                     * make("min", -128) * make("max", 127) * make("addBias", { false, true });
 
-const auto quantize_down_int32_to_int8_scale_relu_cases = framework::dataset::make("result_offset", -2, 1) * framework::dataset::make("result_mult_int", 1,
-                                                          2)
-                                                          * framework::dataset::make("result_shift", 2, 3) * framework::dataset::make("min", -100, -98) * framework::dataset::make("max", 71, 73) * framework::dataset::make("addBias", { false, true });
+const auto quantize_down_int32_to_int8_scale_relu_cases = make("result_offset", -2, 1) * make("result_mult_int", 1, 2)
+                                                          * make("result_shift", 2, 3) * make("min", -100, -98) * make("max", 71, 73) * make("addBias", { false, true });
 
 using CLGEMMLowpQuantizeDownInt32ScaleFixture = GEMMLowpQuantizeDownInt32ToInt8ScaleValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage>;
 
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
 }
 
 TEST_SUITE(BoundedReLu)
-FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
+FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpQuantizeDownInt32ScaleFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallShapes(), quantize_down_int32_to_int8_scale_relu_cases))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -247,13 +253,14 @@ using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture =
     GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, uint8_t>;
 
 FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
-                                                                       datasets::TinyShapes()),
-                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
-                                                       framework::dataset::make("result_offset", 2, 3)),
-                                               framework::dataset::make("min", 0)),
-                                       framework::dataset::make("max", 255)),
-                               framework::dataset::make("addBias", { false, true })))
+    combine(
+        make("DataType", DataType::QASYMM8),
+        datasets::TinyShapes(),
+        make("result_real_multiplier", 0.33f),
+        make("result_offset", 2, 3),
+        make("min", 0),
+        make("max", 255),
+        make("addBias", { false, true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
@@ -264,13 +271,14 @@ TEST_SUITE(QASYMM8_SIGNED)
 using CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed =
     GEMMLowpQuantizeDownInt32ScaleByFloatValidationFixture<CLTensor, CLAccessor, CLGEMMLowpOutputStage, int8_t>;
 FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMLowpQuantizeDownInt32ScaleByFloatFixture_Signed, framework::DatasetMode::ALL,
-                       combine(combine(combine(combine(combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED),
-                                                                       datasets::TinyShapes()),
-                                                               framework::dataset::make("result_real_multiplier", 0.33f)),
-                                                       framework::dataset::make("result_offset", 2, 3)),
-                                               framework::dataset::make("min", -128)),
-                                       framework::dataset::make("max", 127)),
-                               framework::dataset::make("addBias", { false, true })))
+    combine(
+        make("DataType", DataType::QASYMM8_SIGNED),
+        datasets::TinyShapes(),
+        make("result_real_multiplier", 0.33f),
+        make("result_offset", 2, 3),
+        make("min", -128),
+        make("max", 127),
+        make("addBias", { false, true })))
 {
     // Validate output
     validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp
index 46058bd148..9c4d1741eb 100644
--- a/tests/validation/NEON/GEMMLowp.cpp
+++ b/tests/validation/NEON/GEMMLowp.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -50,9 +50,12 @@ namespace validation
 TEST_SUITE(NEON)
 TEST_SUITE(GEMMLowp)
 TEST_SUITE(MatrixMultiplyCore)
+
 using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore>;
 using NEGEMMLowpBatchedMatMulFixture      = GEMMLowpMatrixMultiplyCoreValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, true>;
 
+using framework::dataset::make;
+
 DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()),
                shape_a, shape_b, shape_c, a_offset, b_offset)
 {
@@ -80,26 +83,26 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::c
 
 // *INDENT-OFF*
 // clang-format off
-DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
-    framework::dataset::make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(
+    make("InputAInfo", { TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Input not a multiple of 4
                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::S32),                                 // Mismatching data type
                                              TensorInfo(TensorShape(20U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
                                              TensorInfo(TensorShape(21U, 13U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)), // Invalid dimensions
                                              TensorInfo(TensorShape(16U, 32U), 1, DataType::QASYMM8, QuantizationInfo(1.f/255, 10)),
                                           }),
-    framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
+    make("InputBInfo",{ TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(33U, 21U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
                                             TensorInfo(TensorShape(64U, 16U), 1, DataType::QASYMM8, QuantizationInfo(1.f/256, 10)),
-                                          })),
-    framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
+                                          }),
+    make("OutputInfo",{ TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(33U, 13U), 1, DataType::S32),
                                             TensorInfo(TensorShape(8U, 11U), 1, DataType::S32),
                                             TensorInfo(TensorShape(64U, 32U), 1, DataType::S32),
-                                           })),
-    framework::dataset::make("Expected", { true, false, false, false, true })),
+                                           }),
+    make("Expected", { true, false, false, false, true })),
     a_info, b_info, output_info, expected)
 {
     // Lock tensors
@@ -231,9 +234,9 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned =
 TEST_SUITE(BatchedMatMul)
 TEST_SUITE(QASYMM8)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8 }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(Accessor(_target), _reference, tolerance_batched);
 }
@@ -243,9 +246,9 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned =
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<Tensor, Accessor, NEGEMMLowpMatrixMultiplyCore, false, false, int8_t, int8_t, true>;
 TEST_SUITE(QASYMM8_SIGNED)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL,
-                       combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(),
-                                       framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })),
-                               framework::dataset::make("bool", { false })))
+    combine(datasets::SmallGEMMLowpFusedBatchedMatMulDataset(),
+        make("DataType", { DataType::QASYMM8_SIGNED }),
+        make("reshape_b_only_on_first_run", { false })))
 {
     validate(Accessor(_target), _reference, tolerance_batched);
 }
@@ -256,15 +259,17 @@ using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMulti
 constexpr AbsoluteTolerance<float> tolerance_quant(1);
 
 TEST_SUITE(FusedOffsetOutput)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL,
+    combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_quant);
 }
 
-FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
-                       framework::dataset::make("DataType", { DataType::QASYMM8 })))
+FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY,
+    combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(),
+        make("DataType", { DataType::QASYMM8 })))
 {
     // Validate output
     validate(Accessor(_target), _reference, tolerance_quant);
diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h
index 1492ac6945..a65a1e6bd8 100644
--- a/tests/validation/fixtures/GEMMLowpFixture.h
+++ b/tests/validation/fixtures/GEMMLowpFixture.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2023 Arm Limited.
+ * Copyright (c) 2017-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,14 +21,19 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
-#define ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
 
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "src/core/utils/quantization/AsymmHelpers.h"
+#include "tests/validation/Helpers.h"
 #include "tests/framework/Fixture.h"
 #include "tests/validation/Validation.h"
 #include "tests/validation/reference/GEMMLowp.h"
 
+#include <cstdint>
+#include <vector>
+
 namespace arm_compute
 {
 namespace test
@@ -37,82 +42,46 @@ namespace validation
 {
 namespace
 {
+
 template <typename U>
 void fill(U &&tensor, int i)
 {
-    switch(tensor.data_type())
-    {
-        case DataType::QSYMM8_PER_CHANNEL:
-        {
-            int min_bound = 128;
-            int max_bound = -127;
-            for(size_t j = 0; j < tensor.quantization_info().scale().size(); j++)
-            {
-                std::pair<int, int> bounds = get_symm_quantized_per_channel_bounds(tensor.quantization_info(), -1.0f, 1.0f, i);
-                if(bounds.first < min_bound)
-                {
-                    min_bound = bounds.first;
-                }
-                if(bounds.second > max_bound)
-                {
-                    max_bound = bounds.second;
-                }
-            }
-            std::uniform_int_distribution<int32_t> distribution(min_bound, max_bound);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::QASYMM8:
-        {
-            std::uniform_int_distribution<uint32_t> distribution(1, 254);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::S32:
-        {
-            std::uniform_int_distribution<int32_t> distribution(-20000, 20000);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::F16:
-        {
-            arm_compute::utils::uniform_real_distribution_16bit<half> distribution{ -1.0f, 1.0f };
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        case DataType::F32:
-        {
-            std::uniform_real_distribution<float> distribution(-1.0f, 1.0f);
-            library->fill(tensor, distribution, i);
-            break;
-        }
-        default:
-            library->fill_tensor_uniform(tensor, i);
-    }
+    ARM_COMPUTE_ASSERT(is_data_type_quantized(tensor.data_type()));
+    library->fill_tensor_uniform(tensor, i);
 }
 
+template <typename U>
+void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max)
+{
+    ARM_COMPUTE_ASSERT(tensor.data_type() == DataType::S32);
+    std::uniform_int_distribution<int32_t> distribution(min, max);
+    library->fill(tensor, distribution, i);
+}
+
+/** Information about how to fill tensors */
+struct TensorFillInfo
+{
+    // Bias fill range. Default values are arbitrary
+    int32_t min_bias {-20000};
+    int32_t max_bias {20000};
+    // Optional extra hash to randomize tensor filling
+    int32_t hash     {0};
+};
+
 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d, bool reinterpret_output_as_3d, typename OutputType, bool is_fused = false, bool run_twice = false>
-TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
-                                   GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
-                                   QuantizationInfo b_qinfo = QuantizationInfo(), bool reshape_b_only_on_first_run = false)
+TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                   const QuantizationInfo& output_qinfo, DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8,
+                                   GEMMLowpOutputStageInfo output_stage = GEMMLowpOutputStageInfo(), bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo() )
 {
+    ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+    ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
     // Create tensors
-    DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
+    const DataType data_type_output = output_stage.type == GEMMLowpOutputStageType::NONE ? DataType::S32 : data_type_a;
 
-    TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1);
-    TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
-    TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1);
+    TensorType a      = create_tensor<TensorType>(shape_a, data_type_a, 1, a_qinfo);
+    TensorType b      = create_tensor<TensorType>(shape_b, data_type_b, 1, b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated
+    TensorType output = create_tensor<TensorType>(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */);
 
-    a.info()->set_quantization_info(QuantizationInfo(1.0f / 255, a_offset));
-
-    if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
-    {
-        b.info()->set_quantization_info(b_qinfo);
-    }
-    else
-    {
-        b.info()->set_quantization_info(QuantizationInfo(1.0f / 255, b_offset));
-    }
     TensorType bias;
     if(is_fused)
     {
@@ -142,26 +111,26 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
     ARM_COMPUTE_ASSERT(!output.info()->is_resizable());
 
     // Fill tensors
-    fill(AccessorType(a), 0);
-    fill(AccessorType(b), 1);
+    fill(AccessorType(a), 0 + finfo.hash);
+    fill(AccessorType(b), 1 + finfo.hash);
 
     if(is_fused)
     {
         ARM_COMPUTE_ASSERT(bias.info()->is_resizable());
         bias.allocator()->allocate();
         ARM_COMPUTE_ASSERT(!bias.info()->is_resizable());
-        fill(AccessorType(bias), 2);
+        fill_bias_s32(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias);
     }
 
     // Run with variable inputs.
     if(run_twice)
     {
         gemmlowp.run();
-        fill(AccessorType(a), 3); // Fill tensors with new seed after run
-        fill(AccessorType(b), 4);
+        fill(AccessorType(a), 3 + finfo.hash); // Fill tensors with new seed after run
+        fill(AccessorType(b), 4 + finfo.hash);
         if(is_fused)
         {
-            fill(AccessorType(bias), 5);
+            fill_bias_s32(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias);
         }
     }
 
@@ -171,9 +140,11 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape
 }
 
 template <bool reinterpret_input_as_3d, typename TI = uint8_t, typename TW = uint8_t, bool pretranspose_A = false, bool pretranspose_B = false, bool run_twice = false>
-SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
-                                                 DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, QuantizationInfo b_qinfo = QuantizationInfo())
+SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                                 DataType data_type_a = DataType::QASYMM8, DataType data_type_b = DataType::QASYMM8, const TensorFillInfo& finfo = TensorFillInfo())
 {
+    ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type_a));
+    ARM_COMPUTE_ASSERT(data_type_a == data_type_b);
     TensorShape shape_a_to_use = shape_a;
     if(reinterpret_input_as_3d)
     {
@@ -182,8 +153,8 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
     }
 
     // Create reference
-    SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1 };
-    SimpleTensor<TW> b{ shape_b, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
+    SimpleTensor<TI> a{ shape_a_to_use, data_type_a, 1, a_qinfo };
+    SimpleTensor<TW> b{ shape_b, data_type_b, 1, b_qinfo };
 
     TensorShape shape_a_to_use_transposed{ shape_a_to_use };
     TensorShape shape_b_transposed{ shape_b };
@@ -193,12 +164,12 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
     shape_b_transposed.set(0, shape_b[1]);
     shape_b_transposed.set(1, shape_b[0]);
 
-    SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1 };
-    SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, data_type_b == DataType::QSYMM8_PER_CHANNEL ? b_qinfo : QuantizationInfo(1.0f / 255, b_offset) };
+    SimpleTensor<TI> a_transposed{ shape_a_to_use_transposed, data_type_a, 1, a_qinfo };
+    SimpleTensor<TW> b_transposed{ shape_b_transposed, data_type_b, 1, b_qinfo };
 
     // Fill reference
-    fill(a, 0);
-    fill(b, 1);
+    fill(a, 0 + finfo.hash);
+    fill(b, 1 + finfo.hash);
 
     // Transpose reference if required
     /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
@@ -216,16 +187,18 @@ SimpleTensor<int32_t> compute_gemmlowp_reference(const TensorShape &shape_a, con
     }
 
     // Run with variable inputs.
+    const int32_t a_offset = a_qinfo.uniform().offset;
+    const int32_t b_offset = b_qinfo.uniform().offset;
     if(run_twice)
     {
         reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
-        fill((pretranspose_A) ? a_transposed : a, 3);
-        fill((pretranspose_B) ? b_transposed : b, 4);
+        fill((pretranspose_A) ? a_transposed : a, 3 + finfo.hash);
+        fill((pretranspose_B) ? b_transposed : b, 4 + finfo.hash);
     }
 
     return reference::gemmlowp_matrix_multiply_core<int32_t, TI, TW>((pretranspose_A ? a_transposed : a), (pretranspose_B ? b_transposed : b), shape_output, a_offset, b_offset);
 }
-}
+} // namespace
 
 template <typename TensorType, typename AccessorType, typename FunctionType, bool reinterpret_input_as_3d = false, bool reinterpret_output_as_3d = false, bool run_twice = false>
 class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
@@ -233,20 +206,22 @@ class GEMMLowpMatrixMultiplyCoreValidationFixture : public framework::Fixture
 public:
     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset)
     {
-        _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset);
-        _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset);
+        const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset);
+        const auto b_qinfo = QuantizationInfo(1.0f / 255, b_offset);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo);
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo)
     {
-        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_offset,
-                b_offset);
+        const auto output_qinfo = QuantizationInfo(); // No output stage
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, int32_t, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo);
     }
 
-    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset)
+    SimpleTensor<int32_t> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo)
     {
-        return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset);
+        return compute_gemmlowp_reference<reinterpret_input_as_3d, uint8_t, uint8_t, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo);
     }
 
     TensorType            _target{};
@@ -257,54 +232,138 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo
 class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture : public framework::Fixture
 {
 public:
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b,
+    /** Dynamically initialize the quantization info with saturation awareness
+     */
+    template <typename T>
+    static void setup_quantization(DataType data_type, const TensorShape& shape_a, const TensorShape& shape_b, QuantizationInfo& a_qinfo, QuantizationInfo& b_qinfo, QuantizationInfo& output_qinfo, TensorFillInfo& finfo)
+    {
+        // This hash is used by random generators. There may be hash collisions but
+        // this is intentional as it's a very easy way to make the the current
+        // random generation process almost different for many test configurations,
+        // which were using the same set of values before.
+        finfo.hash = shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1];
+
+        const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
+        const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
+
+        std::mt19937                           generator(library->seed() + finfo.hash);
+        std::uniform_real_distribution<float>  distribution_float(-5.0f, 3.0f);
+        std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
+
+        const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+        const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3]
+
+        const int32_t offset_lhs = distribution_t(generator);
+        const int32_t offset_rhs = distribution_t(generator);
+
+        a_qinfo = QuantizationInfo(scale_lhs, offset_lhs);
+        b_qinfo = QuantizationInfo(scale_rhs, offset_rhs);
+
+        // reinterpret_input_as_3d or reinterpret_output_as_3d can be ignored, as the underlying gemm / matmul computation
+        // is equivalent to a standard 2D one with m-n-k dimensions
+        const int m = shape_a.y();
+        const int n = shape_b.x();
+        const int k = shape_a.x();
+
+        const float bias_fraction = 0.5f; // We enabled is_fused in compute_gemmlowp_target below, thus bias is included
+
+        QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(a_qinfo, b_qinfo, m, n, k, data_type, bias_fraction);
+        output_qinfo            = q_hint.q_info;
+        finfo.min_bias          = q_hint.bias_min;
+        finfo.max_bias          = q_hint.bias_max;
+
+        // Both target and reference implementations use negated offsets, i.e.
+        //      float_val = (int_val + offset) * scale
+        // instead of
+        //      float_val = (int_val - offset) * scale
+        // as usual. Therefore, after calculating the output quantization above, we
+        // negate the offsets of inputs' offsets.
+        a_qinfo = QuantizationInfo(scale_lhs, -offset_lhs);
+        b_qinfo = QuantizationInfo(scale_rhs, -offset_rhs);
+    }
+
+    /** Initialize output stage info from quantization info */
+    static Status init_gemmlowp_output_stage_info(
+                                        DataType                data_type,
+                                        const QuantizationInfo& a_qinfo,
+                                        const QuantizationInfo& b_qinfo,
+                                        const QuantizationInfo& output_qinfo,
+                                        GEMMLowpOutputStageType type,
+                                        GEMMLowpOutputStageInfo &gemmlowp_output_stage_info)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON(!is_data_type_quantized_asymmetric(data_type));
+
+        const UniformQuantizationInfo aq_unif   = a_qinfo.uniform();
+        const UniformQuantizationInfo bq_unif   = b_qinfo.uniform();
+        const UniformQuantizationInfo oq_unif   = output_qinfo.uniform();
+
+        float   multiplier = (aq_unif.scale * bq_unif.scale) / oq_unif.scale;
+        int32_t int_multiplier;
+        int32_t shift;
+
+        ARM_COMPUTE_RETURN_ON_ERROR(
+            quantization::calculate_quantized_multiplier(multiplier, &int_multiplier, &shift));
+
+        int32_t type_min             = 0;
+        int32_t type_max             = 0;
+        std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(output_qinfo, ActivationLayerInfo(), data_type);
+
+        gemmlowp_output_stage_info.gemmlowp_real_multiplier = multiplier;
+        gemmlowp_output_stage_info.gemmlowp_multiplier = int_multiplier;
+        gemmlowp_output_stage_info.gemmlowp_multipliers = { int_multiplier };
+        gemmlowp_output_stage_info.gemmlowp_shift      = shift;
+        gemmlowp_output_stage_info.gemmlowp_shifts     = { shift };
+        gemmlowp_output_stage_info.gemmlowp_offset     = oq_unif.offset;
+        gemmlowp_output_stage_info.type                = type;
+        gemmlowp_output_stage_info.gemmlowp_min_bound  = type_min;
+        gemmlowp_output_stage_info.gemmlowp_max_bound  = type_max;
+
+        return Status{};
+    }
+
+    /** Currently this fixture only tests the following data type configurations:
+     *
+     * 1. a and b are of the same data type
+     * 2. The data type is quantized asymmetric
+     *
+     */
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type,
                bool reshape_b_only_on_first_run)
     {
-        ARM_COMPUTE_ASSERT(output_stage.type != GEMMLowpOutputStageType::NONE);
-        DataType data_type_a = data_type_b == DataType::QASYMM8_SIGNED ? DataType::QASYMM8_SIGNED : DataType::QASYMM8;
+        ARM_COMPUTE_ASSERT(output_stage_type != GEMMLowpOutputStageType::NONE);
+        ARM_COMPUTE_ASSERT(is_data_type_quantized_asymmetric(data_type));
 
-        if(data_type_b == DataType::QSYMM8_PER_CHANNEL)
-        {
-            output_stage.is_quantized_per_channel              = true;
-            const size_t                          num_channels = shape_b[0];
-            std::vector<float>                    scales(num_channels);
-            std::uniform_real_distribution<float> distribution(0.f, 1.f);
-            library->fill(scales, distribution, 0);
-            output_stage.gemmlowp_multipliers.resize(num_channels);
-            output_stage.gemmlowp_shifts.resize(num_channels);
-            for(size_t i = 0; i < num_channels; ++i)
-            {
-                quantization::calculate_quantized_multiplier(scales[i], &output_stage.gemmlowp_multipliers[i], &output_stage.gemmlowp_shifts[i]);
-            }
+        // Randomized dynamic quantization: randomize quantization info in a way that ensures no result saturation
+        // most of the time
+        QuantizationInfo a_qinfo;
+        QuantizationInfo b_qinfo;
+        QuantizationInfo output_qinfo;
+        TensorFillInfo finfo;
+        setup_quantization<TI>(data_type, shape_a, shape_b, a_qinfo, b_qinfo, output_qinfo, finfo);
 
-            _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales));
-            _target    = compute_target(shape_a, shape_b, shape_output, a_offset, 0, output_stage, data_type_a, data_type_b, QuantizationInfo(scales), reshape_b_only_on_first_run);
-        }
-        else
-        {
-            _reference = compute_reference(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo());
-            _target    = compute_target(shape_a, shape_b, shape_output, a_offset, b_offset, output_stage, data_type_a, data_type_b, QuantizationInfo(), reshape_b_only_on_first_run);
-        }
+        GEMMLowpOutputStageInfo output_stage;
+        init_gemmlowp_output_stage_info(data_type, a_qinfo, b_qinfo, output_qinfo, output_stage_type, output_stage);
+
+        _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type, data_type, output_stage, finfo);
+        _target    = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, data_type, data_type, output_stage, reshape_b_only_on_first_run, finfo);
     }
 
 protected:
-    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage,
-                              DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo, bool reshape_b_only_on_first_run = false)
+    TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const QuantizationInfo& output_qinfo,
+                              DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, bool reshape_b_only_on_first_run = false, const TensorFillInfo& finfo = TensorFillInfo())
     {
-        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_offset,
-                b_offset,
-                output_stage, data_type_a, data_type_b, b_qinfo, reshape_b_only_on_first_run);
+        return compute_gemmlowp_target<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, qasymm8_t, true, run_twice>(shape_a, shape_b, shape_output, a_qinfo,
+                b_qinfo, output_qinfo, data_type_a, data_type_b, output_stage, reshape_b_only_on_first_run, finfo);
     }
 
-    SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset,
-                                       GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo)
+    SimpleTensor<TI> compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo,
+                                       DataType data_type_a, DataType data_type_b, const GEMMLowpOutputStageInfo& output_stage, const TensorFillInfo& finfo = TensorFillInfo())
     {
-        SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b,
-                                                                                                                            b_qinfo);
+        SimpleTensor<int32_t> output = compute_gemmlowp_reference<reinterpret_input_as_3d, TI, TW, false, false, run_twice>(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, data_type_a, data_type_b, finfo);
 
         TensorShape           bias_shape(shape_b[0]);
         SimpleTensor<int32_t> bias{ bias_shape, DataType::S32, 1 };
-        (run_twice) ? fill(bias, 5) : fill(bias, 2); // Fill bias with same seed as last run of gemmlowp_target
+        (run_twice) ? fill_bias_s32(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill_bias_s32(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target
 
         switch(output_stage.type)
         {
@@ -330,10 +389,10 @@ class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public
     GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>
 {
 public:
-    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_b)
+    void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, GEMMLowpOutputStageType output_stage_type, DataType data_type)
     {
         GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture<TensorType, AccessorType, FunctionType, reinterpret_input_as_3d, reinterpret_output_as_3d, TI, TW>::setup(shape_a, shape_b,
-                shape_output, a_offset, b_offset, output_stage, data_type_b, false);
+                shape_output, output_stage_type, data_type, false /* reshape_b_only_on_first_run */);
     }
 };
 
@@ -2076,4 +2135,4 @@ protected:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ARM_COMPUTE_TEST_GEMMLOWP_FIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_GEMMLOWPFIXTURE_H
-- 
cgit v1.2.1