From 532ce2c84dd24cb0c5064a3d2e5c7b4094df0e01 Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Thu, 14 Sep 2023 09:13:49 +0100
Subject: Separate the output quantization calculation logic from matmul

This patch generalizes the suggested output quantization calculation to any operation that employs a dot product between two vectors, i.e.
      c = sum_k(a_k * b_k) + d

It also consider and suggests min/max boundaries for random S32 bias generation, depending on the accumulation result.

MatMulKernelFixture is modified to use this interface.

Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: Ibb528261bb0310015967e11bd7ccd9ed9cff8479
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10312
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 tests/validation/fixtures/MatMulKernelFixture.h | 47 +++++++++++++++----------
 1 file changed, 29 insertions(+), 18 deletions(-)

(limited to 'tests/validation/fixtures/MatMulKernelFixture.h')
diff --git a/tests/validation/fixtures/MatMulKernelFixture.h b/tests/validation/fixtures/MatMulKernelFixture.h
index 91ac77d5af..50d194c43a 100644
--- a/tests/validation/fixtures/MatMulKernelFixture.h
+++ b/tests/validation/fixtures/MatMulKernelFixture.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE
-#define ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE
+#ifndef ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H
+#define ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H
 
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Utils.h"
@@ -54,6 +54,12 @@ public:
     void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type,
                bool enable_bias)
     {
+        // This hash is used by random generators. There may be hash collisions but
+        // this is intentional as it's a very easy way to make the the current
+        // random generation process almost different for many test configurations,
+        // which were using the same set of values before.
+        _hash = M0 + N0 + K0 + shape_a[0] + shape_a[1] + shape_b[0] + shape_b[1] + enable_bias + export_rhs_to_cl_image;
+
         // Flag to create a bias
         _enable_bias = enable_bias;
 
@@ -67,7 +73,7 @@ public:
             const int32_t t_max = static_cast<int32_t>(std::numeric_limits<T>::max());
             const int32_t t_min = static_cast<int32_t>(std::numeric_limits<T>::min());
 
-            std::mt19937                           generator(library->seed());
+            std::mt19937                           generator(library->seed() + _hash);
             std::uniform_real_distribution<float>  distribution_float(-5.0f, 3.0f);
             std::uniform_int_distribution<int32_t> distribution_t(t_min, t_max);
 
@@ -84,7 +90,12 @@ public:
             const int n = shape_b.x();
             const int k = shape_a.x();
 
-            dst_q_info = calculate_mat_mul_dst_q_info(lhs_q_info, rhs_q_info, m, n, k, data_type);
+            const float bias_fraction = enable_bias ? 0.5f : 0.f;
+
+            QuantizationHint q_hint = suggest_matmul_dst_q_info_and_bias(lhs_q_info, rhs_q_info, m, n, k, data_type, bias_fraction);
+            dst_q_info              = q_hint.q_info;
+            _min_bias               = q_hint.bias_min;
+            _max_bias               = q_hint.bias_max;
         }
 
         if(pretranspose_a)
@@ -142,12 +153,9 @@ protected:
     }
 
     template <typename U>
-    void fill_bias_s32(U &&tensor, int i, const UniformQuantizationInfo &q_info)
+    void fill_bias_s32(U &&tensor, int i, int32_t min, int32_t max)
     {
-        // For quantized cases, fill the S32 bias according to the following to avoid saturation of test cases.
-        // The following code limits size of bias values to within expected range of output quantization.
-        const unsigned int                     bound = std::abs(q_info.scale * 256); // 256 is size of 8 bit datatype
-        std::uniform_int_distribution<int32_t> distribution(-(bound / 10), (bound / 10));
+        std::uniform_int_distribution<int32_t> distribution(min, max);
         library->fill(tensor, distribution, i);
     }
 
@@ -192,8 +200,8 @@ protected:
         ARM_COMPUTE_ASSERT(!dst.info()->is_resizable());
 
         // Fill tensors
-        fill(CLAccessor(a), 0);
-        fill(CLAccessor(b), 1);
+        fill(CLAccessor(a), _hash + 1);
+        fill(CLAccessor(b), _hash + 2);
 
         // Compute matMul kernel
         ITensorPack tensors_pack({ { ACL_SRC_0, &a },
@@ -207,11 +215,11 @@ protected:
             bias.allocator()->allocate();
             if(is_quantized)
             {
-                fill_bias_s32(CLAccessor(bias), 2, dst_q_info.uniform());
+                fill_bias_s32(CLAccessor(bias), _hash + 3, _min_bias, _max_bias);
             }
             else
             {
-                fill(CLAccessor(bias), 2);
+                fill(CLAccessor(bias), _hash + 3);
             }
             tensors_pack.add_tensor(ACL_SRC_2, &bias);
         }
@@ -236,8 +244,8 @@ protected:
         SimpleTensor<T> c{ output_shape_collapsed, data_type, 1, dst_q_info };
 
         // Fill reference
-        fill(a, 0);
-        fill(b, 1);
+        fill(a, _hash + 1);
+        fill(b, _hash + 2);
 
         /* Note: Assuming the usual batch matmul dimensions A = (B x M x K), B = (B x K x N), if pretranspose_A is set to true, then A is assumed to be (B x K x M),
            therefore, A must be pre-transposed before passing it to the fixture. And, we transpose A again in the fixture to make it (B x M x K)
@@ -288,7 +296,7 @@ protected:
         // of bias tensor from shape [dst.dimension(0)] to [dst.tensor_shape()] in target kernel
         if(_enable_bias)
         {
-            fill(c, 2);
+            fill(c, _hash + 3);
             const int n          = c.shape().x();
             const int other_dims = c.shape().collapsed_from(1)[1];
             for(int i = 1; i < other_dims; ++i) // For all data, copy first n elements into remaining batches
@@ -323,7 +331,7 @@ protected:
         if(_enable_bias)
         {
             // Identical to float implementation, fill and copy values of bias first dimension
-            fill_bias_s32(bias, 2, cq);
+            fill_bias_s32(bias, _hash + 3, _min_bias, _max_bias);
             const int          n          = bias.shape().x();
             const int          other_dims = bias.shape().collapsed_from(1)[1];
             const unsigned int dt_size    = sizeof(int32_t);
@@ -348,6 +356,9 @@ protected:
     bool            _enable_bias{ false };
     bool            _device_supports_export_to_cl_image{ true };
     bool            _device_supports_mmul{ true };
+    int32_t         _min_bias{ 0 };
+    int32_t         _max_bias{ 0 };
+    int32_t         _hash{ 0 };
 };
 
 template <typename T, typename KernelType, bool use_mmul = false>
@@ -374,4 +385,4 @@ public:
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-#endif /* ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE */
+#endif // ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE_H
-- 
cgit v1.2.1