From 532ce2c84dd24cb0c5064a3d2e5c7b4094df0e01 Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Thu, 14 Sep 2023 09:13:49 +0100
Subject: Separate the output quantization calculation logic from matmul

This patch generalizes the suggested output quantization calculation to any operation that employs a dot product between two vectors, i.e.
      c = sum_k(a_k * b_k) + d

It also consider and suggests min/max boundaries for random S32 bias generation, depending on the accumulation result.

MatMulKernelFixture is modified to use this interface.

Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: Ibb528261bb0310015967e11bd7ccd9ed9cff8479
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10312
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 tests/validation/Helpers.h | 48 ++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 44 insertions(+), 4 deletions(-)

(limited to 'tests/validation/Helpers.h')
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h
index c9d990d3a2..7d53c1de37 100644
--- a/tests/validation/Helpers.h
+++ b/tests/validation/Helpers.h
@@ -31,7 +31,8 @@
 #include "tests/Globals.h"
 #include "tests/SimpleTensor.h"
 
-#include <math.h>
+#include <cmath>
+#include <cstdint>
 #include <random>
 #include <type_traits>
 #include <utility>
@@ -52,6 +53,19 @@ struct is_floating_point<half> : public std::true_type
 {
 };
 
+/** Helper struct to store the hints for
+ *  - destination quantization info
+ *  - minimum bias value
+ *  - maximum bias value
+ * in quantized test construction.
+ */
+struct QuantizationHint
+{
+    QuantizationInfo q_info;
+    int32_t          bias_min;
+    int32_t          bias_max;
+};
+
 /** Helper function to get the testing range for each activation layer.
  *
  * @param[in] activation Activation function to test.
@@ -226,10 +240,36 @@ std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo
  */
 void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &data_layout = DataLayout::NHWC, bool only_right_pad = false);
 
-/** For MatMulLowp, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions,
- *  calculate a suitable output quantization for obtaining non-saturated outputs with high probability.
+/** For a matrix multiplication, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions,
+ *  calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability.
+ *
+ * @param[in] lhs_q_info    Lhs matrix quantization info
+ * @param[in] rhs_q_info    Rhs matrix quantization info
+ * @param[in] m             Number of rows of Lhs matrix
+ * @param[in] n             Number of columns of Rhs Matrix
+ * @param[in] k             Number of rows/columns of Rhs/Lhs Matrix
+ * @param[in] data_type     data type, only QASYMM8, QASYMM8_SIGNED are supported
+ * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation. 0 if there is no bias.
+ *
+ * @return QuantizationHint object containing the suggested output quantization info and min/max bias range
+ */
+QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
+                                                    const QuantizationInfo &rhs_q_info, int32_t m, int32_t n, int32_t k, DataType data_type,
+                                                    float bias_fraction);
+
+/** For a multiply-accumulate (mac), given the Lhs/Rhs vector quantization informations and the dot product dimensions,
+ *  calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability.
+ *
+ * @param[in] lhs_q_info    Lhs matrix quantization info
+ * @param[in] rhs_q_info    Rhs matrix quantization info
+ * @param[in] k             number of accumulations taking place in the sum, i.e. c_k = sum_k(a_k * b_k)
+ * @param[in] data_type     data type, only QASYMM8, QASYMM8_SIGNED are supported
+ * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation.
+ *
+ * @return QuantizationHint object containing the suggested output quantization info and min/max bias range
  */
-QuantizationInfo calculate_mat_mul_dst_q_info(const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, int m, int n, int k, DataType data_type);
+QuantizationHint suggest_mac_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info,
+                                                 const QuantizationInfo &rhs_q_info, int32_t k, DataType data_type, float bias_fraction);
 } // namespace validation
 } // namespace test
 } // namespace arm_compute
-- 
cgit v1.2.1