From 532ce2c84dd24cb0c5064a3d2e5c7b4094df0e01 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Thu, 14 Sep 2023 09:13:49 +0100 Subject: Separate the output quantization calculation logic from matmul This patch generalizes the suggested output quantization calculation to any operation that employs a dot product between two vectors, i.e. c = sum_k(a_k * b_k) + d It also consider and suggests min/max boundaries for random S32 bias generation, depending on the accumulation result. MatMulKernelFixture is modified to use this interface. Signed-off-by: Gunes Bayir Change-Id: Ibb528261bb0310015967e11bd7ccd9ed9cff8479 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10312 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Benchmark: Arm Jenkins --- tests/validation/Helpers.h | 48 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 44 insertions(+), 4 deletions(-) (limited to 'tests/validation/Helpers.h') diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index c9d990d3a2..7d53c1de37 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -31,7 +31,8 @@ #include "tests/Globals.h" #include "tests/SimpleTensor.h" -#include +#include +#include #include #include #include @@ -52,6 +53,19 @@ struct is_floating_point : public std::true_type { }; +/** Helper struct to store the hints for + * - destination quantization info + * - minimum bias value + * - maximum bias value + * in quantized test construction. + */ +struct QuantizationHint +{ + QuantizationInfo q_info; + int32_t bias_min; + int32_t bias_max; +}; + /** Helper function to get the testing range for each activation layer. * * @param[in] activation Activation function to test. @@ -226,10 +240,36 @@ std::pair get_symm_quantized_per_channel_bounds(const QuantizationInfo */ void add_padding_x(std::initializer_list tensors, const DataLayout &data_layout = DataLayout::NHWC, bool only_right_pad = false); -/** For MatMulLowp, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions, - * calculate a suitable output quantization for obtaining non-saturated outputs with high probability. +/** For a matrix multiplication, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] lhs_q_info Lhs matrix quantization info + * @param[in] rhs_q_info Rhs matrix quantization info + * @param[in] m Number of rows of Lhs matrix + * @param[in] n Number of columns of Rhs Matrix + * @param[in] k Number of rows/columns of Rhs/Lhs Matrix + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation. 0 if there is no bias. + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range + */ +QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, int32_t m, int32_t n, int32_t k, DataType data_type, + float bias_fraction); + +/** For a multiply-accumulate (mac), given the Lhs/Rhs vector quantization informations and the dot product dimensions, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] lhs_q_info Lhs matrix quantization info + * @param[in] rhs_q_info Rhs matrix quantization info + * @param[in] k number of accumulations taking place in the sum, i.e. c_k = sum_k(a_k * b_k) + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation. + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range */ -QuantizationInfo calculate_mat_mul_dst_q_info(const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, int m, int n, int k, DataType data_type); +QuantizationHint suggest_mac_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, int32_t k, DataType data_type, float bias_fraction); } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1