diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2023-09-14 09:13:49 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2023-09-18 13:53:19 +0000 |
commit | 532ce2c84dd24cb0c5064a3d2e5c7b4094df0e01 (patch) | |
tree | 7bae37ad706ff0c3d62e0702061277b16cb7f141 /tests/validation/Helpers.h | |
parent | a116cd3676796412cd4d9318a6cc1c1eef4c093c (diff) | |
download | ComputeLibrary-532ce2c84dd24cb0c5064a3d2e5c7b4094df0e01.tar.gz |
Separate the output quantization calculation logic from matmul
This patch generalizes the suggested output quantization calculation to any operation that employs a dot product between two vectors, i.e.
c = sum_k(a_k * b_k) + d
It also consider and suggests min/max boundaries for random S32 bias generation, depending on the accumulation result.
MatMulKernelFixture is modified to use this interface.
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: Ibb528261bb0310015967e11bd7ccd9ed9cff8479
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10312
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/validation/Helpers.h')
-rw-r--r-- | tests/validation/Helpers.h | 48 |
1 files changed, 44 insertions, 4 deletions
diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index c9d990d3a2..7d53c1de37 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -31,7 +31,8 @@ #include "tests/Globals.h" #include "tests/SimpleTensor.h" -#include <math.h> +#include <cmath> +#include <cstdint> #include <random> #include <type_traits> #include <utility> @@ -52,6 +53,19 @@ struct is_floating_point<half> : public std::true_type { }; +/** Helper struct to store the hints for + * - destination quantization info + * - minimum bias value + * - maximum bias value + * in quantized test construction. + */ +struct QuantizationHint +{ + QuantizationInfo q_info; + int32_t bias_min; + int32_t bias_max; +}; + /** Helper function to get the testing range for each activation layer. * * @param[in] activation Activation function to test. @@ -226,10 +240,36 @@ std::pair<int, int> get_symm_quantized_per_channel_bounds(const QuantizationInfo */ void add_padding_x(std::initializer_list<ITensor *> tensors, const DataLayout &data_layout = DataLayout::NHWC, bool only_right_pad = false); -/** For MatMulLowp, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions, - * calculate a suitable output quantization for obtaining non-saturated outputs with high probability. +/** For a matrix multiplication, given the Lhs/Rhs matrix quantization informations and the matrix multiplication dimensions, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] lhs_q_info Lhs matrix quantization info + * @param[in] rhs_q_info Rhs matrix quantization info + * @param[in] m Number of rows of Lhs matrix + * @param[in] n Number of columns of Rhs Matrix + * @param[in] k Number of rows/columns of Rhs/Lhs Matrix + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation. 0 if there is no bias. + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range + */ +QuantizationHint suggest_matmul_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, int32_t m, int32_t n, int32_t k, DataType data_type, + float bias_fraction); + +/** For a multiply-accumulate (mac), given the Lhs/Rhs vector quantization informations and the dot product dimensions, + * calculate a suitable output quantization and suggested bias range for obtaining non-saturated outputs with high probability. + * + * @param[in] lhs_q_info Lhs matrix quantization info + * @param[in] rhs_q_info Rhs matrix quantization info + * @param[in] k number of accumulations taking place in the sum, i.e. c_k = sum_k(a_k * b_k) + * @param[in] data_type data type, only QASYMM8, QASYMM8_SIGNED are supported + * @param[in] bias_fraction the fraction of bias amplitude compared to integer accummulation. + * + * @return QuantizationHint object containing the suggested output quantization info and min/max bias range */ -QuantizationInfo calculate_mat_mul_dst_q_info(const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, int m, int n, int k, DataType data_type); +QuantizationHint suggest_mac_dst_q_info_and_bias(const QuantizationInfo &lhs_q_info, + const QuantizationInfo &rhs_q_info, int32_t k, DataType data_type, float bias_fraction); } // namespace validation } // namespace test } // namespace arm_compute |