From 9c7c2d2d23693877867bb3284c577b33cfbff471 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Tue, 11 Apr 2023 17:16:27 +0100 Subject: Add quantized support for CPU MatMul Resolves: COMPMID-5899 Signed-off-by: Viet-Hoa Do Change-Id: I89d96e292c3492ba9b1900a3e5683f9dcd11dfc6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9440 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/fixtures/MatMulFixture.h | 102 ++++++++++++++++++++++++------ 1 file changed, 84 insertions(+), 18 deletions(-) (limited to 'tests/validation/fixtures') diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h index bb4a1cd7be..f8f038af3f 100644 --- a/tests/validation/fixtures/MatMulFixture.h +++ b/tests/validation/fixtures/MatMulFixture.h @@ -25,12 +25,17 @@ #define TESTS_VALIDATION_FIXTURES_MATMULFIXTURE #include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/GEMMLowp.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/ReshapeLayer.h" +#include #include +#include namespace arm_compute { @@ -44,7 +49,7 @@ class MatMulGenericValidationFixture : public framework::Fixture public: template void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool transpose_a, bool transpose_b, DataType data_type, ActivationLayerInfo act_info, int num_extra_runs, - Settings settings) + Settings settings, QuantizationInfo a_qinfo = QuantizationInfo(), QuantizationInfo b_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) { // For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. if(transpose_a) @@ -56,8 +61,8 @@ public: permute(shape_b, PermutationVector(1U, 0U)); } - _target = compute_target(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, settings); - _reference = compute_reference(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info); + _target = compute_target(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, settings, a_qinfo, b_qinfo, o_qinfo); + _reference = compute_reference(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, a_qinfo, b_qinfo, o_qinfo); } protected: @@ -78,23 +83,29 @@ protected: library->fill(tensor, distribution, i); break; } - default: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: { library->fill_tensor_uniform(tensor, i); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported data type."); } } } TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool transpose_a, bool transpose_b, DataType data_type, - ActivationLayerInfo act_info, int num_extra_runs, const Settings &settings) + ActivationLayerInfo act_info, int num_extra_runs, const Settings &settings, QuantizationInfo a_qinfo, QuantizationInfo b_qinfo, QuantizationInfo o_qinfo) { // 1. Create Classes and configure function // ---------------------------------------------------- // Create tensors // Configure relevant classes and matmul function - TensorType a = create_tensor(shape_a, data_type, 1); - TensorType b = create_tensor(shape_b, data_type, 1); - TensorType dst = create_tensor(output_shape, data_type, 1); + TensorType a = create_tensor(shape_a, data_type, 1, a_qinfo); + TensorType b = create_tensor(shape_b, data_type, 1, b_qinfo); + TensorType dst = create_tensor(output_shape, data_type, 1, o_qinfo); FunctionType matmul; @@ -149,18 +160,61 @@ protected: return dst; } + template + typename std::enable_if::value, SimpleTensor>::type + compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo) + { + ARM_COMPUTE_UNUSED(act_info, o_qinfo); + + return reference::gemm(a, b, c, alpha, beta); + } + + template + typename std::enable_if::value, SimpleTensor>::type + compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo) + { + ARM_COMPUTE_UNUSED(alpha, beta); + + const auto aq = a.quantization_info().uniform(); + const auto bq = b.quantization_info().uniform(); + const auto oq = o_qinfo.uniform(); + + const auto multiplier = aq.scale * bq.scale / oq.scale; + + int32_t output_multiplier = 0; + int32_t output_shift = 0; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + std::vector output_multipliers{ output_multiplier }; + std::vector output_shifts{ output_shift }; + + PixelValue output_min{}; + PixelValue output_max{}; + std::tie(output_min, output_max) = quantization::get_quantized_asymmetric_output_min_max( + o_qinfo, act_info, a.data_type()); + + const auto tmp = reference::gemmlowp_matrix_multiply_core( + a, b, c.shape(), aq.offset, bq.offset); + + auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint( + tmp, output_multipliers, output_shifts, oq.offset, + output_min.get(), output_max.get()); + output.quantization_info(o_qinfo); + + return output; + } + SimpleTensor compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &output_shape, bool transpose_a, bool transpose_b, DataType data_type, - ActivationLayerInfo act_info) + ActivationLayerInfo act_info, QuantizationInfo a_qinfo, QuantizationInfo b_qinfo, QuantizationInfo o_qinfo) { - // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D - // This is necessary unless we choose to extend gemm reference for 5D+ tensors - TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimW); - TensorShape a_shape_collapsed = a_shape.collapsed_from(Window::DimW); - TensorShape b_shape_collapsed = b_shape.collapsed_from(Window::DimW); + // We collapse dimensions > 2 onto dimension 2, i.e. 4D+ tensors will look like 3D + // This is necessary unless we choose to extend gemm reference for 4D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape a_shape_collapsed = a_shape.collapsed_from(Window::DimZ); + TensorShape b_shape_collapsed = b_shape.collapsed_from(Window::DimZ); // Create reference - SimpleTensor a{ a_shape_collapsed, data_type, 1 }; - SimpleTensor b{ b_shape_collapsed, data_type, 1 }; + SimpleTensor a{ a_shape_collapsed, data_type, 1, a_qinfo }; + SimpleTensor b{ b_shape_collapsed, data_type, 1, b_qinfo }; SimpleTensor c{ output_shape_collapsed, data_type, 1 }; // Fill reference @@ -199,8 +253,9 @@ protected: // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C // Use transposed tensors if boolean enabled else use original tensors - SimpleTensor result = reference::gemm((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f); - result = reference::activation_layer(result, act_info, QuantizationInfo()); + auto result = compute_reference_gemm((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, act_info, o_qinfo); + + result = reference::activation_layer(result, act_info, o_qinfo); // We reshape the gemm output back if the tensor is high dimensional if(output_shape_collapsed != output_shape) @@ -249,6 +304,17 @@ public: } }; +template +class QuantizedMatMulValidationFixture : public MatMulGenericValidationFixture +{ +public: + template + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool transpose_a, bool transpose_b, DataType data_type, ActivationLayerInfo act_info, int num_extra_runs, QuantizationInfo a_qinfo, QuantizationInfo b_qinfo, QuantizationInfo o_qinfo) + { + MatMulGenericValidationFixture::setup(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings(), a_qinfo, b_qinfo, o_qinfo); + } +}; + } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1