From 9d0c4deb760efc2ca07e5e0b8218995201ad8a1f Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Thu, 13 Apr 2023 18:22:58 +0100 Subject: Add quantized CL MatMul kernels for Lhs NT/T, Rhs NT Implement OpenCL kernels for batched Matrix Multiplication for the quantized data types QASYMM8 and QASYMM8_SIGNED. Quantized MatMul is supported with the following MatMul attributes: * adj_x = false, adj_y = false * adj_x = true, adj_y = false We consider native format kernels only. In other words, no reshaping of the operand matrices is done. Resolves: COMPMID-5921, COMPMID-5922 Change-Id: I99e0f68054a2bd635c60ec2641acc2e7ff398473 Signed-off-by: Omar Al Khatib Signed-off-by: Gunes Bayir Signed-off-by: Jakub Sujak Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9435 Reviewed-by: SiCong Li Reviewed-by: Viet-Hoa Do Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/fixtures/MatMulKernelFixture.h | 130 ++++++++++++++++++------ 1 file changed, 101 insertions(+), 29 deletions(-) (limited to 'tests/validation/fixtures') diff --git a/tests/validation/fixtures/MatMulKernelFixture.h b/tests/validation/fixtures/MatMulKernelFixture.h index 10e2a0659a..7d0b1a40a9 100644 --- a/tests/validation/fixtures/MatMulKernelFixture.h +++ b/tests/validation/fixtures/MatMulKernelFixture.h @@ -25,11 +25,15 @@ #define ACL_TESTS_VALIDATION_FIXTURES_MATMULKERNELFIXTURE #include "arm_compute/core/KernelDescriptors.h" -#include "src/gpu/cl/kernels/ClMatMulNativeKernel.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" + #include "tests/CL/CLAccessor.h" #include "tests/CL/Helper.h" #include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" #include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/GEMMLowp.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/ReshapeLayer.h" @@ -43,14 +47,43 @@ namespace validation { using namespace arm_compute::opencl::kernels; -template +template class MatMulKernelValidationFixture : public framework::Fixture { public: template - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, const int M0, const int N0, const int K0, bool export_rhs_to_cl_image, DataType data_type) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) { // For brevity, the input shapes are assumed to be not-transposed for both Lhs and Rhs matrices. + QuantizationInfo lhs_q_info; + QuantizationInfo rhs_q_info; + QuantizationInfo dst_q_info; + + if(is_data_type_quantized(data_type)) + { + const int32_t t_max = static_cast(std::numeric_limits::max()); + const int32_t t_min = static_cast(std::numeric_limits::min()); + + std::mt19937 generator(library->seed()); + std::uniform_real_distribution distribution_float(-5.0f, 3.0f); + std::uniform_int_distribution distribution_t(t_min, t_max); + + const float scale_lhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + const float scale_rhs = pow(2, distribution_float(generator)); // [2^-5, 2^3] + + const int32_t offset_lhs = distribution_t(generator); + const int32_t offset_rhs = distribution_t(generator); + + lhs_q_info = QuantizationInfo(scale_lhs, offset_lhs); + rhs_q_info = QuantizationInfo(scale_rhs, offset_rhs); + + const int m = shape_a.y(); + const int n = shape_b.x(); + const int k = shape_a.x(); + + dst_q_info = calculate_mat_mul_dst_q_info(lhs_q_info, rhs_q_info, m, n, k, data_type); + } + if(pretranspose_a) { permute(shape_a, PermutationVector(1U, 0U)); @@ -65,8 +98,8 @@ public: if(!export_rhs_to_cl_image || _device_supports_export_to_cl_image) { - _target = compute_target(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type); - _reference = compute_reference(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, data_type); + _target = compute_target(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, lhs_q_info, rhs_q_info, dst_q_info); + _reference = compute_reference(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, data_type, lhs_q_info, rhs_q_info, dst_q_info); } } @@ -93,23 +126,29 @@ protected: } } + template + void fill_constant(U &&tensor, D value) + { + library->fill_tensor_value(tensor, value); + } + CLTensor compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, const int M0, const int N0, const int K0, - bool export_rhs_to_cl_image, DataType data_type) + bool export_rhs_to_cl_image, DataType data_type, const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, const QuantizationInfo &dst_q_info) { - // Create tensors - CLTensor a = create_tensor(shape_a, data_type, 1); - CLTensor b = create_tensor(shape_b, data_type, 1); - CLTensor dst = create_tensor(output_shape, data_type, 1); - - CLSynthetizeOperator matMul{}; - MatMulKernelInfo matmul_info; - matmul_info.adj_lhs = pretranspose_a; - matmul_info.adj_rhs = pretranspose_b; - matmul_info.m0 = M0; - matmul_info.n0 = N0; - matmul_info.k0 = K0; + CLSynthetizeOperator matMul{}; + MatMulKernelInfo matmul_info; + matmul_info.adj_lhs = pretranspose_a; + matmul_info.adj_rhs = pretranspose_b; + matmul_info.m0 = M0; + matmul_info.n0 = N0; + matmul_info.k0 = K0; matmul_info.export_rhs_to_cl_image = export_rhs_to_cl_image; + // Create tensors + CLTensor a = create_tensor(shape_a, data_type, 1, lhs_q_info); + CLTensor b = create_tensor(shape_b, data_type, 1, rhs_q_info); + CLTensor dst = create_tensor(output_shape, data_type, 1, dst_q_info); + matMul.configure(a.info(), b.info(), dst.info(), matmul_info); ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); @@ -138,18 +177,19 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type) + SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool pretranspose_a, bool pretranspose_b, DataType data_type, + const QuantizationInfo &lhs_q_info, const QuantizationInfo &rhs_q_info, const QuantizationInfo &dst_q_info) { // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D // This is necessary unless we choose to extend gemm reference for 5D+ tensors - TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimW); - TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimW); - TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimW); + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape shape_a_collapsed = shape_a.collapsed_from(Window::DimZ); + TensorShape shape_b_collapsed = shape_b.collapsed_from(Window::DimZ); // Create reference - SimpleTensor a{ shape_a_collapsed, data_type, 1 }; - SimpleTensor b{ shape_b_collapsed, data_type, 1 }; - SimpleTensor c{ output_shape_collapsed, data_type, 1 }; + SimpleTensor a{ shape_a_collapsed, data_type, 1, lhs_q_info }; + SimpleTensor b{ shape_b_collapsed, data_type, 1, rhs_q_info }; + SimpleTensor c{ output_shape_collapsed, data_type, 1, dst_q_info }; // Fill reference fill(a, 0); @@ -185,10 +225,8 @@ protected: b_transposed = reference::permute(b, PermutationVector(1U, 0U)); } - // Setting beta to 0 will effectively disable C for the - // computation of the reference: alpha * A * B + 0 * C // Use transposed tensors if boolean enabled else use original tensors - SimpleTensor result = reference::gemm((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c, 1.0f, 0.f); + SimpleTensor result = gemm_reference((pretranspose_a) ? a_transposed : a, (pretranspose_b) ? b_transposed : b, c); // We reshape the gemm output back if the tensor is high dimensional if(output_shape_collapsed != output_shape) @@ -199,9 +237,43 @@ protected: return result; } + template + typename std::enable_if < std::is_same::value || std::is_same::value, SimpleTensor>::type gemm_reference(SimpleTensor &a, SimpleTensor &b, SimpleTensor &c) + { + // Setting beta to 0 will effectively disable C for the + // computation of the reference: alpha * A * B + 0 * C + return reference::gemm(a, b, c, 1.0f, 0.f); + } + + template + typename std::enable_if < std::is_same::value || std::is_same::value, SimpleTensor>::type gemm_reference(SimpleTensor &a, SimpleTensor &b, SimpleTensor &c) + { + const UniformQuantizationInfo aq = a.quantization_info().uniform(); + const UniformQuantizationInfo bq = b.quantization_info().uniform(); + const UniformQuantizationInfo cq = c.quantization_info().uniform(); + + const SimpleTensor result = reference::gemmlowp_matrix_multiply_core(a, b, c.shape(), -aq.offset, -bq.offset); + + std::vector gemmlowp_multipliers{ 1 }; + std::vector gemmlowp_shifts{ 1 }; + const int gemmlowp_offset = cq.offset; + const float scale = aq.scale * bq.scale / cq.scale; + + quantization::calculate_quantized_multiplier(scale, &gemmlowp_multipliers[0], &gemmlowp_shifts[0]); + constexpr int32_t gemmlowp_min_bound = std::numeric_limits::min(); + constexpr int32_t gemmlowp_max_bound = std::numeric_limits::max(); + + SimpleTensor bias{ c.shape(), DataType::S32 }; + fill_constant(bias, static_cast(0)); + + const SimpleTensor final_result = reference::gemmlowp_quantize_down_scale_by_fixedpoint(result, bias, + gemmlowp_multipliers, gemmlowp_shifts, gemmlowp_offset, gemmlowp_min_bound, gemmlowp_max_bound); + return final_result; + } + CLTensor _target{}; SimpleTensor _reference{}; - bool _device_supports_export_to_cl_image { true }; + bool _device_supports_export_to_cl_image{ true }; }; } // namespace validation -- cgit v1.2.1