From 8e2dedea8550b1c18c3bbeead8c972f661dcfac8 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Tue, 27 Jun 2023 14:25:58 +0100 Subject: Add Bias to MatMul Kernels and add support for use in Fully Connected Layer Resolves: [COMPMID-6316] Signed-off-by: Mohammed Suhail Munshi Change-Id: I08e6bac9e6b46b76978da0dc6a48ccfe3dde5086 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9833 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/fixtures/MatMulKernelFixture.h | 103 +++++++++++++++++++++--- 1 file changed, 94 insertions(+), 9 deletions(-) (limited to 'tests/validation/fixtures') diff --git a/tests/validation/fixtures/MatMulKernelFixture.h b/tests/validation/fixtures/MatMulKernelFixture.h index 59bcfe5b2d..88fdf8b291 100644 --- a/tests/validation/fixtures/MatMulKernelFixture.h +++ b/tests/validation/fixtures/MatMulKernelFixture.h @@ -36,7 +36,7 @@ #include "tests/validation/reference/GEMMLowp.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/ReshapeLayer.h" - +#include #include namespace arm_compute @@ -48,12 +48,16 @@ namespace validation using namespace arm_compute::opencl::kernels; template -class MatMulKernelValidationFixture : public framework::Fixture +class MatMulKernelGenericValidationFixture : public framework::Fixture { public: template - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type, + bool enable_bias) { + // Flag to create a bias + _enable_bias = enable_bias; + // For brevity, the input shapes are assumed to be not-transposed for both Lhs and Rhs matrices. QuantizationInfo lhs_q_info; QuantizationInfo rhs_q_info; @@ -138,6 +142,16 @@ protected: } } + template + void fill_bias_s32(U &&tensor, int i, const UniformQuantizationInfo &q_info) + { + // For quantized cases, fill the S32 bias according to the following to avoid saturation of test cases. + // The following code limits size of bias values to within expected range of output quantization. + const unsigned int bound = std::abs(q_info.scale * 256); // 256 is size of 8 bit datatype + std::uniform_int_distribution distribution(-(bound / 10), (bound / 10)); + library->fill(tensor, distribution, i); + } + template void fill_constant(U &&tensor, D value) { @@ -156,12 +170,15 @@ protected: matmul_info.k0 = K0; matmul_info.export_rhs_to_cl_image = export_rhs_to_cl_image; + bool is_quantized = is_data_type_quantized(data_type); + // Create tensors - CLTensor a = create_tensor(shape_a, data_type, 1, lhs_q_info); - CLTensor b = create_tensor(shape_b, data_type, 1, rhs_q_info); - CLTensor dst = create_tensor(output_shape, data_type, 1, dst_q_info); + CLTensor a = create_tensor(shape_a, data_type, 1, lhs_q_info); + CLTensor b = create_tensor(shape_b, data_type, 1, rhs_q_info); + CLTensor bias = create_tensor(output_shape[0], (is_quantized) ? DataType::S32 : data_type, 1, dst_q_info); + CLTensor dst = create_tensor(output_shape, data_type, 1, dst_q_info); - matMul.configure(a.info(), b.info(), dst.info(), matmul_info); + matMul.configure(a.info(), b.info(), (_enable_bias) ? bias.info() : nullptr, dst.info(), matmul_info); ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); ARM_COMPUTE_ASSERT(dst.info()->is_resizable()); @@ -184,6 +201,22 @@ protected: { ACL_SRC_1, &b }, { ACL_DST, &dst } }); + + if(_enable_bias) + { + // Allocate, fill and add bias to TensorPack obj + bias.allocator()->allocate(); + if(is_quantized) + { + fill_bias_s32(CLAccessor(bias), 2, dst_q_info.uniform()); + } + else + { + fill(CLAccessor(bias), 2); + } + tensors_pack.add_tensor(ACL_SRC_2, &bias); + } + matMul.run(tensors_pack); return dst; @@ -252,9 +285,21 @@ protected: template typename std::enable_if < std::is_same::value || std::is_same::value, SimpleTensor>::type gemm_reference(SimpleTensor &a, SimpleTensor &b, SimpleTensor &c) { + // Fill bias, then copy first dimension into subsequent dimensions to mimic broadcast + // of bias tensor from shape [dst.dimension(0)] to [dst.tensor_shape()] in target kernel + if(_enable_bias) + { + fill(c, 2); + const int n = c.shape().x(); + const int other_dims = c.shape().collapsed_from(1)[1]; + for(int i = 1; i < other_dims; ++i) // For all data, copy first n elements into remaining batches + { + memcpy(c.data() + i * n, c.data(), n * sizeof(T)); + } + } // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C - return reference::gemm(a, b, c, 1.0f, 0.f); + return reference::gemm(a, b, c, 1.0f, (_enable_bias) ? 1.0f : 0.f); } template @@ -276,19 +321,59 @@ protected: constexpr int32_t gemmlowp_max_bound = std::numeric_limits::max(); SimpleTensor bias{ c.shape(), DataType::S32 }; - fill_constant(bias, static_cast(0)); + if(_enable_bias) + { + // Identical to float implementation, fill and copy values of bias first dimension + fill_bias_s32(bias, 2, cq); + const int n = bias.shape().x(); + const int other_dims = bias.shape().collapsed_from(1)[1]; + const unsigned int dt_size = sizeof(int32_t); + for(int i = 1; i < other_dims; ++i) + { + memcpy(bias.data() + i * n, bias.data(), n * dt_size); + } + } + else + { + fill_constant(bias, static_cast(0)); // effectively disable bias + } const SimpleTensor final_result = reference::gemmlowp_quantize_down_scale_by_fixedpoint(result, bias, gemmlowp_multipliers, gemmlowp_shifts, gemmlowp_offset, gemmlowp_min_bound, gemmlowp_max_bound); + return final_result; } CLTensor _target{}; SimpleTensor _reference{}; + bool _enable_bias{ false }; bool _device_supports_export_to_cl_image{ true }; bool _device_supports_mmul{ true }; }; +template +class MatMulKernelValidationFixture : public MatMulKernelGenericValidationFixture +{ +public: + template + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + { + MatMulKernelGenericValidationFixture::setup(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, + false /* enable bias */); + } +}; + +template +class MatMulKernelWithBiasValidation : public MatMulKernelGenericValidationFixture +{ +public: + template + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool pretranspose_a, bool pretranspose_b, int M0, int N0, int K0, bool export_rhs_to_cl_image, DataType data_type) + { + MatMulKernelGenericValidationFixture::setup(shape_a, shape_b, output_shape, pretranspose_a, pretranspose_b, M0, N0, K0, export_rhs_to_cl_image, data_type, + true /* enable bias */); + } +}; } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1