From aed63ee175e0d64c934389e9d1b2edd0cb1a5cdd Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Mon, 26 Jul 2021 13:18:50 +0100 Subject: Add support for non-constant weights and biases in CpuFullyConnected Changing the approach for specifying that weights and biases tensors are non-constant by making it a member of TensorInfo rather than an option of the functions. Resolves: COMPMID-4222 Change-Id: I96e6f3868f51785c9700a3ef6a1fe7b05747862c Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6162 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas --- src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp | 6 +++++- src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp | 6 +++++- src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp | 6 +++++- src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp | 6 +++++- src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp | 8 ++++++-- src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp | 9 ++++++--- 6 files changed, 32 insertions(+), 9 deletions(-) (limited to 'src/core/NEON/kernels') diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp index 5cbdf20798..20c8230148 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp @@ -523,7 +523,7 @@ public: return size; } - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { if (std::is_same::value) { _col_bias = reinterpret_cast(in_buffer); @@ -534,6 +534,10 @@ public: compute_col_sums(*qp_ptr, _args._Nsize, _args._Ksize * _args._Ksections, B + (i * B_multi_stride), ldb, _col_bias + (i * _args._Nsize), _args._Ksize * _args._Ksections, i, 0); } } + } + + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + requantize_bias(in_buffer, B, ldb, B_multi_stride); // Put the transposed data after the column sums - in non-transposing cases get_col_sum_size() == 0 uintptr_t buffer_int = reinterpret_cast(in_buffer); diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp index c72dca2e96..efb5bd1bb4 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp @@ -269,12 +269,16 @@ public: return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi)); } - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { col_bias = reinterpret_cast(in_buffer); for (unsigned int i=0; i<_nmulti; i++) { compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0); } + } + + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + requantize_bias(in_buffer, B, ldb, B_multi_stride); uintptr_t buffer_int = reinterpret_cast(in_buffer); Toi *buffer = reinterpret_cast(buffer_int + get_col_sum_size()); diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp index 7376b5ffe3..e84b58dd0f 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp @@ -219,12 +219,16 @@ public: return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi)); } - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { col_bias = reinterpret_cast(in_buffer); for (unsigned int i=0; i<_nmulti; i++) { compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0); } + } + + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + requantize_bias(in_buffer, B, ldb, B_multi_stride); uintptr_t buffer_int = reinterpret_cast(in_buffer); Toi *buffer = reinterpret_cast(buffer_int + get_col_sum_size()); diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp index 5639cb4182..c75c320a6b 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp @@ -923,7 +923,7 @@ public: return (x_size * _Ktotal * _nmulti * sizeof(Toi)) + get_col_sum_size(); } - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { if (std::is_same::value) { col_bias = reinterpret_cast(in_buffer); @@ -934,6 +934,10 @@ public: compute_col_sums(*qp_ptr, _Nsize, _Ksize * _Ksections, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize * _Ksections, i, 0); } } + } + + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + requantize_bias(in_buffer, B, ldb, B_multi_stride); // Put the transposed data after the column sums - in non-transposing cases get_col_sum_size() == 0 uintptr_t buffer_int = reinterpret_cast(in_buffer); diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp index d4348beabf..f0b4e5db9e 100644 --- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp @@ -201,11 +201,11 @@ public: return _buffer_per_multi * _args._nmulti * sizeof(To) + get_col_sum_size(); } - void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override { + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { // Column sums go on the front of the pretransposed buffer in requantized cases. // We could optimize here in case we don't actually need to sum the columns, but this code is only run on setup. if (std::is_same::value) { - col_bias = reinterpret_cast(buffer); + col_bias = reinterpret_cast(in_buffer); Requantize32 *qp_ptr = reinterpret_cast(&_os); @@ -213,6 +213,10 @@ public: compute_col_sums(*qp_ptr, _args._Nsize, _args._Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _args._Nsize), _args._Ksize, i, 0); } } + } + + void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override { + requantize_bias(buffer, B, ldb, B_multi_stride); // The actual transposed buffer goes after the column sums (if any) uintptr_t buffer_int = reinterpret_cast(buffer); diff --git a/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp b/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp index 1e2a9acc1d..ce727032e6 100644 --- a/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp +++ b/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp @@ -179,13 +179,16 @@ public: return _subgemm->get_B_pretransposed_array_size() + col_sum_size(); } + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + _col_sums = reinterpret_cast(in_buffer); + col_sums_pretransposed(B, ldb, B_multi_stride); + } + void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override { uintptr_t buffer_int = reinterpret_cast(buffer); _subgemm->pretranspose_B_array(reinterpret_cast(buffer_int + col_sum_size()), B, ldb, B_multi_stride); - _col_sums = reinterpret_cast(buffer); - - col_sums_pretransposed(B, ldb, B_multi_stride); + requantize_bias(buffer, B, ldb, B_multi_stride); } void set_pretransposed_B_data(void *buffer) override { -- cgit v1.2.1