diff options
Diffstat (limited to 'src/core/NEON/kernels')
6 files changed, 9 insertions, 32 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp index 20c8230148..5cbdf20798 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp @@ -523,7 +523,7 @@ public: return size; } - void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { if (std::is_same<OutputStage, Requantize32>::value) { _col_bias = reinterpret_cast<int32_t *>(in_buffer); @@ -534,10 +534,6 @@ public: compute_col_sums(*qp_ptr, _args._Nsize, _args._Ksize * _args._Ksections, B + (i * B_multi_stride), ldb, _col_bias + (i * _args._Nsize), _args._Ksize * _args._Ksections, i, 0); } } - } - - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { - requantize_bias(in_buffer, B, ldb, B_multi_stride); // Put the transposed data after the column sums - in non-transposing cases get_col_sum_size() == 0 uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer); diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp index efb5bd1bb4..c72dca2e96 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp @@ -269,16 +269,12 @@ public: return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi)); } - void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { col_bias = reinterpret_cast<int32_t *>(in_buffer); for (unsigned int i=0; i<_nmulti; i++) { compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0); } - } - - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { - requantize_bias(in_buffer, B, ldb, B_multi_stride); uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer); Toi *buffer = reinterpret_cast<Toi *>(buffer_int + get_col_sum_size()); diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp index e84b58dd0f..7376b5ffe3 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp @@ -219,16 +219,12 @@ public: return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi)); } - void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { col_bias = reinterpret_cast<int32_t *>(in_buffer); for (unsigned int i=0; i<_nmulti; i++) { compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0); } - } - - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { - requantize_bias(in_buffer, B, ldb, B_multi_stride); uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer); Toi *buffer = reinterpret_cast<Toi *>(buffer_int + get_col_sum_size()); diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp index c75c320a6b..5639cb4182 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp @@ -923,7 +923,7 @@ public: return (x_size * _Ktotal * _nmulti * sizeof(Toi)) + get_col_sum_size(); } - void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { if (std::is_same<OutputStage, Requantize32>::value) { col_bias = reinterpret_cast<int32_t *>(in_buffer); @@ -934,10 +934,6 @@ public: compute_col_sums(*qp_ptr, _Nsize, _Ksize * _Ksections, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize * _Ksections, i, 0); } } - } - - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { - requantize_bias(in_buffer, B, ldb, B_multi_stride); // Put the transposed data after the column sums - in non-transposing cases get_col_sum_size() == 0 uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer); diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp index f0b4e5db9e..d4348beabf 100644 --- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp @@ -201,11 +201,11 @@ public: return _buffer_per_multi * _args._nmulti * sizeof(To) + get_col_sum_size(); } - void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override { // Column sums go on the front of the pretransposed buffer in requantized cases. // We could optimize here in case we don't actually need to sum the columns, but this code is only run on setup. if (std::is_same<OutputStage, Requantize32>::value) { - col_bias = reinterpret_cast<int32_t *>(in_buffer); + col_bias = reinterpret_cast<int32_t *>(buffer); Requantize32 *qp_ptr = reinterpret_cast<Requantize32 *>(&_os); @@ -213,10 +213,6 @@ public: compute_col_sums(*qp_ptr, _args._Nsize, _args._Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _args._Nsize), _args._Ksize, i, 0); } } - } - - void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override { - requantize_bias(buffer, B, ldb, B_multi_stride); // The actual transposed buffer goes after the column sums (if any) uintptr_t buffer_int = reinterpret_cast<uintptr_t>(buffer); diff --git a/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp b/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp index ce727032e6..1e2a9acc1d 100644 --- a/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp +++ b/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp @@ -179,16 +179,13 @@ public: return _subgemm->get_B_pretransposed_array_size() + col_sum_size(); } - void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { - _col_sums = reinterpret_cast<int32_t *>(in_buffer); - col_sums_pretransposed(B, ldb, B_multi_stride); - } - void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override { uintptr_t buffer_int = reinterpret_cast<uintptr_t>(buffer); _subgemm->pretranspose_B_array(reinterpret_cast<void *>(buffer_int + col_sum_size()), B, ldb, B_multi_stride); - requantize_bias(buffer, B, ldb, B_multi_stride); + _col_sums = reinterpret_cast<int32_t *>(buffer); + + col_sums_pretransposed(B, ldb, B_multi_stride); } void set_pretransposed_B_data(void *buffer) override { |