diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp | 31 |
1 files changed, 27 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp index 7a5fa87ee6..f12efe4282 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -110,7 +110,13 @@ class GemmHybridQuantized : public GemmCommon<To, Tr> { static unsigned int compute_n_block(const GemmArgs &args) { if (args._cfg && args._cfg->outer_block_size) { - return args._cfg->outer_block_size; + unsigned int n_block = args._cfg->outer_block_size; + + // Needs to be (at least a single) multiple of the kernel output width. + n_block /= strategy::out_width(); + n_block = std::max(n_block, 1u) * strategy::out_width(); + + return n_block; } const unsigned int k_block = compute_k_block(args); @@ -263,12 +269,18 @@ public: return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi)); } - void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { + void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override { col_bias = reinterpret_cast<int32_t *>(in_buffer); for (unsigned int i=0; i<_nmulti; i++) { compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0); } + } + + void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride, bool transposed) override { + assert(!transposed); + + requantize_bias(in_buffer, B, ldb, B_multi_stride); uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer); Toi *buffer = reinterpret_cast<Toi *>(buffer_int + get_col_sum_size()); @@ -286,7 +298,7 @@ public: const unsigned int size = roundup(xmax-x0, strategy::out_width()) * k_size; strat.transforms.PrepareB( buffer, B + (multi * B_multi_stride), ldb, - x0, xmax, k0, kmax); + x0, xmax, k0, kmax, false); buffer += size; } @@ -304,6 +316,17 @@ public: _qp.bias = bias; _qp.bias_multi_stride = bias_multi_stride; } + + GemmConfig get_config() override { + GemmConfig c; + + c.method = GemmMethod::GEMM_HYBRID; + c.inner_block_size = _k_block; + c.outer_block_size = _n_block; + c.filter = get_type_name<strategy>(); + + return c; + } }; } // namespace arm_gemm |