diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp | 2 | ||||
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp | 2 | ||||
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp | 9 |
3 files changed, 11 insertions, 2 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp index 7c860a24a1..d1c4e49edb 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_qint8.cpp @@ -71,7 +71,7 @@ static const GemmImplementation<int8_t, int8_t, Requantize32> gemm_qint8_methods #ifdef ARM_COMPUTE_ENABLE_SVE #ifdef ARM_COMPUTE_ENABLE_SME2 { - GemmMethod::GEMM_HYBRID, + GemmMethod::GEMV_PRETRANSPOSED, "sme2_gemv_s8qa_dot_16VL", [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && quant_hybrid_asymmetric(qp) && args._Msize == 1 && !args._indirect_input && args._nbatches == 1; }, nullptr, diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp index 3baf9857da..b85b1c4fcf 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp @@ -67,7 +67,7 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth #ifdef ARM_COMPUTE_ENABLE_SME2 // SME kernels { - GemmMethod::GEMM_HYBRID, + GemmMethod::GEMV_PRETRANSPOSED, "sme2_gemv_u8qa_dot_16VL", [](const GemmArgs &args, const Requantize32 &qp) { return args._ci->has_sme2() && quant_hybrid_asymmetric(qp) && args._Msize == 1 && !args._indirect_input && args._nbatches == 1; }, nullptr, diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp index f70fc98572..92c884ce18 100644 --- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp @@ -215,6 +215,15 @@ public: } } + void set_quantized_bias(const int32_t *bias, size_t bias_multi_stride) override { + if (std::is_same<OutputStage, Requantize32>::value) { + Requantize32 *qp = reinterpret_cast<Requantize32 *>(&_os); + + qp->bias = bias; + qp->bias_multi_stride = bias_multi_stride; + } + } + void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride, bool transposed) override { assert(!transposed); |