From 6c6619b5ea0688d0e1db4497b4e3bff31ed34677 Mon Sep 17 00:00:00 2001 From: Aleksandr Nikolaev Date: Tue, 7 Jul 2020 19:50:21 +0100 Subject: [ONCPUML-120]: Tweak of the launch heuristics for hybrid_u8u32_dot_16x4 kernel Hybrid kernel turns to be faster for qasymm8 than quantized_wrapper with interleaved. Signed-off-by: Aleksandr Nikolaev Change-Id: I200646aee6cdcabfe125b746c7d87bfa7d06e0fc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3585 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp index 7b08041005..0125f9c5db 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp @@ -72,7 +72,7 @@ static const GemmImplementation gemm_quint8_meth GemmMethod::GEMM_HYBRID_QUANTIZED, "hybrid_u8u32_dot_16x4", [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && args._Ksize>=16; }, - [](const GemmArgs &args, const Requantize32 &) { return args._Nsize<=256 && args._Ksize>128; }, + [](const GemmArgs &args, const Requantize32 &) { return ((args._Nsize<=256) && (args._Ksize>128)) || (args._maxthreads >= 8); }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized(args, qp); } }, /** QUANTIZE_WRAPPER_2D enables 2D parallelisation hint for IScheduler in NEGEMMAssemblyDispatch */ -- cgit v1.2.1