diff options
author | Aleksandr Nikolaev <aleksandr.nikolaev@arm.com> | 2020-07-07 19:50:21 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-07-24 20:40:03 +0000 |
commit | 6c6619b5ea0688d0e1db4497b4e3bff31ed34677 (patch) | |
tree | 025857fb526fe64cea3d98fd3640a45a6c709b6b | |
parent | 09cad7253cc75f67bf428ceef196cfbec7f8c357 (diff) | |
download | ComputeLibrary-6c6619b5ea0688d0e1db4497b4e3bff31ed34677.tar.gz |
[ONCPUML-120]: Tweak of the launch heuristics for hybrid_u8u32_dot_16x4 kernel
Hybrid kernel turns to be faster for qasymm8 than quantized_wrapper with interleaved.
Signed-off-by: Aleksandr Nikolaev <aleksandr.nikolaev@arm.com>
Change-Id: I200646aee6cdcabfe125b746c7d87bfa7d06e0fc
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3585
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp | 2 |
1 files changed, 1 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp index 7b08041005..0125f9c5db 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp @@ -72,7 +72,7 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth GemmMethod::GEMM_HYBRID_QUANTIZED, "hybrid_u8u32_dot_16x4", [](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && args._Ksize>=16; }, - [](const GemmArgs &args, const Requantize32 &) { return args._Nsize<=256 && args._Ksize>128; }, + [](const GemmArgs &args, const Requantize32 &) { return ((args._Nsize<=256) && (args._Ksize>128)) || (args._maxthreads >= 8); }, [](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<hybrid_u8u32_dot_16x4, uint8_t, uint8_t>(args, qp); } }, /** QUANTIZE_WRAPPER_2D enables 2D parallelisation hint for IScheduler in NEGEMMAssemblyDispatch */ |