aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAleksandr Nikolaev <aleksandr.nikolaev@arm.com>2020-07-07 19:50:21 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-07-24 20:40:03 +0000
commit6c6619b5ea0688d0e1db4497b4e3bff31ed34677 (patch)
tree025857fb526fe64cea3d98fd3640a45a6c709b6b
parent09cad7253cc75f67bf428ceef196cfbec7f8c357 (diff)
downloadComputeLibrary-6c6619b5ea0688d0e1db4497b4e3bff31ed34677.tar.gz
[ONCPUML-120]: Tweak of the launch heuristics for hybrid_u8u32_dot_16x4 kernel
Hybrid kernel turns to be faster for qasymm8 than quantized_wrapper with interleaved. Signed-off-by: Aleksandr Nikolaev <aleksandr.nikolaev@arm.com> Change-Id: I200646aee6cdcabfe125b746c7d87bfa7d06e0fc Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3585 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
index 7b08041005..0125f9c5db 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp
@@ -72,7 +72,7 @@ static const GemmImplementation<uint8_t, uint8_t, Requantize32> gemm_quint8_meth
GemmMethod::GEMM_HYBRID_QUANTIZED,
"hybrid_u8u32_dot_16x4",
[](const GemmArgs &args, const Requantize32 &) { return args._ci->has_dotprod() && args._Ksize>=16; },
- [](const GemmArgs &args, const Requantize32 &) { return args._Nsize<=256 && args._Ksize>128; },
+ [](const GemmArgs &args, const Requantize32 &) { return ((args._Nsize<=256) && (args._Ksize>128)) || (args._maxthreads >= 8); },
[](const GemmArgs &args, const Requantize32 &qp) { return new GemmHybridQuantized<hybrid_u8u32_dot_16x4, uint8_t, uint8_t>(args, qp); }
},
/** QUANTIZE_WRAPPER_2D enables 2D parallelisation hint for IScheduler in NEGEMMAssemblyDispatch */