From ef28340ac76f46753ad901b5d00311791a7f0887 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 27 Jan 2021 16:22:05 +0000 Subject: Force early exit of threads when output_height=1 and channels=1 in assembly pooling Resolves COMPMID-4156, COMPMID-4125 Change-Id: I408e51e9759448e6190490975ccab2c1bc07a813 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4927 Reviewed-by: TeresaARM Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp | 6 ++++++ .../arm_conv/pooling/pooling_depthfirst_generic_quantized.hpp | 6 ++++++ 2 files changed, 12 insertions(+) diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp index 3a15b28d92..fa06a0078b 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic.hpp @@ -159,6 +159,12 @@ class PoolingDepthfirstGeneric : public PoolingCommon= end_channel) + { + // Early exit in case of multiple threads parallelising on channels + return; + } + // Cast input and output pointers into the right types const TInput *const inptr = static_cast(_input) + start_channel; TOutput *const outptr = static_cast(_output) + start_channel; diff --git a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic_quantized.hpp b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic_quantized.hpp index 9516042eed..1f2891f814 100644 --- a/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic_quantized.hpp +++ b/src/core/NEON/kernels/arm_conv/pooling/pooling_depthfirst_generic_quantized.hpp @@ -160,6 +160,12 @@ class PoolingDepthfirstGenericQuantized : public PoolingCommon= end_channel) + { + // Early exit in case of multiple threads parallelising on channels + return; + } + // Cast input and output pointers into the right types const TInput *const inptr = static_cast(_input) + start_channel; TOutput *const outptr = static_cast(_output) + start_channel; -- cgit v1.2.1