From 8307ecf74d2cdbee284faef1cc108ad62742e883 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Wed, 9 Nov 2022 15:38:54 +0000 Subject: Fix regression caused by mws in ActivationLayer - Regression is caused by the small default mws in ActivationLayer - Syncronization of threads takes longer than the workload on small sized tensors. - Size 1536 is chosen arbitrarily based on the size of tensors in benchmarked networks Resolves: [COMPMID-5655] Signed-off-by: Mohammed Suhail Munshi Change-Id: I02e865b578399e75484f471e67806dd4cf7502c0 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/468454 Comments-Addressed: bsgcomp Tested-by: bsgcomp Reviewed-by: Jakub Sujak Reviewed-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8615 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/cpu/kernels/CpuActivationKernel.cpp | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/cpu/kernels/CpuActivationKernel.cpp b/src/cpu/kernels/CpuActivationKernel.cpp index c7e1075bfd..04a9731f4a 100644 --- a/src/cpu/kernels/CpuActivationKernel.cpp +++ b/src/cpu/kernels/CpuActivationKernel.cpp @@ -233,7 +233,13 @@ size_t CpuActivationKernel::get_mws(const CPUInfo &platform, size_t thread_count ARM_COMPUTE_UNUSED(thread_count); ARM_COMPUTE_UNUSED(platform); - return ICPPKernel::default_mws; + if(_split_dimension == Window::DimX) + { + // Don't split the work load too small if the tensor has been reinterpreted as 1D. + // This number is loosely chosen as threading overhead in each platform varies wildly. + return 1536; + } + return default_mws; } void CpuActivationKernel::run_op(ITensorPack &tensors, const Window &window, const ThreadInfo &info) -- cgit v1.2.1