aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJonathan Deakin <jonathan.deakin@arm.com>2024-02-12 09:13:00 +0000
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2024-02-12 12:10:01 +0000
commit0c85334f6b6908083fa09721cb0d3e7aebd43109 (patch)
treef1c0eba5fd3725ab033500adc49c1b2e066f2fa8
parent0e73498c4426cddad6f5652288a0323553ea6720 (diff)
downloadComputeLibrary-0c85334f6b6908083fa09721cb0d3e7aebd43109.tar.gz
Fix parallel depthwise perf regression from 2db938c
Incorrect conditional meant that we were parallelizing over batches when we should have been parallelizing over rows. Relates to: ONCPUML-1443 COMPMID-6875 Signed-off-by: Jonathan Deakin <jonathan.deakin@arm.com> Change-Id: I61d43bb2a94e8a6887d4cc5d1ae2ebb03295dff7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11120 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp2
1 files changed, 1 insertions, 1 deletions
diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
index 38092adfee..7fe9011da1 100644
--- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
+++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp
@@ -110,7 +110,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors)
// Split over rows (z) if there's more than 1, otherwise batches (w). This logic
// corresponds to the threading strategy in DepthFirstDriver::execute_internal
- auto split_dimension = _pImpl->asm_kernel->window().num_iterations(Window::DimZ) == 1 ? Window::DimZ : Window::DimW;
+ auto split_dimension = _pImpl->asm_kernel->window().num_iterations(Window::DimZ) != 1 ? Window::DimZ : Window::DimW;
NEScheduler::get().schedule_op(_pImpl->asm_kernel.get(), split_dimension, _pImpl->asm_kernel->window(), tensors);
}