From 0c85334f6b6908083fa09721cb0d3e7aebd43109 Mon Sep 17 00:00:00 2001 From: Jonathan Deakin Date: Mon, 12 Feb 2024 09:13:00 +0000 Subject: Fix parallel depthwise perf regression from 2db938c Incorrect conditional meant that we were parallelizing over batches when we should have been parallelizing over rows. Relates to: ONCPUML-1443 COMPMID-6875 Signed-off-by: Jonathan Deakin Change-Id: I61d43bb2a94e8a6887d4cc5d1ae2ebb03295dff7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11120 Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins Tested-by: Arm Jenkins --- src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp index 38092adfee..7fe9011da1 100644 --- a/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp +++ b/src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp @@ -110,7 +110,7 @@ void CpuDepthwiseConv2dAssemblyDispatch::run(ITensorPack &tensors) // Split over rows (z) if there's more than 1, otherwise batches (w). This logic // corresponds to the threading strategy in DepthFirstDriver::execute_internal - auto split_dimension = _pImpl->asm_kernel->window().num_iterations(Window::DimZ) == 1 ? Window::DimZ : Window::DimW; + auto split_dimension = _pImpl->asm_kernel->window().num_iterations(Window::DimZ) != 1 ? Window::DimZ : Window::DimW; NEScheduler::get().schedule_op(_pImpl->asm_kernel.get(), split_dimension, _pImpl->asm_kernel->window(), tensors); } -- cgit v1.2.1