From 1e91d71aaadd2154a708de1bc95f3c937b1e718a Mon Sep 17 00:00:00 2001 From: Milos Puzovic Date: Thu, 28 Mar 2024 13:28:21 +0000 Subject: Parallelise im2col along dimensions with higher number of iterations Signed-off-by: Milos Puzovic Change-Id: I362f3f4a42e218424fca917bed22003ec9d5609c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11363 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir --- src/cpu/operators/CpuGemmConv2d.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp index 7460f2020c..55d950ff4a 100644 --- a/src/cpu/operators/CpuGemmConv2d.cpp +++ b/src/cpu/operators/CpuGemmConv2d.cpp @@ -809,9 +809,16 @@ void CpuGemmConv2d::run(ITensorPack &tensors) if (!_skip_im2col) { // Run input reshaping - unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); - ITensorPack pack = {{TensorType::ACL_SRC, src}, {TensorType::ACL_DST, im2col_output.get()}}; - NEScheduler::get().schedule_op(_im2col_kernel.get(), y_dim, _im2col_kernel->window(), pack); + unsigned int hint_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT); + unsigned int x_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH); + unsigned int hint_dim_iterations = _im2col_kernel->window().num_iterations(hint_dim); + unsigned int x_dim_iterations = _im2col_kernel->window().num_iterations(x_dim); + if (hint_dim_iterations < NEScheduler::get().num_threads() && x_dim_iterations > hint_dim_iterations) + { + hint_dim = x_dim; + } + ITensorPack pack = {{TensorType::ACL_SRC, src}, {TensorType::ACL_DST, im2col_output.get()}}; + NEScheduler::get().schedule_op(_im2col_kernel.get(), hint_dim, _im2col_kernel->window(), pack); gemm_input_to_use = im2col_output.get(); } -- cgit v1.2.1