aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMilos Puzovic <milos.puzovic@arm.com>2024-03-28 13:28:21 +0000
committerMilos Puzovic <milos.puzovic@arm.com>2024-04-04 11:15:37 +0000
commit1e91d71aaadd2154a708de1bc95f3c937b1e718a (patch)
tree0c7da8e5d5c8dab609587b73602187496fa734d7
parent77bbe2e08b0376edfd3f504950be7f4b5720eeb0 (diff)
downloadComputeLibrary-1e91d71aaadd2154a708de1bc95f3c937b1e718a.tar.gz
Parallelise im2col along dimensions with higher number of iterations
Signed-off-by: Milos Puzovic <milos.puzovic@arm.com> Change-Id: I362f3f4a42e218424fca917bed22003ec9d5609c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11363 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
-rw-r--r--src/cpu/operators/CpuGemmConv2d.cpp13
1 files changed, 10 insertions, 3 deletions
diff --git a/src/cpu/operators/CpuGemmConv2d.cpp b/src/cpu/operators/CpuGemmConv2d.cpp
index 7460f2020c..55d950ff4a 100644
--- a/src/cpu/operators/CpuGemmConv2d.cpp
+++ b/src/cpu/operators/CpuGemmConv2d.cpp
@@ -809,9 +809,16 @@ void CpuGemmConv2d::run(ITensorPack &tensors)
if (!_skip_im2col)
{
// Run input reshaping
- unsigned int y_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
- ITensorPack pack = {{TensorType::ACL_SRC, src}, {TensorType::ACL_DST, im2col_output.get()}};
- NEScheduler::get().schedule_op(_im2col_kernel.get(), y_dim, _im2col_kernel->window(), pack);
+ unsigned int hint_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
+ unsigned int x_dim = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
+ unsigned int hint_dim_iterations = _im2col_kernel->window().num_iterations(hint_dim);
+ unsigned int x_dim_iterations = _im2col_kernel->window().num_iterations(x_dim);
+ if (hint_dim_iterations < NEScheduler::get().num_threads() && x_dim_iterations > hint_dim_iterations)
+ {
+ hint_dim = x_dim;
+ }
+ ITensorPack pack = {{TensorType::ACL_SRC, src}, {TensorType::ACL_DST, im2col_output.get()}};
+ NEScheduler::get().schedule_op(_im2col_kernel.get(), hint_dim, _im2col_kernel->window(), pack);
gemm_input_to_use = im2col_output.get();
}