From 40471d12a19088df4af6ad80e5c0437d724dd8fa Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 26 Apr 2021 08:39:28 +0100 Subject: Add optimization for global pooling in pooling_layer.cl - Simplify the implementation when the pooling size has the same spatial dimensions of the input tensor - Rework the heuristic for F32/F16 - Add test for validating the global pooling path - Fix compare_dimensions in validation. The validation fails because we have different number of dimensions for NCHW and NHWC (e.g. 1,1,2,1(NCHW) -> 2,1,1,1(NHWC) Change-Id: Iba680cb30bf2a5d0952265a4cc9794f368549ca5 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5510 Reviewed-by: Michele Di Giorgio Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/gpu/cl/kernels/ClPoolingKernel.cpp | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'src/core/gpu/cl/kernels/ClPoolingKernel.cpp') diff --git a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp index 78243402bf..a432877a1d 100644 --- a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp @@ -173,9 +173,11 @@ std::tuple validate_and_configure_window(ITenso } case DataLayout::NHWC: { + const size_t vec_size = dst->data_type() == DataType::F32 ? 2 : 4; + // Initialize border size border_size = BorderSize(); - num_elems_processed_per_iteration = adjust_vec_size(4, dst->dimension(0)); + num_elems_processed_per_iteration = adjust_vec_size(vec_size, dst->dimension(0)); win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); break; } -- cgit v1.2.1