From 50929ef951880469b9d579323d2f9c9f5025327d Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 30 Apr 2021 10:16:47 +0100 Subject: Add optimization for global pooling in pooling_layer.cl - Simplify the implementation when the pooling size has the same spatial dimensions of the input tensor - Rework the heuristic for F32/F16 - Add test for validating the global pooling path - Fix compare_dimensions in validation. The validation fails because we have different number of dimensions for NCHW and NHWC (e.g. 1,1,2,1(NCHW) -> 2,1,1,1(NHWC) Resolves COMPMID-4426 Change-Id: Ia53ee659a9fbc3d011f286a8150d1be9d6d2cd05 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5533 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Reviewed-by: TeresaARM Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- src/core/CL/cl_kernels/pooling_layer.cl | 32 +++++++++++++++++++++++++++++++- 1 file changed, 31 insertions(+), 1 deletion(-) diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 8944c9b1ac..ba61674c7b 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -723,6 +723,34 @@ __kernel void pooling_layer_MxN_nhwc( VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) res0 = INITIAL_VALUE; +#if POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0 + // Global pooling path + + int filter_size = POOL_SIZE_X * POOL_SIZE_Y; + +#pragma unroll 8 + for(int y = 0; y < POOL_SIZE_X * POOL_SIZE_Y; ++y) + { + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + data0; +#if defined(FP_MIXED_PRECISION) + // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE + data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); +#else // defined(FP_MIXED_PRECISION) + data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr)); +#endif // defined(FP_MIXED_PRECISION) + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data0 *= data0; +#endif // defined(POOL_L2) + + res0 = POOL_OP(res0, data0); + + in_base_ptr += input_stride_y; + } +#else // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0 + int idx_in_w = idx_out_w * STRIDE_X - PAD_X; int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y; @@ -757,7 +785,9 @@ __kernel void pooling_layer_MxN_nhwc( res0 = POOL_OP(res0, data0); } } - + +#endif // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0 + #if defined(POOL_AVG) || defined(POOL_L2) res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size; #endif // defined(POOL_AVG) || defined(POOL_L2) -- cgit v1.2.1