Add optimization for global pooling in pooling_layer.cl

- Simplify the implementation when the pooling size has the same spatial dimensions of the input tensor - Rework the heuristic for F32/F16 - Add test for validating the global pooling path - Fix compare_dimensions in validation. The validation fails because we have different number of dimensions for NCHW and NHWC (e.g. 1,1,2,1(NCHW) -> 2,1,1,1(NHWC) Resolves COMPMID-4426 Change-Id: Ia53ee659a9fbc3d011f286a8150d1be9d6d2cd05 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5533 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2021-04-30 10:16:47 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-04-30 13:40:40 +0000
commit: 50929ef951880469b9d579323d2f9c9f5025327d (patch)
tree: ac16f522961ee95ed5da8e5d54d53fe6f8420bcf /src/core
parent: e5df1d51177c0682622426c97de620c5ad05d341 (diff)
download: ComputeLibrary-50929ef951880469b9d579323d2f9c9f5025327d.tar.gz
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 8944c9b1ac..ba61674c7b 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -723,6 +723,34 @@ __kernel void pooling_layer_MxN_nhwc(
     VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)
     res0 = INITIAL_VALUE;
 
+#if POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0
+    // Global pooling path
+
+    int filter_size = POOL_SIZE_X * POOL_SIZE_Y;
+
+#pragma unroll 8
+    for(int y = 0; y < POOL_SIZE_X * POOL_SIZE_Y; ++y)
+    {
+        VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)
+        data0;
+#if defined(FP_MIXED_PRECISION)
+        // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
+        data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE));
+#else  // defined(FP_MIXED_PRECISION)
+        data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr));
+#endif // defined(FP_MIXED_PRECISION)
+
+#if defined(POOL_L2)
+        // Raise to power of 2 for L2 Pooling
+        data0 *= data0;
+#endif // defined(POOL_L2)
+
+        res0 = POOL_OP(res0, data0);
+
+        in_base_ptr += input_stride_y;
+    }
+#else // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0
+
     int idx_in_w = idx_out_w * STRIDE_X - PAD_X;
     int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y;
 
@@ -757,7 +785,9 @@ __kernel void pooling_layer_MxN_nhwc(
             res0 = POOL_OP(res0, data0);
         }
     }
-    
+
+#endif // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0
+
 #if defined(POOL_AVG) || defined(POOL_L2)
             res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size;
 #endif // defined(POOL_AVG) || defined(POOL_L2)
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2021-04-30 10:16:47 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-04-30 13:40:40 +0000
commit	50929ef951880469b9d579323d2f9c9f5025327d (patch)
tree	ac16f522961ee95ed5da8e5d54d53fe6f8420bcf /src/core
parent	e5df1d51177c0682622426c97de620c5ad05d341 (diff)
download	ComputeLibrary-50929ef951880469b9d579323d2f9c9f5025327d.tar.gz