aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2021-04-30 10:16:47 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-30 13:40:40 +0000
commit50929ef951880469b9d579323d2f9c9f5025327d (patch)
treeac16f522961ee95ed5da8e5d54d53fe6f8420bcf
parente5df1d51177c0682622426c97de620c5ad05d341 (diff)
downloadComputeLibrary-50929ef951880469b9d579323d2f9c9f5025327d.tar.gz
Add optimization for global pooling in pooling_layer.cl
- Simplify the implementation when the pooling size has the same spatial dimensions of the input tensor - Rework the heuristic for F32/F16 - Add test for validating the global pooling path - Fix compare_dimensions in validation. The validation fails because we have different number of dimensions for NCHW and NHWC (e.g. 1,1,2,1(NCHW) -> 2,1,1,1(NHWC) Resolves COMPMID-4426 Change-Id: Ia53ee659a9fbc3d011f286a8150d1be9d6d2cd05 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5533 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl32
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 8944c9b1ac..ba61674c7b 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -723,6 +723,34 @@ __kernel void pooling_layer_MxN_nhwc(
VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)
res0 = INITIAL_VALUE;
+#if POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0
+ // Global pooling path
+
+ int filter_size = POOL_SIZE_X * POOL_SIZE_Y;
+
+#pragma unroll 8
+ for(int y = 0; y < POOL_SIZE_X * POOL_SIZE_Y; ++y)
+ {
+ VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)
+ data0;
+#if defined(FP_MIXED_PRECISION)
+ // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
+ data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE));
+#else // defined(FP_MIXED_PRECISION)
+ data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr));
+#endif // defined(FP_MIXED_PRECISION)
+
+#if defined(POOL_L2)
+ // Raise to power of 2 for L2 Pooling
+ data0 *= data0;
+#endif // defined(POOL_L2)
+
+ res0 = POOL_OP(res0, data0);
+
+ in_base_ptr += input_stride_y;
+ }
+#else // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0
+
int idx_in_w = idx_out_w * STRIDE_X - PAD_X;
int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y;
@@ -757,7 +785,9 @@ __kernel void pooling_layer_MxN_nhwc(
res0 = POOL_OP(res0, data0);
}
}
-
+
+#endif // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0
+
#if defined(POOL_AVG) || defined(POOL_L2)
res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size;
#endif // defined(POOL_AVG) || defined(POOL_L2)