diff options
author | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2021-04-30 10:16:47 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-04-30 13:40:40 +0000 |
commit | 50929ef951880469b9d579323d2f9c9f5025327d (patch) | |
tree | ac16f522961ee95ed5da8e5d54d53fe6f8420bcf /src/core | |
parent | e5df1d51177c0682622426c97de620c5ad05d341 (diff) | |
download | ComputeLibrary-50929ef951880469b9d579323d2f9c9f5025327d.tar.gz |
Add optimization for global pooling in pooling_layer.cl
- Simplify the implementation when the pooling size has the same spatial
dimensions of the input tensor
- Rework the heuristic for F32/F16
- Add test for validating the global pooling path
- Fix compare_dimensions in validation. The validation fails because
we have different
number of dimensions for NCHW and NHWC (e.g. 1,1,2,1(NCHW) ->
2,1,1,1(NHWC)
Resolves COMPMID-4426
Change-Id: Ia53ee659a9fbc3d011f286a8150d1be9d6d2cd05
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5533
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-by: TeresaARM <teresa.charlinreyes@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CL/cl_kernels/pooling_layer.cl | 32 |
1 files changed, 31 insertions, 1 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 8944c9b1ac..ba61674c7b 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -723,6 +723,34 @@ __kernel void pooling_layer_MxN_nhwc( VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) res0 = INITIAL_VALUE; +#if POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0 + // Global pooling path + + int filter_size = POOL_SIZE_X * POOL_SIZE_Y; + +#pragma unroll 8 + for(int y = 0; y < POOL_SIZE_X * POOL_SIZE_Y; ++y) + { + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + data0; +#if defined(FP_MIXED_PRECISION) + // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE + data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); +#else // defined(FP_MIXED_PRECISION) + data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr)); +#endif // defined(FP_MIXED_PRECISION) + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data0 *= data0; +#endif // defined(POOL_L2) + + res0 = POOL_OP(res0, data0); + + in_base_ptr += input_stride_y; + } +#else // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0 + int idx_in_w = idx_out_w * STRIDE_X - PAD_X; int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y; @@ -757,7 +785,9 @@ __kernel void pooling_layer_MxN_nhwc( res0 = POOL_OP(res0, data0); } } - + +#endif // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT && PAD_X == 0 && PAD_Y == 0 + #if defined(POOL_AVG) || defined(POOL_L2) res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size; #endif // defined(POOL_AVG) || defined(POOL_L2) |