From bf17955e2bf36c635acbac7c3bb03fbbd7732671 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 5 Sep 2017 13:51:21 +0100 Subject: COMPMID-522 - Added support for GlobalPooling in CLPoolingLayer and CLFlattening for 3D tensor Change-Id: Ifc7db1e4d4af322a4dcbfeb3e132e5c326596872 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86618 Reviewed-by: Georgios Pinitas Tested-by: Kaizen --- src/core/CL/cl_kernels/pooling_layer.cl | 98 +++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) (limited to 'src/core/CL/cl_kernels/pooling_layer.cl') diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 18ad4a69a8..0497bf4b91 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -415,3 +415,101 @@ __kernel void pooling_layer_7( // Store result *(__global DATA_TYPE *)output.ptr = res; } + +#if defined(POOL_SIZE) + +// Set the initial value for the pooling operation accordingly with the data type +#if defined(POOL_AVG) +#define INITIAL_VALUE 0 +#else // POOL_AVG +#ifdef FIXED_POINT_POSITION +#define MIN_VAL_EXPAND(type) type##_MIN +#define MIN_VAL(type) MIN_VAL_EXPAND(type) +#define INITIAL_VALUE MIN_VAL(DATA_TYPE) +#define INITIAL_VALUE 0 +#else // FIXED_POINT_POSITION +#if FP16 +#define INITIAL_VALUE -HALF_MAX +#else // FP16 +#define INITIAL_VALUE -FLT_MAX +#endif // FP16 +#endif // FIXED_POINT_POSITION + +#endif // POOL_AVG + +/** Performs a pooling function of pool size equal to N + * + * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32; + * @note -DFP16 must be passed at compile time if half float data type is used + * @note Pool size must be passed using -DPOOL_SIZE e.g. -DPOOL_SIZE=13; + * @note In case of average pooling the following information must be passed at compile time: + * -DPOOL_AVG must be provided otherwise max pooling will be performed. + * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions + * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 + * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr + * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void pooling_layer_N( + TENSOR3D_DECLARATION(input), + TENSOR3D_DECLARATION(output)) +{ + // Get pixels pointer + Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); + Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); + + VEC_DATA_TYPE(DATA_TYPE, 8) + vdata = INITIAL_VALUE; + DATA_TYPE sdata = INITIAL_VALUE; + + // Load data + for(int y = 0; y < POOL_SIZE; y++) + { + int x = 0; + for(; x <= ((int)POOL_SIZE - 8); x += 8) + { + VEC_DATA_TYPE(DATA_TYPE, 8) + data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0)); + vdata = POOL_OP(vdata, data0); + } + + // Leftover + for(; x < (int)POOL_SIZE; ++x) + { + DATA_TYPE data0 = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0)); + sdata = POOL_OP(sdata, data0); + } + } + + // Reduce result + VEC_DATA_TYPE(DATA_TYPE, 4) + reduce4 = POOL_OP(vdata.s0123, vdata.s4567); + VEC_DATA_TYPE(DATA_TYPE, 2) + reduce2 = POOL_OP(reduce4.s01, reduce4.s23); + DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1); + res = POOL_OP(res, sdata); + + // Divide by pool region in case of average pooling +#ifdef POOL_AVG + res = DIV_OP(res, calculate_avg_scale(POOL_SIZE, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)); +#endif /* POOL_AVG */ + + // Store result + *(__global DATA_TYPE *)output.ptr = res; +} +#endif // defined(POOL_SIZE) \ No newline at end of file -- cgit v1.2.1