aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/pooling_layer.cl
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2017-09-05 13:51:21 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitbf17955e2bf36c635acbac7c3bb03fbbd7732671 (patch)
tree9032d87a13942d4b7a15b2db5e7570d79823c66f /src/core/CL/cl_kernels/pooling_layer.cl
parent583137cc60580023abfd9d05abf933e7e117e29f (diff)
downloadComputeLibrary-bf17955e2bf36c635acbac7c3bb03fbbd7732671.tar.gz
COMPMID-522 - Added support for GlobalPooling in CLPoolingLayer and CLFlattening for 3D tensor
Change-Id: Ifc7db1e4d4af322a4dcbfeb3e132e5c326596872 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/86618 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/pooling_layer.cl')
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl98
1 files changed, 98 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 18ad4a69a8..0497bf4b91 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -415,3 +415,101 @@ __kernel void pooling_layer_7(
// Store result
*(__global DATA_TYPE *)output.ptr = res;
}
+
+#if defined(POOL_SIZE)
+
+// Set the initial value for the pooling operation accordingly with the data type
+#if defined(POOL_AVG)
+#define INITIAL_VALUE 0
+#else // POOL_AVG
+#ifdef FIXED_POINT_POSITION
+#define MIN_VAL_EXPAND(type) type##_MIN
+#define MIN_VAL(type) MIN_VAL_EXPAND(type)
+#define INITIAL_VALUE MIN_VAL(DATA_TYPE)
+#define INITIAL_VALUE 0
+#else // FIXED_POINT_POSITION
+#if FP16
+#define INITIAL_VALUE -HALF_MAX
+#else // FP16
+#define INITIAL_VALUE -FLT_MAX
+#endif // FP16
+#endif // FIXED_POINT_POSITION
+
+#endif // POOL_AVG
+
+/** Performs a pooling function of pool size equal to N
+ *
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
+ * @note -DFP16 must be passed at compile time if half float data type is used
+ * @note Pool size must be passed using -DPOOL_SIZE e.g. -DPOOL_SIZE=13;
+ * @note In case of average pooling the following information must be passed at compile time:
+ * -DPOOL_AVG must be provided otherwise max pooling will be performed.
+ * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
+ * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
+ * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
+ *
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32
+ * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
+ * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
+ * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void pooling_layer_N(
+ TENSOR3D_DECLARATION(input),
+ TENSOR3D_DECLARATION(output))
+{
+ // Get pixels pointer
+ Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+ Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+
+ VEC_DATA_TYPE(DATA_TYPE, 8)
+ vdata = INITIAL_VALUE;
+ DATA_TYPE sdata = INITIAL_VALUE;
+
+ // Load data
+ for(int y = 0; y < POOL_SIZE; y++)
+ {
+ int x = 0;
+ for(; x <= ((int)POOL_SIZE - 8); x += 8)
+ {
+ VEC_DATA_TYPE(DATA_TYPE, 8)
+ data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
+ vdata = POOL_OP(vdata, data0);
+ }
+
+ // Leftover
+ for(; x < (int)POOL_SIZE; ++x)
+ {
+ DATA_TYPE data0 = *((__global DATA_TYPE *)tensor3D_offset(&input, x, y, 0));
+ sdata = POOL_OP(sdata, data0);
+ }
+ }
+
+ // Reduce result
+ VEC_DATA_TYPE(DATA_TYPE, 4)
+ reduce4 = POOL_OP(vdata.s0123, vdata.s4567);
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ reduce2 = POOL_OP(reduce4.s01, reduce4.s23);
+ DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);
+ res = POOL_OP(res, sdata);
+
+ // Divide by pool region in case of average pooling
+#ifdef POOL_AVG
+ res = DIV_OP(res, calculate_avg_scale(POOL_SIZE, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
+#endif /* POOL_AVG */
+
+ // Store result
+ *(__global DATA_TYPE *)output.ptr = res;
+}
+#endif // defined(POOL_SIZE) \ No newline at end of file