aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/pooling_layer.cl
diff options
context:
space:
mode:
authorAnton Lokhmotov <psyhtest@users.noreply.github.com>2017-11-08 09:34:19 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitaf6204c331eed7894ec4c5fd4e98ec22b6dac676 (patch)
tree7c952fbcf3ec11ff4c9d49061be12742e9cd979d /src/core/CL/cl_kernels/pooling_layer.cl
parent3a873a578e80481a55ce3b885078948ae79468eb (diff)
downloadComputeLibrary-af6204c331eed7894ec4c5fd4e98ec22b6dac676.tar.gz
COMPMID-661: Add avgpool-uint8 support. Optimize avgpool-fp32 for Bifrost. (#13)
Change-Id: I32ba6afbac6694ffa053dd16f03a1b3d14627a19 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94857 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/pooling_layer.cl')
-rw-r--r--src/core/CL/cl_kernels/pooling_layer.cl101
1 files changed, 2 insertions, 99 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 635c44a849..ee8ff27ab7 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -375,7 +375,7 @@ calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upp
* @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
*/
-__kernel void pooling_layer_3_optimized(
+__kernel void pooling_layer_optimized_3(
TENSOR3D_DECLARATION(input),
TENSOR3D_DECLARATION(output))
{
@@ -403,103 +403,6 @@ __kernel void pooling_layer_3_optimized(
}
#endif // defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
-/** Performs a pooling function of pool size equal to 7.
- *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
- * @note In case of average pooling the following information must be passed at compile time:
- * -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed.
- * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
- * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
- * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
- *
- * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32
- * @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
- * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
- * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image
- * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr
- * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
- * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
- * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
- * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes)
- * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes)
- * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes)
- * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image
- */
-__kernel void pooling_layer_7(
- TENSOR3D_DECLARATION(input),
- TENSOR3D_DECLARATION(output))
-{
- // Get pixels pointer
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
-
- // Load data
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data1 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data2 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data3 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 3, 0));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data4 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 4, 0));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data5 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 5, 0));
- VEC_DATA_TYPE(DATA_TYPE, 8)
- data6 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 6, 0));
-
-#if defined(POOL_L2)
- // Raise to power of 2 for L2 Pooling
- data0 = POW2_OP(data0, 8);
- data1 = POW2_OP(data1, 8);
- data2 = POW2_OP(data2, 8);
- data3 = POW2_OP(data3, 8);
- data4 = POW2_OP(data4, 8);
- data5 = POW2_OP(data5, 8);
- data6 = POW2_OP(data6, 8);
-#endif /* defined(POOL_L2) */
-
- // Pool operation of all rows
- data0 = POOL_OP(data0, data1);
- data2 = POOL_OP(data2, data3);
- data4 = POOL_OP(data4, data5);
- data0 = POOL_OP(data0, data2);
- data4 = POOL_OP(data4, data6);
- data0 = POOL_OP(data0, data4);
-
- // Set last element
-#if defined(POOL_AVG) || defined(POOL_L2)
- data0.s7 = 0;
-#else /* defined(POOL_AVG) || defined(POOL_L2) */
- data0.s7 = data0.s6;
-#endif /* defined(POOL_AVG) || defined(POOL_L2) */
-
- // Reduce result
- VEC_DATA_TYPE(DATA_TYPE, 4)
- reduce4 = POOL_OP(data0.s0123, data0.s4567);
- VEC_DATA_TYPE(DATA_TYPE, 2)
- reduce2 = POOL_OP(reduce4.s01, reduce4.s23);
- DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1);
-
-#if defined(POOL_AVG) || defined(POOL_L2)
- // Divide by pool region in case of average pooling
- res = DIV_OP(res, calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
-#endif /* defined(POOL_AVG) || defined(POOL_L2) */
-
-#if defined(POOL_L2)
- // Take square root of the result in L2 pooling
- res = SQRT_OP(res);
-#endif /* defined(POOL_L2) */
-
- // Store result
- *(__global DATA_TYPE *)output.ptr = res;
-}
-
#if defined(POOL_SIZE)
// Set the initial value for the pooling operation accordingly with the data type
@@ -608,4 +511,4 @@ __kernel void pooling_layer_N(
// Store result
*(__global DATA_TYPE *)output.ptr = res;
}
-#endif // defined(POOL_SIZE) \ No newline at end of file
+#endif // defined(POOL_SIZE)