diff options
Diffstat (limited to 'src/core/CL/cl_kernels/reduction_operation.cl')
-rw-r--r-- | src/core/CL/cl_kernels/reduction_operation.cl | 67 |
1 files changed, 40 insertions, 27 deletions
diff --git a/src/core/CL/cl_kernels/reduction_operation.cl b/src/core/CL/cl_kernels/reduction_operation.cl index 2651123cf5..749e3cdaa3 100644 --- a/src/core/CL/cl_kernels/reduction_operation.cl +++ b/src/core/CL/cl_kernels/reduction_operation.cl @@ -23,6 +23,19 @@ */ #include "helpers.h" +#if FLOAT_DATA_TYPE +#define ISGREATER(x, y) isgreater(x, y) +#define ISLESS(x, y) isless(x, y) +#else // !FLOAT_DATA_TYPE +#if defined(WIDTH) +#define ISGREATER(x, y) (x > y) ? 1 : 0 +#define ISLESS(x, y) (x < y) ? 1 : 0 +#else // !defined(WIDTH) +#define ISGREATER(x, y) select((int16)0, (int16)-1, x > y) +#define ISLESS(x, y) select((int16)0, (int16)-1, x < y) +#endif // defined(WIDTH) +#endif // FLOAT_DATA_TYPE + /** Calculate square sum of a vector * * @param[in] input Pointer to the first pixel. @@ -124,9 +137,9 @@ __kernel void reduction_operation_x( { #if defined(PROD) local_results[lid] *= local_results[lid + i]; -#else //!defined(PROD) +#else // !defined(PROD) local_results[lid] += local_results[lid + i]; -#endif //defined(PROD) +#endif // defined(PROD) } barrier(CLK_LOCAL_MEM_FENCE); } @@ -138,7 +151,7 @@ __kernel void reduction_operation_x( { local_results[0] /= WIDTH; } -#endif /* defined(MEAN) && defined(WIDTH) */ +#endif // defined(MEAN) && defined(WIDTH) ((__global DATA_TYPE *)offset(&partial_res, get_group_id(0), y))[0] = local_results[0]; } } @@ -153,7 +166,7 @@ __kernel void reduction_operation_x( * @note The product flag must be passed at compile time using -DPROD if we want to compute the product, otherwise sum will be used * @note In case of ARG_MIN and ARG_MAX the condition data type must be passed at compile time using -DCOND_DATA_TYPE e.g. -DCOND_DATA_TYPE=short * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 and QASYMM8 for operation MEAN + * @param[in] src_ptr Pointer to the source tensor. Supported data types: S32/F16/F32 and QASYMM8 for operation MEAN * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor @@ -179,11 +192,11 @@ __kernel void reduction_operation_non_parallel_x( { DATA_TYPE_PROMOTED in = *((__global DATA_TYPE *)vector_offset(&src, x)); #if defined(ARG_MAX) - indx = select(indx, x, isgreater(in, res)); - res = select(res, in, CONVERT(isgreater(in, res), COND_DATA_TYPE)); + indx = select(indx, x, ISGREATER(in, res)); + res = select(res, in, CONVERT(ISGREATER(in, res), COND_DATA_TYPE)); #elif defined(ARG_MIN) - indx = select(indx, x, isless(in, res)); - res = select(res, in, CONVERT(isless(in, res), COND_DATA_TYPE)); + indx = select(indx, x, ISLESS(in, res)); + res = select(res, in, CONVERT(ISLESS(in, res), COND_DATA_TYPE)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) res += in; #endif // defined(ARG_MAX) || defined(ARG_MIN) @@ -199,7 +212,7 @@ __kernel void reduction_operation_non_parallel_x( *((__global uchar *)output.ptr) = convert_uchar(res); #endif // defined(ARG_MAX) || defined(ARG_MIN) } -#endif /* defined(WIDTH) */ +#endif // defined(WIDTH) #if defined(HEIGHT) /** This kernel performs reduction on y-axis. @@ -207,7 +220,7 @@ __kernel void reduction_operation_non_parallel_x( * @note The input data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * @note The height size must be passed at compile time using -DHEIGHT e.g. -DHEIGHT=128 * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -243,22 +256,22 @@ __kernel void reduction_operation_y( VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16) in = CONVERT(vload16(0, (__global DATA_TYPE *)offset(&src, 0, y)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); #if defined(ARG_MAX) - uint16 cond_conv = CONVERT(isgreater(in, res), uint16); + uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16); indx = select(indx, y, cond_conv); - res = select(res, in, isgreater(in, res)); + res = select(res, in, ISGREATER(in, res)); #elif defined(ARG_MIN) - uint16 cond_conv = CONVERT(isless(in, res), uint16); + uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, y, cond_conv); - res = select(res, in, isless(in, res)); + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; #endif // defined(SUM_SQUARE) #if defined(PROD) res *= in; -#else //!defined(PROD) +#else // !defined(PROD) res += in; -#endif //defined(PROD) +#endif // defined(PROD) #endif // defined(ARG_MAX) || defined(ARG_MIN) } @@ -272,7 +285,7 @@ __kernel void reduction_operation_y( vstore16(CONVERT(res, VEC_DATA_TYPE(DATA_TYPE, 16)), 0, (__global DATA_TYPE *)output.ptr); #endif // defined(ARG_MAX) || defined(ARG_MIN) } -#endif /* defined(HEIGHT) */ +#endif // defined(HEIGHT) #if defined(DEPTH) /** This kernel performs reduction on z-axis. @@ -280,7 +293,7 @@ __kernel void reduction_operation_y( * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float * @note The depth size must be passed at compile time using -DDEPTH e.g. -DDEPTH=128 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -330,13 +343,13 @@ __kernel void reduction_operation_z( #endif // defined(COMPLEX) #if defined(ARG_MAX) - uint16 cond_conv = CONVERT(isgreater(in, res), uint16); + uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16); indx = select(indx, z, cond_conv); - res = select(res, in, isgreater(in, res)); + res = select(res, in, ISGREATER(in, res)); #elif defined(ARG_MIN) - uint16 cond_conv = CONVERT(isless(in, res), uint16); + uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, z, cond_conv); - res = select(res, in, isless(in, res)); + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; @@ -374,7 +387,7 @@ __kernel void reduction_operation_z( * @note The batch size must be passed at compile time using -DBATCH e.g. -DBATCH=128 * @note The depth size must be passed at compile time using -DBATCH e.g. -DDEPTH=128 * - * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/F16/F32 + * @param[in] input_ptr Pointer to the source tensor. Supported data types: QASYMM8/S32/F16/F32 * @param[in] input_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -419,13 +432,13 @@ __kernel void reduction_operation_w( in = CONVERT(vload16(0, (__global DATA_TYPE *)tensor4D_offset(&input, 0, 0, 0, w)), VEC_DATA_TYPE(DATA_TYPE_PROMOTED, 16)); #if defined(ARG_MAX) - uint16 cond_conv = CONVERT(isgreater(in, res), uint16); + uint16 cond_conv = CONVERT(ISGREATER(in, res), uint16); indx = select(indx, w, cond_conv); - res = select(res, in, isgreater(in, res)); + res = select(res, in, ISGREATER(in, res)); #elif defined(ARG_MIN) - uint16 cond_conv = CONVERT(isless(in, res), uint16); + uint16 cond_conv = CONVERT(ISLESS(in, res), uint16); indx = select(indx, w, cond_conv); - res = select(res, in, isless(in, res)); + res = select(res, in, ISLESS(in, res)); #else // !(defined(ARG_MAX) || defined(ARG_MIN)) #if defined(SUM_SQUARE) in *= in; |