From ac69aa137e360340fe9f148f019d93af6c3d8336 Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Mon, 3 Jul 2017 17:39:37 +0100 Subject: COMPMID-418 Add check and fix comments after preprocessor conditions Change-Id: I1353fd652ee180e3931e58b4ce13d651a48c7e2c Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79567 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer --- src/core/CL/CLKernelLibrary.cpp | 6 +- src/core/CL/cl_kernels/activation_layer.cl | 36 +++++------ src/core/CL/cl_kernels/arithmetic_op.cl | 4 +- src/core/CL/cl_kernels/channel_combine.cl | 12 ++-- src/core/CL/cl_kernels/channel_extract.cl | 46 +++++++------- src/core/CL/cl_kernels/convolution3x3.cl | 4 +- src/core/CL/cl_kernels/convolution5x5.cl | 6 +- src/core/CL/cl_kernels/convolution7x7.cl | 6 +- src/core/CL/cl_kernels/convolution9x9.cl | 6 +- src/core/CL/cl_kernels/convolution_layer.cl | 24 +++---- src/core/CL/cl_kernels/convolution_rectangle.cl | 16 ++--- src/core/CL/cl_kernels/depth_convert.cl | 4 +- src/core/CL/cl_kernels/derivative.cl | 12 ++-- src/core/CL/cl_kernels/fast_corners.cl | 11 ++-- src/core/CL/cl_kernels/gemm.cl | 36 +++++------ src/core/CL/cl_kernels/hog.cl | 29 +++++---- src/core/CL/cl_kernels/magnitude_phase.cl | 16 ++--- src/core/CL/cl_kernels/mean_stddev.cl | 20 +++--- src/core/CL/cl_kernels/minmaxloc.cl | 26 ++++---- src/core/CL/cl_kernels/non_linear_filter3x3.cl | 42 ++++++------ src/core/CL/cl_kernels/non_linear_filter5x5.cl | 48 +++++++------- src/core/CL/cl_kernels/pixelwise_mul_float.cl | 10 +-- src/core/CL/cl_kernels/pixelwise_mul_int.cl | 4 +- src/core/CL/cl_kernels/pooling_layer.cl | 22 +++---- src/core/CL/cl_kernels/scharr_filter.cl | 26 ++++---- src/core/CL/cl_kernels/sobel_filter.cl | 74 +++++++++++----------- src/core/CL/cl_kernels/softmax_layer.cl | 20 +++--- src/core/CL/cl_kernels/transpose.cl | 6 +- src/core/NEON/kernels/NEAccumulateKernel.cpp | 2 +- src/core/NEON/kernels/NEBox3x3Kernel.cpp | 2 +- src/core/NEON/kernels/NECannyEdgeKernel.cpp | 2 +- .../NEON/kernels/NEGEMMMatrixAdditionKernel.cpp | 4 +- .../NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp | 18 +++--- src/core/NEON/kernels/NEHarrisCornersKernel.cpp | 2 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 4 +- .../NELocallyConnectedMatrixMultiplyKernel.cpp | 4 +- src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp | 2 +- .../kernels/NENonMaximaSuppression3x3Kernel.cpp | 2 +- src/runtime/NEON/functions/NEGEMM.cpp | 2 +- src/runtime/Scheduler.cpp | 24 +++---- 40 files changed, 320 insertions(+), 320 deletions(-) (limited to 'src') diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 4a92bac23c..dd3531e858 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -487,7 +487,7 @@ const std::map CLKernelLibrary::_program_source_map = "warp_perspective.cl", #include "./cl_kernels/warp_perspective.clembed" }, -#endif +#endif /* EMBEDDED_KERNELS */ }; CLKernelLibrary::CLKernelLibrary() @@ -560,7 +560,7 @@ const Program &CLKernelLibrary::load_program(const std::string &program_name) co } program = Program(_context, program_name, program_source_it->second); -#else +#else /* EMBEDDED_KERNELS */ // Check for binary std::string source_name = _kernel_path + program_name; std::string binary_name = source_name + "bin"; @@ -578,7 +578,7 @@ const Program &CLKernelLibrary::load_program(const std::string &program_name) co { ARM_COMPUTE_ERROR("Kernel file %s does not exist.", source_name.c_str()); } -#endif +#endif /* EMBEDDED_KERNELS */ // Insert program to program map const auto new_program = _programs_map.emplace(program_name, std::move(program)); diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl index 136191aa22..721c43c017 100644 --- a/src/core/CL/cl_kernels/activation_layer.cl +++ b/src/core/CL/cl_kernels/activation_layer.cl @@ -51,48 +51,48 @@ */ __kernel void activation_layer( TENSOR3D_DECLARATION(input) -#if !defined IN_PLACE +#ifndef IN_PLACE , TENSOR3D_DECLARATION(output) -#endif +#endif /* not IN_PLACE */ ) { // Get pixels pointer Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); -#if defined IN_PLACE +#ifdef IN_PLACE Tensor3D output = input; -#else +#else /* IN_PLACE */ Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); -#endif +#endif /* IN_PLACE */ // Load data VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)input.ptr); // Perform activation -#if defined LOGISTIC +#ifdef LOGISTIC data = 1 / (1 + exp(-data)); -#elif defined TANH +#elif defined(TANH) data = (VEC_DATA_TYPE(DATA_TYPE, 16))A * tanh((VEC_DATA_TYPE(DATA_TYPE, 16))B * data); -#elif defined RELU +#elif defined(RELU) data = max(0, data); -#elif defined BRELU +#elif defined(BRELU) data = min((VEC_DATA_TYPE(DATA_TYPE, 16))A, max(0, data)); -#elif defined SRELU +#elif defined(SRELU) data = log(1 + exp(data)); -#elif defined ABS -#if defined TYPE_INT +#elif defined(ABS) +#ifdef TYPE_INT data = abs(data); -#else +#else /* TYPE_INT */ data = fabs(data); -#endif -#elif defined SQUARE +#endif /* TYPE_INT */ +#elif defined(SQUARE) data = data * data; -#elif defined SQRT +#elif defined(SQRT) data = sqrt(data); -#elif defined LINEAR +#elif defined(LINEAR) data = (VEC_DATA_TYPE(DATA_TYPE, 16))A * data + (VEC_DATA_TYPE(DATA_TYPE, 16))B; -#endif +#endif /* switch TANH, RELU, BRELU, SRELU, ABS, SQUARE, SQRT, LINEAR */ // Store result vstore16(data, 0, (__global DATA_TYPE *)output.ptr); diff --git a/src/core/CL/cl_kernels/arithmetic_op.cl b/src/core/CL/cl_kernels/arithmetic_op.cl index 434300efa8..5102d34552 100644 --- a/src/core/CL/cl_kernels/arithmetic_op.cl +++ b/src/core/CL/cl_kernels/arithmetic_op.cl @@ -26,10 +26,10 @@ #ifdef SATURATE #define ADD(x, y) add_sat((x), (y)) #define SUB(x, y) sub_sat((x), (y)) -#else +#else /* SATURATE */ #define ADD(x, y) (x) + (y) #define SUB(x, y) (x) - (y) -#endif +#endif /* SATURATE */ /** This function add two images. * diff --git a/src/core/CL/cl_kernels/channel_combine.cl b/src/core/CL/cl_kernels/channel_combine.cl index 93e80b925e..d309812221 100644 --- a/src/core/CL/cl_kernels/channel_combine.cl +++ b/src/core/CL/cl_kernels/channel_combine.cl @@ -337,11 +337,11 @@ __kernel void channel_combine_NV( uchar8 data1 = vload8(0, src_plane1.ptr); uchar8 data2 = vload8(0, src_plane2.ptr); -#if defined NV12 +#ifdef NV12 vstore16(shuffle2(data1, data2, (uchar16)(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15)), 0, dst_plane1.ptr); -#elif defined NV21 +#elif defined(NV21) vstore16(shuffle2(data2, data1, (uchar16)(0, 2, 4, 6, 8, 10, 12, 14, 1, 3, 5, 7, 9, 11, 13, 15)), 0, dst_plane1.ptr); -#endif +#endif /* NV12 or NV21 */ } /** This function combines three planes to a single YUV444 or IYUV image. @@ -405,12 +405,12 @@ __kernel void copy_planes_3p( // Copy plane data vstore16(vload16(0, src_plane0.ptr), 0, dst_plane0.ptr); -#if defined YUV444 +#ifdef YUV444 vstore16(vload16(0, src_plane1.ptr), 0, dst_plane1.ptr); vstore16(vload16(0, src_plane2.ptr), 0, dst_plane2.ptr); -#elif defined IYUV +#elif defined(IYUV) vstore16(vload16(0, offset(&src_plane0, 0, height)), 0, (__global uchar *)offset(&dst_plane0, 0, height)); vstore8(vload8(0, src_plane1.ptr), 0, dst_plane1.ptr); vstore8(vload8(0, src_plane2.ptr), 0, dst_plane2.ptr); -#endif +#endif /* YUV444 or IYUV */ } diff --git a/src/core/CL/cl_kernels/channel_extract.cl b/src/core/CL/cl_kernels/channel_extract.cl index 14c6c8a92a..e95bda465b 100644 --- a/src/core/CL/cl_kernels/channel_extract.cl +++ b/src/core/CL/cl_kernels/channel_extract.cl @@ -51,16 +51,16 @@ __kernel void channel_extract_RGB888( uchar16 data = vload16(0, src.ptr); uchar8 data2 = vload8(0, src.ptr + 16); -#if defined CHANNEL_R +#ifdef CHANNEL_R vstore4(data.s0369, 0, dst.ptr); vstore4((uchar4)(data.sCF, data2.s25), 0, dst.ptr + 4); -#elif defined CHANNEL_G +#elif defined(CHANNEL_G) vstore4(data.s147A, 0, dst.ptr); vstore4((uchar4)(data.sD, data2.s036), 0, dst.ptr + 4); -#elif defined CHANNEL_B +#elif defined(CHANNEL_B) vstore4(data.s258B, 0, dst.ptr); vstore4((uchar4)(data.sE, data2.s147), 0, dst.ptr + 4); -#endif +#endif /* CHANNEL_R or CHANNEL_G or CHANNEL_B */ } /** This function extracts a given channel from an RGBA image. @@ -91,15 +91,15 @@ __kernel void channel_extract_RGBA8888( uchar16 data = vload16(0, src.ptr); uchar16 data2 = vload16(0, src.ptr + 16); -#if defined CHANNEL_R +#ifdef CHANNEL_R vstore8((uchar8)(data.s048C, data2.s048C), 0, dst.ptr); -#elif defined CHANNEL_G +#elif defined(CHANNEL_G) vstore8((uchar8)(data.s159D, data2.s159D), 0, dst.ptr); -#elif defined CHANNEL_B +#elif defined(CHANNEL_B) vstore8((uchar8)(data.s26AE, data2.s26AE), 0, dst.ptr); -#elif defined CHANNEL_A +#elif defined(CHANNEL_A) vstore8((uchar8)(data.s37BF, data2.s37BF), 0, dst.ptr); -#endif +#endif /* CHANNEL_R or CHANNEL_G or CHANNEL_B or CHANNEL_A */ } /** This function extracts a given channel from an YUYV image. @@ -129,13 +129,13 @@ __kernel void channel_extract_YUYV422( uchar16 data = vload16(0, src.ptr); -#if defined CHANNEL_Y +#ifdef CHANNEL_Y vstore8(data.s02468ACE, 0, dst.ptr); -#elif defined CHANNEL_U +#elif defined(CHANNEL_U) vstore4(data.s159D, 0, dst.ptr); -#elif defined CHANNEL_V +#elif defined(CHANNEL_V) vstore4(data.s37BF, 0, dst.ptr); -#endif +#endif /* CHANNEL_Y or CHANNEL_U or CHANNEL_V */ } /** This function extracts a given channel from an UYUV image. @@ -165,13 +165,13 @@ __kernel void channel_extract_UYVY422( uchar16 data = vload16(0, src.ptr); -#if defined CHANNEL_Y +#ifdef CHANNEL_Y vstore8(data.s13579BDF, 0, dst.ptr); -#elif defined CHANNEL_U +#elif defined(CHANNEL_U) vstore4(data.s048C, 0, dst.ptr); -#elif defined CHANNEL_V +#elif defined(CHANNEL_V) vstore4(data.s26AE, 0, dst.ptr); -#endif +#endif /* CHANNEL_Y or CHANNEL_U or CHANNEL_V */ } /** This function extracts a given channel from an NV12 image. @@ -202,11 +202,11 @@ __kernel void channel_extract_NV12( uchar16 data = vload16(0, src.ptr); -#if defined CHANNEL_U +#ifdef CHANNEL_U vstore8(data.s02468ACE, 0, dst.ptr); -#elif defined CHANNEL_V +#elif defined(CHANNEL_V) vstore8(data.s13579BDF, 0, dst.ptr); -#endif +#endif /* CHANNEL_U or CHANNEL_V */ } /** This function extracts a given channel from an NV21 image. @@ -237,11 +237,11 @@ __kernel void channel_extract_NV21( uchar16 data = vload16(0, src.ptr); -#if defined CHANNEL_U +#ifdef CHANNEL_U vstore8(data.s13579BDF, 0, dst.ptr); -#elif defined CHANNEL_V +#elif defined(CHANNEL_V) vstore8(data.s02468ACE, 0, dst.ptr); -#endif +#endif /* CHANNEL_U or CHANNEL_V */ } /** This function extracts a given plane from an multi-planar image. diff --git a/src/core/CL/cl_kernels/convolution3x3.cl b/src/core/CL/cl_kernels/convolution3x3.cl index 3733d0c733..8c75ecddb2 100644 --- a/src/core/CL/cl_kernels/convolution3x3.cl +++ b/src/core/CL/cl_kernels/convolution3x3.cl @@ -25,11 +25,11 @@ #ifndef DATA_TYPE #define DATA_TYPE short -#endif +#endif /* DATA_TYPE */ #ifndef DATA_TYPE_OUT #define DATA_TYPE_OUT uchar -#endif +#endif /* DATA_TYPE_OUT */ /** Compute a 1D horizontal convolution of size 3 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels). * diff --git a/src/core/CL/cl_kernels/convolution5x5.cl b/src/core/CL/cl_kernels/convolution5x5.cl index d1335c5558..605cd09b89 100644 --- a/src/core/CL/cl_kernels/convolution5x5.cl +++ b/src/core/CL/cl_kernels/convolution5x5.cl @@ -25,15 +25,15 @@ #ifndef DATA_TYPE #define DATA_TYPE short -#endif +#endif /* DATA_TYPE */ #ifndef COMPUTE_TYPE #define COMPUTE_TYPE int -#endif +#endif /* COMPUTE_TYPE */ #ifndef DATA_TYPE_OUT #define DATA_TYPE_OUT uchar -#endif +#endif /* DATA_TYPE_OUT */ /** Compute a 1D horizontal convolution of size 5 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels). * diff --git a/src/core/CL/cl_kernels/convolution7x7.cl b/src/core/CL/cl_kernels/convolution7x7.cl index 74a0055370..1abfb156d3 100644 --- a/src/core/CL/cl_kernels/convolution7x7.cl +++ b/src/core/CL/cl_kernels/convolution7x7.cl @@ -25,15 +25,15 @@ #ifndef DATA_TYPE #define DATA_TYPE short -#endif +#endif /* DATA_TYPE */ #ifndef COMPUTE_TYPE #define COMPUTE_TYPE int -#endif +#endif /* COMPUTE_TYPE */ #ifndef DATA_TYPE_OUT #define DATA_TYPE_OUT uchar -#endif +#endif /* DATA_TYPE_OUT */ /** Compute a 1D horizontal convolution of size 7 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels). * diff --git a/src/core/CL/cl_kernels/convolution9x9.cl b/src/core/CL/cl_kernels/convolution9x9.cl index d8b07cafac..f537326a31 100644 --- a/src/core/CL/cl_kernels/convolution9x9.cl +++ b/src/core/CL/cl_kernels/convolution9x9.cl @@ -25,15 +25,15 @@ #ifndef DATA_TYPE #define DATA_TYPE short -#endif +#endif /* DATA_TYPE */ #ifndef COMPUTE_TYPE #define COMPUTE_TYPE int -#endif +#endif /* COMPUTE_TYPE */ #ifndef DATA_TYPE_OUT #define DATA_TYPE_OUT uchar -#endif +#endif /* DATA_TYPE_OUT */ /** Compute a 1D horizontal convolution of size 9 for 8 bytes assuming the input is made of 1 channel of 1 byte (i.e 8 pixels). * diff --git a/src/core/CL/cl_kernels/convolution_layer.cl b/src/core/CL/cl_kernels/convolution_layer.cl index 837fdd70fe..a5cbe3d5c4 100644 --- a/src/core/CL/cl_kernels/convolution_layer.cl +++ b/src/core/CL/cl_kernels/convolution_layer.cl @@ -53,9 +53,9 @@ __kernel void reshape_to_columns( TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst), -#if defined HAS_BIAS +#ifdef HAS_BIAS VECTOR_DECLARATION(bias), -#endif +#endif /* HAS_BIAS */ uint width, uint height, uint depth, uint total_filters) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); @@ -64,9 +64,9 @@ __kernel void reshape_to_columns( __global uchar *tmp_src_ptr = src.ptr; __global uchar *tmp_dst_ptr = dst_ptr + dst_offset_first_element_in_bytes + get_global_id(0) * dst_stride_y + get_global_id(1) * width * dst_stride_y + get_global_id( 2) * width * height * dst_stride_y; -#if defined HAS_BIAS +#ifdef HAS_BIAS __global uchar *tmp_bias_ptr = bias_ptr + bias_offset_first_element_in_bytes; -#endif +#endif /* HAS_BIAS */ if(is_last_thread) { @@ -74,10 +74,10 @@ __kernel void reshape_to_columns( { *((__global DATA_TYPE *)tmp_dst_ptr) = *((__global DATA_TYPE *)tmp_src_ptr); -#if defined HAS_BIAS +#ifdef HAS_BIAS *((__global DATA_TYPE *)(tmp_dst_ptr + dst_stride_y)) = *((__global DATA_TYPE *)(tmp_bias_ptr)); tmp_bias_ptr += bias_stride_x; -#endif +#endif /* HAS_BIAS */ tmp_src_ptr += depth * src_stride_z; tmp_dst_ptr += dst_stride_x; } @@ -93,7 +93,7 @@ __kernel void reshape_to_columns( } } -#if(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) +#if defined(CONVOLVED_WIDTH) && defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_X) && defined(PAD_Y) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(KERNEL_DEPTH) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) /** This kernel performs a reshaping of the input tensor to a tensor used to perform convolution using GEMM. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float @@ -148,11 +148,11 @@ __kernel void im2col_generic( } } -#if defined HAS_BIAS +#if defined(HAS_BIAS) *((__global DATA_TYPE *)output_ptr) = (DATA_TYPE)1; -#endif +#endif /* HAS_BIAS */ } -#endif //(defined CONVOLVED_WIDTH && defined STRIDE_X && defined STRIDE_Y && defined PAD_X && defined PAD_Y && defined KERNEL_WIDTH && defined KERNEL_HEIGHT && defined KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) +#endif //(CONVOLVED_WIDTH && STRIDE_X && STRIDE_Y && PAD_X && PAD_Y && KERNEL_WIDTH && KERNEL_HEIGHT && KERNEL_DEPTH && SRC_WIDTH && SRC_HEIGHT) /** This kernel performs a reshaping of the output of the convolution layer. * @@ -220,12 +220,12 @@ __kernel void im2col_reduced( *((__global DATA_TYPE *)tmp_out_ptr) = *((__global DATA_TYPE *)src.ptr); -#if defined HAS_BIAS +#ifdef HAS_BIAS // If it is the last thread in the 3 dimensional workgroup if(get_global_id(0) == (get_global_size(0) - 1) && get_global_id(1) == (get_global_size(1) - 1) && get_global_id(2) == (get_global_size(2) - 1)) { tmp_out_ptr += dst_stride_x; *((__global DATA_TYPE *)tmp_out_ptr) = (DATA_TYPE)1; } -#endif +#endif /* HAS_BIAS */ } diff --git a/src/core/CL/cl_kernels/convolution_rectangle.cl b/src/core/CL/cl_kernels/convolution_rectangle.cl index 96b9cff3eb..f5a109fa91 100644 --- a/src/core/CL/cl_kernels/convolution_rectangle.cl +++ b/src/core/CL/cl_kernels/convolution_rectangle.cl @@ -31,15 +31,15 @@ #ifndef DATA_TYPE #define DATA_TYPE short -#endif +#endif /* DATA_TYPE */ #ifndef COMPUTE_TYPE #define COMPUTE_TYPE int -#endif +#endif /* COMPUTE_TYPE */ #ifndef DATA_TYPE_OUT #define DATA_TYPE_OUT uchar -#endif +#endif /* DATA_TYPE_OUT */ #ifndef DYNAMIC_MATRIX_CONVOLUTION @@ -89,24 +89,24 @@ __kernel void convolution_rectangle( #if MATRIX_WIDTH == 3 pixels += convolution1x3(offset(&src, -1, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 3], matrix_coeff[1 + i * 3], matrix_coeff[2 + i * 3]); -#endif +#endif /* MATRIX_WIDTH */ #if MATRIX_WIDTH == 5 pixels += convolution1x5(offset(&src, -2, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 5], matrix_coeff[1 + i * 5], matrix_coeff[2 + i * 5], matrix_coeff[3 + i * 5], matrix_coeff[4 + i * 5]); -#endif +#endif /* MATRIX_WIDTH */ #if MATRIX_WIDTH == 7 pixels += convolution1x7(offset(&src, -3, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 7], matrix_coeff[1 + i * 7], matrix_coeff[2 + i * 7], matrix_coeff[3 + i * 7], matrix_coeff[4 + i * 7], matrix_coeff[5 + i * 7], matrix_coeff[6 + i * 7]); -#endif +#endif /* MATRIX_WIDTH */ #if MATRIX_WIDTH == 9 pixels += convolution1x9(offset(&src, -4, -(MATRIX_HEIGHT / 2) + i), matrix_coeff[0 + i * 9], matrix_coeff[1 + i * 9], matrix_coeff[2 + i * 9], matrix_coeff[3 + i * 9], matrix_coeff[4 + i * 9], matrix_coeff[5 + i * 9], matrix_coeff[6 + i * 9], matrix_coeff[7 + i * 9], matrix_coeff[8 + i * 9]); -#endif +#endif /* MATRIX_WIDTH */ } pixels /= (VEC_DATA_TYPE(DATA_TYPE, 8))SCALE; @@ -115,4 +115,4 @@ __kernel void convolution_rectangle( vstore8(CONVERT_SAT(pixels, VEC_DATA_TYPE(DATA_TYPE_OUT, 8)), 0, ((__global DATA_TYPE_OUT *)dst.ptr)); } -#endif // DYNAMIC_MATRIX_CONVOLUTION +#endif /* not DYNAMIC_MATRIX_CONVOLUTION */ diff --git a/src/core/CL/cl_kernels/depth_convert.cl b/src/core/CL/cl_kernels/depth_convert.cl index c8eaa95352..3a1c7ca2c5 100644 --- a/src/core/CL/cl_kernels/depth_convert.cl +++ b/src/core/CL/cl_kernels/depth_convert.cl @@ -25,9 +25,9 @@ #ifdef SATURATE #define CONVERT_DOWN(x, type) CONVERT_SAT(x, type) -#else +#else /* SATURATE */ #define CONVERT_DOWN(x, type) CONVERT(x, type) -#endif +#endif /* SATURATE */ /** This function performs a down-scaling depth conversion. * diff --git a/src/core/CL/cl_kernels/derivative.cl b/src/core/CL/cl_kernels/derivative.cl index 0e810d2e7c..cd2091e237 100644 --- a/src/core/CL/cl_kernels/derivative.cl +++ b/src/core/CL/cl_kernels/derivative.cl @@ -52,29 +52,29 @@ __kernel void derivative( #ifdef GRAD_X , IMAGE_DECLARATION(dst_gx) -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y , IMAGE_DECLARATION(dst_gy) -#endif +#endif /* GRAD_Y */ ) { Image src = CONVERT_TO_IMAGE_STRUCT(src); #ifdef GRAD_X Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ #ifdef GRAD_X short16 l_data = convert_short16(vload16(0, offset(&src, -1, 0))); short16 r_data = convert_short16(vload16(0, offset(&src, 1, 0))); vstore16(r_data - l_data, 0, ((__global short *)dst_gx.ptr)); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y short16 t_data = convert_short16(vload16(0, offset(&src, 0, -1))); short16 b_data = convert_short16(vload16(0, offset(&src, 0, 1))); vstore16(b_data - t_data, 0, ((__global short *)dst_gy.ptr)); -#endif +#endif /* GRAD_Y */ } diff --git a/src/core/CL/cl_kernels/fast_corners.cl b/src/core/CL/cl_kernels/fast_corners.cl index 470d14a7b0..3e1929c637 100644 --- a/src/core/CL/cl_kernels/fast_corners.cl +++ b/src/core/CL/cl_kernels/fast_corners.cl @@ -206,12 +206,11 @@ __kernel void fast_corners( return; } -#ifndef USE_MAXSUPPRESSION - *out.ptr = 1; -#else - +#ifdef USE_MAXSUPPRESSION *out.ptr = compute_strength(p, in.ptr, input_stride_y, threshold); -#endif +#else /* USE_MAXSUPPRESSION */ + *out.ptr = 1; +#endif /* USE_MAXSUPPRESSION */ } /** Copy result to Keypoint buffer and count number of corners @@ -240,7 +239,7 @@ __kernel void copy_to_keypoint( { return; } -#endif +#endif /* UPDATE_NUMBER */ Image in = CONVERT_TO_IMAGE_STRUCT(input); diff --git a/src/core/CL/cl_kernels/gemm.cl b/src/core/CL/cl_kernels/gemm.cl index 796b343bda..d25621db64 100644 --- a/src/core/CL/cl_kernels/gemm.cl +++ b/src/core/CL/cl_kernels/gemm.cl @@ -262,7 +262,7 @@ __kernel void gemm_interleave4x4_8bit(IMAGE_DECLARATION(src), * @param[in] biases_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] biases_offset_first_element_in_bytes The offset of the first element in the destination tensor */ -#if(defined DATA_TYPE) +#ifdef DATA_TYPE __kernel void gemm_accumulate_biases( IMAGE_DECLARATION(accum), VECTOR_DECLARATION(biases)) @@ -279,9 +279,9 @@ __kernel void gemm_accumulate_biases( // Store result in the accummulate buffer vstore16(accum_value, 0, (__global DATA_TYPE *)accum.ptr); } -#endif // defined DATA_TYPE +#endif /* DATA_TYPE */ -#if(defined WIDTH_MATRIX_B) +#ifdef WIDTH_MATRIX_B /** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_8bit and @ref gemm_transpose1x16 before running the matrix multiplication * @@ -385,9 +385,9 @@ __kernel void gemm_mm_u8(IMAGE_DECLARATION(src0), vstore16(convert_uchar16_sat(c20), 0, (__global uchar *)(offset(&dst, 0, 2))); vstore16(convert_uchar16_sat(c30), 0, (__global uchar *)(offset(&dst, 0, 3))); } -#endif +#endif /* WIDTH_MATRIX_B */ -#if(defined WIDTH_MATRIX_B && defined ALPHA) +#if defined(WIDTH_MATRIX_B) && defined(ALPHA) /** This OpenCL kernel is optimised for Midgard. It computes the matrix multiplication between matrix A (src0) and matrix B (src1) * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_32bit and @ref gemm_transpose1x4 before running the matrix multiplication * @@ -796,7 +796,7 @@ __kernel void gemm_mm_f16(IMAGE_DECLARATION(src0), vstore8(c30, 0, (__global half *)(offset(&dst, 0, 3))); } -#if(defined FIXED_POINT_POSITION) +#ifdef FIXED_POINT_POSITION /** This OpenCL kernel computes the matrix multiplication between matrix A (src0) and matrix B (src1) in 8 bit fixed point precision * Matrix A and matrix B must be reshaped respectively with @ref gemm_interleave4x4_8bit and @ref gemm_transpose1x16 before running the matrix multiplication * @@ -888,9 +888,9 @@ __kernel void gemm_mm_qs8(IMAGE_DECLARATION(src0), vstore16(c20_qs8, 0, (__global char *)(offset(&dst, 0, 2))); vstore16(c30_qs8, 0, (__global char *)(offset(&dst, 0, 3))); } -#endif // (defined FIXED_POINT_POSITION) +#endif /* FIXED_POINT_POSITION */ -#if(defined WIDTH_VECTOR_A) +#ifdef WIDTH_VECTOR_A /** This OpenCL kernel computes the vector by matrix multiplication between the vector A (src0) and matrix B (src1) * * @attention The width of vector A, the width of matrix B and the alpha's value need to be passed at compile time using -DWIDTH_VECTOR_A -DWIDTH_MATRIX_B and -DALPHA @@ -1027,7 +1027,7 @@ __kernel void gemm_vm_f16(IMAGE_DECLARATION(src0), vstore8(acc, 0, (__global half *)(offset(&dst, 0, 0))); } -#if(defined FIXED_POINT_POSITION) +#ifdef FIXED_POINT_POSITION /** This OpenCL kernel computes the vector by matrix multiplication between the vector A (src0) and matrix B (src1) in 8 bit fixed point * * @attention The width of vector A, the width of matrix B, the alpha's value and the fixed point position need to be passed at compile time using -DWIDTH_VECTOR_A -DWIDTH_MATRIX_B, -DALPHA and -DFIXED_POINT_POSITION @@ -1111,11 +1111,11 @@ __kernel void gemm_vm_qs8(IMAGE_DECLARATION(src0), /* Store 16 values */ vstore16(acc_qs8, 0, (__global char *)(offset(&dst, 0, 0))); } -#endif /* #if(defined FIXED_POINT_POSITION) */ -#endif /* (defined WIDTH_VECTOR_A) */ -#endif /* (defined WIDTH_MATRIX_B && defined ALPHA) */ +#endif /* FIXED_POINT_POSITION */ +#endif /* WIDTH_VECTOR_A */ +#endif /* WIDTH_MATRIX_B && ALPHA */ -#if(defined BETA) +#ifdef BETA /** This OpenCL kernel performs the in-place matrix addition between 2 matrices taking into account that the second matrix might be weighted by a scalar value beta: * * @attention The beta's value need to be passed at compile time using -DBETA @@ -1190,7 +1190,7 @@ __kernel void gemm_ma_f16(IMAGE_DECLARATION(src), vstore8(out, 0, (__global half *)dst.ptr); } -#if(defined FIXED_POINT_POSITION) +#ifdef FIXED_POINT_POSITION /** This OpenCL kernel performs the in-place matrix addition between 2 matrices in 8 bit fixed point taking into account that the second matrix might be weighted by a scalar value beta: * * @attention The beta's value and the fixed point position need to be passed at compile time using -DBETA and -DFIXED_POINT_POSITION @@ -1229,10 +1229,10 @@ __kernel void gemm_ma_qs8(IMAGE_DECLARATION(src), /* Store final result in axb matrix */ vstore16(out, 0, (__global char *)dst.ptr); } -#endif /* #if(defined FIXED_POINT_POSITION) */ -#endif /* (defined BETA) */ +#endif /* FIXED_POINT_POSITION */ +#endif /* BETA */ -#if(defined WIDTH_VECTOR_A) +#ifdef WIDTH_VECTOR_A /** This OpenCL kernel computes the vector by matrix multiplication between each row of A (src0) and matrix B (src1) used for locally connected layer * * @attention The width of A need to be passed at compile time using -DWIDTH_VECTOR_A @@ -1298,4 +1298,4 @@ __kernel void gemm_lc_vm_f32(IMAGE_DECLARATION(src0), vstore4(acc, 0, (__global float *)(offset(&dst, 0, 0))); } -#endif /* (defined WIDTH_VECTOR_A) */ +#endif /* WIDTH_VECTOR_A */ diff --git a/src/core/CL/cl_kernels/hog.cl b/src/core/CL/cl_kernels/hog.cl index 31dd57b767..5d3a607c44 100644 --- a/src/core/CL/cl_kernels/hog.cl +++ b/src/core/CL/cl_kernels/hog.cl @@ -24,7 +24,7 @@ #include "helpers.h" #include "types.h" -#if(defined CELL_WIDTH && defined CELL_HEIGHT && defined NUM_BINS && defined PHASE_SCALE) +#if defined(CELL_WIDTH) && defined(CELL_HEIGHT) && defined(NUM_BINS) && defined(PHASE_SCALE) /** This OpenCL kernel computes the HOG orientation binning * @@ -159,21 +159,21 @@ __kernel void hog_orientation_binning(IMAGE_DECLARATION(mag), ((__global float *)dst.ptr)[xc] = bins[xc]; } } -#endif // (defined CELL_WIDTH && defined CELL_HEIGHT && defined NUM_BINS && defined PHASE_SCALE) +#endif /* CELL_WIDTH and CELL_HEIGHT and NUM_BINS and PHASE_SCALE */ -#if(defined NUM_CELLS_PER_BLOCK_HEIGHT && defined NUM_BINS_PER_BLOCK_X && defined NUM_BINS_PER_BLOCK && HOG_NORM_TYPE && defined L2_HYST_THRESHOLD) +#if defined(NUM_CELLS_PER_BLOCK_HEIGHT) && defined(NUM_BINS_PER_BLOCK_X) && defined(NUM_BINS_PER_BLOCK) && defined(HOG_NORM_TYPE) && defined(L2_HYST_THRESHOLD) #ifndef L2_NORM #error The value of enum class HOGNormType::L2_NORM has not be passed to the OpenCL kernel -#endif +#endif /* not L2_NORM */ #ifndef L2HYS_NORM #error The value of enum class HOGNormType::L2HYS_NORM has not be passed to the OpenCL kernel -#endif +#endif /* not L2HYS_NORM */ #ifndef L1_NORM #error The value of enum class HOGNormType::L1_NORM has not be passed to the OpenCL kernel -#endif +#endif /* not L1_NORM */ /** This OpenCL kernel computes the HOG block normalization * @@ -231,13 +231,13 @@ __kernel void hog_block_normalization(IMAGE_DECLARATION(src), sum_f32 += val1 * val1; sum_f32 += val2 * val2; sum_f32 += val3 * val3; -#else +#else /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */ // Compute |val| for L1_NORM sum_f32 += fabs(val0); sum_f32 += fabs(val1); sum_f32 += fabs(val2); sum_f32 += fabs(val3); -#endif // (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) +#endif /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */ // Store linearly the input values un-normalized in the output image. These values will be reused for the normalization. // This approach will help us to be cache friendly in the next for loop where the normalization will be done because all the values @@ -255,9 +255,9 @@ __kernel void hog_block_normalization(IMAGE_DECLARATION(src), #if(HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) sum += val * val; -#else +#else /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */ sum += fabs(val); -#endif // (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) +#endif /* (HOG_NORM_TYPE == L2_NORM) || (HOG_NORM_TYPE == L2HYS_NORM) */ ((__global float *)dst.ptr)[xc + 0 + yc * NUM_BINS_PER_BLOCK_X] = val; } @@ -322,7 +322,7 @@ __kernel void hog_block_normalization(IMAGE_DECLARATION(src), // We use the same constants of OpenCV scale = 1.0f / (sqrt(sum) + 1e-3f); -#endif // (HOG_NORM_TYPE == L2HYS_NORM) +#endif /* (HOG_NORM_TYPE == L2HYS_NORM) */ int i = 0; for(; i <= (NUM_BINS_PER_BLOCK - 16); i += 16) @@ -349,9 +349,9 @@ __kernel void hog_block_normalization(IMAGE_DECLARATION(src), ((__global float *)dst.ptr)[i] *= scale; } } -#endif // (defined NUM_CELLS_PER_BLOCK_HEIGHT && defined NUM_BINS_PER_BLOCK_X && defined NUM_BINS_PER_BLOCK && HOG_NORM_TYPE && defined L2_HYST_THRESHOLD) +#endif /* NUM_CELLS_PER_BLOCK_HEIGHT and NUM_BINS_PER_BLOCK_X and NUM_BINS_PER_BLOCK and HOG_NORM_TYPE and L2_HYST_THRESHOLD */ -#if(defined NUM_BLOCKS_PER_DESCRIPTOR_Y && defined NUM_BINS_PER_DESCRIPTOR_X && defined THRESHOLD && defined MAX_NUM_DETECTION_WINDOWS && defined IDX_CLASS && defined BLOCK_STRIDE_WIDTH && defined BLOCK_STRIDE_HEIGHT && defined DETECTION_WINDOW_WIDTH && defined DETECTION_WINDOW_HEIGHT) +#if defined(NUM_BLOCKS_PER_DESCRIPTOR_Y) && defined(NUM_BINS_PER_DESCRIPTOR_X) && defined(THRESHOLD) && defined(MAX_NUM_DETECTION_WINDOWS) && defined(IDX_CLASS) && defined(BLOCK_STRIDE_WIDTH) && defined(BLOCK_STRIDE_HEIGHT) && defined(DETECTION_WINDOW_WIDTH) && defined(DETECTION_WINDOW_HEIGHT) /** This OpenCL kernel computes the HOG detector using linear SVM * @@ -452,4 +452,5 @@ __kernel void hog_detector(IMAGE_DECLARATION(src), } } } -#endif // defined BIAS && defined NUM_BLOCKS_PER_DESCRIPTOR_Y && defined NUM_BINS_PER_DESCRIPTOR_X && ... +#endif /* NUM_BLOCKS_PER_DESCRIPTOR_Y && NUM_BINS_PER_DESCRIPTOR_X && THRESHOLD && MAX_NUM_DETECTION_WINDOWS && IDX_CLASS && + * BLOCK_STRIDE_WIDTH && BLOCK_STRIDE_HEIGHT && DETECTION_WINDOW_WIDTH && DETECTION_WINDOW_HEIGHT */ diff --git a/src/core/CL/cl_kernels/magnitude_phase.cl b/src/core/CL/cl_kernels/magnitude_phase.cl index c4b0df8de9..e9845e0e20 100644 --- a/src/core/CL/cl_kernels/magnitude_phase.cl +++ b/src/core/CL/cl_kernels/magnitude_phase.cl @@ -81,17 +81,17 @@ inline uchar16 phase_signed(VEC_DATA_TYPE(DATA_TYPE, 16) a, VEC_DATA_TYPE(DATA_T #define MAGNITUDE_OP(x, y) magnitude_l1((x), (y)) #elif(2 == MAGNITUDE) #define MAGNITUDE_OP(x, y) magnitude_l2(convert_int16(x), convert_int16(y)) -#else +#else /* MAGNITUDE */ #define MAGNITUDE_OP(x, y) -#endif +#endif /* MAGNITUDE */ #if(1 == PHASE) #define PHASE_OP(x, y) phase_unsigned((x), (y)) #elif(2 == PHASE) #define PHASE_OP(x, y) phase_signed((x), (y)) -#else +#else /* PHASE */ #define PHASE_OP(x, y) -#endif +#endif /* PHASE */ /** Calculate the magnitude and phase of given the gradients of an image. * @@ -133,11 +133,11 @@ __kernel void magnitude_phase( #ifdef MAGNITUDE , IMAGE_DECLARATION(magnitude) -#endif +#endif /* MAGNITUDE */ #ifdef PHASE , IMAGE_DECLARATION(phase) -#endif +#endif /* PHASE */ ) { // Get pixels pointer @@ -154,9 +154,9 @@ __kernel void magnitude_phase( #ifdef MAGNITUDE Image magnitude = CONVERT_TO_IMAGE_STRUCT(magnitude); vstore16(MAGNITUDE_OP(in_a, in_b), 0, (__global DATA_TYPE *)magnitude.ptr); -#endif +#endif /* MAGNITUDE */ #ifdef PHASE Image phase = CONVERT_TO_IMAGE_STRUCT(phase); vstore16(PHASE_OP(in_a, in_b), 0, phase.ptr); -#endif +#endif /* PHASE */ } diff --git a/src/core/CL/cl_kernels/mean_stddev.cl b/src/core/CL/cl_kernels/mean_stddev.cl index 50b8312548..7c29d2fe96 100644 --- a/src/core/CL/cl_kernels/mean_stddev.cl +++ b/src/core/CL/cl_kernels/mean_stddev.cl @@ -44,19 +44,19 @@ __kernel void mean_stddev_accumulate( IMAGE_DECLARATION(src), uint height, __global ulong *global_sum -#if defined STDDEV +#ifdef STDDEV , __global ulong *global_sum_sq -#endif +#endif /* STDDEV */ ) { // Get pixels pointer Image src = CONVERT_TO_IMAGE_STRUCT(src); - uint8 tmp_sum = 0; -#if defined STDDEV - uint8 tmp_sum_sq = 0; -#endif + uint8 tmp_sum = 0; +#ifdef STDDEV + uint8 tmp_sum_sq = 0; +#endif /* STDDEV */ // Calculate partial sum for(int i = 0; i < height; i++) { @@ -64,20 +64,20 @@ __kernel void mean_stddev_accumulate( uint8 data = convert_uint8(vload8(0, offset(&src, 0, i))); tmp_sum += data; -#if defined STDDEV +#ifdef STDDEV tmp_sum_sq += data * data; -#endif +#endif /* STDDEV */ } // Perform reduction tmp_sum.s0123 += tmp_sum.s4567; tmp_sum.s01 += tmp_sum.s23; atom_add(global_sum, tmp_sum.s0 + tmp_sum.s1); -#if defined STDDEV +#ifdef STDDEV tmp_sum_sq.s0123 += tmp_sum_sq.s4567; tmp_sum_sq.s01 += tmp_sum_sq.s23; atom_add(global_sum_sq, tmp_sum_sq.s0 + tmp_sum_sq.s1); -#endif +#endif /* STDDEV */ } #pragma OPENCL EXTENSION cl_khr_int64_base_atomics : disable diff --git a/src/core/CL/cl_kernels/minmaxloc.cl b/src/core/CL/cl_kernels/minmaxloc.cl index 799b1e8c3b..e628e9bd5b 100644 --- a/src/core/CL/cl_kernels/minmaxloc.cl +++ b/src/core/CL/cl_kernels/minmaxloc.cl @@ -26,11 +26,11 @@ #ifndef DATA_TYPE_MIN #define DATA_TYPE_MIN 0x0 -#endif +#endif /* DATA_TYPE_MIN */ #ifndef DATA_TYPE_MAX #define DATA_TYPE_MAX 0xFF -#endif +#endif /* DATA_TYPE_MAX */ __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_min = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MIN); __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_max = (VEC_DATA_TYPE(DATA_TYPE, 16))(DATA_TYPE_MAX); @@ -82,7 +82,7 @@ __kernel void minmax( widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(DATA_TYPE, 16)); local_max = max(local_max, select(type_min, data, widx)); local_min = min(local_min, select(type_max, data, widx)); -#endif +#endif /* NON_MULTIPLE_OF_16 */ // Perform min/max reduction local_min.s01234567 = min(local_min.s01234567, local_min.s89ABCDEF); @@ -124,41 +124,41 @@ __kernel void minmaxloc( IMAGE_DECLARATION(src), __global int *min_max, __global uint *min_max_count -#if defined LOCATE_MIN +#ifdef LOCATE_MIN , __global Coordinates2D *min_loc, uint max_min_loc_count -#endif -#if defined LOCATE_MAX +#endif /* LOCATE_MIN */ +#ifdef LOCATE_MAX , __global Coordinates2D *max_loc, uint max_max_loc_count -#endif +#endif /* LOCATE_MAX */ ) { Image src = CONVERT_TO_IMAGE_STRUCT(src); DATA_TYPE value = *((__global DATA_TYPE *)src.ptr); -#if defined COUNT_MIN_MAX +#ifdef COUNT_MIN_MAX if(value == min_max[0]) { uint idx = atomic_inc(&min_max_count[0]); -#if defined LOCATE_MIN +#ifdef LOCATE_MIN if(idx < max_min_loc_count) { min_loc[idx].x = get_global_id(0); min_loc[idx].y = get_global_id(1); } -#endif +#endif /* LOCATE_MIN */ } if(value == min_max[1]) { uint idx = atomic_inc(&min_max_count[1]); -#if defined LOCATE_MAX +#ifdef LOCATE_MAX if(idx < max_max_loc_count) { max_loc[idx].x = get_global_id(0); max_loc[idx].y = get_global_id(1); } -#endif +#endif /* LOCATE_MAX */ } -#endif +#endif /* COUNT_MIN_MAX */ } diff --git a/src/core/CL/cl_kernels/non_linear_filter3x3.cl b/src/core/CL/cl_kernels/non_linear_filter3x3.cl index f860c96bb8..19118ea23b 100644 --- a/src/core/CL/cl_kernels/non_linear_filter3x3.cl +++ b/src/core/CL/cl_kernels/non_linear_filter3x3.cl @@ -54,13 +54,13 @@ __kernel void non_linear_filter_box3x3( uchar16 bottom = vload16(0, offset(&src, -1, 1)); // Apply respective filter -#if defined MIN - uchar16 tmp = min(top, min(middle, bottom)); - uchar8 out = row_reduce_min_3(tmp); -#elif defined MAX +#ifdef MIN + uchar16 tmp = min(top, min(middle, bottom)); + uchar8 out = row_reduce_min_3(tmp); +#elif defined(MAX) uchar16 tmp = max(top, max(middle, bottom)); uchar8 out = row_reduce_max_3(tmp); -#elif defined MEDIAN +#elif defined(MEDIAN) uchar8 p0 = top.s01234567; uchar8 p1 = top.s12345678; uchar8 p2 = top.s23456789; @@ -71,9 +71,9 @@ __kernel void non_linear_filter_box3x3( uchar8 p7 = bottom.s12345678; uchar8 p8 = bottom.s23456789; uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8); -#else +#else /* MIN or MAX or MEDIAN */ #error "Unsupported filter function" -#endif +#endif /* MIN or MAX or MEDIAN */ // Store result vstore8(out, 0, dst.ptr); @@ -109,22 +109,22 @@ __kernel void non_linear_filter_cross3x3( uchar8 bottom = vload8(0, offset(&src, 0, 1)); // Apply respective filter -#if defined MIN - uchar8 tmp_middle = row_reduce_min_3(middle); - uchar8 out = min(tmp_middle, min(top, bottom)); -#elif defined MAX +#ifdef MIN + uchar8 tmp_middle = row_reduce_min_3(middle); + uchar8 out = min(tmp_middle, min(top, bottom)); +#elif defined(MAX) uchar8 tmp_middle = row_reduce_max_3(middle); uchar8 out = max(tmp_middle, max(top, bottom)); -#elif defined MEDIAN +#elif defined(MEDIAN) uchar8 p0 = top.s01234567; uchar8 p1 = middle.s01234567; uchar8 p2 = middle.s12345678; uchar8 p3 = middle.s23456789; uchar8 p4 = bottom.s01234567; uchar8 out = sort5(p0, p1, p2, p3, p4); -#else +#else /* MIN or MAX or MEDIAN */ #error "Unsupported filter function" -#endif +#endif /* MIN or MAX or MEDIAN */ // Store result vstore8(out, 0, dst.ptr); @@ -160,13 +160,13 @@ __kernel void non_linear_filter_disk3x3( uchar16 bottom = vload16(0, offset(&src, -1, 1)); // Apply respective filter -#if defined MIN - uchar16 tmp = min(top, min(middle, bottom)); - uchar8 out = row_reduce_min_3(tmp); -#elif defined MAX +#ifdef MIN + uchar16 tmp = min(top, min(middle, bottom)); + uchar8 out = row_reduce_min_3(tmp); +#elif defined(MAX) uchar16 tmp = max(top, max(middle, bottom)); uchar8 out = row_reduce_max_3(tmp); -#elif defined MEDIAN +#elif defined(MEDIAN) uchar8 p0 = top.s01234567; uchar8 p1 = top.s12345678; uchar8 p2 = top.s23456789; @@ -177,9 +177,9 @@ __kernel void non_linear_filter_disk3x3( uchar8 p7 = bottom.s12345678; uchar8 p8 = bottom.s23456789; uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8); -#else +#else /* MIN or MAX or MEDIAN */ #error "Unsupported filter function" -#endif +#endif /* MIN or MAX or MEDIAN */ // Store result vstore8(out, 0, dst.ptr); diff --git a/src/core/CL/cl_kernels/non_linear_filter5x5.cl b/src/core/CL/cl_kernels/non_linear_filter5x5.cl index d9ae95fd2d..92f7a99851 100644 --- a/src/core/CL/cl_kernels/non_linear_filter5x5.cl +++ b/src/core/CL/cl_kernels/non_linear_filter5x5.cl @@ -351,17 +351,17 @@ __kernel void non_linear_filter_box5x5( uchar16 bottom2 = vload16(0, offset(&src, -2, 2)); // Apply respective filter -#if defined MIN - uchar16 tmp = min(middle, min(min(top2, top), min(bottom, bottom2))); - uchar8 out = row_reduce_min_5(tmp); -#elif defined MAX +#ifdef MIN + uchar16 tmp = min(middle, min(min(top2, top), min(bottom, bottom2))); + uchar8 out = row_reduce_min_5(tmp); +#elif defined(MAX) uchar16 tmp = max(middle, max(max(top2, top), max(bottom, bottom2))); uchar8 out = row_reduce_max_5(tmp); -#elif defined MEDIAN +#elif defined(MEDIAN) uchar8 out = median_box5x5(top2, top, middle, bottom, bottom2); -#else +#else /* MIN or MAX or MEDIAN */ #error "Unsupported filter function" -#endif +#endif /* MIN or MAX or MEDIAN */ // Store result vstore8(out, 0, dst.ptr); @@ -399,13 +399,13 @@ __kernel void non_linear_filter_cross5x5( uchar16 bottom2 = vload16(0, offset(&src, 0, 2)); // Apply respective filter -#if defined MIN - uchar8 tmp_middle = row_reduce_min_5(middle); - uchar8 out = min(tmp_middle, min(min(top2.s01234567, top.s01234567), min(bottom.s01234567, bottom2.s01234567))); -#elif defined MAX +#ifdef MIN + uchar8 tmp_middle = row_reduce_min_5(middle); + uchar8 out = min(tmp_middle, min(min(top2.s01234567, top.s01234567), min(bottom.s01234567, bottom2.s01234567))); +#elif defined(MAX) uchar8 tmp_middle = row_reduce_max_5(middle); uchar8 out = max(tmp_middle, max(max(top2.s01234567, top.s01234567), max(bottom.s01234567, bottom2.s01234567))); -#elif defined MEDIAN +#elif defined(MEDIAN) uchar8 p0 = top2.s01234567; uchar8 p1 = top.s01234567; uchar8 p2 = middle.s01234567; @@ -416,9 +416,9 @@ __kernel void non_linear_filter_cross5x5( uchar8 p7 = bottom.s01234567; uchar8 p8 = bottom2.s01234567; uchar8 out = sort9(p0, p1, p2, p3, p4, p5, p6, p7, p8); -#else +#else /* MIN or MAX or MEDIAN */ #error "Unsupported filter function" -#endif +#endif /* MIN or MAX or MEDIAN */ // Store result vstore8(out, 0, dst.ptr); @@ -456,23 +456,23 @@ __kernel void non_linear_filter_disk5x5( uchar16 bottom2 = vload16(0, offset(&src, -1, 2)); // Apply respective filter -#if defined MIN - uchar16 tmp_3 = min(top2, bottom2); - uchar16 tmp_5 = min(middle, min(top, bottom)); - uchar8 tmp_3_red = row_reduce_min_3(tmp_3); - uchar8 tmp_5_red = row_reduce_min_5(tmp_5); - uchar8 out = min(tmp_3_red, tmp_5_red); -#elif defined MAX +#ifdef MIN + uchar16 tmp_3 = min(top2, bottom2); + uchar16 tmp_5 = min(middle, min(top, bottom)); + uchar8 tmp_3_red = row_reduce_min_3(tmp_3); + uchar8 tmp_5_red = row_reduce_min_5(tmp_5); + uchar8 out = min(tmp_3_red, tmp_5_red); +#elif defined(MAX) uchar16 tmp_3 = max(top2, bottom2); uchar16 tmp_5 = max(middle, max(top, bottom)); uchar8 tmp_3_red = row_reduce_max_3(tmp_3); uchar8 tmp_5_red = row_reduce_max_5(tmp_5); uchar8 out = max(tmp_3_red, tmp_5_red); -#elif defined MEDIAN +#elif defined(MEDIAN) uchar8 out = median_disk5x5(top2, top, middle, bottom, bottom2); -#else +#else /* MIN or MAX or MEDIAN */ #error "Unsupported filter function" -#endif +#endif /* MIN or MAX or MEDIAN */ // Store result vstore8(out, 0, dst.ptr); diff --git a/src/core/CL/cl_kernels/pixelwise_mul_float.cl b/src/core/CL/cl_kernels/pixelwise_mul_float.cl index ae2031f422..89367dc0ce 100644 --- a/src/core/CL/cl_kernels/pixelwise_mul_float.cl +++ b/src/core/CL/cl_kernels/pixelwise_mul_float.cl @@ -25,9 +25,9 @@ #ifdef SATURATE #define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##_sat##round(x)) -#else +#else /* SATURATE */ #define CONVERT_OP_FLOAT_STR(x, type, round) (convert_##type##round(x)) -#endif +#endif /* SATURATE */ #define CONVERT_OP_FLOAT(x, type, round) CONVERT_OP_FLOAT_STR(x, type, round) /** Performs a pixelwise multiplication with float scale of either integer or float inputs. @@ -76,13 +76,13 @@ __kernel void pixelwise_mul_float( in2_data = CONVERT(vload16(0, (__global DATA_TYPE_IN2 *)in2.ptr), VEC_DATA_TYPE(DATA_TYPE_RES, 16)); // Perform multiplication -#if defined DATA_TYPE_FLOAT +#ifdef DATA_TYPE_FLOAT VEC_DATA_TYPE(DATA_TYPE_OUT, 16) res = CONVERT(in1_data * in2_data * scale, VEC_DATA_TYPE(DATA_TYPE_OUT, 16)); -#else +#else /* DATA_TYPE_FLOAT */ VEC_DATA_TYPE(DATA_TYPE_OUT, 16) res = CONVERT_OP_FLOAT(CONVERT_OP_FLOAT((convert_float16(in1_data * in2_data) * scale), VEC_DATA_TYPE(DATA_TYPE_RES, 16), ROUND), VEC_DATA_TYPE(DATA_TYPE_OUT, 16), ROUND); -#endif +#endif /* DATA_TYPE_FLOAT */ // Store result vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr); diff --git a/src/core/CL/cl_kernels/pixelwise_mul_int.cl b/src/core/CL/cl_kernels/pixelwise_mul_int.cl index 05c437cd17..e6dfd3043d 100644 --- a/src/core/CL/cl_kernels/pixelwise_mul_int.cl +++ b/src/core/CL/cl_kernels/pixelwise_mul_int.cl @@ -25,9 +25,9 @@ #ifdef SATURATE #define CONVERT_OP_INT_STR(x, type) (convert_##type##_sat(x)) -#else +#else /* SATURATE */ #define CONVERT_OP_INT_STR(x, type) (convert_##type(x)) -#endif +#endif /* SATURATE */ #define CONVERT_OP_INT(x, type) CONVERT_OP_INT_STR(x, type) /** Performs a pixelwise multiplication with integer scale of integer inputs. diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 6bdb174235..b7245203d4 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -23,11 +23,11 @@ */ #include "helpers.h" -#if defined POOL_AVG +#ifdef POOL_AVG #define POOL_OP(x, y) ((x) + (y)) -#else +#else /* POOL_AVG */ #define POOL_OP(x, y) (fmax((x), (y))) -#endif +#endif /* POOL_AVG */ float calculate_avg_scale(const int pool_size, const int upper_bound_w, const int upper_bound_h, const int pad_x, const int pad_y, const int stride_x, const int stride_y) @@ -70,7 +70,7 @@ __kernel void pooling_layer_2( #ifdef POOL_AVG , int2 max_dims, int2 strides, int2 paddings -#endif +#endif /* POOL_AVG */ ) { // Get pixels pointer @@ -90,7 +90,7 @@ __kernel void pooling_layer_2( // Divide by pool region in case of average pooling #ifdef POOL_AVG res *= calculate_avg_scale(2, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y); -#endif +#endif /* POOL_AVG */ // Store result *(__global DATA_TYPE *)output.ptr = res; @@ -127,7 +127,7 @@ __kernel void pooling_layer_3( #ifdef POOL_AVG , int2 max_dims, int2 strides, int2 paddings -#endif +#endif /* POOL_AVG */ ) { // Get pixels pointer @@ -150,7 +150,7 @@ __kernel void pooling_layer_3( // Divide by pool region in case of average pooling #ifdef POOL_AVG res *= calculate_avg_scale(3, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y); -#endif +#endif /* POOL_AVG */ // Store result *(__global DATA_TYPE *)output.ptr = res; @@ -187,7 +187,7 @@ __kernel void pooling_layer_7( #ifdef POOL_AVG , int2 max_dims, int2 strides, int2 paddings -#endif +#endif /* POOL_AVG */ ) { // Get pixels pointer @@ -221,9 +221,9 @@ __kernel void pooling_layer_7( // Set last element #ifdef POOL_AVG data0.s7 = 0; -#else +#else /* POOL_AVG */ data0.s7 = data0.s6; -#endif +#endif /* POOL_AVG */ // Reduce result VEC_DATA_TYPE(DATA_TYPE, 4) @@ -235,7 +235,7 @@ __kernel void pooling_layer_7( // Divide by pool region in case of average pooling #ifdef POOL_AVG res *= calculate_avg_scale(7, max_dims.x, max_dims.y, paddings.x, paddings.y, strides.x, strides.y); -#endif +#endif /* POOL_AVG */ // Store result *(__global DATA_TYPE *)output.ptr = res; diff --git a/src/core/CL/cl_kernels/scharr_filter.cl b/src/core/CL/cl_kernels/scharr_filter.cl index ef9878c1a3..d9b5d07837 100644 --- a/src/core/CL/cl_kernels/scharr_filter.cl +++ b/src/core/CL/cl_kernels/scharr_filter.cl @@ -52,28 +52,28 @@ __kernel void scharr3x3( #ifdef GRAD_X , IMAGE_DECLARATION(dst_gx) -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y , IMAGE_DECLARATION(dst_gy) -#endif +#endif /* GRAD_Y */ ) { Image src = CONVERT_TO_IMAGE_STRUCT(src); #ifdef GRAD_X Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ // Output pixels #ifdef GRAD_X short8 gx = (short8)0; -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y short8 gy = (short8)0; -#endif +#endif /* GRAD_Y */ // Row0 uchar16 temp = vload16(0, offset(&src, -1, -1)); @@ -83,12 +83,12 @@ __kernel void scharr3x3( #ifdef GRAD_X gx += left * (short8)(-3); gx += right * (short8)(+3); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y gy += left * (short8)(-3); gy += middle * (short8)(-10); gy += right * (short8)(-3); -#endif +#endif /* GRAD_Y */ // Row1 temp = vload16(0, offset(&src, -1, 0)); @@ -97,7 +97,7 @@ __kernel void scharr3x3( #ifdef GRAD_X gx += left * (short8)(-10); gx += right * (short8)(+10); -#endif +#endif /* GRAD_X */ // Row2 temp = vload16(0, offset(&src, -1, 1)); @@ -107,18 +107,18 @@ __kernel void scharr3x3( #ifdef GRAD_X gx += left * (short8)(-3); gx += right * (short8)(+3); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y gy += left * (short8)(+3); gy += middle * (short8)(+10); gy += right * (short8)(+3); -#endif +#endif /* GRAD_Y */ // Store results #ifdef GRAD_X vstore8(gx, 0, ((__global short *)dst_gx.ptr)); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y vstore8(gy, 0, ((__global short *)dst_gy.ptr)); -#endif +#endif /* GRAD_Y */ } diff --git a/src/core/CL/cl_kernels/sobel_filter.cl b/src/core/CL/cl_kernels/sobel_filter.cl index 4eb0eef770..fc2b0eed92 100644 --- a/src/core/CL/cl_kernels/sobel_filter.cl +++ b/src/core/CL/cl_kernels/sobel_filter.cl @@ -56,28 +56,28 @@ __kernel void sobel3x3( #ifdef GRAD_X , IMAGE_DECLARATION(dst_gx) -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y , IMAGE_DECLARATION(dst_gy) -#endif +#endif /* GRAD_Y */ ) { Image src = CONVERT_TO_IMAGE_STRUCT(src); #ifdef GRAD_X Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ // Output pixels #ifdef GRAD_X short8 gx = (short8)0; -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y short8 gy = (short8)0; -#endif +#endif /* GRAD_Y */ // Row0 uchar16 temp = vload16(0, offset(&src, -1, -1)); @@ -87,12 +87,12 @@ __kernel void sobel3x3( #ifdef GRAD_X gx += left * (short8)(-1); gx += right * (short8)(+1); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y gy += left * (short8)(-1); gy += middle * (short8)(-2); gy += right * (short8)(-1); -#endif +#endif /* GRAD_Y */ // Row1 temp = vload16(0, offset(&src, -1, 0)); @@ -101,7 +101,7 @@ __kernel void sobel3x3( #ifdef GRAD_X gx += left * (short8)(-2); gx += right * (short8)(+2); -#endif +#endif /* GRAD_X */ // Row2 temp = vload16(0, offset(&src, -1, 1)); @@ -111,20 +111,20 @@ __kernel void sobel3x3( #ifdef GRAD_X gx += left * (short8)(-1); gx += right * (short8)(+1); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y gy += left * (short8)(+1); gy += middle * (short8)(+2); gy += right * (short8)(+1); -#endif +#endif /* GRAD_Y */ // Store results #ifdef GRAD_X vstore8(gx, 0, ((__global short *)dst_gx.ptr)); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y vstore8(gy, 0, ((__global short *)dst_gy.ptr)); -#endif +#endif /* GRAD_Y */ } /**********************************************/ @@ -261,20 +261,20 @@ __kernel void sobel_separable1x5( #ifdef GRAD_X , IMAGE_DECLARATION(dst_gx) -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y , IMAGE_DECLARATION(dst_gy) -#endif +#endif /* GRAD_Y */ ) { Image src = CONVERT_TO_IMAGE_STRUCT(src); #ifdef GRAD_X Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ // Output pixels short16 gx_gy = sobel1x5(&src, @@ -284,10 +284,10 @@ __kernel void sobel_separable1x5( // Store result in dst #ifdef GRAD_X vstore8(gx_gy.s01234567, 0, ((__global short *)dst_gx.ptr)); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y vstore8(gx_gy.s89ABCDEF, 0, ((__global short *)dst_gy.ptr)); -#endif +#endif /* GRAD_Y */ } /** Apply a 5x1 convolution matrix to two single channel S16 input temporary images @@ -326,32 +326,32 @@ __kernel void sobel_separable5x1( #ifdef GRAD_X IMAGE_DECLARATION(src_x), IMAGE_DECLARATION(dst_gx), -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y IMAGE_DECLARATION(src_y), IMAGE_DECLARATION(dst_gy), -#endif +#endif /* GRAD_Y */ int dummy) { #ifdef GRAD_X Image src_x = CONVERT_TO_IMAGE_STRUCT(src_x); Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image src_y = CONVERT_TO_IMAGE_STRUCT(src_y); Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ #ifdef GRAD_X short8 gx = sobel5x1(&src_x, 1, 4, 6, 4, 1); vstore8(gx, 0, ((__global short *)dst_gx.ptr)); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y short8 gy = sobel5x1(&src_y, -1, -2, 0, 2, 1); vstore8(gy, 0, ((__global short *)dst_gy.ptr)); -#endif +#endif /* GRAD_Y */ } /**********************************************/ @@ -444,20 +444,20 @@ __kernel void sobel_separable1x7( #ifdef GRAD_X , IMAGE_DECLARATION(dst_gx) -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y , IMAGE_DECLARATION(dst_gy) -#endif +#endif /* GRAD_Y */ ) { Image src = CONVERT_TO_IMAGE_STRUCT(src); #ifdef GRAD_X Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ int8 gx = (int8)0; int8 gy = (int8)0; @@ -466,10 +466,10 @@ __kernel void sobel_separable1x7( // Store result in dst #ifdef GRAD_X vstore8(gx, 0, ((__global int *)dst_gx.ptr)); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y vstore8(gy, 0, ((__global int *)dst_gy.ptr)); -#endif +#endif /* GRAD_Y */ } /** Apply a 7x1 convolution matrix to two single channel S16 input temporary images and output two single channel S16 images and leave the borders undefined. @@ -507,33 +507,33 @@ __kernel void sobel_separable7x1( #ifdef GRAD_X IMAGE_DECLARATION(src_x), IMAGE_DECLARATION(dst_gx), -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y IMAGE_DECLARATION(src_y), IMAGE_DECLARATION(dst_gy), -#endif +#endif /* GRAD_Y */ int dummy) { #ifdef GRAD_X Image src_x = CONVERT_TO_IMAGE_STRUCT(src_x); Image dst_gx = CONVERT_TO_IMAGE_STRUCT(dst_gx); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y Image src_y = CONVERT_TO_IMAGE_STRUCT(src_y); Image dst_gy = CONVERT_TO_IMAGE_STRUCT(dst_gy); -#endif +#endif /* GRAD_Y */ // Output pixels #ifdef GRAD_X int8 gx = 0; SOBEL7x1(&src_x, gx, Y); vstore8(gx, 0, (__global int *)dst_gx.ptr); -#endif +#endif /* GRAD_X */ #ifdef GRAD_Y int8 gy = 0; SOBEL7x1(&src_y, gy, X); vstore8(gy, 0, (__global int *)dst_gy.ptr); -#endif +#endif /* GRAD_Y */ } /**********************************************/ diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl index a29aea4fae..113fc762a6 100644 --- a/src/core/CL/cl_kernels/softmax_layer.cl +++ b/src/core/CL/cl_kernels/softmax_layer.cl @@ -23,7 +23,7 @@ */ #include "helpers.h" -#if defined(FIXED_POINT_POSITION) +#ifdef FIXED_POINT_POSITION #include "fixed_point.h" #define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size) @@ -37,7 +37,7 @@ #define MINVAL MIN_VAL(DATA_TYPE) #define SELECT_DATA_TYPE EXPAND(DATA_TYPE) -#else +#else /* FIXED_POINT_POSITION */ #define MAX_OP(x, y, type, size) max((x), (y)) #define ADD_OP(x, y, type, size) ((x) + (y)) @@ -45,15 +45,15 @@ #define DIV_OP(x, y, type, size) ((x) / (y)) #define EXP_OP(x, type, size) exp((x)) -#if defined USE_F16 +#ifdef USE_F16 #define MINVAL -HALF_MAX #define SELECT_DATA_TYPE short -#else +#else /* USE_F16 */ #define MINVAL -FLT_MAX #define SELECT_DATA_TYPE int -#endif +#endif /* USE_F16 */ -#endif +#endif /* FIXED_POINT_POSITION */ __constant VEC_DATA_TYPE(DATA_TYPE, 16) type_min = (VEC_DATA_TYPE(DATA_TYPE, 16))(MINVAL); __constant uint16 idx16 = (uint16)(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15); @@ -99,14 +99,14 @@ __kernel void softmax_layer_max( max_val = MAX_OP(data, max_val, DATA_TYPE, 16); } -#if defined NON_MULTIPLE_OF_16 +#ifdef NON_MULTIPLE_OF_16 // Handle non multiple of 16 VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0)); VEC_DATA_TYPE(SELECT_DATA_TYPE, 16) widx = CONVERT(((uint16)(width4 << 4) + idx16) < width, VEC_DATA_TYPE(SELECT_DATA_TYPE, 16)); max_val = MAX_OP(max_val, select(type_min, data, widx), DATA_TYPE, 16); -#endif +#endif /* NON_MULTIPLE_OF_16 */ // Perform max reduction max_val.s01234567 = MAX_OP(max_val.s01234567, max_val.s89ABCDEF, DATA_TYPE, 8); @@ -182,7 +182,7 @@ __kernel void softmax_layer_shift_exp_sum( sum1D = ADD_OP(sum1D, data, DATA_TYPE, 16); } -#if defined NON_MULTIPLE_OF_16 +#ifdef NON_MULTIPLE_OF_16 // Handle non multiple of 16 VEC_DATA_TYPE(DATA_TYPE, 16) data = vload16(0, (__global DATA_TYPE *)offset(&src, width4 << 4, 0)); @@ -193,7 +193,7 @@ __kernel void softmax_layer_shift_exp_sum( data = select(0, data, widx); vstore16(data, 0, (__global DATA_TYPE *)offset(&dst, width4 << 4, 0)); sum1D = ADD_OP(sum1D, data, DATA_TYPE, 16); -#endif +#endif /* NON_MULTIPLE_OF_16 */ // Perform min/max reduction sum1D.s01234567 = ADD_OP(sum1D.s01234567, sum1D.s89ABCDEF, DATA_TYPE, 8); diff --git a/src/core/CL/cl_kernels/transpose.cl b/src/core/CL/cl_kernels/transpose.cl index c30158f280..daa0129e7d 100644 --- a/src/core/CL/cl_kernels/transpose.cl +++ b/src/core/CL/cl_kernels/transpose.cl @@ -98,7 +98,7 @@ #ifndef DATA_TYPE_IN_BYTES #error DATA_TYPE_IN_BYTES not set for the transpose OpenCL kernel -#endif +#endif /* not DATA_TYPE_IN_BYTES */ #if DATA_TYPE_IN_BYTES == 4 #define DATA_TYPE uint @@ -118,9 +118,9 @@ #define VLOAD(x, y) vload16(x, y) #define VSTORE(x, y, z) vstore16(x, y, z) #define BLOCK_SIZE 16 -#else +#else /* switch DATA_TYPE_IN_BYTES */ #error DATA_TYPE_IN_BYTES not supported for transpose -#endif +#endif /* switch DATA_TYPE_IN_BYTES */ /** This OpenCL kernel computes the matrix transposition of input matrix * diff --git a/src/core/NEON/kernels/NEAccumulateKernel.cpp b/src/core/NEON/kernels/NEAccumulateKernel.cpp index e5b933a781..6e54dd64a3 100644 --- a/src/core/NEON/kernels/NEAccumulateKernel.cpp +++ b/src/core/NEON/kernels/NEAccumulateKernel.cpp @@ -131,7 +131,7 @@ void NEAccumulateWeightedFP16Kernel::run(const Window &window) }, input, accum); } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ namespace { diff --git a/src/core/NEON/kernels/NEBox3x3Kernel.cpp b/src/core/NEON/kernels/NEBox3x3Kernel.cpp index d7e6d73cd7..551c903dd9 100644 --- a/src/core/NEON/kernels/NEBox3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEBox3x3Kernel.cpp @@ -103,7 +103,7 @@ void NEBox3x3FP16Kernel::run(const Window &window) }, input, output); } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ BorderSize NEBox3x3Kernel::border_size() const { diff --git a/src/core/NEON/kernels/NECannyEdgeKernel.cpp b/src/core/NEON/kernels/NECannyEdgeKernel.cpp index 85a2cd5855..26df6f6b8b 100644 --- a/src/core/NEON/kernels/NECannyEdgeKernel.cpp +++ b/src/core/NEON/kernels/NECannyEdgeKernel.cpp @@ -787,7 +787,7 @@ void NEGradientFP16Kernel::configure(const ITensor *gx, const ITensor *gy, ITens INEKernel::configure(win); } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ namespace { diff --git a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp index 7d659ab2e6..57d2807b8a 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.cpp @@ -89,7 +89,7 @@ void matrix_addition_f16(const ITensor *input, ITensor *output, const Window &wi }, in, out); } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ void matrix_addition_qs8(const ITensor *input, ITensor *output, const Window &window, float beta) { @@ -142,7 +142,7 @@ void NEGEMMMatrixAdditionKernel::configure(const ITensor *input, ITensor *output #ifdef ARM_COMPUTE_ENABLE_FP16 _func = &matrix_addition_f16; break; -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ default: ARM_COMPUTE_ERROR("Data type not supported"); break; diff --git a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp index 101c5c8132..bff16ec329 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.cpp @@ -244,7 +244,7 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(vec_a))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(matrix_b))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(matrix_b + in_b_stride))); -#endif +#endif /* __arm__ */ auto vec_a_end_addr = vec_a + num_elems_vec_a; for(; vec_a <= (vec_a_end_addr - 4);) @@ -267,7 +267,7 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(matrix_b + 2 * in_b_stride))); asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(matrix_b + 3 * in_b_stride))); asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(matrix_b + 4 * in_b_stride))); -#endif +#endif /* __arm__ */ acc0 = vmlaq_lane_f32(acc0, b00, a0l, 0); acc1 = vmlaq_lane_f32(acc1, b01, a0l, 0); @@ -527,7 +527,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(mtx_a0))); asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(mtx_b0))); asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(mtx_b1))); -#endif +#endif /* __arm__ */ auto mtx_b0_end_addr = mtx_b0 + num_elems_matrix_b_x; for(; mtx_b0 <= (mtx_b0_end_addr - 32);) @@ -546,7 +546,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b1))); -#endif +#endif /* __arm__ */ // 4x4 block 0 acc00 = vmlaq_f32(acc00, b00, a0); @@ -637,7 +637,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_a0))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b0))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(mtx_b1))); -#endif +#endif /* __arm__ */ // 4x4 block 0 acc00 = vmlaq_f32(acc00, b00, a0); @@ -728,7 +728,7 @@ void matrix_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_a0))); asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b0))); asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b1))); -#endif +#endif /* __arm__ */ // 4x4 block 0 acc00 = vmlaq_f32(acc00, b00, a0); acc10 = vmlaq_f32(acc10, b00, a1); @@ -915,9 +915,9 @@ void matrix_matrix_multiply_f16(const ITensor *input0, const ITensor *input1, IT vst1q_f16(mtx_out + 3 * out_stride, c.val[3]); }, ina, inb, out); -#else +#else /* ARM_COMPUTE_ENABLE_FP16 */ ARM_COMPUTE_ERROR("Not implemented"); -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ } template @@ -1028,7 +1028,7 @@ void matrix_matrix_multiply_qs8(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_a0))); asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b0))); asm volatile("PLD [%0, #128*2]" ::"r"(reinterpret_cast(mtx_b1))); -#endif +#endif /* __arm__ */ // Second accumulation acc00_qs16 = vqmlal_qs8(acc00_qs16, b02, a4, fixed_point_position); diff --git a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp index 585676bb87..d44f4ce3b8 100644 --- a/src/core/NEON/kernels/NEHarrisCornersKernel.cpp +++ b/src/core/NEON/kernels/NEHarrisCornersKernel.cpp @@ -360,7 +360,7 @@ void NEHarrisScoreFP16Kernel::configure(const IImage *input1, const INEKernel::configure(win); } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ template class arm_compute::NEHarrisScoreKernel<3>; template class arm_compute::NEHarrisScoreKernel<5>; diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 99daa2e5e7..8c9d12c57c 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -305,7 +305,7 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::F16: _func = &NEIm2ColKernel::run_reduced; break; -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: _func = &NEIm2ColKernel::run_reduced; break; @@ -325,7 +325,7 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size case DataType::F16: _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; diff --git a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp index ab84efbf23..895799c6ca 100644 --- a/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp +++ b/src/core/NEON/kernels/NELocallyConnectedMatrixMultiplyKernel.cpp @@ -89,7 +89,7 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(vec_a))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(matrix_b))); asm volatile("PLD [%0, #128*4]" ::"r"(reinterpret_cast(matrix_b + in_b_stride))); -#endif +#endif /* __arm__ */ const float *vec_a_end_addr = vec_a + num_elems_vec_a; @@ -113,7 +113,7 @@ void vector_matrix_multiply_f32(const ITensor *input0, const ITensor *input1, IT asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(matrix_b + 2 * in_b_stride))); asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(matrix_b + 3 * in_b_stride))); asm volatile("PLD [%0, #128*1]" ::"r"(reinterpret_cast(matrix_b + 4 * in_b_stride))); -#endif +#endif /* __arm __ */ acc0 = vmlaq_lane_f32(acc0, b00, a0l, 0); acc1 = vmlaq_lane_f32(acc1, b01, a0l, 0); diff --git a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp index a874d219d7..599dad6c70 100644 --- a/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp +++ b/src/core/NEON/kernels/NEMagnitudePhaseKernel.cpp @@ -428,7 +428,7 @@ template class arm_compute::NEMagnitudePhaseFP16Kernel; template class arm_compute::NEMagnitudePhaseFP16Kernel; template class arm_compute::NEMagnitudePhaseFP16Kernel; -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ namespace { diff --git a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp index 1826c474f7..ece7b9a10f 100644 --- a/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp +++ b/src/core/NEON/kernels/NENonMaximaSuppression3x3Kernel.cpp @@ -224,7 +224,7 @@ void NENonMaximaSuppression3x3FP16Kernel::configure(const ITensor *input, ITenso INEKernel::configure(win); } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ namespace { diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 730735590d..73c5f548c9 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -90,7 +90,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe shape_tmp_b.set(1, std::ceil(b->info()->dimension(0) / 8.0f)); break; } -#endif +#endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: { shape_tmp_b.set(0, b->info()->dimension(1) * 16); diff --git a/src/runtime/Scheduler.cpp b/src/runtime/Scheduler.cpp index a131928293..505c4a384c 100644 --- a/src/runtime/Scheduler.cpp +++ b/src/runtime/Scheduler.cpp @@ -26,13 +26,13 @@ #include "arm_compute/core/Error.h" #if ARM_COMPUTE_CPP_SCHEDULER #include "arm_compute/runtime/CPP/CPPScheduler.h" -#endif +#endif /* ARM_COMPUTE_CPP_SCHEDULER */ #include "arm_compute/runtime/SingleThreadScheduler.h" #if ARM_COMPUTE_OPENMP_SCHEDULER #include "arm_compute/runtime/OMP/OMPScheduler.h" -#endif +#endif /* ARM_COMPUTE_OPENMP_SCHEDULER */ using namespace arm_compute; @@ -42,9 +42,9 @@ Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::OMP; Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; #elif ARM_COMPUTE_CPP_SCHEDULER && ARM_COMPUTE_OPENMP_SCHEDULER Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::CPP; -#else +#else /* ARM_COMPUTE_*_SCHEDULER */ Scheduler::Type Scheduler::_scheduler_type = Scheduler::Type::ST; -#endif +#endif /* ARM_COMPUTE_*_SCHEDULER */ void Scheduler::set(Type t) { @@ -64,17 +64,17 @@ bool Scheduler::is_available(Type t) { #if ARM_COMPUTE_CPP_SCHEDULER return true; -#else +#else /* ARM_COMPUTE_CPP_SCHEDULER */ return false; -#endif +#endif /* ARM_COMPUTE_CPP_SCHEDULER */ } case Type::OMP: { #if ARM_COMPUTE_OPENMP_SCHEDULER return true; -#else +#else /* ARM_COMPUTE_OPENMP_SCHEDULER */ return false; -#endif +#endif /* ARM_COMPUTE_OPENMP_SCHEDULER */ } case Type::CUSTOM: { @@ -105,18 +105,18 @@ IScheduler &Scheduler::get() { #if ARM_COMPUTE_CPP_SCHEDULER return CPPScheduler::get(); -#else +#else /* ARM_COMPUTE_CPP_SCHEDULER */ ARM_COMPUTE_ERROR("Recompile with cppthreads=1 to use C++11 scheduler."); -#endif +#endif /* ARM_COMPUTE_CPP_SCHEDULER */ break; } case Type::OMP: { #if ARM_COMPUTE_OPENMP_SCHEDULER return OMPScheduler::get(); -#else +#else /* ARM_COMPUTE_OPENMP_SCHEDULER */ ARM_COMPUTE_ERROR("Recompile with openmp=1 to use openmp scheduler."); -#endif +#endif /* ARM_COMPUTE_OPENMP_SCHEDULER */ break; } case Type::CUSTOM: -- cgit v1.2.1