diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-09-04 18:44:23 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 13:03:09 +0100 |
commit | 6ff3b19ee6120edf015fad8caab2991faa3070af (patch) | |
tree | a7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /src/core/CL/cl_kernels/canny.cl | |
download | ComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz |
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'src/core/CL/cl_kernels/canny.cl')
-rw-r--r-- | src/core/CL/cl_kernels/canny.cl | 429 |
1 files changed, 429 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/canny.cl b/src/core/CL/cl_kernels/canny.cl new file mode 100644 index 0000000000..ec6719213c --- /dev/null +++ b/src/core/CL/cl_kernels/canny.cl @@ -0,0 +1,429 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" + +/** Calculate the magnitude and phase from horizontal and vertical result of sobel result. + * + * @note The calculation of gradient uses level 1 normalisation. + * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: + * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short + * + * @param[in] src1_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32 + * @param[in] src1_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] src2_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32 + * @param[in] src2_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src2_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] grad_ptr Pointer to the gradient output. Supported data types: U16, U32 + * @param[in] grad_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] grad_step_x grad_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] grad_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] grad_step_y grad_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] grad_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] angle_ptr Pointer to the angle output. Supported data types: U8 + * @param[in] angle_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] angle_step_x angle_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] angle_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] angle_step_y angle_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] angle_offset_first_element_in_bytes The offset of the first element of the output + */ +__kernel void combine_gradients_L1( + IMAGE_DECLARATION(src1), + IMAGE_DECLARATION(src2), + IMAGE_DECLARATION(grad), + IMAGE_DECLARATION(angle)) +{ + // Construct images + Image src1 = CONVERT_TO_IMAGE_STRUCT(src1); + Image src2 = CONVERT_TO_IMAGE_STRUCT(src2); + Image grad = CONVERT_TO_IMAGE_STRUCT(grad); + Image angle = CONVERT_TO_IMAGE_STRUCT(angle); + + // Load sobel horizontal and vertical values + VEC_DATA_TYPE(DATA_TYPE_IN, 4) + h = vload4(0, (__global DATA_TYPE_IN *)src1.ptr); + VEC_DATA_TYPE(DATA_TYPE_IN, 4) + v = vload4(0, (__global DATA_TYPE_IN *)src2.ptr); + + /* Calculate the gradient, using level 1 normalisation method */ + VEC_DATA_TYPE(DATA_TYPE_OUT, 4) + m = CONVERT_SAT((abs(h) + abs(v)), VEC_DATA_TYPE(DATA_TYPE_OUT, 4)); + + /* Calculate the angle */ + float4 p = atan2pi(convert_float4(v), convert_float4(h)); + + /* Remap angle to range [0, 256) */ + p = select(p, p + 2, p < 0.0f) * 128.0f; + + /* Store results */ + vstore4(m, 0, (__global DATA_TYPE_OUT *)grad.ptr); + vstore4(convert_uchar4_sat_rte(p), 0, angle.ptr); +} + +/** Calculate the gradient and angle from horizontal and vertical result of sobel result. + * + * @note The calculation of gradient uses level 2 normalisation + * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: + * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short + * + * @param[in] src1_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32 + * @param[in] src1_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src1_step_x src1_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src1_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src1_step_y src1_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src1_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] src2_ptr Pointer to the source image (Vertical result of Sobel). Supported data types: S16, S32 + * @param[in] src2_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src2_step_x src2_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src2_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src2_step_y src2_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src2_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] grad_ptr Pointer to the gradient output. Supported data types: U16, U32 + * @param[in] grad_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] grad_step_x grad_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] grad_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] grad_step_y grad_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] grad_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] angle_ptr Pointer to the angle output. Supported data types: U8 + * @param[in] angle_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] angle_step_x angle_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] angle_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] angle_step_y angle_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] angle_offset_first_element_in_bytes The offset of the first element of the output + */ +__kernel void combine_gradients_L2( + IMAGE_DECLARATION(src1), + IMAGE_DECLARATION(src2), + IMAGE_DECLARATION(grad), + IMAGE_DECLARATION(angle)) +{ + // Construct images + Image src1 = CONVERT_TO_IMAGE_STRUCT(src1); + Image src2 = CONVERT_TO_IMAGE_STRUCT(src2); + Image grad = CONVERT_TO_IMAGE_STRUCT(grad); + Image angle = CONVERT_TO_IMAGE_STRUCT(angle); + + // Load sobel horizontal and vertical values + float4 h = convert_float4(vload4(0, (__global DATA_TYPE_IN *)src1.ptr)); + float4 v = convert_float4(vload4(0, (__global DATA_TYPE_IN *)src2.ptr)); + + /* Calculate the gradient, using level 2 normalisation method */ + float4 m = sqrt(h * h + v * v); + + /* Calculate the angle */ + float4 p = atan2pi(v, h); + + /* Remap angle to range [0, 256) */ + p = select(p, p + 2, p < 0.0f) * 128.0f; + + /* Store results */ + vstore4(CONVERT_SAT_ROUND(m, VEC_DATA_TYPE(DATA_TYPE_OUT, 4), rte), 0, (__global DATA_TYPE_OUT *)grad.ptr); + vstore4(convert_uchar4_sat_rte(p), 0, angle.ptr); +} + +/** Array that holds the relative coordinates offset for the neighbouring pixels. + */ +__constant short4 neighbours_coords[] = +{ + { -1, 0, 1, 0 }, // 0 + { -1, 1, 1, -1 }, // 45 + { 0, 1, 0, -1 }, // 90 + { 1, 1, -1, -1 }, // 135 + { 1, 0, -1, 0 }, // 180 + { 1, -1, -1, 1 }, // 225 + { 0, 1, 0, -1 }, // 270 + { -1, -1, 1, 1 }, // 315 + { -1, 0, 1, 0 }, // 360 +}; + +/** Perform non maximum suppression. + * + * @attention The input and output data types need to be passed at compile time using -DDATA_TYPE_IN and -DDATA_TYPE_OUT: + * e.g. -DDATA_TYPE_IN=uchar -DDATA_TYPE_OUT=short + * + * @param[in] grad_ptr Pointer to the gradient output. Supported data types: S16, S32 + * @param[in] grad_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] grad_step_x grad_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] grad_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] grad_step_y grad_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] grad_offset_first_element_in_bytes The offset of the first element of the output + * @param[in] angle_ptr Pointer to the angle output. Supported data types: U8 + * @param[in] angle_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] angle_step_x angle_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] angle_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] angle_step_y angle_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] angle_offset_first_element_in_bytes TThe offset of the first element of the output + * @param[out] non_max_ptr Pointer to the non maximum suppressed output. Supported data types: U16, U32 + * @param[in] non_max_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] non_max_step_x non_max_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] non_max_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] non_max_step_y non_max_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] non_max_offset_first_element_in_bytes The offset of the first element of the output + * @param[in] lower_thr The low threshold + */ +__kernel void suppress_non_maximum( + IMAGE_DECLARATION(grad), + IMAGE_DECLARATION(angle), + IMAGE_DECLARATION(non_max), + uint lower_thr) +{ + // Construct images + Image grad = CONVERT_TO_IMAGE_STRUCT(grad); + Image angle = CONVERT_TO_IMAGE_STRUCT(angle); + Image non_max = CONVERT_TO_IMAGE_STRUCT(non_max); + + // Get gradient and angle + DATA_TYPE_IN gradient = *((__global DATA_TYPE_IN *)grad.ptr); + uchar an = convert_ushort(*angle.ptr); + + if(gradient <= lower_thr) + { + return; + } + + // Divide the whole round into 8 directions + uchar ang = 127 - an; + DATA_TYPE_OUT q_an = (ang + 16) >> 5; + + // Find the two pixels in the perpendicular direction + short2 x_p = neighbours_coords[q_an].s02; + short2 y_p = neighbours_coords[q_an].s13; + DATA_TYPE_IN g1 = *((global DATA_TYPE_IN *)offset(&grad, x_p.x, y_p.x)); + DATA_TYPE_IN g2 = *((global DATA_TYPE_IN *)offset(&grad, x_p.y, y_p.y)); + + if((gradient > g1) && (gradient > g2)) + { + *((global DATA_TYPE_OUT *)non_max.ptr) = gradient; + } +} + +#define EDGE 255 +#define hysteresis_local_stack_L1 8 // The size of level 1 stack. This has to agree with the host side +#define hysteresis_local_stack_L2 16 // The size of level 2 stack, adjust this can impact the match rate with VX implementation + +/** Check whether pixel is valid +* +* Skip the pixel if the early_test fails. +* Otherwise, it tries to add the pixel coordinate to the stack, and proceed to popping the stack instead if the stack is full +* +* @param[in] early_test Boolean condition based on the minv check and visited buffer check +* @param[in] x_pos X-coordinate of pixel that is going to be recorded, has to be within the boundary +* @param[in] y_pos Y-coordinate of pixel that is going to be recorded, has to be within the boundary +* @param[in] x_cur X-coordinate of current central pixel +* @param[in] y_cur Y-coordinate of current central pixel +*/ +#define check_pixel(early_test, x_pos, y_pos, x_cur, y_cur) \ + { \ + if(!early_test) \ + { \ + /* Number of elements in the local stack 1, points to next available entry */ \ + c = *((__global char *)offset(&l1_stack_counter, x_cur, y_cur)); \ + \ + if(c > (hysteresis_local_stack_L1 - 1)) /* Stack level 1 is full */ \ + goto pop_stack; \ + \ + /* The pixel that has already been recorded is ignored */ \ + if(!atomic_or((__global uint *)offset(&recorded, x_pos, y_pos), 1)) \ + { \ + l1_ptr[c] = (short2)(x_pos, y_pos); \ + *((__global char *)offset(&l1_stack_counter, x_cur, y_cur)) += 1; \ + } \ + } \ + } + +/** Perform hysteresis. + * + * @attention The input data_type needs to be passed at compile time using -DDATA_TYPE_IN: e.g. -DDATA_TYPE_IN=short + * + * @param[in] src_ptr Pointer to the input image. Supported data types: U8 + * @param[in] src_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] out_ptr Pointer to the output image. Supported data types: U8 + * @param[in] out_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] out_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] out_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] visited_ptr Pointer to the visited buffer, where pixels are marked as visited. Supported data types: U32 + * @param[in] visited_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] visited_step_x visited_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] visited_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] visited_step_y visited_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] visited_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] recorded_ptr Pointer to the recorded buffer, where pixels are marked as recorded. Supported data types: U32 + * @param[in] recorded_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] recorded_step_x recorded_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] recorded_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] recorded_step_y recorded_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] recorded_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] l1_stack_ptr Pointer to the l1 stack of a pixel. Supported data types: S32 + * @param[in] l1_stack_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] l1_stack_step_x l1_stack_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] l1_stack_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] l1_stack_step_y l1_stack_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] l1_stack_offset_first_element_in_bytes The offset of the first element of the output + * @param[out] l1_stack_counter_ptr Pointer to the l1 stack counters of an image. Supported data types: U8 + * @param[in] l1_stack_counter_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] l1_stack_counter_step_x l1_stack_counter_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] l1_stack_counter_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] l1_stack_counter_step_y l1_stack_counter_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] l1_stack_counter_offset_first_element_in_bytes The offset of the first element of the output + * @param[in] low_thr The lower threshold + * @param[in] up_thr The upper threshold + * @param[in] width The width of the image. + * @param[in] height The height of the image + */ +kernel void hysteresis( + IMAGE_DECLARATION(src), + IMAGE_DECLARATION(out), + IMAGE_DECLARATION(visited), + IMAGE_DECLARATION(recorded), + IMAGE_DECLARATION(l1_stack), + IMAGE_DECLARATION(l1_stack_counter), + uint low_thr, + uint up_thr, + int width, + int height) +{ + // Create images + Image src = CONVERT_TO_IMAGE_STRUCT_NO_STEP(src); + Image out = CONVERT_TO_IMAGE_STRUCT_NO_STEP(out); + Image visited = CONVERT_TO_IMAGE_STRUCT_NO_STEP(visited); + Image recorded = CONVERT_TO_IMAGE_STRUCT_NO_STEP(recorded); + Image l1_stack = CONVERT_TO_IMAGE_STRUCT_NO_STEP(l1_stack); + Image l1_stack_counter = CONVERT_TO_IMAGE_STRUCT_NO_STEP(l1_stack_counter); + + // Index + int x = get_global_id(0); + int y = get_global_id(1); + + // Load value + DATA_TYPE_IN val = *((__global DATA_TYPE_IN *)offset(&src, x, y)); + + // If less than upper threshold set to NO_EDGE and return + if(val <= up_thr) + { + *offset(&out, x, y) = 0; + return; + } + + // Init local stack 2 + short2 stack_L2[hysteresis_local_stack_L2] = { 0 }; + int L2_counter = 0; + + // Perform recursive hysteresis + while(true) + { + // Get L1 stack pointer + __global short2 *l1_ptr = (__global short2 *)(l1_stack.ptr + y * l1_stack.stride_y + x * hysteresis_local_stack_L1 * l1_stack.stride_x); + + // If the pixel has already been visited, proceed with the items in the stack instead + if(atomic_or((__global uint *)offset(&visited, x, y), 1) != 0) + { + goto pop_stack; + } + + // Set strong edge + *offset(&out, x, y) = EDGE; + + // If it is the top of stack l2, we don't need check the surrounding pixels + if(L2_counter > (hysteresis_local_stack_L2 - 1)) + { + goto pop_stack2; + } + + // Points to the start of the local stack; + char c; + + VEC_DATA_TYPE(DATA_TYPE_IN, 4) + x_tmp; + uint4 v_tmp; + + // Get direction pixel indices + int N = max(y - 1, 0), S = min(y + 1, height - 2), W = max(x - 1, 0), E = min(x + 1, width - 2); + + // Check 8 pixels around for week edges where low_thr < val <= up_thr + x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, N)); + v_tmp = vload4(0, (__global uint *)offset(&visited, W, N)); + check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, N, x, y); // NW + check_pixel(((x_tmp.s1 <= low_thr) || v_tmp.s1 || (x_tmp.s1 > up_thr)), x, N, x, y); // N + check_pixel(((x_tmp.s2 <= low_thr) || v_tmp.s2 || (x_tmp.s2 > up_thr)), E, N, x, y); // NE + + x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, y)); + v_tmp = vload4(0, (__global uint *)offset(&visited, W, y)); + check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, y, x, y); // W + check_pixel(((x_tmp.s2 <= low_thr) || v_tmp.s2 || (x_tmp.s2 > up_thr)), E, y, x, y); // E + + x_tmp = vload4(0, (__global DATA_TYPE_IN *)offset(&src, W, S)); + v_tmp = vload4(0, (__global uint *)offset(&visited, W, S)); + check_pixel(((x_tmp.s0 <= low_thr) || v_tmp.s0 || (x_tmp.s0 > up_thr)), W, S, x, y); // SW + check_pixel(((x_tmp.s1 <= low_thr) || v_tmp.s1 || (x_tmp.s1 > up_thr)), x, S, x, y); // S + check_pixel(((x_tmp.s2 <= low_thr) || v_tmp.s2 || (x_tmp.s2 > up_thr)), E, S, x, y); // SE + +#undef check_pixel + +pop_stack: + c = *((__global char *)offset(&l1_stack_counter, x, y)); + + if(c >= 1) + { + *((__global char *)offset(&l1_stack_counter, x, y)) -= 1; + int2 l_c = convert_int2(l1_ptr[c - 1]); + + // Push the current position into level 2 stack + stack_L2[L2_counter].x = x; + stack_L2[L2_counter].y = y; + + x = l_c.x; + y = l_c.y; + + L2_counter++; + + continue; + } + + if(L2_counter > 0) + { + goto pop_stack2; + } + else + { + return; + } + +pop_stack2: + L2_counter--; + x = stack_L2[L2_counter].x; + y = stack_L2[L2_counter].y; + }; +} |