diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2017-09-04 18:44:23 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-09-17 13:03:09 +0100 |
commit | 6ff3b19ee6120edf015fad8caab2991faa3070af (patch) | |
tree | a7a6dcd16dfd56d79fa1b56a313caeebcc939b68 /src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl | |
download | ComputeLibrary-6ff3b19ee6120edf015fad8caab2991faa3070af.tar.gz |
COMPMID-344 Updated doxygen
Change-Id: I32f7b84daa560e460b77216add529c8fa8b327ae
Diffstat (limited to 'src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl')
-rw-r--r-- | src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl | 522 |
1 files changed, 522 insertions, 0 deletions
diff --git a/src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl b/src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl new file mode 100644 index 0000000000..e1131d5573 --- /dev/null +++ b/src/core/CL/cl_kernels/optical_flow_pyramid_lk.cl @@ -0,0 +1,522 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" +#include "types.h" + +/* + *The criteria for lost tracking is that the spatial gradient matrix has: + * - Determinant less than DETERMINANT_THR + * - or minimum eigenvalue is smaller then EIGENVALUE_THR + * + * The thresholds for the determinant and the minimum eigenvalue is + * defined by the OpenVX spec + * + * Note: Also lost tracking happens when the point tracked coordinate is outside + * the image coordinates + * + * https://www.khronos.org/registry/vx/specs/1.0/html/d0/d0c/group__group__vision__function__opticalflowpyrlk.html + */ + +/* Internal Lucas-Kanade Keypoint struct */ +typedef struct InternalKeypoint +{ + float x; /**< The x coordinate. */ + float y; /**< The y coordinate. */ + float tracking_status; /**< A zero indicates a lost point. Initialized to 1 by corner detectors. */ + float dummy; +} InternalKeypoint; + +/** Threshold for the determinant. Used for lost tracking criteria */ +#define DETERMINANT_THR 1.0e-07f + +/** Thresholds for minimum eigenvalue. Used for lost tracking criteria */ +#define EIGENVALUE_THR 1.0e-04f + +/** Constants used for Lucas-Kanade Algorithm */ +#define W_BITS (14) +#define FLT_SCALE (1.0f / (float)(1 << 20)) +#define D0 ((float)(1 << W_BITS)) +#define D1 (1.0f / (float)(1 << (W_BITS - 5))) + +/** Initializes the internal new points array when the level of pyramid is NOT equal to max. + * + * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid. + * @param[in,out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid. + * @param[in] scale Scale factor to apply for the new_point coordinates. + */ +__kernel void init_level( + __global float4 *old_points_internal, + __global float4 *new_points_internal, + const float scale) +{ + int idx = get_global_id(0); + + // Get old and new keypoints + float4 old_point = old_points_internal[idx]; + float4 new_point = new_points_internal[idx]; + + // Scale accordingly with the pyramid_scale + old_point.xy *= (float2)(2.0f); + new_point.xy *= (float2)(2.0f); + + old_points_internal[idx] = old_point; + new_points_internal[idx] = new_point; +} + +/** Initializes the internal new points array when the level of pyramid is equal to max. + * + * @param[in] old_points An array of key points that are defined at the old_images high resolution pyramid. + * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid. + * @param[out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid. + * @param[in] scale Scale factor to apply for the new_point coordinates. + */ +__kernel void init_level_max( + __global Keypoint *old_points, + __global InternalKeypoint *old_points_internal, + __global InternalKeypoint *new_points_internal, + const float scale) +{ + int idx = get_global_id(0); + + Keypoint old_point = old_points[idx]; + + // Get old keypoint to track + InternalKeypoint old_point_internal; + old_point_internal.x = old_point.x * scale; + old_point_internal.y = old_point.y * scale; + old_point_internal.tracking_status = 1.f; + + // Store internal keypoints + old_points_internal[idx] = old_point_internal; + new_points_internal[idx] = old_point_internal; +} + +/** Initializes the new_points array when the level of pyramid is equal to max and if use_initial_estimate = 1. + * + * @param[in] old_points An array of key points that are defined at the old_images high resolution pyramid. + * @param[in] new_points_estimates An array of estimate key points that are defined at the old_images high resolution pyramid. + * @param[in,out] old_points_internal An array of internal key points that are defined at the old_images high resolution pyramid. + * @param[out] new_points_internal An array of internal key points that are defined at the new_images high resolution pyramid. + * @param[in] scale Scale factor to apply for the new_point coordinates. + */ +__kernel void init_level_max_initial_estimate( + __global Keypoint *old_points, + __global Keypoint *new_points_estimates, + __global InternalKeypoint *old_points_internal, + __global InternalKeypoint *new_points_internal, + const float scale) +{ + int idx = get_global_id(0); + + Keypoint old_point = old_points[idx]; + Keypoint new_point_estimate = new_points_estimates[idx]; + InternalKeypoint old_point_internal; + InternalKeypoint new_point_internal; + + // Get old keypoint to track + old_point_internal.x = old_point.x * scale; + old_point_internal.y = old_point.y * scale; + old_point_internal.tracking_status = 1.f; + + // Get new keypoint to track + new_point_internal.x = new_point_estimate.x * scale; + new_point_internal.y = new_point_estimate.y * scale; + new_point_internal.tracking_status = new_point_estimate.tracking_status; + + // Store internal keypoints + old_points_internal[idx] = old_point_internal; + new_points_internal[idx] = new_point_internal; +} + +/** Truncates the coordinates stored in new_points array + * + * @param[in] new_points_internal An array of estimate key points that are defined at the new_images high resolution pyramid. + * @param[out] new_points An array of internal key points that are defined at the new_images high resolution pyramid. + */ +__kernel void finalize( + __global InternalKeypoint *new_points_internal, + __global Keypoint *new_points) +{ + int idx = get_global_id(0); + + // Load internal keypoint + InternalKeypoint new_point_internal = new_points_internal[idx]; + + // Calculate output point + Keypoint new_point; + new_point.x = round(new_point_internal.x); + new_point.y = round(new_point_internal.y); + new_point.tracking_status = new_point_internal.tracking_status; + + // Store new point + new_points[idx] = new_point; +} + +/** Computes A11, A12, A22, min_eig, ival, ixval and iyval at level 0th of the pyramid. These values will be used in step 1. + * + * @param[in] old_image_ptr Pointer to the input old image. Supported data types: U8 + * @param[in] old_image_stride_x Stride of the input old image in X dimension (in bytes) + * @param[in] old_image_step_x old_image_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] old_image_stride_y Stride of the input old image in Y dimension (in bytes) + * @param[in] old_image_step_y old_image_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] old_image_offset_first_element_in_bytes The offset of the first element in the input old image + * @param[in] old_scharr_gx_ptr Pointer to the input scharr x image. Supported data types: S16 + * @param[in] old_scharr_gx_stride_x Stride of the input scharr x image in X dimension (in bytes) + * @param[in] old_scharr_gx_step_x old_scharr_gx_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] old_scharr_gx_stride_y Stride of the input scharr x image in Y dimension (in bytes) + * @param[in] old_scharr_gx_step_y old_scharr_gx_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] old_scharr_gx_offset_first_element_in_bytes The offset of the first element in the input scharr x image + * @param[in] old_scharr_gy_ptr Pointer to the input scharr y image. Supported data types: S16 + * @param[in] old_scharr_gy_stride_x Stride of the input scharr y image in X dimension (in bytes) + * @param[in] old_scharr_gy_step_x old_scharr_gy_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] old_scharr_gy_stride_y Stride of the input scharr y image in Y dimension (in bytes) + * @param[in] old_scharr_gy_step_y old_scharr_gy_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] old_scharr_gy_offset_first_element_in_bytes The offset of the first element in the input scharr y image + * @param[in] old_points An array of key points. Those key points are defined at the old_images high resolution pyramid + * @param[in, out] new_points An output array of key points. Those key points are defined at the new_images high resolution pyramid + * @param[out] coeff It stores | A11 | A12 | A22 | min_eig | for each keypoint + * @param[out] iold_val It stores | ival | ixval | iyval | dummy | for each point in the window centered on old_keypoint + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] window_dimension_pow2 The squared size of the window on which to perform the algorithm + * @param[in] half_window The half size of the window on which to perform the algorithm + * @param[in] border_limits It stores the right border limit (width - window_dimension - 1, height - window_dimension - 1,) + * @param[in] eig_const 1.0f / (float)(2.0f * window_dimension * window_dimension) + * @param[in] level0 It is set to 1 if level 0 of the pyramid + */ +void __kernel lktracker_stage0( + IMAGE_DECLARATION(old_image), + IMAGE_DECLARATION(old_scharr_gx), + IMAGE_DECLARATION(old_scharr_gy), + __global float4 *old_points, + __global float4 *new_points, + __global float4 *coeff, + __global short4 *iold_val, + const int window_dimension, + const int window_dimension_pow2, + const int half_window, + const float3 border_limits, + const float eig_const, + const int level0) +{ + int idx = get_global_id(0); + + Image old_image = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_image); + Image old_scharr_gx = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_scharr_gx); + Image old_scharr_gy = CONVERT_TO_IMAGE_STRUCT_NO_STEP(old_scharr_gy); + + // Get old keypoint + float2 old_keypoint = old_points[idx].xy - (float2)half_window; + + // Get the floor value + float2 iold_keypoint = floor(old_keypoint); + + // Check if using the window dimension we can go out of boundary in the following for loops. If so, invalidate the tracked point + if(any(iold_keypoint < border_limits.zz) || any(iold_keypoint >= border_limits.xy)) + { + if(level0 == 1) + { + // Invalidate tracked point as we are at level 0 + new_points[idx].s2 = 0.0f; + } + + // Not valid coordinate. It sets min_eig to 0.0f + coeff[idx].s3 = 0.0f; + + return; + } + + // Compute weight for the bilinear interpolation + float2 ab = old_keypoint - iold_keypoint; + + // Weight used for Bilinear-Interpolation on Scharr images + // w_scharr.s0 = (1.0f - ab.x) * (1.0f - ab.y) + // w_scharr.s1 = ab.x * (1.0f - ab.y) + // w_scharr.s2 = (1.0f - ab.x) * ab.y + // w_scharr.s3 = ab.x * ab.y + + float4 w_scharr; + w_scharr.s3 = ab.x * ab.y; + w_scharr.s0 = w_scharr.s3 + 1.0f - ab.x - ab.y; + w_scharr.s12 = ab - (float2)w_scharr.s3; + + // Weight used for Bilinear-Interpolation on Old and New images + // w.s0 = round(w_scharr.s0 * D0) + // w.s1 = round(w_scharr.s1 * D0) + // w.s2 = round(w_scharr.s2 * D0) + // w.s3 = w.s3 = D0 - w.s0 - w.s1 - w.s2 + + float4 w; + w = round(w_scharr * (float4)D0); + w.s3 = D0 - w.s0 - w.s1 - w.s2; // Added for matching VX implementation + + // G.s0 = A11, G.s1 = A12, G.s2 = A22, G.s3 = min_eig + int4 iG = (int4)0; + + // Window offset + int window_offset = idx * window_dimension_pow2; + + // Compute Spatial Gradient Matrix G + for(ushort ky = 0; ky < window_dimension; ++ky) + { + int offset_y = iold_keypoint.y + ky; + for(ushort kx = 0; kx < window_dimension; ++kx) + { + int offset_x = iold_keypoint.x + kx; + float4 px; + + // Load values from old_image for computing the bilinear interpolation + px = convert_float4((uchar4)(vload2(0, offset(&old_image, offset_x, offset_y)), + vload2(0, offset(&old_image, offset_x, offset_y + 1)))); + + // old_i.s0 = ival, old_i.s1 = ixval, old_i.s2 = iyval, old_i.s3 = dummy + float4 old_i; + + // Compute bilinear interpolation (with D1 scale factor) for ival + old_i.s0 = dot(px, w) * D1; + + // Load values from old_scharr_gx for computing the bilinear interpolation + px = convert_float4((short4)(vload2(0, (__global short *)offset(&old_scharr_gx, offset_x, offset_y)), + vload2(0, (__global short *)offset(&old_scharr_gx, offset_x, offset_y + 1)))); + + // Compute bilinear interpolation for ixval + old_i.s1 = dot(px, w_scharr); + + // Load values from old_scharr_gy for computing the bilinear interpolation + px = convert_float4((short4)(vload2(0, (__global short *)offset(&old_scharr_gy, offset_x, offset_y)), + vload2(0, (__global short *)offset(&old_scharr_gy, offset_x, offset_y + 1)))); + + // Compute bilinear interpolation for iyval + old_i.s2 = dot(px, w_scharr); + + // Rounding (it could be omitted. Used just for matching the VX implementation) + int4 iold = convert_int4(round(old_i)); + + // Accumulate values in the Spatial Gradient Matrix + iG.s0 += (int)(iold.s1 * iold.s1); + iG.s1 += (int)(iold.s1 * iold.s2); + iG.s2 += (int)(iold.s2 * iold.s2); + + // Store ival, ixval and iyval + iold_val[window_offset + kx] = convert_short4(iold); + } + window_offset += window_dimension; + } + + // Scale iA11, iA12 and iA22 + float4 G = convert_float4(iG) * (float4)FLT_SCALE; + + // Compute minimum eigen value + G.s3 = (float)(G.s2 + G.s0 - sqrt(pown(G.s0 - G.s2, 2) + 4.0f * G.s1 * G.s1)) * eig_const; + + // Store A11. A11, A22 and min_eig + coeff[idx] = G; +} + +/** Computes the motion vector for a given keypoint + * + * @param[in] new_image_ptr Pointer to the input new image. Supported data types: U8 + * @param[in] new_image_stride_x Stride of the input new image in X dimension (in bytes) + * @param[in] new_image_step_x new_image_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] new_image_stride_y Stride of the input new image in Y dimension (in bytes) + * @param[in] new_image_step_y new_image_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] new_image_offset_first_element_in_bytes The offset of the first element in the input new image + * @param[in, out] new_points An output array of key points. Those key points are defined at the new_images high resolution pyramid + * @param[in] coeff The | A11 | A12 | A22 | min_eig | for each keypoint + * @param[in] iold_val The | ival | ixval | iyval | dummy | for each point in the window centered on old_keypoint + * @param[in] window_dimension The size of the window on which to perform the algorithm + * @param[in] window_dimension_pow2 The squared size of the window on which to perform the algorithm + * @param[in] half_window The half size of the window on which to perform the algorithm + * @param[in] num_iterations The maximum number of iterations + * @param[in] epsilon The value for terminating the algorithm. + * @param[in] border_limits It stores the right border limit (width - window_dimension - 1, height - window_dimension - 1,) + * @param[in] eig_const 1.0f / (float)(2.0f * window_dimension * window_dimension) + * @param[in] level0 It is set to 1 if level of pyramid = 0 + * @param[in] term_iteration It is set to 1 if termination = VX_TERM_CRITERIA_ITERATIONS + * @param[in] term_epsilon It is set to 1 if termination = VX_TERM_CRITERIA_EPSILON + */ +void __kernel lktracker_stage1( + IMAGE_DECLARATION(new_image), + __global float4 *new_points, + __global float4 *coeff, + __global short4 *iold_val, + const int window_dimension, + const int window_dimension_pow2, + const int half_window, + const int num_iterations, + const float epsilon, + const float3 border_limits, + const float eig_const, + const int level0, + const int term_iteration, + const int term_epsilon) +{ + int idx = get_global_id(0); + Image new_image = CONVERT_TO_IMAGE_STRUCT_NO_STEP(new_image); + + // G.s0 = A11, G.s1 = A12, G.s2 = A22, G.s3 = min_eig + float4 G = coeff[idx]; + + // Determinant + float D = G.s0 * G.s2 - G.s1 * G.s1; + + // Check if it is a good point to track + if(G.s3 < EIGENVALUE_THR || D < DETERMINANT_THR) + { + if(level0 == 1) + { + // Invalidate tracked point as we are at level 0 + new_points[idx].s2 = 0; + } + + return; + } + + // Compute inverse + //D = native_recip(D); + D = 1.0 / D; + + // Get new keypoint + float2 new_keypoint = new_points[idx].xy - (float)half_window; + + // Get new point + float2 out_new_point = new_points[idx].xy; + + // Keep delta obtained in the previous iteration + float2 prev_delta = (float2)0.0f; + + int j = 0; + while(j < num_iterations) + { + // Get the floor value + float2 inew_keypoint = floor(new_keypoint); + + // Check if using the window dimension we can go out of boundary in the following for loops. If so, invalidate the tracked point + if(any(inew_keypoint < border_limits.zz) || any(inew_keypoint >= border_limits.xy)) + { + if(level0 == 1) + { + // Invalidate tracked point as we are at level 0 + new_points[idx].s2 = 0.0f; + } + else + { + new_points[idx].xy = out_new_point; + } + + return; + } + + // Compute weight for the bilinear interpolation + float2 ab = new_keypoint - inew_keypoint; + + // Weight used for Bilinear-Interpolation on Old and New images + // w.s0 = round((1.0f - ab.x) * (1.0f - ab.y) * D0) + // w.s1 = round(ab.x * (1.0f - ab.y) * D0) + // w.s2 = round((1.0f - ab.x) * ab.y * D0) + // w.s3 = D0 - w.s0 - w.s1 - w.s2 + + float4 w; + w.s3 = ab.x * ab.y; + w.s0 = w.s3 + 1.0f - ab.x - ab.y; + w.s12 = ab - (float2)w.s3; + w = round(w * (float4)D0); + w.s3 = D0 - w.s0 - w.s1 - w.s2; + + // Mismatch vector + int2 ib = 0; + + // Old val offset + int old_val_offset = idx * window_dimension_pow2; + + for(int ky = 0; ky < window_dimension; ++ky) + { + for(int kx = 0; kx < window_dimension; ++kx) + { + // ival, ixval and iyval have been computed in the previous stage + int4 old_ival = convert_int4(iold_val[old_val_offset]); + + // Load values from old_image for computing the bilinear interpolation + float4 px = convert_float4((uchar4)(vload2(0, offset(&new_image, inew_keypoint.x + kx, inew_keypoint.y + ky)), + vload2(0, offset(&new_image, inew_keypoint.x + kx, inew_keypoint.y + ky + 1)))); + + // Compute bilinear interpolation on new image + int jval = (int)round(dot(px, w) * D1); + + // Compute luminance difference + int diff = (int)(jval - old_ival.s0); + + // Accumulate values in mismatch vector + ib += (diff * old_ival.s12); + + // Update old val offset + old_val_offset++; + } + } + + float2 b = convert_float2(ib) * (float2)FLT_SCALE; + + // Optical Flow + float2 delta; + + delta.x = (float)((G.s1 * b.y - G.s2 * b.x) * D); + delta.y = (float)((G.s1 * b.x - G.s0 * b.y) * D); + + // Update new point coordinate + new_keypoint += delta; + + out_new_point = new_keypoint + (float2)half_window; + + if(term_epsilon == 1) + { + float mag2 = dot(delta, delta); + + if(mag2 <= epsilon) + { + new_points[idx].xy = out_new_point; + + return; + } + } + + // Check convergence analyzing the previous delta + if(j > 0 && all(fabs(delta + prev_delta) < (float2)0.01f)) + { + out_new_point -= delta * (float2)0.5f; + + new_points[idx].xy = out_new_point; + + return; + } + + // Update previous delta + prev_delta = delta; + + if(term_iteration == 1) + { + j++; + } + } + + new_points[idx].xy = out_new_point; +} |