From 27e67f0b2047cfa2f011f9e242e3068d9e106b39 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Tue, 16 Feb 2021 11:34:39 +0000 Subject: Remove Compute Vision Neon support Resolves COMPMID-4150 Change-Id: I316e8ab97de796666c71eadfde894715fcf4a1aa Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5141 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/NEFastCornersKernel.cpp | 475 -------------------------- 1 file changed, 475 deletions(-) delete mode 100644 src/core/NEON/kernels/NEFastCornersKernel.cpp (limited to 'src/core/NEON/kernels/NEFastCornersKernel.cpp') diff --git a/src/core/NEON/kernels/NEFastCornersKernel.cpp b/src/core/NEON/kernels/NEFastCornersKernel.cpp deleted file mode 100644 index c9280d8dc0..0000000000 --- a/src/core/NEON/kernels/NEFastCornersKernel.cpp +++ /dev/null @@ -1,475 +0,0 @@ -/* - * Copyright (c) 2016-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/core/NEON/kernels/NEFastCornersKernel.h" - -#include "arm_compute/core/Coordinates.h" -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Validate.h" -#include "src/core/helpers/AutoConfiguration.h" -#include "src/core/helpers/WindowHelpers.h" - -#include -#include -#include -#include - -using namespace arm_compute; - -NEFastCornersKernel::NEFastCornersKernel() - : INEKernel(), _input(nullptr), _output(nullptr), _threshold(0), _non_max_suppression(false) -{ -} - -namespace -{ -constexpr size_t PERMUTATIONS = 16; -constexpr size_t PERM_SIZE = 16; - -inline uint8x8x2_t create_permutation_index(size_t k) -{ - ARM_COMPUTE_ERROR_ON(k >= PERMUTATIONS); - - static const std::array, PERM_SIZE> permutations_table{ { { 0, 1, 2, 3, 4, 5, 6, 7, 8, 255, 255, 255, 255, 255, 255, 255 }, - { 15, 0, 1, 2, 3, 4, 5, 6, 7, 255, 255, 255, 255, 255, 255, 255 }, - { 14, 15, 0, 1, 2, 3, 4, 5, 6, 255, 255, 255, 255, 255, 255, 255 }, - { 13, 14, 15, 0, 1, 2, 3, 4, 5, 255, 255, 255, 255, 255, 255, 255 }, - { 12, 13, 14, 15, 0, 1, 2, 3, 4, 255, 255, 255, 255, 255, 255, 255 }, - { 11, 12, 13, 14, 15, 0, 1, 2, 3, 255, 255, 255, 255, 255, 255, 255 }, - { 10, 11, 12, 13, 14, 15, 0, 1, 2, 255, 255, 255, 255, 255, 255, 255 }, - { 9, 10, 11, 12, 13, 14, 15, 0, 1, 255, 255, 255, 255, 255, 255, 255 }, - { 8, 9, 10, 11, 12, 13, 14, 15, 0, 255, 255, 255, 255, 255, 255, 255 }, - { 7, 8, 9, 10, 11, 12, 13, 14, 15, 255, 255, 255, 255, 255, 255, 255 }, - { 6, 7, 8, 9, 10, 11, 12, 13, 14, 255, 255, 255, 255, 255, 255, 255 }, - { 5, 6, 7, 8, 9, 10, 11, 12, 13, 255, 255, 255, 255, 255, 255, 255 }, - { 4, 5, 6, 7, 8, 9, 10, 11, 12, 255, 255, 255, 255, 255, 255, 255 }, - { 3, 4, 5, 6, 7, 8, 9, 10, 11, 255, 255, 255, 255, 255, 255, 255 }, - { 2, 3, 4, 5, 6, 7, 8, 9, 10, 255, 255, 255, 255, 255, 255, 255 }, - { 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 255, 255, 255, 255, 255, 255 } - - } }; - - const uint8x8x2_t index = - { - { - vld1_u8(permutations_table[k].data()), - vld1_u8(permutations_table[k].data() + 8) - } - }; - - return index; -} - -inline uint8x8x4_t create_circle_index_register() -{ - /* - This function creates the index registers to retrieve the 16 texels in the Bresenham circle of radius 3 with center in P. - - . . F 0 1 . . . - . E . . . 2 . . - D . . . . . 3 . - C . . P . . 4 . - B . . . . . 5 . - . A . . . 6 . . - . . 9 8 7 . . . - - Where . is an irrelevant texel value - - We want to retrieve all texels [0,F] - - The 4 registers in r will then be used to get these texels out of two tables in the function get_circle_texels() - - The first table holds the top 4 rows of texels - . . F 0 1 . . . - . E . . . 2 . . - D . . . . . 3 . - C . . P . . 4 . - - The second table the bottom 3 rows of texels - B . . . . . 5 . - . A . . . 6 . . - . . 9 8 7 . . . - - */ - static const std::array top_right = - { - /* The register r.val[0] will be used to retrieve these texels: - . . . 0 1 . . . - . . . . . 2 . . - . . . . . . 3 . - . . . . . . 4 . - */ - 3 /* top table, first row, elem 4, value 0 in the diagram above */, - 4 /* top table, first row, elem 5, value 1 in the diagram above */, - 13 /* top table, second row, elem 6, value 2 in the diagram above */, - 22 /* top table, third row, elem 7, value 3 in the diagram above*/, - 30 /* top table, fourth row, elem 7, value 4 in the diagram above*/, - 255, - 255, - 255 - }; - - static const std::array bottom_right = - { - /* The register r.val[1] will be used to retrieve these texels: - . . . . . . 5 . - . . . . . 6 . . - . . . . 7 . . . - */ - 255, - 255, - 255, - 255, - 255, - 6 /* low table, first row, elem 7, value 5 in the diagram above*/, - 13 /* low table, second row, elem 6, value 6 in the diagram above*/, - 20 /* low table, third row, elem 5, value 7 in the diagram above*/ - }; - - static const std::array top_left = - { - /* The register r.val[2] will be used to retrieve these texels: - . . F . . . . . - . E . . . . . . - D . . . . . . . - C . . . . . . . - */ - 255, - 255, - 255, - 255, - 24 /* top table, fourth row, elem 1, value C in the diagram above */, - 16 /* top table, third row, elem 1, value D in the diagram above*/, - 9 /* top table, second row, elem 2, value E in the diagram above*/, - 2 /* top table, first row, elem 3, value F in the diagram above*/ - }; - - static const std::array bottom_left = - { - /* The register r.val[3] will be used to retrieve these texels: - B . . . . . . . - . A . . . . . . - . . 9 8 . . . . - */ - 19 /* low table, third row, elem 4, value 8 in the diagram above */, - 18 /* low table, third row, elem 3, value 9 in the diagram above */, - 9 /* low table, second row, elem 2, value A in the diagram above */, - 0 /* low table, first row, elem 1, value B in the diagram above */, - 255, - 255, - 255, - 255 - }; - - const uint8x8x4_t reg = - { - { - vld1_u8(top_right.data()), - vld1_u8(bottom_right.data()), - vld1_u8(top_left.data()), - vld1_u8(bottom_left.data()) - } - }; - - return reg; -} - -inline uint8x16_t get_circle_texels(const uint8x8x4_t &index, const uint8x8x4_t &tbl_hi, const uint8x8x3_t &tbl_lo) -{ - /* - This function loads the 16 texels in the Bresenham circle of radius 3 into the register 'texels'. - The parameter 'index' is an array of indices which was previously setup in setup_circle_index_register(). - tbl_hi and tbl_lo are the two tables holding the texels in the window [(-3,-3),(+3,+3)] for a given texel P - */ - return vcombine_u8(vtbx3_u8(vtbl4_u8(tbl_hi, index.val[0]), tbl_lo, index.val[1]), - vtbx3_u8(vtbl4_u8(tbl_hi, index.val[2]), tbl_lo, index.val[3])); -} - -inline uint8x16_t get_permutation_texels(const uint8x8x2_t &permutation_index, const uint8x8x2_t &tbl_circle) -{ - /* - This function stores the 9 texels of a give permutation X in the neon register 'texels' - - 'tbl_circle' is a LUT with the texels 0 to F - - . . F 0 1 . . . - . E . . . 2 . . - D . . . . . 3 . - C . . P . . 4 . - B . . . . . 5 . - . A . . . 6 . . - . . 9 8 7 . . . - - 'permutation_index' is one of the permutations below: - - { 0, 1, 2, 3, 4, 5, 6, 7, 8}, - { F, 0, 1, 2, 3, 4, 5, 6, 7}, - { E, F, 0, 1, 2, 3, 4, 5, 6}, - { D, E, F, 0, 1, 2, 3, 4, 5}, - { C, D, E, F, 0, 1, 2, 3, 4}, - { B, C, D, E, F, 0, 1, 2, 3}, - { A, B, C, D, E, F, 0, 1, 2}, - { 9, A, B, C, D, E, F, 0, 1}, - { 8, 9, A, B, C, D, E, F, 0}, - { 7, 8, 9, A, B, C, D, E, F}, - { 6, 7, 8, 9, A, B, C, D, E}, - { 5, 6, 7, 8, 9, A, B, C, D}, - { 4, 5, 6, 7, 8, 9, A, B, C}, - { 3, 4, 5, 6, 7, 8, 9, A, B}, - { 2, 3, 4, 5, 6, 7, 8, 9, A}, - { 1, 2, 3, 4, 5, 6, 7, 8, 9}, - */ - static const uint8x8_t perm_right = vdup_n_u8(255); // init to 255 so that vtbx preserves the original values of the lanes - - return vcombine_u8(vtbl2_u8(tbl_circle, permutation_index.val[0]), - vtbx2_u8(perm_right, tbl_circle, permutation_index.val[1])); -} - -inline bool is_permutation_brighter(const uint8x16_t &permutation, const uint8x16_t &pg) -{ - const uint8x16_t res_gt = vcgtq_u8(permutation, pg); - - return vget_lane_u64(vreinterpret_u64_u8(vand_u8(vget_high_u8(res_gt), vget_low_u8(res_gt))), 0) == std::numeric_limits::max(); -} - -inline bool is_permutation_darker(const uint8x16_t &permutation, const uint8x16_t &pl) -{ - const uint8x16_t res_lt = vcltq_u8(permutation, pl); - const uint64x2_t u64res_lt = vreinterpretq_u64_u8(res_lt); - const uint64_t t3 = vgetq_lane_u64(u64res_lt, 0); - const uint64_t t4 = vgetq_lane_u64(u64res_lt, 1); - - return std::numeric_limits::max() == t3 && 255 == t4; -} - -inline bool is_permutation_corner(const uint8x16_t &permutation, const uint8x16_t &pg, const uint8x16_t &pl) -{ - return is_permutation_brighter(permutation, pg) || is_permutation_darker(permutation, pl); -} - -inline bool point_is_fast_corner(uint8_t p, uint8_t threshold, const uint8x8x2_t &tbl_circle_texels, std::array &perm_indices) -{ - /* - This function determines whether the point 'p' is a corner. - */ - uint8x16_t pg = vqaddq_u8(vdupq_n_u8(p), vdupq_n_u8(threshold)); - uint8x16_t pl = vqsubq_u8(vdupq_n_u8(p), vdupq_n_u8(threshold)); - - bool corner_detected = false; - - for(size_t j = 0; !corner_detected && j < PERMUTATIONS; ++j) - { - const uint8x16_t pe_texels = get_permutation_texels(perm_indices[j], tbl_circle_texels); - corner_detected = is_permutation_corner(pe_texels, pg, pl); - } - - return corner_detected; -} - -inline uint8x8x2_t create_circle_tbl(const std::array &buffer, size_t in_offset, const uint8x8x4_t &circle_index_r) -{ - /* - This function builds a LUT holding the 16 texels in the Brensenham circle radius 3. - circle_index_r is a vector of 4 registers to retrieve the texels from the two tables mentioned above. - */ - - //Load the texels in the window [(x-3,y-3),(x+3,y+3)]. - //The top 4 rows are loaded in tbl_hi and the low 3 rows in tbl_lo. - //These two tables are then used to retrieve the texels in the Bresenham circle of radius 3. - const uint8x8x4_t tbl_window_hi = - { - { - vld1_u8(buffer[0] + in_offset), - vld1_u8(buffer[1] + in_offset), - vld1_u8(buffer[2] + in_offset), - vld1_u8(buffer[3] + in_offset) - } - }; - - const uint8x8x3_t tbl_window_lo = - { - { - vld1_u8(buffer[4] + in_offset), - vld1_u8(buffer[5] + in_offset), - vld1_u8(buffer[6] + in_offset) - } - }; - - const uint8x16_t circle_texels = get_circle_texels(circle_index_r, tbl_window_hi, tbl_window_lo); - - const uint8x8x2_t tbl_circle_texels = - { - { - vget_low_u8(circle_texels), - vget_high_u8(circle_texels) - } - }; - - return tbl_circle_texels; -} - -inline uint8_t get_point_score(uint8_t p, uint8_t tolerance, const uint8x8x2_t &tbl_circle, std::array &perm_indices) -{ - uint8_t b = 255; - uint8_t a = tolerance; - - while(b - a > 1) - { - const uint16_t ab = a + b; - const uint8_t c = ab >> 1; - - if(point_is_fast_corner(p, c, tbl_circle, perm_indices)) - { - a = c; - } - else - { - b = c; - } - } - - return a; -} -} // namespace - -BorderSize NEFastCornersKernel::border_size() const -{ - return BorderSize(3); -} - -void NEFastCornersKernel::configure(const IImage *input, IImage *output, uint8_t threshold, bool non_max_suppression, bool border_undefined) -{ - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(input); - ARM_COMPUTE_ERROR_ON_TENSOR_NOT_2D(output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8); - ARM_COMPUTE_ERROR_ON_MSG(border_undefined == false, "Not implemented"); - - _input = input; - _output = output; - _threshold = threshold; - _non_max_suppression = non_max_suppression; - - constexpr unsigned int num_elems_processed_per_iteration = 1; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_elems_written_per_iteration = 1; - constexpr unsigned int num_rows_read_per_iteration = 7; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); - AccessWindowHorizontal output_access(output->info(), 0, num_elems_written_per_iteration); - AccessWindowRectangle input_access(input->info(), -border_size().left, -border_size().top, num_elems_read_per_iteration, num_rows_read_per_iteration); - - update_window_and_padding(win, input_access, output_access); - - output_access.set_valid_region(win, input->info()->valid_region(), border_undefined, border_size()); - - INEKernel::configure(win); -} - -void NEFastCornersKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window); - - std::array perm_index{ {} }; - /* - We use a LUT loaded with 7 rows of uint8_t from the input image [-3,-3]...[+3,+3] to retrieve the texels in the Brensenham circle radius 3 and put them in one neon register uint8x16_t. - The three lines below setup the neon index registers to get these texels out from the table - */ - const uint8x8x4_t circle_index_r = create_circle_index_register(); - /* - We put the 16 texels (circle) in a LUT to easily generate all the permutations. The for block below setups the indices for each permutation. - */ - for(size_t k = 0; k < PERMUTATIONS; ++k) - { - perm_index[k] = create_permutation_index(k); - } - - Iterator in(_input, window); - Iterator out(_output, window); - - const std::array in_row - { - _input->ptr_to_element(Coordinates(-3, -3)), - _input->ptr_to_element(Coordinates(-3, -2)), - _input->ptr_to_element(Coordinates(-3, -1)), - _input->ptr_to_element(Coordinates(-3, 0)), - _input->ptr_to_element(Coordinates(-3, 1)), - _input->ptr_to_element(Coordinates(-3, 2)), - _input->ptr_to_element(Coordinates(-3, 3)) - }; - - auto is_rejected = [](uint8_t p, uint8_t q, uint8_t a, uint8_t b) - { - const bool p_is_in_ab = (a <= p) && (p <= b); - const bool q_is_in_ab = (a <= q) && (q <= b); - return p_is_in_ab && q_is_in_ab; - }; - - execute_window_loop(window, [&](const Coordinates &) - { - const size_t in_offset = in.offset(); - const uint8_t p0 = *in.ptr(); - const uint8_t b = std::min(p0 + _threshold, 255); - const uint8_t a = std::max(p0 - _threshold, 0); - uint8_t score = 0; - /* - Fast check to discard points which cannot be corners and avoid the expensive computation of the potential 16 permutations - - pixels 1 and 9 are examined, if both I1 and I9 are within [Ip - t, Ip + t], then candidate p is not a corner. - */ - const uint8_t p1 = (in_offset + in_row[0])[3]; - const uint8_t p9 = (in_offset + in_row[6])[3]; - - if(!is_rejected(p1, p9, a, b)) - { - /* pixels 5 and 13 are further examined to check whether three of them are brighter than Ip + t or darker than Ip - t */ - const uint8_t p5 = (in_offset + in_row[3])[6]; - const uint8_t p13 = (in_offset + in_row[3])[0]; - - if(!is_rejected(p5, p13, a, b)) - { - /* at this stage we use the full test with the 16 permutations to classify the point as corner or not */ - const uint8x8x2_t tbl_circle_texel = create_circle_tbl(in_row, in_offset, circle_index_r); - - if(point_is_fast_corner(p0, _threshold, tbl_circle_texel, perm_index)) - { - if(_non_max_suppression) - { - score = get_point_score(p0, _threshold, tbl_circle_texel, perm_index); - } - else - { - score = 1; - } - } - } - } - - *out.ptr() = score; - }, - in, out); -} -- cgit v1.2.1