From fc2817dc0436ef2d5064df0a061aafd3d324d894 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Tue, 27 Jun 2017 17:26:37 +0100 Subject: COMPMID-424 NEON/CL Harris Corners validation tests. Change-Id: I82d2a73f515a8d45d16b9ddb702fea51ae05c82e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79687 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer --- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 5 ++-- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 3 +-- src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp | 7 ++---- src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 3 +-- src/runtime/CL/functions/CLHarrisCorners.cpp | 29 ++++++++++++++++------ 5 files changed, 28 insertions(+), 19 deletions(-) (limited to 'src') diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 9fc34a7760..1f757fe34c 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" @@ -91,8 +92,8 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 4; constexpr unsigned int num_elems_written_per_iteration = 4; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; + const unsigned int num_elems_read_per_iteration = block_size == 7 ? 10 : 8; + const unsigned int num_rows_read_per_iteration = block_size; Window win = calculate_max_window(*_input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 1b70d7513f..1bf831b9d9 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -88,8 +88,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe constexpr unsigned int num_elems_processed_per_iteration_x = 8; const unsigned int num_elems_processed_per_iteration_y = input->info()->dimension(1); - _border_size = BorderSize(std::max(static_cast(num_elems_processed_per_iteration_x) - static_cast(input->info()->dimension(0)), - static_cast(input->info()->dimension(0) % num_elems_processed_per_iteration_x))); + _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) - input->info()->dimension(0)); Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp index 884da2861b..84e12d4ead 100644 --- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp +++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp @@ -57,10 +57,7 @@ inline void check_corner(float x, float y, float strength, InternalKeypoint *out inline void corner_candidates(const float *__restrict input, InternalKeypoint *__restrict output, int32_t x, int32_t y, int32_t *num_corner_candidates, std::mutex *corner_candidates_mutex) { - check_corner(x + 0, y, *(input + 0), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 1, y, *(input + 1), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 2, y, *(input + 2), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 3, y, *(input + 3), output, num_corner_candidates, corner_candidates_mutex); + check_corner(x, y, *input, output, num_corner_candidates, corner_candidates_mutex); } } // namespace @@ -86,7 +83,7 @@ void CPPCornerCandidatesKernel::configure(const IImage *input, InternalKeypoint _output = output; _num_corner_candidates = num_corner_candidates; - const unsigned int num_elems_processed_per_iteration = 4; + const unsigned int num_elems_processed_per_iteration = 1; // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index 1eb7e45e36..579c46fa3d 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -110,8 +110,7 @@ void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *g constexpr unsigned int num_elems_processed_per_iteration = 16; - _border_size = BorderSize(std::max(static_cast(num_elems_processed_per_iteration) - static_cast(input->info()->dimension(0)), - static_cast(input->info()->dimension(0) % num_elems_processed_per_iteration))); + _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration) - input->info()->dimension(0)); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); diff --git a/src/runtime/CL/functions/CLHarrisCorners.cpp b/src/runtime/CL/functions/CLHarrisCorners.cpp index 87d573a7ad..8f9fcdc58f 100644 --- a/src/runtime/CL/functions/CLHarrisCorners.cpp +++ b/src/runtime/CL/functions/CLHarrisCorners.cpp @@ -42,8 +42,20 @@ using namespace arm_compute; -CLHarrisCorners::CLHarrisCorners() - : _sobel(), _harris_score(), _non_max_suppr(), _candidates(), _sort_euclidean(), _border_gx(), _border_gy(), _gx(), _gy(), _score(), _nonmax(), _corners_list(), _num_corner_candidates(0), +CLHarrisCorners::CLHarrisCorners() // NOLINT + : _sobel(nullptr), + _harris_score(), + _non_max_suppr(), + _candidates(), + _sort_euclidean(), + _border_gx(), + _border_gy(), + _gx(), + _gy(), + _score(), + _nonmax(), + _corners_list(nullptr), + _num_corner_candidates(0), _corners(nullptr) { } @@ -62,6 +74,7 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist const TensorShape shape = input->info()->tensor_shape(); const DataType dt = (gradient_size < 7) ? DataType::S16 : DataType::S32; TensorInfo tensor_info(shape, 1, dt); + _gx.allocator()->init(tensor_info); _gy.allocator()->init(tensor_info); @@ -99,10 +112,6 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist ARM_COMPUTE_ERROR("Gradient size not implemented"); } - // Configure border filling before harris score - _border_gx.configure(&_gx, BorderSize(block_size / 2), border_mode, constant_border_value); - _border_gy.configure(&_gy, BorderSize(block_size / 2), border_mode, constant_border_value); - // Normalization factor const float norm_factor = 1.0f / (255.0f * pow(4.0f, gradient_size / 2) * block_size); const float pow4_normalization_factor = pow(norm_factor, 4); @@ -110,8 +119,12 @@ void CLHarrisCorners::configure(ICLImage *input, float threshold, float min_dist // Set/init Harris Score kernel accordingly with block_size _harris_score.configure(&_gx, &_gy, &_score, block_size, pow4_normalization_factor, threshold, sensitivity, border_mode == BorderMode::UNDEFINED); + // Configure border filling using harris score kernel's block size + _border_gx.configure(&_gx, _harris_score.border_size(), border_mode, PixelValue(constant_border_value)); + _border_gy.configure(&_gy, _harris_score.border_size(), border_mode, PixelValue(constant_border_value)); + // Init non-maxima suppression function - _non_max_suppr.configure(&_score, &_nonmax, border_mode == BorderMode::UNDEFINED); + _non_max_suppr.configure(&_score, &_nonmax, border_mode); // Init corner candidates kernel _candidates.configure(&_nonmax, _corners_list.get(), &_num_corner_candidates); @@ -144,7 +157,7 @@ void CLHarrisCorners::run() CLScheduler::get().enqueue(_harris_score, false); // Run non-maxima suppression - CLScheduler::get().enqueue(_non_max_suppr); + _non_max_suppr.run(); // Run corner candidate kernel _nonmax.map(true); -- cgit v1.2.1