From fc2817dc0436ef2d5064df0a061aafd3d324d894 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Tue, 27 Jun 2017 17:26:37 +0100 Subject: COMPMID-424 NEON/CL Harris Corners validation tests. Change-Id: I82d2a73f515a8d45d16b9ddb702fea51ae05c82e Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79687 Tested-by: Kaizen Reviewed-by: Moritz Pflanzer --- src/core/CL/kernels/CLHarrisCornersKernel.cpp | 5 +++-- src/core/CL/kernels/CLMeanStdDevKernel.cpp | 3 +-- src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp | 7 ++----- src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 3 +-- 4 files changed, 7 insertions(+), 11 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 9fc34a7760..1f757fe34c 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" @@ -91,8 +92,8 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 4; constexpr unsigned int num_elems_written_per_iteration = 4; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; + const unsigned int num_elems_read_per_iteration = block_size == 7 ? 10 : 8; + const unsigned int num_rows_read_per_iteration = block_size; Window win = calculate_max_window(*_input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 1b70d7513f..1bf831b9d9 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -88,8 +88,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe constexpr unsigned int num_elems_processed_per_iteration_x = 8; const unsigned int num_elems_processed_per_iteration_y = input->info()->dimension(1); - _border_size = BorderSize(std::max(static_cast(num_elems_processed_per_iteration_x) - static_cast(input->info()->dimension(0)), - static_cast(input->info()->dimension(0) % num_elems_processed_per_iteration_x))); + _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) - input->info()->dimension(0)); Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp index 884da2861b..84e12d4ead 100644 --- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp +++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp @@ -57,10 +57,7 @@ inline void check_corner(float x, float y, float strength, InternalKeypoint *out inline void corner_candidates(const float *__restrict input, InternalKeypoint *__restrict output, int32_t x, int32_t y, int32_t *num_corner_candidates, std::mutex *corner_candidates_mutex) { - check_corner(x + 0, y, *(input + 0), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 1, y, *(input + 1), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 2, y, *(input + 2), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 3, y, *(input + 3), output, num_corner_candidates, corner_candidates_mutex); + check_corner(x, y, *input, output, num_corner_candidates, corner_candidates_mutex); } } // namespace @@ -86,7 +83,7 @@ void CPPCornerCandidatesKernel::configure(const IImage *input, InternalKeypoint _output = output; _num_corner_candidates = num_corner_candidates; - const unsigned int num_elems_processed_per_iteration = 4; + const unsigned int num_elems_processed_per_iteration = 1; // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index 1eb7e45e36..579c46fa3d 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -110,8 +110,7 @@ void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *g constexpr unsigned int num_elems_processed_per_iteration = 16; - _border_size = BorderSize(std::max(static_cast(num_elems_processed_per_iteration) - static_cast(input->info()->dimension(0)), - static_cast(input->info()->dimension(0) % num_elems_processed_per_iteration))); + _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration) - input->info()->dimension(0)); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); -- cgit v1.2.1