diff options
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/CL/kernels/CLHarrisCornersKernel.cpp | 5 | ||||
-rw-r--r-- | src/core/CL/kernels/CLMeanStdDevKernel.cpp | 3 | ||||
-rw-r--r-- | src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp | 7 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEMeanStdDevKernel.cpp | 3 |
4 files changed, 7 insertions, 11 deletions
diff --git a/src/core/CL/kernels/CLHarrisCornersKernel.cpp b/src/core/CL/kernels/CLHarrisCornersKernel.cpp index 9fc34a7760..1f757fe34c 100644 --- a/src/core/CL/kernels/CLHarrisCornersKernel.cpp +++ b/src/core/CL/kernels/CLHarrisCornersKernel.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLHarrisCornersKernel.h" +#include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/CL/ICLTensor.h" @@ -91,8 +92,8 @@ void CLHarrisScoreKernel::configure(const ICLImage *input1, const ICLImage *inpu // Configure kernel window constexpr unsigned int num_elems_processed_per_iteration = 4; constexpr unsigned int num_elems_written_per_iteration = 4; - constexpr unsigned int num_elems_read_per_iteration = 8; - constexpr unsigned int num_rows_read_per_iteration = 3; + const unsigned int num_elems_read_per_iteration = block_size == 7 ? 10 : 8; + const unsigned int num_rows_read_per_iteration = block_size; Window win = calculate_max_window(*_input1->info(), Steps(num_elems_processed_per_iteration), border_undefined, border_size()); diff --git a/src/core/CL/kernels/CLMeanStdDevKernel.cpp b/src/core/CL/kernels/CLMeanStdDevKernel.cpp index 1b70d7513f..1bf831b9d9 100644 --- a/src/core/CL/kernels/CLMeanStdDevKernel.cpp +++ b/src/core/CL/kernels/CLMeanStdDevKernel.cpp @@ -88,8 +88,7 @@ void CLMeanStdDevKernel::configure(const ICLImage *input, float *mean, cl::Buffe constexpr unsigned int num_elems_processed_per_iteration_x = 8; const unsigned int num_elems_processed_per_iteration_y = input->info()->dimension(1); - _border_size = BorderSize(std::max(static_cast<int>(num_elems_processed_per_iteration_x) - static_cast<int>(input->info()->dimension(0)), - static_cast<int>(input->info()->dimension(0) % num_elems_processed_per_iteration_x))); + _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration_x) - input->info()->dimension(0)); Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y)); AccessWindowRectangle input_access(input->info(), 0, 0, num_elems_processed_per_iteration_x, num_elems_processed_per_iteration_y); diff --git a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp index 884da2861b..84e12d4ead 100644 --- a/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp +++ b/src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp @@ -57,10 +57,7 @@ inline void check_corner(float x, float y, float strength, InternalKeypoint *out inline void corner_candidates(const float *__restrict input, InternalKeypoint *__restrict output, int32_t x, int32_t y, int32_t *num_corner_candidates, std::mutex *corner_candidates_mutex) { - check_corner(x + 0, y, *(input + 0), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 1, y, *(input + 1), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 2, y, *(input + 2), output, num_corner_candidates, corner_candidates_mutex); - check_corner(x + 3, y, *(input + 3), output, num_corner_candidates, corner_candidates_mutex); + check_corner(x, y, *input, output, num_corner_candidates, corner_candidates_mutex); } } // namespace @@ -86,7 +83,7 @@ void CPPCornerCandidatesKernel::configure(const IImage *input, InternalKeypoint _output = output; _num_corner_candidates = num_corner_candidates; - const unsigned int num_elems_processed_per_iteration = 4; + const unsigned int num_elems_processed_per_iteration = 1; // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); diff --git a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp index 1eb7e45e36..579c46fa3d 100644 --- a/src/core/NEON/kernels/NEMeanStdDevKernel.cpp +++ b/src/core/NEON/kernels/NEMeanStdDevKernel.cpp @@ -110,8 +110,7 @@ void NEMeanStdDevKernel::configure(const IImage *input, float *mean, uint64_t *g constexpr unsigned int num_elems_processed_per_iteration = 16; - _border_size = BorderSize(std::max(static_cast<int>(num_elems_processed_per_iteration) - static_cast<int>(input->info()->dimension(0)), - static_cast<int>(input->info()->dimension(0) % num_elems_processed_per_iteration))); + _border_size = BorderSize(ceil_to_multiple(input->info()->dimension(0), num_elems_processed_per_iteration) - input->info()->dimension(0)); // Configure kernel window Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); |