aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authorDiego Lopez Recas <Diego.LopezRecas@arm.com>2017-12-11 12:36:55 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:43:42 +0000
commit61ef5bf586606d6282526641cf2244121d07c6fd (patch)
tree6c113ebeba8dd17e653fade213afeff3e2ed95d4 /src/core
parent9c42de9e4c7061ea0431f2ef2ecf8984e0d9c89b (diff)
downloadComputeLibrary-61ef5bf586606d6282526641cf2244121d07c6fd.tar.gz
IVGCVSW-847 Fix {NEON/CL}PoolingLayerKernel config
Also, add validation test that hits the discovered failure for CL. Change-Id: I5573e0a3f169b85d5fb7299e7c48d74be7165208 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112717 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp51
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp23
2 files changed, 31 insertions, 43 deletions
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index ac368c77ef..860cc92266 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -123,39 +123,26 @@ std::tuple<Status, Window, CLPoolingConfig> validate_and_configure_window(ITenso
const int input_width = input->dimension(0);
const int input_height = input->dimension(1);
- unsigned int num_elems_processed_per_iteration = 1;
+ // Change the number of elements processed per iteration
+ // for pooling 3x3 with stride less equal than 3
+ const bool can_optimize = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_quantized(data_type);
+ const unsigned int num_elems_processed_per_iteration = can_optimize ? 4 : 1;
+ const int num_elems_read_per_iteration = (num_elems_processed_per_iteration - 1) * pool_stride_x + pool_size;
- if((pool_size == 3) && !is_data_type_quantized_asymmetric(data_type))
- {
- const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(data_type);
-
- int num_elems_read_per_iteration = pool_size;
- if(is_pool3x3_stride_le3)
- {
- // Change the number of elements processed and the number of elements read per iteration
- // for pooling 3x3 with stride less equal than 3
- num_elems_processed_per_iteration = 4;
- num_elems_read_per_iteration = pool_size * (pool_stride_x + 1);
- }
+ // Number of iterations in X dimension
+ const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
- const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
- const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
+ // Upper limit for the number of right/bottom border elements that are accessed
+ const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
+ const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
- border_size.right = std::max(upper_bound_w, pool_pad_x);
- border_size.bottom = std::max(upper_bound_h, pool_pad_y);
- }
- else
- {
- const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width;
- const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
-
- border_size.right = std::max(upper_bound_w, pool_pad_x);
- border_size.bottom = std::max(upper_bound_h, pool_pad_y);
- }
+ border_size.right = std::max(upper_bound_w, pool_pad_x);
+ border_size.bottom = std::max(upper_bound_h, pool_pad_y);
Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration));
- AccessWindowRectangle input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right, input_height + border_size.bottom);
+ AccessWindowRectangle input_access(input, -pool_pad_x, -pool_pad_y, num_elems_read_per_iteration, pool_size,
+ pool_stride_x * num_elems_processed_per_iteration, pool_stride_y);
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
bool window_changed = update_window_and_padding(win, input_access, output_access);
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
@@ -305,8 +292,12 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue)
{
// Upsample input by pool size
Window in_slice(slice);
- in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration));
- in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y));
+ in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x,
+ (in_slice.x().end() - pool_pad_x) * pool_stride_x,
+ pool_stride_x * _num_elems_processed_per_iteration));
+ in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y,
+ (in_slice.y().end() - pool_pad_y) * pool_stride_y,
+ pool_stride_y));
// Set inputs
unsigned int idx = 0;
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index ac183d2f30..ff4802c5e0 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -317,7 +317,11 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
break;
}
- const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
+ // Number of iterations in X dimension
+ const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
+
+ // Upper limit for the number of right/bottom border elements that are accessed
+ const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width;
const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height;
border_size = BorderSize(pool_pad_y, pool_pad_x);
@@ -363,32 +367,25 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- PoolingType pool_type = pool_info.pool_type();
- int pool_size = pool_info.pool_size();
+ const PoolingType pool_type = pool_info.pool_type();
const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
const bool exclude_padding = pool_info.exclude_padding();
const bool is_global_pooling = pool_info.is_global_pooling();
- std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
- std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
+ const int pool_stride_x = pad_stride_info.stride().first;
// Update pool size in case of global pooling
- pool_size = is_global_pooling ? input->info()->dimension(0) : pool_size;
+ const int pool_size = is_global_pooling ? input->info()->dimension(0) : pool_info.pool_size();
// Validate pool info before calling scaled_dimensions
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(input->info(), pool_info, pool_size));
// Check output dimensions
+ unsigned int pooled_w, pooled_h;
std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
input->info()->dimension(1),
pool_size,
pool_size,
- pool_info.pad_stride_info());
+ pad_stride_info);
// Output auto initialization if not yet initialized
auto_init(input->info(), output->info(), pooled_w, pooled_h);