From 61ef5bf586606d6282526641cf2244121d07c6fd Mon Sep 17 00:00:00 2001 From: Diego Lopez Recas Date: Mon, 11 Dec 2017 12:36:55 +0000 Subject: IVGCVSW-847 Fix {NEON/CL}PoolingLayerKernel config Also, add validation test that hits the discovered failure for CL. Change-Id: I5573e0a3f169b85d5fb7299e7c48d74be7165208 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/112717 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 51 +++++++++++--------------- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 23 +++++------- tests/framework/datasets/JoinDataset.h | 12 +++--- tests/validation/CL/PoolingLayer.cpp | 17 +++++++-- 4 files changed, 52 insertions(+), 51 deletions(-) diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index ac368c77ef..860cc92266 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -123,39 +123,26 @@ std::tuple validate_and_configure_window(ITenso const int input_width = input->dimension(0); const int input_height = input->dimension(1); - unsigned int num_elems_processed_per_iteration = 1; + // Change the number of elements processed per iteration + // for pooling 3x3 with stride less equal than 3 + const bool can_optimize = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_quantized(data_type); + const unsigned int num_elems_processed_per_iteration = can_optimize ? 4 : 1; + const int num_elems_read_per_iteration = (num_elems_processed_per_iteration - 1) * pool_stride_x + pool_size; - if((pool_size == 3) && !is_data_type_quantized_asymmetric(data_type)) - { - const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(data_type); - - int num_elems_read_per_iteration = pool_size; - if(is_pool3x3_stride_le3) - { - // Change the number of elements processed and the number of elements read per iteration - // for pooling 3x3 with stride less equal than 3 - num_elems_processed_per_iteration = 4; - num_elems_read_per_iteration = pool_size * (pool_stride_x + 1); - } + // Number of iterations in X dimension + const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration; - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width; - const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; + // Upper limit for the number of right/bottom border elements that are accessed + const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width; + const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; - border_size.right = std::max(upper_bound_w, pool_pad_x); - border_size.bottom = std::max(upper_bound_h, pool_pad_y); - } - else - { - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width; - const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; - - border_size.right = std::max(upper_bound_w, pool_pad_x); - border_size.bottom = std::max(upper_bound_h, pool_pad_y); - } + border_size.right = std::max(upper_bound_w, pool_pad_x); + border_size.bottom = std::max(upper_bound_h, pool_pad_y); Window win = calculate_max_window(*output, Steps(num_elems_processed_per_iteration)); - AccessWindowRectangle input_access(input, -pool_pad_x, -pool_pad_y, input_width + border_size.right, input_height + border_size.bottom); + AccessWindowRectangle input_access(input, -pool_pad_x, -pool_pad_y, num_elems_read_per_iteration, pool_size, + pool_stride_x * num_elems_processed_per_iteration, pool_stride_y); AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); bool window_changed = update_window_and_padding(win, input_access, output_access); output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); @@ -305,8 +292,12 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) { // Upsample input by pool size Window in_slice(slice); - in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, in_slice.x().end() * pool_stride_x, pool_stride_x * _num_elems_processed_per_iteration)); - in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, in_slice.y().end() * pool_stride_y, pool_stride_y)); + in_slice.set(Window::DimX, Window::Dimension(in_slice.x().start() - pool_pad_x, + (in_slice.x().end() - pool_pad_x) * pool_stride_x, + pool_stride_x * _num_elems_processed_per_iteration)); + in_slice.set(Window::DimY, Window::Dimension(in_slice.y().start() - pool_pad_y, + (in_slice.y().end() - pool_pad_y) * pool_stride_y, + pool_stride_y)); // Set inputs unsigned int idx = 0; diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index ac183d2f30..ff4802c5e0 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -317,7 +317,11 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen break; } - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width; + // Number of iterations in X dimension + const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration; + + // Upper limit for the number of right/bottom border elements that are accessed + const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width; const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; border_size = BorderSize(pool_pad_y, pool_pad_x); @@ -363,32 +367,25 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - int pool_pad_x = 0; - int pool_pad_y = 0; - int pool_stride_x = 0; - int pool_stride_y = 0; - unsigned int pooled_w = 0; - unsigned int pooled_h = 0; - PoolingType pool_type = pool_info.pool_type(); - int pool_size = pool_info.pool_size(); + const PoolingType pool_type = pool_info.pool_type(); const PadStrideInfo pad_stride_info = pool_info.pad_stride_info(); const bool exclude_padding = pool_info.exclude_padding(); const bool is_global_pooling = pool_info.is_global_pooling(); - std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad(); - std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); + const int pool_stride_x = pad_stride_info.stride().first; // Update pool size in case of global pooling - pool_size = is_global_pooling ? input->info()->dimension(0) : pool_size; + const int pool_size = is_global_pooling ? input->info()->dimension(0) : pool_info.pool_size(); // Validate pool info before calling scaled_dimensions ARM_COMPUTE_ERROR_THROW_ON(validate_arguments_pool_info(input->info(), pool_info, pool_size)); // Check output dimensions + unsigned int pooled_w, pooled_h; std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), pool_size, pool_size, - pool_info.pad_stride_info()); + pad_stride_info); // Output auto initialization if not yet initialized auto_init(input->info(), output->info(), pooled_w, pooled_h); diff --git a/tests/framework/datasets/JoinDataset.h b/tests/framework/datasets/JoinDataset.h index eded6e0259..d682c19d6b 100644 --- a/tests/framework/datasets/JoinDataset.h +++ b/tests/framework/datasets/JoinDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,8 +47,10 @@ template class JoinDataset : public Dataset { private: - using iter1_type = typename T::iterator; - using iter2_type = typename U::iterator; + using T_noref = typename std::remove_reference::type; + using U_noref = typename std::remove_reference::type; + using iter1_type = typename T_noref::iterator; + using iter2_type = typename U_noref::iterator; public: /** Construct dataset from the given datasets. @@ -65,12 +67,12 @@ public: JoinDataset(JoinDataset &&) = default; /** Type of the dataset. */ - using type = typename T::type; + using type = typename T_noref::type; /** Iterator for the dataset. */ struct iterator { - iterator(const T *dataset1, const U *dataset2) + iterator(const T_noref *dataset1, const U_noref *dataset2) : _iter1{ dataset1->begin() }, _iter2{ dataset2->begin() }, _first_size{ dataset1->size() } { } diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp index ee639376c5..4e5e5aa2e7 100644 --- a/tests/validation/CL/PoolingLayer.cpp +++ b/tests/validation/CL/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,12 @@ namespace validation { namespace { +/** Failing data set */ +const auto PoolingLayerDatasetSpecial = ((((framework::dataset::make("Shape", TensorShape{ 60U, 52U, 3U, 5U }) + * framework::dataset::make("PoolType", PoolingType::AVG)) + * framework::dataset::make("PoolingSize", 100)) + * framework::dataset::make("PadStride", PadStrideInfo(5, 5, 50, 50))) + * framework::dataset::make("ExcludePadding", true)); /** Input data set for floating-point data types */ const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })), framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), @@ -74,7 +80,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Window shrink TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), // Mismatching fixed point position - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11), // Window shrink + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), // Invalid parameters @@ -104,7 +110,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( PoolingLayerInfo(PoolingType::MAX), PoolingLayerInfo(PoolingType::AVG), })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, false, false, true })), + framework::dataset::make("Expected", { false, false, false, true, false, false, false, false, false, true })), input_info, output_info, pool_info, expected) { ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), pool_info)) == expected, framework::LogLevel::ERRORS); @@ -117,6 +123,11 @@ using CLPoolingLayerFixture = PoolingLayerValidationFixture, framework::DatasetMode::ALL, PoolingLayerDatasetSpecial * framework::dataset::make("DataType", DataType::F32)) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), combine(PoolingLayerDatasetFP, framework::dataset::make("DataType", DataType::F32)))) { -- cgit v1.2.1