diff options
-rw-r--r-- | arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h | 9 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEPoolingLayer.h | 18 | ||||
-rwxr-xr-x | scripts/include_functions_kernels.py | 2 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 143 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEPoolingLayer.cpp | 27 | ||||
-rw-r--r-- | tests/validation/CL/GlobalPoolingLayer.cpp (renamed from tests/validation/CL/GlobalPooling.cpp) | 4 | ||||
-rw-r--r-- | tests/validation/NEON/GlobalPoolingLayer.cpp | 73 | ||||
-rw-r--r-- | tests/validation/NEON/PoolingLayer.cpp | 2 |
8 files changed, 262 insertions, 16 deletions
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 2a0ecf8ba7..9d7c75179a 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -48,6 +48,8 @@ public: ~NEPoolingLayerKernel() = default; /** Set the input and output tensors. * + * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only + * * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. @@ -123,6 +125,13 @@ private: */ template <PoolingType pooling_type> void pooling7_f32(const Window &window_input, const Window &window); + /** Function to perform NxN pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template <PoolingType pooling_type> + void poolingN_f32(const Window &window_input, const Window &window); /** Common signature for all the specialised Pooling functions * * @param[in] window_input Input region on which to execute the kernel. diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index 5c36e80f37..7b038aaa51 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -24,8 +24,10 @@ #ifndef __ARM_COMPUTE_NEPOOLINGLAYER_H__ #define __ARM_COMPUTE_NEPOOLINGLAYER_H__ -#include "arm_compute/runtime/NEON/INESimpleFunction.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" #include "arm_compute/core/Types.h" namespace arm_compute @@ -37,16 +39,28 @@ class ITensor; * -# @ref NEFillBorderKernel (executed if padding size is different from zero) * -# @ref NEPoolingLayerKernel */ -class NEPoolingLayer : public INESimpleFunction +class NEPoolingLayer : public IFunction { public: + /** Constructor */ + NEPoolingLayer(); /** Set the input and output tensors. * + * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only + * * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info); + + // Inherited methods overridden: + void run() override; + +private: + NEPoolingLayerKernel _pooling_layer_kernel; + NEFillBorderKernel _border_handler; + bool _is_global_pooling_layer; }; } #endif /* __ARM_COMPUTE_NEPOOLINGLAYER_H__ */ diff --git a/scripts/include_functions_kernels.py b/scripts/include_functions_kernels.py index e6e5f5e7d5..80d1fb28e9 100755 --- a/scripts/include_functions_kernels.py +++ b/scripts/include_functions_kernels.py @@ -1,4 +1,4 @@ -#!/usr/bin/env python3.5 +#!/usr/bin/env python #FIXME: Remove this file before the release import glob diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index b97564e77b..8d4e46500f 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -108,14 +108,13 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad(); std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); - static const std::set<int> supported_pool_sizes = { 2, 3, 7 }; + static const std::set<int> supported_pool_sizes = { 2, 3 }; ARM_COMPUTE_UNUSED(supported_pool_sizes); ARM_COMPUTE_ERROR_ON_NULLPTR(output); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_fixed_point(input->info()->data_type())); - ARM_COMPUTE_ERROR_ON(supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()); - ARM_COMPUTE_ERROR_ON(7 == pool_size && input->info()->data_type() != DataType::F32); + ARM_COMPUTE_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && (input->info()->data_type() != DataType::F32)); ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size); ARM_COMPUTE_ERROR_ON(is_data_type_fixed_point(input->info()->data_type()) && pool_stride_x > 2); @@ -207,7 +206,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons num_elems_read_per_iteration = 8; // We use vload8 for pooling7 break; default: - ARM_COMPUTE_ERROR("Pooling size not supported"); + num_elems_read_per_iteration = 1; // We use vload4 for poolingN but with a leftover for loop break; } num_elems_processed_per_iteration = 1; @@ -380,7 +379,20 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons } break; default: - ARM_COMPUTE_ERROR("Unsupported pooling size"); + switch(pool_type) + { + case PoolingType::AVG: + _func = &NEPoolingLayerKernel::poolingN_f32<PoolingType::AVG>; + break; + case PoolingType::L2: + _func = &NEPoolingLayerKernel::poolingN_f32<PoolingType::L2>; + break; + case PoolingType::MAX: + _func = &NEPoolingLayerKernel::poolingN_f32<PoolingType::MAX>; + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling type!"); + } break; } @@ -1005,6 +1017,127 @@ void NEPoolingLayerKernel::pooling7_f32(const Window &window_input, const Window input, output); } +template <PoolingType pooling_type> +void NEPoolingLayerKernel::poolingN_f32(const Window &window_input, const Window &window) +{ + Iterator input(_input, window_input); + Iterator output(_output, window); + + const int pool_size = _pool_info.pool_size(); + int pool_pad_x = 0; + int pool_pad_y = 0; + int pool_stride_x = 0; + int pool_stride_y = 0; + std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad(); + std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride(); + const int upper_bound_w = _input->info()->dimension(0) + pool_pad_x; + const int upper_bound_h = _input->info()->dimension(1) + pool_pad_y; + + execute_window_loop(window, [&](const Coordinates & id) + { + float res = 0.0f; + + if(pooling_type != PoolingType::MAX) + { + // Calculate scale + const float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + + // Perform pooling + float32x4_t vres = vdupq_n_f32(0.0f); + + for(int y = 0; y < pool_size; ++y) + { + int x = 0; + for(; x <= (pool_size - 4); x += 4) + { + const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + + // Get power of 2 in case of l2 pooling and accumulate + if(pooling_type == PoolingType::L2) + { + vres = vmlaq_f32(vres, data, data); + } + else + { + vres = vaddq_f32(vres, data); + } + } + + // Leftover for loop + for(; x < pool_size; ++x) + { + float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + + // Get power of 2 in case of l2 pooling + if(pooling_type == PoolingType::L2) + { + data *= data; + } + + res += data; + } + } + +#if defined(__aarch64__) + // Reduction operation available on 64 bit architectures only + res += vaddvq_f32(vres); +#else // __aarch64__ + // Reduction + float32x2_t tmp = vpadd_f32(vget_high_f32(vres), vget_low_f32(vres)); + tmp = vpadd_f32(tmp, tmp); + + res += vget_lane_f32(tmp, 0); +#endif // __aarch64__ + // Divide by scale + res *= scale; + } + else + { + float32x4_t vres = vdupq_n_f32(std::numeric_limits<float>::min()); + res = std::numeric_limits<float>::min(); + + for(int y = 0; y < pool_size; ++y) + { + int x = 0; + for(; x <= (pool_size - 4); x += 4) + { + const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + vres = vmaxq_f32(vres, data); + } + + // Leftover for loop + for(; x < pool_size; ++x) + { + const float data = *(reinterpret_cast<const float *>(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + res = std::max(res, data); + } + } + +#if defined(__aarch64__) + // Reduction operation available on 64 bit architectures only + res = std::max(vmaxvq_f32(vres), res); +#else // __aarch64__ + float32x2_t tmp = vpmax_f32(vget_high_f32(vres), vget_low_f32(vres)); + tmp = vpmax_f32(tmp, tmp); + + res = std::max(res, vget_lane_f32(tmp, 0)); +#endif // __aarch64__ + } + + // Calculate square-root in case of l2 pooling + if(pooling_type == PoolingType::L2) + { + res = std::sqrt(res); + } + + // Store result + *(reinterpret_cast<float *>(output.ptr())) = res; + }, + input, output); +} + void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) { ARM_COMPUTE_UNUSED(info); diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index de04f36961..f8a85b9897 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -23,19 +23,36 @@ */ #include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" -#include "arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h" +#include "arm_compute/core/ITensor.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + #include "support/ToolchainSupport.h" using namespace arm_compute; +NEPoolingLayer::NEPoolingLayer() + : _pooling_layer_kernel(), _border_handler(), _is_global_pooling_layer(false) +{ +} + void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info) { + // Check if we have Global Pooling Layer + _is_global_pooling_layer = (input->info()->dimension(0) == pool_info.pool_size()) && (input->info()->dimension(1) == pool_info.pool_size()); + // Configure pooling kernel - auto k = arm_compute::support::cpp14::make_unique<NEPoolingLayerKernel>(); - k->configure(input, output, pool_info); - _kernel = std::move(k); + _pooling_layer_kernel.configure(input, output, pool_info); // Configure border depending on operation required BorderMode border_mode = (pool_info.pool_type() == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(static_cast<float>(0.f))); + _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, PixelValue(static_cast<float>(0.f))); } + +void NEPoolingLayer::run() +{ + // Fill border + NEScheduler::get().schedule(&_border_handler, Window::DimY); + + // Run pooling layer + NEScheduler::get().schedule(&_pooling_layer_kernel, _is_global_pooling_layer ? Window::DimZ : Window::DimY); +}
\ No newline at end of file diff --git a/tests/validation/CL/GlobalPooling.cpp b/tests/validation/CL/GlobalPoolingLayer.cpp index c5c9d00b27..31e3fe0eb7 100644 --- a/tests/validation/CL/GlobalPooling.cpp +++ b/tests/validation/CL/GlobalPoolingLayer.cpp @@ -47,8 +47,8 @@ namespace const auto GlobalPoolingLayerDataset = combine(datasets::GlobalPoolingShapes(), datasets::PoolingTypes()); /** Input data set for quantized data types */ -constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ +constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 types */ +constexpr AbsoluteTolerance<float> tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for FP16 types */ } // namespace TEST_SUITE(CL) diff --git a/tests/validation/NEON/GlobalPoolingLayer.cpp b/tests/validation/NEON/GlobalPoolingLayer.cpp new file mode 100644 index 0000000000..37950b059f --- /dev/null +++ b/tests/validation/NEON/GlobalPoolingLayer.cpp @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/PoolingTypesDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/PoolingLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +/** Input data set for float data types */ +const auto GlobalPoolingLayerDataset = combine(datasets::GlobalPoolingShapes(), datasets::PoolingTypes()); + +/** Input data set for quantized data types */ +constexpr AbsoluteTolerance<float> tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 types */ +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(GlobalPoolingLayer) + +template <typename T> +using NEGlobalPoolingLayerFixture = GlobalPoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>; + +TEST_SUITE(Float) +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunGlobalPooling, NEGlobalPoolingLayerFixture<float>, framework::DatasetMode::ALL, combine(GlobalPoolingLayerDataset, framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp index 5ebbc1bc96..ff68ecf481 100644 --- a/tests/validation/NEON/PoolingLayer.cpp +++ b/tests/validation/NEON/PoolingLayer.cpp @@ -44,7 +44,7 @@ namespace validation namespace { /** Input data set for float data types */ -const auto PoolingLayerDatasetFP = combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7 })), +const auto PoolingLayerDatasetFP = combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7, 9 })), framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })); /** Input data set for quantized data types */ |