From 5518671926c2c493e023a2e0d78b4aef4cb0dcec Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 8 Jan 2018 17:37:12 +0000 Subject: COMPMID-785: Add QASYMM8 support for pooling layer Adds generic pooling case for QASYMM8 Change-Id: I37d38a92ca61651e915fbbbb6da88e180390b4ab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115439 Tested-by: Jenkins Reviewed-by: Anthony Barbier --- arm_compute/core/NEON/kernels/NEFillBorderKernel.h | 4 +- .../core/NEON/kernels/NEPoolingLayerKernel.h | 28 +- .../runtime/NEON/functions/NEPoolingLayer.h | 6 +- src/core/NEON/kernels/NEFillBorderKernel.cpp | 9 +- src/core/NEON/kernels/NEPoolingLayerKernel.cpp | 622 ++++++++++++++++++--- src/core/Utils.cpp | 2 + src/runtime/CL/functions/CLPoolingLayer.cpp | 12 +- src/runtime/NEON/functions/NEPoolingLayer.cpp | 11 +- tests/benchmark/NEON/PoolingLayer.cpp | 6 +- tests/validation/NEON/PoolingLayer.cpp | 43 +- 10 files changed, 637 insertions(+), 106 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h index 4e003243ba..164158bb79 100644 --- a/arm_compute/core/NEON/kernels/NEFillBorderKernel.h +++ b/arm_compute/core/NEON/kernels/NEFillBorderKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,7 +53,7 @@ public: * * @note This kernel fills the borders within the XY-planes. * - * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QS8/QS16/S16/S32/F32. + * @param[in,out] tensor Tensor to process. Data types supported: U8/S8/QS8/QASYMM8/QS16/S16/S32/F32. * @param[in] border_size Size of the border to fill in elements. * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h index 87d14e5f91..19c4656679 100644 --- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -50,7 +50,7 @@ public: * * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -59,7 +59,7 @@ public: * * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[in] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * @@ -86,7 +86,6 @@ private: */ template void pooling2_f16(const Window &window_input, const Window &window); - /** Function to perform 2x2 pooling for 8bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. @@ -94,6 +93,13 @@ private: */ template void pooling2_q8(const Window &window_input, const Window &window); + /** Function to perform 2x2 pooling for 8bit asymmetric fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling2_qasymm8(const Window &window_input, const Window &window); /** Function to perform 2x2 pooling for 16bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. @@ -122,6 +128,13 @@ private: */ template void pooling3_q8(const Window &window_input, const Window &window); + /** Function to perform 3x3 pooling for 8bit quantized fixed point. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template + void pooling3_qasymm8(const Window &window_input, const Window &window); /** Function to perform 3x3 pooling for 16bit fixed point. * * @param[in] window_input Input region on which to execute the kernel. @@ -142,6 +155,13 @@ private: * @param[in] window Output region on which to execute the kernel. */ template + void poolingN_qasymm8(const Window &window_input, const Window &window); + /** Function to perform NxN pooling. + * + * @param[in] window_input Input region on which to execute the kernel. + * @param[in] window Output region on which to execute the kernel. + */ + template void poolingN_f32(const Window &window_input, const Window &window); /** Common signature for all the specialised Pooling functions * diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h index 0f8abb587d..3ac0844798 100644 --- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h +++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,7 +48,7 @@ public: * * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only * - * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32. + * @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ @@ -57,7 +57,7 @@ public: * * @note QS8, QS16 and F16 are supported for pool sizes 2 and 3 only * - * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[in] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * diff --git a/src/core/NEON/kernels/NEFillBorderKernel.cpp b/src/core/NEON/kernels/NEFillBorderKernel.cpp index af04955608..747b8b1bfe 100644 --- a/src/core/NEON/kernels/NEFillBorderKernel.cpp +++ b/src/core/NEON/kernels/NEFillBorderKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -105,7 +105,10 @@ NEFillBorderKernel::NEFillBorderKernel() void NEFillBorderKernel::configure(ITensor *tensor, BorderSize border_size, BorderMode border_mode, const PixelValue &constant_border_value) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QS8, DataType::QS16, DataType::U16, DataType::S16, DataType::F16, DataType::U32, DataType::S32, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(tensor, 1, DataType::U8, DataType::QS8, DataType::QASYMM8, + DataType::QS16, DataType::U16, DataType::S16, + DataType::U32, DataType::S32, + DataType::F16, DataType::F32); _tensor = tensor; _border_size = border_size; @@ -140,6 +143,7 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) { switch(_tensor->info()->data_type()) { + case DataType::QASYMM8: case DataType::U8: fill_constant_value_single_channel(window); break; @@ -184,6 +188,7 @@ void NEFillBorderKernel::run(const Window &window, const ThreadInfo &info) { switch(_tensor->info()->data_type()) { + case DataType::QASYMM8: case DataType::U8: fill_replicate_single_channel(window); break; diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp index 47372c2d5d..ac183d2f30 100644 --- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp +++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,6 +28,7 @@ #include "arm_compute/core/FixedPoint.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/TensorInfo.h" @@ -35,6 +36,8 @@ #include "arm_compute/core/Validate.h" #include "arm_compute/core/Window.h" +#include "support/ToolchainSupport.h" + #include #include #include @@ -98,6 +101,56 @@ inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, in return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position)); } +template +inline void scale_vector_s16x8(uint16x8_t &v, const Coordinates &id, int id_offset, int step, + const int pool_size, const int upper_bound_w, const int upper_bound_h, + const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + int start_x = (id.x() + id_offset) * stride_x - pad_x; + int start_y = id.y() * stride_y - pad_y; + const int end_y = std::min(start_y + pool_size, upper_bound_h); + if(exclude_padding) + { + start_y = std::max(0, start_y); + } + + std::array elems = + { + { + vgetq_lane_u16(v, 0), + vgetq_lane_u16(v, 1), + vgetq_lane_u16(v, 2), + vgetq_lane_u16(v, 3), + vgetq_lane_u16(v, 4), + vgetq_lane_u16(v, 5), + vgetq_lane_u16(v, 6), + vgetq_lane_u16(v, 7), + } + }; + + for(auto &el : elems) + { + int c_start_x = start_x; + const int end_x = std::min(c_start_x + pool_size, upper_bound_w); + if(exclude_padding) + { + c_start_x = std::max(0, c_start_x); + } + float scale = 1.f / ((end_y - start_y) * (end_x - c_start_x)); + el *= scale; + start_x += step * stride_x; + } + + v = vsetq_lane_u16(elems[0], v, 0); + v = vsetq_lane_u16(elems[1], v, 1); + v = vsetq_lane_u16(elems[2], v, 2); + v = vsetq_lane_u16(elems[3], v, 3); + v = vsetq_lane_u16(elems[4], v, 4); + v = vsetq_lane_u16(elems[5], v, 5); + v = vsetq_lane_u16(elems[6], v, 6); + v = vsetq_lane_u16(elems[7], v, 7); +} + Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h, int pool_size) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); @@ -114,9 +167,9 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride(); static const std::set supported_pool_sizes = { 2, 3 }; - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_fixed_point(input->data_type())); - ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && (input->data_type() != DataType::F32)); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type())); + ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()) && ((input->data_type() != DataType::F32) && (input->data_type() != DataType::QASYMM8))); ARM_COMPUTE_RETURN_ERROR_ON(!is_global_pooling && (pool_pad_x >= pool_size || pool_pad_y >= pool_size)); ARM_COMPUTE_RETURN_ERROR_ON(is_global_pooling && (input->tensor_shape().x() != input->tensor_shape().y())); ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_fixed_point(input->data_type()) && pool_stride_x > 2); @@ -185,6 +238,26 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen } num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16; break; + case DataType::QASYMM8: + switch(pool_size) + { + case 2: + num_elems_read_per_iteration = 16; + num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15; + num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16; + break; + case 3: + num_elems_read_per_iteration = 16; + num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14; + num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16; + break; + default: + num_elems_read_per_iteration = 1; + num_elems_processed_per_iteration = 1; + num_elems_horizontal_window = 1; + break; + } + break; case DataType::QS16: num_elems_read_per_iteration = 8; switch(pool_size) @@ -328,12 +401,15 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons _output = output; _pool_info = pool_info; + // Get data type + const DataType data_type = input->info()->data_type(); + // Select appropriate function - switch(pool_size) + if(data_type == DataType::QS8) { - case 2: - if(input->info()->data_type() == DataType::QS8) - { + switch(pool_size) + { + case 2: switch(pool_type) { case PoolingType::AVG: @@ -345,9 +421,74 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } + break; + case 3: + switch(pool_type) + { + case PoolingType::AVG: + _func = &NEPoolingLayerKernel::pooling3_q8; + break; + case PoolingType::MAX: + _func = &NEPoolingLayerKernel::pooling3_q8; + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling type!"); + } + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling size!"); + } + } + else if(data_type == DataType::QASYMM8) + { + if(pool_size == 2 && pool_stride_x < 3) + { + switch(pool_type) + { + case PoolingType::AVG: + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_qasymm8 : &NEPoolingLayerKernel::pooling2_qasymm8; + break; + case PoolingType::MAX: + _func = &NEPoolingLayerKernel::pooling2_qasymm8; + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling type!"); + } + } + else if(pool_size == 3 && pool_stride_x < 3) + { + switch(pool_type) + { + case PoolingType::AVG: + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_qasymm8 : &NEPoolingLayerKernel::pooling3_qasymm8; + break; + case PoolingType::MAX: + _func = &NEPoolingLayerKernel::pooling3_qasymm8; + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - else if(input->info()->data_type() == DataType::QS16) + } + else + { + switch(pool_type) { + case PoolingType::AVG: + _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_qasymm8 : &NEPoolingLayerKernel::poolingN_qasymm8; + break; + case PoolingType::MAX: + _func = &NEPoolingLayerKernel::poolingN_qasymm8; + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling type!"); + } + } + } + else if(data_type == DataType::QS16) + { + switch(pool_size) + { + case 2: switch(pool_type) { case PoolingType::AVG: @@ -359,9 +500,29 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - else if(input->info()->data_type() == DataType::F16) - { + break; + case 3: + switch(pool_type) + { + case PoolingType::AVG: + _func = &NEPoolingLayerKernel::pooling3_q16; + break; + case PoolingType::MAX: + _func = &NEPoolingLayerKernel::pooling3_q16; + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling type!"); + } + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling size!"); + } + } + else if(data_type == DataType::F16) + { + switch(pool_size) + { + case 2: switch(pool_type) { case PoolingType::AVG: @@ -376,121 +537,96 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - else if(input->info()->data_type() == DataType::F32) - { + break; + case 3: switch(pool_type) { case PoolingType::AVG: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32 : &NEPoolingLayerKernel::pooling2_f32; + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16 : &NEPoolingLayerKernel::pooling3_f16; break; case PoolingType::L2: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32 : &NEPoolingLayerKernel::pooling2_f32; + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16 : &NEPoolingLayerKernel::pooling3_f16; break; case PoolingType::MAX: - _func = &NEPoolingLayerKernel::pooling2_f32; + _func = &NEPoolingLayerKernel::pooling3_f16; break; default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - break; - case 3: - if(input->info()->data_type() == DataType::QS8) - { + break; + default: + ARM_COMPUTE_ERROR("Unsupported pooling size!"); + } + } + else if(data_type == DataType::F32) + { + switch(pool_size) + { + case 2: switch(pool_type) { case PoolingType::AVG: - _func = &NEPoolingLayerKernel::pooling3_q8; + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32 : &NEPoolingLayerKernel::pooling2_f32; + break; + case PoolingType::L2: + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling2_f32 : &NEPoolingLayerKernel::pooling2_f32; break; case PoolingType::MAX: - _func = &NEPoolingLayerKernel::pooling3_q8; + _func = &NEPoolingLayerKernel::pooling2_f32; break; default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - else if(input->info()->data_type() == DataType::QS16) - { + break; + case 3: switch(pool_type) { case PoolingType::AVG: - _func = &NEPoolingLayerKernel::pooling3_q16; + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32 : &NEPoolingLayerKernel::pooling3_f32; + break; + case PoolingType::L2: + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32 : &NEPoolingLayerKernel::pooling3_f32; break; case PoolingType::MAX: - _func = &NEPoolingLayerKernel::pooling3_q16; + _func = &NEPoolingLayerKernel::pooling3_f32; break; default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - else if(input->info()->data_type() == DataType::F16) - { + break; + case 7: switch(pool_type) { case PoolingType::AVG: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16 : &NEPoolingLayerKernel::pooling3_f16; + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32 : &NEPoolingLayerKernel::pooling7_f32; break; case PoolingType::L2: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f16 : &NEPoolingLayerKernel::pooling3_f16; + _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32 : &NEPoolingLayerKernel::pooling7_f32; break; case PoolingType::MAX: - _func = &NEPoolingLayerKernel::pooling3_f16; + _func = &NEPoolingLayerKernel::pooling7_f32; break; default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - else if(input->info()->data_type() == DataType::F32) - { + break; + default: switch(pool_type) { case PoolingType::AVG: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32 : &NEPoolingLayerKernel::pooling3_f32; + _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32 : &NEPoolingLayerKernel::poolingN_f32; break; case PoolingType::L2: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling3_f32 : &NEPoolingLayerKernel::pooling3_f32; + _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32 : &NEPoolingLayerKernel::poolingN_f32; break; case PoolingType::MAX: - _func = &NEPoolingLayerKernel::pooling3_f32; + _func = &NEPoolingLayerKernel::poolingN_f32; break; default: ARM_COMPUTE_ERROR("Unsupported pooling type!"); } - } - break; - case 7: - switch(pool_type) - { - case PoolingType::AVG: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32 : &NEPoolingLayerKernel::pooling7_f32; - break; - case PoolingType::L2: - _func = (exclude_padding) ? &NEPoolingLayerKernel::pooling7_f32 : &NEPoolingLayerKernel::pooling7_f32; - break; - case PoolingType::MAX: - _func = &NEPoolingLayerKernel::pooling7_f32; - break; - default: - ARM_COMPUTE_ERROR("Unsupported pooling type!"); - } - break; - default: - switch(pool_type) - { - case PoolingType::AVG: - _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32 : &NEPoolingLayerKernel::poolingN_f32; - break; - case PoolingType::L2: - _func = (exclude_padding) ? &NEPoolingLayerKernel::poolingN_f32 : &NEPoolingLayerKernel::poolingN_f32; - break; - case PoolingType::MAX: - _func = &NEPoolingLayerKernel::poolingN_f32; - break; - default: - ARM_COMPUTE_ERROR("Unsupported pooling type!"); - } - break; + break; + } } // Configure kernel window @@ -563,6 +699,119 @@ void NEPoolingLayerKernel::pooling2_q8(const Window &window_input, const Window input, output); } +template +void NEPoolingLayerKernel::pooling2_qasymm8(const Window &window_input, const Window &window) +{ + Iterator input(_input, window_input); + Iterator output(_output, window); + + constexpr int pool_size = 2; + int pool_pad_x = 0; + int pool_pad_y = 0; + int pool_stride_x = 0; + int pool_stride_y = 0; + std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad(); + std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride(); + const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x); + const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y); + + const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_x), -static_cast(pool_pad_y))); + const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_x), -static_cast(pool_pad_y) + 1)); + + const int scale_step_x = (pool_stride_x == 1) ? 2 : 1; + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto top_data = vld1q_u8(reinterpret_cast(input_top_ptr + input.offset())); + const auto bottom_data = vld1q_u8(reinterpret_cast(input_bottom_ptr + input.offset())); + uint8x8_t lower_res = {}; + uint8x8_t upper_res = {}; + + if(pooling_type != PoolingType::MAX) + { + const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } }; + const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } }; + + // Add rows + const uint16x8x2_t vrsum = + { + { + vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]), + vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]), + } + }; + + // Pair-wise add row data + const uint16x4x2_t vpsum = + { + { + vpadd_u16(vget_low_u16(vrsum.val[0]), vget_high_u16(vrsum.val[0])), + vpadd_u16(vget_low_u16(vrsum.val[1]), vget_high_u16(vrsum.val[1])), + } + }; + + uint16x8_t res_lower = vcombine_u16(vpsum.val[0], vpsum.val[1]); + + // Scale lower result + scale_vector_s16x8(res_lower, id, 0, scale_step_x, + pool_size, upper_bound_w, upper_bound_h, + pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + lower_res = vmovn_u16(res_lower); + + // Compute upper result for stride_x == 1 + if(pool_stride_x == 1) + { + // Shifted row sum + const uint16x8x2_t vrsum_shifted = + { + { + vextq_u16(vrsum.val[0], vrsum.val[1], 1), + vextq_u16(vrsum.val[1], vrsum.val[1], 1) + } + }; + + // Pair-wise add shifted row + const uint16x4x2_t vpsum_shifted = + { + { + vpadd_u16(vget_low_u16(vrsum_shifted.val[0]), vget_high_u16(vrsum_shifted.val[0])), + vpadd_u16(vget_low_u16(vrsum_shifted.val[1]), vget_high_u16(vrsum_shifted.val[1])), + } + }; + uint16x8_t res_upper = vcombine_u16(vpsum_shifted.val[0], vpsum_shifted.val[1]); + + // Scale lower result + scale_vector_s16x8(res_upper, id, 1, 2, + pool_size, upper_bound_w, upper_bound_h, + pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + upper_res = vmovn_u16(res_upper); + } + } + else + { + const uint8x16_t max_data = vmaxq_u8(top_data, bottom_data); + lower_res = vpmax_u8(vget_low_u8(max_data), vget_high_u8(max_data)); + if(pool_stride_x == 1) + { + const uint8x16_t max_data_shifted = vextq_u8(max_data, max_data, 1); + upper_res = vpmax_u8(vget_low_u8(max_data_shifted), vget_high_u8(max_data_shifted)); + } + } + + // Store result + if(pool_stride_x == 1) + { + const uint8x8x2_t res = { { lower_res, upper_res } }; + vst2_u8(reinterpret_cast(output.ptr()), res); + } + else + { + vst1_u8(reinterpret_cast(output.ptr()), lower_res); + } + }, + input, output); +} + template void NEPoolingLayerKernel::pooling2_q16(const Window &window_input, const Window &window) { @@ -892,6 +1141,125 @@ void NEPoolingLayerKernel::pooling3_q8(const Window &window_input, const Window input, output); } +template +void NEPoolingLayerKernel::pooling3_qasymm8(const Window &window_input, const Window &window) +{ + Iterator input(_input, window_input); + Iterator output(_output, window); + + constexpr int pool_size = 3; + int pool_pad_x = 0; + int pool_pad_y = 0; + int pool_stride_x = 0; + int pool_stride_y = 0; + std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad(); + std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride(); + const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x); + const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y); + + const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_x), -static_cast(pool_pad_y))); + const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_x), -static_cast(pool_pad_y) + 1)); + const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast(pool_pad_x), -static_cast(pool_pad_y) + 2)); + + execute_window_loop(window, [&](const Coordinates & id) + { + const auto top_data = vld1q_u8(reinterpret_cast(input_top_ptr + input.offset())); + const auto middle_data = vld1q_u8(reinterpret_cast(input_middle_ptr + input.offset())); + const auto bottom_data = vld1q_u8(reinterpret_cast(input_bottom_ptr + input.offset())); + + if(pooling_type == PoolingType::AVG) + { + // Convert data to u16 + const uint16x8x2_t top_data_u16 = { { vmovl_u8(vget_low_u8(top_data)), vmovl_u8(vget_high_u8(top_data)) } }; + const uint16x8x2_t middle_data_u16 = { { vmovl_u8(vget_low_u8(middle_data)), vmovl_u8(vget_high_u8(middle_data)) } }; + const uint16x8x2_t bottom_data_u16 = { { vmovl_u8(vget_low_u8(bottom_data)), vmovl_u8(vget_high_u8(bottom_data)) } }; + + // Calculate row sums + const uint16x8x2_t vrsum = + { + { + vaddq_u16(vaddq_u16(top_data_u16.val[0], bottom_data_u16.val[0]), middle_data_u16.val[0]), + vaddq_u16(vaddq_u16(top_data_u16.val[1], bottom_data_u16.val[1]), middle_data_u16.val[1]), + } + }; + const uint16x8x2_t vrsum_shifted_1 = + { + { + vextq_u16(vrsum.val[0], vrsum.val[1], 1), + vextq_u16(vrsum.val[1], vrsum.val[1], 1) + } + }; + const uint16x8x2_t vrsum_shifted_2 = + { + { + vextq_u16(vrsum.val[0], vrsum.val[1], 2), + vextq_u16(vrsum.val[1], vrsum.val[1], 2) + } + }; + // Calculate final sum + uint16x8x2_t final_sum = + { + { + vaddq_u16(vaddq_u16(vrsum.val[0], vrsum_shifted_1.val[0]), vrsum_shifted_2.val[0]), + vaddq_u16(vaddq_u16(vrsum.val[1], vrsum_shifted_1.val[1]), vrsum_shifted_2.val[1]), + } + }; + if(pool_stride_x == 2) + { + uint16x8_t res = + { + vgetq_lane_u16(final_sum.val[0], 0), + vgetq_lane_u16(final_sum.val[0], 2), + vgetq_lane_u16(final_sum.val[0], 4), + vgetq_lane_u16(final_sum.val[0], 6), + vgetq_lane_u16(final_sum.val[1], 0), + vgetq_lane_u16(final_sum.val[1], 2), + vgetq_lane_u16(final_sum.val[1], 4), + vgetq_lane_u16(final_sum.val[1], 6), + }; + + scale_vector_s16x8(res, id, 0, 1, + pool_size, upper_bound_w, upper_bound_h, + pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + vst1_u8(reinterpret_cast(output.ptr()), vmovn_u16(res)); + } + else + { + // Scale lower result + scale_vector_s16x8(final_sum.val[0], id, 0, 1, + pool_size, upper_bound_w, upper_bound_h, + pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + // Scale lower result + scale_vector_s16x8(final_sum.val[1], id, 8, 1, + pool_size, upper_bound_w, upper_bound_h, + pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + const uint8x16_t res = vcombine_u8(vmovn_u16(final_sum.val[0]), vmovn_u16(final_sum.val[1])); + vst1q_u8(reinterpret_cast(output.ptr()), res); + } + } + else + { + const uint8x16_t max_data = vmaxq_u8(vmaxq_u8(top_data, bottom_data), middle_data); + const uint8x16_t max_data_shift1 = vextq_u8(max_data, max_data, 1); + const uint8x16_t max_data_shift2 = vextq_u8(max_data, max_data, 2); + const uint8x16_t final_max = vmaxq_u8(vmaxq_u8(max_data, max_data_shift1), max_data_shift2); + + if(pool_stride_x == 2) + { + const uint8x8x2_t table = { { vget_low_u8(final_max), vget_high_u8(final_max) } }; + static const uint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 }; + const uint8x8_t res = vtbl2_u8(table, lookup_val); + vst1_u8(reinterpret_cast(output.ptr()), res); + } + else + { + vst1q_u8(reinterpret_cast(output.ptr()), final_max); + } + } + }, + input, output); +} + template void NEPoolingLayerKernel::pooling3_q16(const Window &window_input, const Window &window) { @@ -1232,6 +1600,98 @@ void NEPoolingLayerKernel::poolingN_f32(const Window &window_input, const Window input, output); } +template +void NEPoolingLayerKernel::poolingN_qasymm8(const Window &window_input, const Window &window) +{ + Iterator input(_input, window_input); + Iterator output(_output, window); + + const int pool_size = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size(); + int pool_pad_x = 0; + int pool_pad_y = 0; + int pool_stride_x = 0; + int pool_stride_y = 0; + std::tie(pool_pad_x, pool_pad_y) = _pool_info.pad_stride_info().pad(); + std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride(); + const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x); + const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y); + + execute_window_loop(window, [&](const Coordinates & id) + { + uint8_t res = 0; + + if(pooling_type != PoolingType::MAX) + { + uint32x4_t vres = vdupq_n_u32(0); + uint32_t sres = 0; + + // Calculate scale + const float scale = calculate_avg_scale(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_x, pool_pad_y, pool_stride_x, pool_stride_y); + + // Perform pooling + for(int y = 0; y < pool_size; ++y) + { + int x = 0; + for(; x <= (pool_size - 8); x += 8) + { + const uint8x8_t data = vld1_u8(reinterpret_cast(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + + const uint16x8_t data_u16 = vmovl_u8(data); + vres = vaddq_u32(vres, vaddl_u16(vget_high_u16(data_u16), vget_low_u16(data_u16))); + } + + // Leftover for loop + for(; x < pool_size; ++x) + { + uint8_t data = *(reinterpret_cast(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + sres += data; + } + } + + // Reduction + const auto tmp = vpadd_u32(vget_high_u32(vres), vget_low_u32(vres)); + sres += vget_lane_u32(tmp, 0) + vget_lane_u32(tmp, 1); + + // Divide by scale + res = static_cast(support::cpp11::round(sres * scale)); + } + else + { + uint8x8_t vres = vdup_n_u8(0); + res = 0; + + for(int y = 0; y < pool_size; ++y) + { + int x = 0; + for(; x <= (pool_size - 8); x += 8) + { + const uint8x8_t data = vld1_u8(reinterpret_cast(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + vres = vmax_u8(vres, data); + } + + // Leftover for loop + for(; x < pool_size; ++x) + { + const uint8_t data = *(reinterpret_cast(input.ptr() + (x - pool_pad_x) * _input->info()->strides_in_bytes().x() + (y - pool_pad_y) * _input->info()->strides_in_bytes().y())); + res = std::max(res, data); + } + } + + // Reduce max + vres = vpmax_u8(vres, vres); + vres = vpmax_u8(vres, vres); + vres = vpmax_u8(vres, vres); + + // Get max value + res = std::max(res, vget_lane_u8(vres, 0)); + } + + // Store result + *(reinterpret_cast(output.ptr())) = res; + }, + input, output); +} + Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input); @@ -1269,6 +1729,7 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) const unsigned int pool_stride_x = _pool_info.pad_stride_info().stride().first; const unsigned int pool_stride_y = _pool_info.pad_stride_info().stride().second; + const unsigned int pool_size = _pool_info.pool_size(); // Set step for input in x and y direction for the input Window window_input(window); @@ -1282,6 +1743,15 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info) window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration; break; } + case DataType::QASYMM8: + { + window_x_inc = pool_stride_x; + if((pool_size == 2 || pool_size == 3) && pool_stride_x < 3) + { + window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration; + } + break; + } case DataType::F32: { window_x_inc = pool_stride_x; diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index a8249c4840..83a843de58 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -320,6 +320,7 @@ void arm_compute::print_consecutive_elements(std::ostream &s, DataType dt, const { switch(dt) { + case DataType::QASYMM8: case DataType::U8: print_consecutive_elements_impl(s, ptr, n, stream_width, element_delim); break; @@ -355,6 +356,7 @@ int arm_compute::max_consecutive_elements_display_width(std::ostream &s, DataTyp { switch(dt) { + case DataType::QASYMM8: case DataType::U8: return max_consecutive_elements_display_width_impl(s, ptr, n); case DataType::QS8: diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index 2341633362..201bf87b47 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -40,16 +40,14 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin k->configure(input, output, pool_info); _kernel = std::move(k); - // Configure border depending on operation required + // Configure border depending on operation required (quantize border in case of asymmetric data_type) BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - // Quantize border in case data type is quantized asymmetric data type - uint32_t border_value = 0; + PixelValue zero_value(0.f); if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) { - border_value = static_cast(input->info()->quantization_info().quantize(0.f, RoundingPolicy::TO_NEAREST_UP)); + zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); } - - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(border_value)); + _border_handler.configure(input, _kernel->border_size(), border_mode, zero_value); } Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp index 530c7fca4a..8a32507a73 100644 --- a/src/runtime/NEON/functions/NEPoolingLayer.cpp +++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,9 +43,14 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay // Configure pooling kernel _pooling_layer_kernel.configure(input, output, pool_info); - // Configure border depending on operation required + // Configure border depending on operation required (quantize border in case of asymmetric data_type) BorderMode border_mode = (pool_info.pool_type() == PoolingType::MAX) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, PixelValue(static_cast(0.f))); + PixelValue zero_value(0.f); + if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) + { + zero_value = PixelValue(static_cast(input->info()->quantization_info().offset)); + } + _border_handler.configure(input, _pooling_layer_kernel.border_size(), border_mode, zero_value); } Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) diff --git a/tests/benchmark/NEON/PoolingLayer.cpp b/tests/benchmark/NEON/PoolingLayer.cpp index 4815959ae6..46f9dfe361 100644 --- a/tests/benchmark/NEON/PoolingLayer.cpp +++ b/tests/benchmark/NEON/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,9 +46,9 @@ namespace test namespace { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8 }); +const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32, DataType::QS8, DataType::QASYMM8 }); #else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8 }); +const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::QS8, DataType::QASYMM8 }); #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ } // namespace diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp index 79a732a59a..86fd802d13 100644 --- a/tests/validation/NEON/PoolingLayer.cpp +++ b/tests/validation/NEON/PoolingLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,12 +53,18 @@ const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::m framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), framework::dataset::make("ExcludePadding", { false })); +/** Input data set for asymmetric data type */ +const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3, 9 })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), + framework::dataset::make("ExcludePadding", { true, false })); + constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -constexpr AbsoluteTolerance tolerance_qs8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ -constexpr AbsoluteTolerance tolerance_qs16(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ +constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +constexpr AbsoluteTolerance tolerance_qs8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ +constexpr AbsoluteTolerance tolerance_qs16(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ +constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */ } // namespace TEST_SUITE(NEON) @@ -148,7 +154,7 @@ TEST_SUITE_END() template using NEPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture; -TEST_SUITE(Quantized) +TEST_SUITE(FixedPoint) TEST_SUITE(QS8) FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerFixedPointFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS, framework::dataset::make("DataType", DataType::QS8))), @@ -184,6 +190,31 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerFixedPointFixture, frame TEST_SUITE_END() TEST_SUITE_END() +TEST_SUITE(Quantized) + +template +using NEPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture; + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEPoolingLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8, + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), + QuantizationInfo(7.f / 255, 123) + }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEPoolingLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8, + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() +TEST_SUITE_END() + TEST_SUITE_END() TEST_SUITE_END() } // namespace validation -- cgit v1.2.1