aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/NEPoolingLayerKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp563
1 files changed, 2 insertions, 561 deletions
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index 7877cf5cc0..e586b72d30 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -25,7 +25,6 @@
#include "arm_compute/core/AccessWindowStatic.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/FixedPoint.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/NEAsymm.h"
@@ -79,32 +78,6 @@ inline float calculate_avg_scale(const Coordinates &id, const int pool_size_x, c
return 1.f / ((end_y - start_y) * (end_x - start_x));
}
-inline qint8_t calculate_avg_scale_q8(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
- int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
-{
- static const std::array<qint8_t, 10> scale_values_q8 =
- { { 0x0, 0x0, 0x40, 0x2A, 0x20, 0x19, 0x15, 0x12, 0x10, 0xE } };
- const int start_x = id.x() * stride_x - pad_x;
- const int start_y = id.y() * stride_y - pad_y;
- const int end_x = std::min(start_x + pool_size, upper_bound_w);
- const int end_y = std::min(start_y + pool_size, upper_bound_h);
- const int val = ((end_y - start_y) * (end_x - start_x));
- return sshr_qs8(scale_values_q8[val], (7 - fixed_point_position));
-}
-
-inline qint16_t calculate_avg_scale_q16(const Coordinates &id, int pool_size, int upper_bound_w, int upper_bound_h,
- int pad_x, int pad_y, int stride_x, int stride_y, int fixed_point_position)
-{
- static std::array<qint16_t, 10> scale_values_q16 =
- { { 0x0, 0x0, 0x4000, 0x2AAB, 0x2000, 0x199A, 0x1555, 0x1249, 0x1000, 0xE38 } };
- const int start_x = id.x() * stride_x - pad_x;
- const int start_y = id.y() * stride_y - pad_y;
- const int end_x = std::min(start_x + pool_size, upper_bound_w);
- const int end_y = std::min(start_y + pool_size, upper_bound_h);
- const int val = ((end_y - start_y) * (end_x - start_x));
- return sshr_qs16(scale_values_q16[val], (15 - fixed_point_position));
-}
-
template <bool exclude_padding>
inline void scale_vector_s16x8(uint16x8_t &v, const Coordinates &id, int id_offset, int step,
const int pool_size, const int upper_bound_w, const int upper_bound_h,
@@ -163,22 +136,18 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
int pool_stride_y = 0;
PoolingType pool_type = pool_info.pool_type();
const PadStrideInfo pad_stride_info = pool_info.pad_stride_info();
- const bool exclude_padding = pool_info.exclude_padding();
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
static const std::set<int> supported_pool_sizes = { 2, 3 };
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON((supported_pool_sizes.find(pool_size_x) == supported_pool_sizes.end()) && ((input->data_type() != DataType::F32) && (input->data_type() != DataType::QASYMM8))
&& (pool_type != PoolingType::MAX));
- ARM_COMPUTE_RETURN_ERROR_ON(is_data_type_fixed_point(input->data_type()) && pool_stride_x > 2);
- ARM_COMPUTE_RETURN_ERROR_ON(exclude_padding && is_data_type_fixed_point(input->data_type()));
if(output->total_size() != 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
|| (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
@@ -236,22 +205,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
{
switch(input->data_type())
{
- case DataType::QS8:
- num_elems_read_per_iteration = 16;
- switch(pool_size_x)
- {
- case 2:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 8 : 15;
- break;
- case 3:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 8 : 16;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 7 : 14;
- break;
- default:
- break;
- }
- break;
case DataType::QASYMM8:
if(is_nhwc)
{
@@ -274,22 +227,6 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
break;
}
break;
- case DataType::QS16:
- num_elems_read_per_iteration = 8;
- switch(pool_size_x)
- {
- case 2:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 4 : 7;
- break;
- case 3:
- num_elems_horizontal_window = (pool_stride_x == 2) ? 4 : 8;
- num_elems_processed_per_iteration = (pool_stride_x == 2) ? 3 : 6;
- break;
- default:
- break;
- }
- break;
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
case DataType::F16:
if(is_nhwc)
@@ -462,64 +399,7 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
const DataType data_type = input->info()->data_type();
const bool is_nchw = data_layout == DataLayout::NCHW;
- // Select appropriate function
- if(data_type == DataType::QS8)
- {
- if(_is_square)
- {
- switch(pool_size_x)
- {
- case 2:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling2_q8_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling2_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- case 3:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling3_q8_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling3_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- default:
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- }
- }
- else
- {
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q8_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- }
- }
- else if(data_type == DataType::QASYMM8)
+ if(data_type == DataType::QASYMM8)
{
if(pool_size_x == 2 && pool_stride_x < 3 && _is_square)
{
@@ -606,62 +486,6 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
}
}
}
- else if(data_type == DataType::QS16)
- {
- if(_is_square)
- {
- switch(pool_size_x)
- {
- case 2:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling2_q16_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling2_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- case 3:
- switch(pool_type)
- {
- case PoolingType::AVG:
- _func = &NEPoolingLayerKernel::pooling3_q16_nchw<PoolingType::AVG>;
- break;
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::pooling3_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- default:
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- break;
- }
- }
- else
- {
- switch(pool_type)
- {
- case PoolingType::MAX:
- _func = &NEPoolingLayerKernel::poolingMxN_q16_nchw<PoolingType::MAX>;
- break;
- default:
- ARM_COMPUTE_ERROR("Unsupported pooling type!");
- }
- }
- }
else if(data_type == DataType::F16)
{
if(_is_square)
@@ -1022,71 +846,6 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
INEKernel::configure(win_config.second);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling2_q8_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 2;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
- const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
- qint8x8_t lower_res = {};
- qint8x8_t upper_res = {};
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
- const qint8x8_t scale_vec = vdup_n_qs8(scale);
-
- // Perform pooling
- const qint8x16_t sum_data = vqaddq_qs8(top_data, bottom_data);
- lower_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data), vget_high_s8(sum_data)), scale_vec, fixed_point_position);
- if(pool_stride_x == 1)
- {
- const qint8x16_t sum_data_shifted = vextq_s8(sum_data, sum_data, 1);
- upper_res = vqmul_qs8(vpadd_s8(vget_low_s8(sum_data_shifted), vget_high_s8(sum_data_shifted)), scale_vec, fixed_point_position);
- }
- }
- else
- {
- const qint8x16_t max_data = vmaxq_s8(top_data, bottom_data);
- lower_res = vpmax_s8(vget_low_s8(max_data), vget_high_s8(max_data));
- if(pool_stride_x == 1)
- {
- const qint8x16_t max_data_shifted = vextq_s8(max_data, max_data, 1);
- upper_res = vpmax_s8(vget_low_s8(max_data_shifted), vget_high_s8(max_data_shifted));
- }
- }
- if(pool_stride_x == 1)
- {
- const qint8x8x2_t res = { { lower_res, upper_res } };
- vst2_s8(reinterpret_cast<qint8_t *>(output.ptr()), res);
- }
- else
- {
- vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), lower_res);
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, const Window &window)
{
@@ -1201,71 +960,6 @@ void NEPoolingLayerKernel::pooling2_qasymm8_nchw(const Window &window_input, con
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling2_q16_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 2;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
- const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
- qint16x4_t lower_res = {};
- qint16x4_t upper_res = {};
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
- const qint16x4_t scale_vec = vdup_n_qs16(scale);
-
- // Perform pooling
- const qint16x8_t sum_data = vqaddq_qs16(top_data, bottom_data);
- lower_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data), vget_high_s16(sum_data)), scale_vec, fixed_point_position);
- if(pool_stride_x == 1)
- {
- const qint16x8_t sum_data_shifted = vextq_s16(sum_data, sum_data, 1);
- upper_res = vqmul_qs16(vpadd_s16(vget_low_s16(sum_data_shifted), vget_high_s16(sum_data_shifted)), scale_vec, fixed_point_position);
- }
- }
- else
- {
- const qint16x8_t max_data = vmaxq_s16(top_data, bottom_data);
- lower_res = vpmax_s16(vget_low_s16(max_data), vget_high_s16(max_data));
- if(pool_stride_x == 1)
- {
- const qint16x8_t max_data_shifted = vextq_s16(max_data, max_data, 1);
- upper_res = vpmax_s16(vget_low_s16(max_data_shifted), vget_high_s16(max_data_shifted));
- }
- }
- if(pool_stride_x == 1)
- {
- const qint16x4x2_t res = { { lower_res, upper_res } };
- vst2_s16(reinterpret_cast<qint16_t *>(output.ptr()), res);
- }
- else
- {
- vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), lower_res);
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_f16_nchw(const Window &window_input, const Window &window)
{
@@ -1461,82 +1155,6 @@ void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const W
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling3_q8_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 3;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const uint8_t *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
- const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_top_ptr + input.offset()));
- const auto middle_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_middle_ptr + input.offset()));
- const auto bottom_data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input_bottom_ptr + input.offset()));
- qint8x8_t res = {};
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint8_t scale = calculate_avg_scale_q8(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
-
- // Perform pooling for stride 2
- const qint8x16_t sum_data = vqaddq_qs8(vqaddq_qs8(top_data, bottom_data), middle_data);
- const qint8x16_t sum_data2 = vextq_s8(sum_data, sum_data, 1);
- const qint8x16_t sum_data3 = vextq_s8(sum_data, sum_data, 2);
- const qint8x16_t final_sum = vqaddq_qs8(vqaddq_qs8(sum_data, sum_data2), sum_data3);
- if(pool_stride_x == 2)
- {
- const qint8x8x2_t table = { { vget_low_s8(final_sum), vget_high_s8(final_sum) } };
- static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
- const qint8x8_t scale_vec = vdup_n_qs8(scale);
- res = vtbl2_s8(table, lookup_val);
- res = vqmul_qs8(res, scale_vec, fixed_point_position);
- vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
- }
- else
- {
- const qint8x16_t scale_vec = vdupq_n_qs8(scale);
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), vqmulq_qs8(final_sum, scale_vec, fixed_point_position));
- }
- }
- else
- {
- const qint8x16_t max_data = vmaxq_s8(vmaxq_s8(top_data, bottom_data), middle_data);
- const qint8x16_t max_data2 = vextq_s8(max_data, max_data, 1);
- const qint8x16_t max_data3 = vextq_s8(max_data, max_data, 2);
- const qint8x16_t final_max = vmaxq_s8(vmaxq_s8(max_data, max_data2), max_data3);
-
- if(pool_stride_x == 2)
- {
- const qint8x8x2_t table = { { vget_low_s8(final_max), vget_high_s8(final_max) } };
- static const qint8x8_t lookup_val = { 0, 2, 4, 6, 8, 10, 12, 14 };
- res = vtbl2_s8(table, lookup_val);
- vst1_qs8(reinterpret_cast<qint8_t *>(output.ptr()), res);
- }
- else
- {
- vst1q_qs8(reinterpret_cast<qint8_t *>(output.ptr()), final_max);
- }
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, const Window &window)
{
@@ -1657,77 +1275,6 @@ void NEPoolingLayerKernel::pooling3_qasymm8_nchw(const Window &window_input, con
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::pooling3_q16_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int fixed_point_position = _input->info()->fixed_point_position();
- constexpr int pool_size = 3;
- const int pool_pad_right = _pool_info.pad_stride_info().pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info().pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
- const int upper_bound_w = _input->info()->dimension(0) + pool_pad_right;
- const int upper_bound_h = _input->info()->dimension(1) + pool_pad_bottom;
-
- const unsigned char *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
- const unsigned char *const input_middle_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
- const unsigned char *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 2));
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- const auto top_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_top_ptr + input.offset()));
- const auto middle_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_middle_ptr + input.offset()));
- const auto bottom_data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input_bottom_ptr + input.offset()));
-
- if(pooling_type == PoolingType::AVG)
- {
- // Calculate scale
- const qint16_t scale = calculate_avg_scale_q16(id, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y, fixed_point_position);
-
- // Perform pooling for stride 2
- const qint16x8_t sum_data = vqaddq_qs16(vqaddq_qs16(top_data, bottom_data), middle_data);
- const qint16x8_t sum_data2 = vextq_s16(sum_data, sum_data, 1);
- const qint16x8_t sum_data3 = vextq_s16(sum_data, sum_data, 2);
- const qint16x8_t final_sum = vqaddq_qs16(vqaddq_qs16(sum_data, sum_data2), sum_data3);
- if(pool_stride_x == 2)
- {
- const qint16x4_t tmp = { vgetq_lane_s16(final_sum, 0), vgetq_lane_s16(final_sum, 2), vgetq_lane_s16(final_sum, 4), vgetq_lane_s16(final_sum, 6) };
- const qint16x4_t scale_vec = vdup_n_qs16(scale);
- vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmul_qs16(tmp, scale_vec, fixed_point_position));
- }
- else
- {
- const qint16x8_t scale_vec = vdupq_n_qs16(scale);
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), vqmulq_qs16(final_sum, scale_vec, fixed_point_position));
- }
- }
- else
- {
- const qint16x8_t max_data = vmaxq_s16(vmaxq_s16(top_data, bottom_data), middle_data);
- const qint16x8_t max_data2 = vextq_s16(max_data, max_data, 1);
- const qint16x8_t max_data3 = vextq_s16(max_data, max_data, 2);
- const qint16x8_t final_max = vmaxq_s16(vmaxq_s16(max_data, max_data2), max_data3);
-
- if(pool_stride_x == 2)
- {
- const qint16x4_t tmp = { vgetq_lane_s16(final_max, 0), vgetq_lane_s16(final_max, 2), vgetq_lane_s16(final_max, 4), vgetq_lane_s16(final_max, 6) };
- vst1_qs16(reinterpret_cast<qint16_t *>(output.ptr()), tmp);
- }
- else
- {
- vst1q_qs16(reinterpret_cast<qint16_t *>(output.ptr()), final_max);
- }
- }
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window)
{
@@ -1879,110 +1426,6 @@ void NEPoolingLayerKernel::pooling7_f32_nchw(const Window &window_input, const W
input, output);
}
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::poolingMxN_q8_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
- const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- qint8x16_t vres = {};
- qint8_t res = {};
-
- //PoolingType::MAX
- for(int y = 0; y < pool_size_y; ++y)
- {
- int x = 0;
- for(; x <= (pool_size_x - 16); x += 16)
- {
- const qint8x16_t data = vld1q_qs8(reinterpret_cast<const qint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
- (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- vres = vmaxq_s8(vres, data);
- }
-
- // Leftover for loop
- for(; x < pool_size_x; ++x)
- {
- qint8_t data = *(reinterpret_cast<const qint8_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- res = std::max(res, data);
- }
- }
- //Reduce
- const qint8x8_t half_vres = vpmax_s8(vget_low_s8(vres), vget_high_s8(vres));
- res = std::max(res, vget_lane_s8(half_vres, 0));
- res = std::max(res, vget_lane_s8(half_vres, 1));
- res = std::max(res, vget_lane_s8(half_vres, 2));
- res = std::max(res, vget_lane_s8(half_vres, 3));
- res = std::max(res, vget_lane_s8(half_vres, 4));
- res = std::max(res, vget_lane_s8(half_vres, 5));
- res = std::max(res, vget_lane_s8(half_vres, 6));
- res = std::max(res, vget_lane_s8(half_vres, 7));
-
- // Store result
- *(reinterpret_cast<qint8_t *>(output.ptr())) = res;
- },
- input, output);
-}
-
-template <PoolingType pooling_type>
-void NEPoolingLayerKernel::poolingMxN_q16_nchw(const Window &window_input, const Window &window)
-{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- const int pool_size_x = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().x() : _pool_info.pool_size().width;
- const int pool_size_y = _pool_info.is_global_pooling() ? _input->info()->tensor_shape().y() : _pool_info.pool_size().height;
- const int pool_pad_top = _pool_info.pad_stride_info().pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info().pad_left();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info().stride();
-
- execute_window_loop(window, [&](const Coordinates & id)
- {
- qint16x8_t vres = {};
- qint16_t res = {};
-
- //PoolingType::MAX
- for(int y = 0; y < pool_size_y; ++y)
- {
- int x = 0;
- for(; x <= (pool_size_x - 8); x += 8)
- {
- const qint16x8_t data = vld1q_qs16(reinterpret_cast<const qint16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() +
- (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- vres = vmaxq_s16(vres, data);
- }
-
- // Leftover for loop
- for(; x < pool_size_x; ++x)
- {
- qint16_t data = *(reinterpret_cast<const qint16_t *>(input.ptr() + (x - pool_pad_left) * _input->info()->strides_in_bytes().x() + (y - pool_pad_top) * _input->info()->strides_in_bytes().y()));
- res = std::max(res, data);
- }
- }
- //Reduce
- const qint16x4_t half_vres = vpmax_s16(vget_low_s16(vres), vget_high_s16(vres));
- res = std::max(res, vget_lane_s16(half_vres, 0));
- res = std::max(res, vget_lane_s16(half_vres, 1));
- res = std::max(res, vget_lane_s16(half_vres, 2));
- res = std::max(res, vget_lane_s16(half_vres, 3));
-
- // Store result
- *(reinterpret_cast<qint16_t *>(output.ptr())) = res;
- },
- input, output);
-}
-
template <PoolingType pooling_type, bool exclude_padding>
void NEPoolingLayerKernel::poolingMxN_f16_nchw(const Window &window_input, const Window &window)
{
@@ -2688,8 +2131,6 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
unsigned int window_x_inc = 0;
switch(_input->info()->data_type())
{
- case DataType::QS8:
- case DataType::QS16:
case DataType::F16:
{
window_x_inc = (pool_stride_x == 2) ? _num_elems_processed_per_iteration * 2 : _num_elems_processed_per_iteration;