aboutsummaryrefslogtreecommitdiff
path: root/src/core
diff options
context:
space:
mode:
authormorgolock <pablo.tello@arm.com>2020-03-24 09:26:48 +0000
committerPablo Marquez <pablo.tello@arm.com>2020-04-01 12:45:47 +0000
commitcc1f6c94f1fc3b5d5ccbd5aa43e2a08487664f50 (patch)
treeedf8c87c5ac37b291a9b615b9eeb65df08f79095 /src/core
parent9428a182911802cf6e6df6eb751a7c7eb43602f9 (diff)
downloadComputeLibrary-cc1f6c94f1fc3b5d5ccbd5aa43e2a08487664f50.tar.gz
MLCE-166: Add support for extracting indices in NEPoolingLayer 2x2 NCHW
* Added initial support for pooling indices * Only supported for NCHW Poolsize 2 Change-Id: I92ce767e64fcc01aae89411064b4cb2be272a1e9 Signed-off-by: morgolock <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2927 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp23
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp55
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp230
3 files changed, 195 insertions, 113 deletions
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 767d6d6ca0..dbbca4771b 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -56,10 +56,11 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, PoolingLayerInfo p
auto_init_if_empty(*output, input->clone()->set_tensor_shape(out_shape));
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in the CL backend.");
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2),
"Unsupported combination of parameters!");
@@ -166,7 +167,7 @@ std::tuple<Status, Window, CLPoolingConfig> validate_and_configure_window(ITenso
} // namespace
CLPoolingLayerKernel::CLPoolingLayerKernel()
- : _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1)
+ : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1)
{
}
@@ -175,16 +176,16 @@ BorderSize CLPoolingLayerKernel::border_size() const
return _border_size;
}
-void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info)
+void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Set instance variables
- _input = input;
- _output = output;
- _pool_info = pool_info;
- _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
-
+ _input = input;
+ _output = output;
+ _pool_info = pool_info;
+ _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
+ _indices = indices;
int pool_stride_x = 0;
int pool_stride_y = 0;
const PoolingType pool_type = pool_info.pool_type;
@@ -215,7 +216,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
// Check output dimensions
auto_init(input->info(), output->info(), pool_info);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr));
const DataType data_type = input->info()->data_type();
@@ -331,9 +332,9 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
_config_id += lower_string(string_from_data_layout(input->info()->data_layout()));
}
-Status CLPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status CLPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info)));
return Status{};
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
index 14cedfe3d2..36499eb4fd 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -55,9 +55,10 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int poole
auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in GLES backend");
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2),
"Unsupported combination of parameters!");
@@ -77,8 +78,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
+ unsigned int pooled_w = 0;
+ unsigned int pooled_h = 0;
std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
input->dimension(1),
pool_size,
@@ -93,14 +94,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info)
{
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- int pool_size = pool_info.pool_size.width;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
+ int pool_pad_x = 0;
+ int pool_pad_y = 0;
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ unsigned int pooled_w = 0;
+ unsigned int pooled_h = 0;
+ int pool_size = pool_info.pool_size.width;
+ const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
@@ -215,7 +216,7 @@ std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITenso
} // namespace
GCPoolingLayerKernel::GCPoolingLayerKernel()
- : _input(nullptr), _output(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
+ : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
{
}
@@ -224,18 +225,18 @@ BorderSize GCPoolingLayerKernel::border_size() const
return _border_size;
}
-void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
+void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices)
{
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- const PoolingType pool_type = pool_info.pool_type;
- int pool_size = pool_info.pool_size.width;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
- const bool exclude_padding = pool_info.exclude_padding;
+ int pool_pad_x = 0;
+ int pool_pad_y = 0;
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ unsigned int pooled_w = 0;
+ unsigned int pooled_h = 0;
+ const PoolingType pool_type = pool_info.pool_type;
+ int pool_size = pool_info.pool_size.width;
+ const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
+ const bool exclude_padding = pool_info.exclude_padding;
std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
@@ -253,13 +254,13 @@ void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output,
auto_init(input->info(), output->info(), pooled_w, pooled_h);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr));
// Set instance variables
_input = input;
_output = output;
_pool_info = pool_info;
-
+ _indices = indices;
// Set build options
std::set<std::string> build_opts;
build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
@@ -321,9 +322,9 @@ void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output,
_border_size = pooling_config.second;
}
-Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info)));
return Status{};
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index d6a3fadd33..fdbba815b4 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -123,7 +123,8 @@ inline void scale_vector_q16x8(bool exclude_padding, TVec &v, const Coordinates
v = wrapper::vsetlane(elems[7], v, 7);
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info,
+ unsigned int &pooled_w, unsigned int pooled_h, const ITensorInfo *indices, Size2D pool_size)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -134,6 +135,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ if(indices)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method");
+ }
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(input->data_type()) && !pool_info.exclude_padding && (pool_info.pool_type == PoolingType::AVG) && pool_info.pad_stride_info.has_padding()
@@ -146,6 +152,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
|| (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
+
+ if(indices)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC, "Pool indices only supported in NCHW");
+ ARM_COMPUTE_RETURN_ERROR_ON((indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
+ || (indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
+ }
}
return Status{};
@@ -159,13 +173,18 @@ Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsign
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
- BorderSize &border_size,
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *indices, const PoolingLayerInfo &pool_info,
+ unsigned int &num_elems_processed_per_iteration,
+ BorderSize &border_size,
unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
{
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info)));
-
+ if(indices)
+ {
+ // Indices auto inizialitation if not yet initialized
+ auto_init_if_empty(*indices, (input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info))).set_data_type(DataType::U32) /* we store the offset to the element */);
+ }
const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->data_layout() : pool_info.data_layout;
unsigned int num_elems_read_per_iteration = 0;
unsigned int num_elems_horizontal_window = 0;
@@ -286,25 +305,28 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
{
// Number of iterations in X dimension
const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
-
// Upper limit for the number of right/bottom border elements that are accessed
const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - input_width;
const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - input_height;
-
- border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
- border_size.right = std::max(upper_bound_w, pool_pad_right);
- border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
-
+ border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
+ border_size.right = std::max(upper_bound_w, pool_pad_right);
+ border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
TensorShape output_shape{ input->tensor_shape() };
output_shape.set(0, pooled_w);
output_shape.set(1, pooled_h);
TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
-
win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
- AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
-
+ AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
- window_changed = update_window_and_padding(win, input_access, output_access);
+ if(indices)
+ {
+ AccessWindowHorizontal indices_access(indices, 0, num_elems_horizontal_window);
+ window_changed = update_window_and_padding(win, input_access, output_access, indices_access);
+ }
+ else
+ {
+ window_changed = update_window_and_padding(win, input_access, output_access);
+ }
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
}
else
@@ -313,12 +335,18 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
output_shape.set(1, pooled_w);
output_shape.set(2, pooled_h);
TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
-
win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
-
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, input_access, output_access);
+ if(indices)
+ {
+ AccessWindowHorizontal indices_access(indices, 0, num_elems_processed_per_iteration);
+ window_changed = update_window_and_padding(win, input_access, output_access, indices_access);
+ }
+ else
+ {
+ window_changed = update_window_and_padding(win, input_access, output_access);
+ }
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
}
@@ -438,7 +466,7 @@ inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo
} // namespace
NEPoolingLayerKernel::NEPoolingLayerKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
{
}
@@ -447,10 +475,9 @@ BorderSize NEPoolingLayerKernel::border_size() const
return _border_size;
}
-void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
const bool is_global_pooling = pool_info.is_global_pooling;
const int pool_stride_x = pad_stride_info.stride().first;
@@ -478,11 +505,12 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
pad_stride_info);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h, (indices) ? indices->info() : nullptr, pool_size));
// Set instance variables
_input = input;
_output = output;
+ _indices = indices;
_pool_info = pool_info;
_data_layout = input->info()->data_layout();
_is_square = (pool_size.x() == pool_size.y());
@@ -690,7 +718,8 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
}
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y());
+ auto win_config = validate_and_configure_window(input->info(), output->info(), (indices) ? indices->info() : nullptr,
+ pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
INEKernel::configure(win_config.second);
}
@@ -1435,7 +1464,6 @@ void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const
res = std::max(res, data);
}
}
-
#if defined(__aarch64__)
// Reduction operation available on 64 bit architectures only
res = std::max(vmaxvq_f32(vres), res);
@@ -1459,66 +1487,117 @@ void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const
input, output);
}
-void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
+void NEPoolingLayerKernel::pooling2_f32_nchw_maxpool_indices(const Window &window_input, const Window &window)
{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- constexpr int pool_size = 2;
- const int pool_pad_right = _pool_info.pad_stride_info.pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info.pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info.pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info.pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
+ Iterator input(_input, window_input);
+ Iterator output(_output, window);
+ Iterator indices(_indices, window);
+ int final_index = 0;
+ const int pool_pad_top = _pool_info.pad_stride_info.pad_top();
+ const int pool_pad_left = _pool_info.pad_stride_info.pad_left();
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride();
- const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
- const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
-
const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
- execute_window_loop(window, [&](const Coordinates & id)
+ const Strides &input_strides = _input->info()->strides_in_bytes();
+ const auto in_stridew = input_strides[1];
+
+ execute_window_loop(window, [&](const Coordinates &)
{
- float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
- float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
- float32x2_t res = {};
- float final_res = 0;
+ const auto input_offset_top = input_top_ptr + input.offset();
+ const auto input_offset_bottom = input_bottom_ptr + input.offset();
+ const auto in_top_ptr = reinterpret_cast<const float *>(input_offset_top);
+ const auto in_bottom_ptr = reinterpret_cast<const float *>(input_offset_bottom);
+ float32x2_t top_data = vld1_f32(in_top_ptr);
+ float32x2_t bottom_data = vld1_f32(in_bottom_ptr);
+ float32x2_t res = {};
+ float final_res = 0;
+ const float32x2_t max_data = vmax_f32(top_data, bottom_data);
+ res = vpmax_f32(max_data, max_data);
+ final_res = vget_lane_f32(res, 0);
+ // Store result
+ *(reinterpret_cast<float *>(output.ptr())) = final_res;
+ const uint32_t offset_top = (uint32_t)(input.offset() / sizeof(float));
+ const uint32_t offset_bottom = (uint32_t)offset_top + (in_stridew / sizeof(float));
+ const uint32x2_t voffset_top = { offset_top, offset_top + 1u };
+ const uint32x2_t voffset_bottom = { offset_bottom, offset_bottom + 1u };
+ const uint32x2_t tmp_indices = vbsl_u32(vcgt_f32(top_data, bottom_data), voffset_top, voffset_bottom);
+ final_index = vget_lane_u32(vbsl_u32(vcgt_f32(max_data, vrev64_f32(max_data)), tmp_indices, vrev64_u32(tmp_indices)), 0);
+ *(reinterpret_cast<int *>(indices.ptr())) = final_index;
+ },
+ input, output, indices);
+}
- // Get power of 2 in case of l2 pooling
- if(pooling_type == PoolingType::L2)
+void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type,
+ bool exclude_padding)
+{
+ if(pooling_type == PoolingType::MAX && _indices)
+ {
+ pooling2_f32_nchw_maxpool_indices(window_input, window);
+ }
+ else
+ {
+ Iterator input(_input, window_input);
+ Iterator output(_output, window);
+ constexpr int pool_size = 2;
+ const int pool_pad_right = _pool_info.pad_stride_info.pad_right();
+ const int pool_pad_top = _pool_info.pad_stride_info.pad_top();
+ const int pool_pad_left = _pool_info.pad_stride_info.pad_left();
+ const int pool_pad_bottom = _pool_info.pad_stride_info.pad_bottom();
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride();
+ const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
+ const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
+
+ const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
+ const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
+
+ execute_window_loop(window, [&](const Coordinates & id)
{
- top_data = vmul_f32(top_data, top_data);
- bottom_data = vmul_f32(bottom_data, bottom_data);
- }
+ const auto in_top_ptr = reinterpret_cast<const float *>(input_top_ptr + input.offset());
+ const auto in_bottom_ptr = reinterpret_cast<const float *>(input_bottom_ptr + input.offset());
+ float32x2_t top_data = vld1_f32(in_top_ptr);
+ float32x2_t bottom_data = vld1_f32(in_bottom_ptr);
+ float32x2_t res = {};
+ float final_res = 0;
+ // Get power of 2 in case of l2 pooling
+ if(pooling_type == PoolingType::L2)
+ {
+ top_data = vmul_f32(top_data, top_data);
+ bottom_data = vmul_f32(bottom_data, bottom_data);
+ }
- if(pooling_type != PoolingType::MAX)
- {
- // Calculate scale
- float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
- const float32x2_t scale_v = vdup_n_f32(scale);
+ if(pooling_type != PoolingType::MAX)
+ {
+ // Calculate scale
+ float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
+ const float32x2_t scale_v = vdup_n_f32(scale);
- // Perform pooling
- const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
- res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
- }
- else
- {
- const float32x2_t max_data = vmax_f32(top_data, bottom_data);
- res = vpmax_f32(max_data, max_data);
- }
- final_res = vget_lane_f32(res, 0);
+ // Perform pooling
+ const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
+ res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
+ }
+ else
+ {
+ const float32x2_t max_data = vmax_f32(top_data, bottom_data);
+ res = vpmax_f32(max_data, max_data);
+ }
+ final_res = vget_lane_f32(res, 0);
- // Calculate square-root in case of l2 pooling
- if(pooling_type == PoolingType::L2)
- {
- final_res = sqrt(final_res);
- }
+ // Calculate square-root in case of l2 pooling
+ if(pooling_type == PoolingType::L2)
+ {
+ final_res = sqrt(final_res);
+ }
- // Store result
- *(reinterpret_cast<float *>(output.ptr())) = final_res;
- },
- input, output);
+ // Store result
+ *(reinterpret_cast<float *>(output.ptr())) = final_res;
+ },
+ input, output);
+ }
}
void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
@@ -2001,7 +2080,7 @@ void NEPoolingLayerKernel::poolingMxN_q8_nhwc(const Window &window_input, const
input, output);
}
-Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
@@ -2032,8 +2111,9 @@ Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInf
pool_size_y,
pool_info.pad_stride_info);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h, indices, Size2D(pool_size_x, pool_size_y)));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(),
+ (indices) ? indices->clone().get() : nullptr, pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
pool_size_x, pool_size_y)
.first);
@@ -2094,4 +2174,4 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
// Run function
(this->*_func)(window_input, window, _pool_info.pool_type, exclude_padding);
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute