aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormorgolock <pablo.tello@arm.com>2020-03-24 09:26:48 +0000
committerPablo Marquez <pablo.tello@arm.com>2020-04-01 12:45:47 +0000
commitcc1f6c94f1fc3b5d5ccbd5aa43e2a08487664f50 (patch)
treeedf8c87c5ac37b291a9b615b9eeb65df08f79095
parent9428a182911802cf6e6df6eb751a7c7eb43602f9 (diff)
downloadComputeLibrary-cc1f6c94f1fc3b5d5ccbd5aa43e2a08487664f50.tar.gz
MLCE-166: Add support for extracting indices in NEPoolingLayer 2x2 NCHW
* Added initial support for pooling indices * Only supported for NCHW Poolsize 2 Change-Id: I92ce767e64fcc01aae89411064b4cb2be272a1e9 Signed-off-by: morgolock <pablo.tello@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2927 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Sang-Hoon Park <sang-hoon.park@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/CL/kernels/CLPoolingLayerKernel.h7
-rw-r--r--arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h9
-rw-r--r--arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h13
-rw-r--r--arm_compute/runtime/CL/functions/CLPoolingLayer.h6
-rw-r--r--arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h8
-rw-r--r--arm_compute/runtime/NEON/functions/NEPoolingLayer.h6
-rw-r--r--src/core/CL/kernels/CLPoolingLayerKernel.cpp23
-rw-r--r--src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp55
-rw-r--r--src/core/NEON/kernels/NEPoolingLayerKernel.cpp230
-rw-r--r--src/runtime/CL/functions/CLPoolingLayer.cpp9
-rw-r--r--src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp13
-rw-r--r--src/runtime/NEON/functions/NEPoolingLayer.cpp8
-rw-r--r--tests/validation/NEON/PoolingLayer.cpp19
-rw-r--r--tests/validation/fixtures/PoolingLayerFixture.h42
-rw-r--r--tests/validation/reference/PoolingLayer.cpp42
-rw-r--r--tests/validation/reference/PoolingLayer.h4
16 files changed, 321 insertions, 173 deletions
diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
index 4b3ee24333..fdd10f3f66 100644
--- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h
@@ -55,17 +55,19 @@ public:
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
- void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
+ void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run(const Window &window, cl::CommandQueue &queue) override;
@@ -74,6 +76,7 @@ public:
public:
const ICLTensor *_input;
ICLTensor *_output;
+ ICLTensor *_indices;
PoolingLayerInfo _pool_info;
DataLayout _data_layout;
BorderSize _border_size;
diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
index 85c051c326..7a2fb84f34 100644
--- a/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
+++ b/arm_compute/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,18 +54,20 @@ public:
* @param[in] input Source tensor. Data types supported: F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
- void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+ void configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref GCPoolingLayerKernel
*
* @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run(const Window &window) override;
@@ -74,6 +76,7 @@ public:
private:
const IGCTensor *_input;
IGCTensor *_output;
+ IGCTensor *_indices;
PoolingLayerInfo _pool_info;
BorderSize _border_size;
unsigned int _num_elems_processed_per_iteration;
diff --git a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
index 654dfad701..6519ac72fe 100644
--- a/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEPoolingLayerKernel.h
@@ -57,8 +57,9 @@ public:
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
- void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
+ void configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayerKernel
*
* @note F16 are supported for pool sizes 2 and 3 only
@@ -66,10 +67,11 @@ public:
* @param[in] input Source tensor. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
@@ -84,6 +86,12 @@ private:
* @param[in] exclude_padding Flag to specify exclusion of padding from the operation.
*/
void pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding = false);
+ /** Function to perform 2x2 pooling and compute the pooling indices. The indices can be used for max unpool.
+ *
+ * @param[in] window_input Input region on which to execute the kernel.
+ * @param[in] window Output region on which to execute the kernel.
+ */
+ void pooling2_f32_nchw_maxpool_indices(const Window &window_input, const Window &window);
/** Function to perform MxN pooling for 32-bit floating point values.
*
* @param[in] window_input Input region on which to execute the kernel.
@@ -197,6 +205,7 @@ private:
PoolingFunction _func;
const ITensor *_input;
ITensor *_output;
+ ITensor *_indices;
PoolingLayerInfo _pool_info;
DataLayout _data_layout;
unsigned int _num_elems_processed_per_iteration;
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index c78b558ac8..05b35dcee8 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -46,17 +46,19 @@ public:
* @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
- void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info);
+ void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer
*
* @param[in] input Source tensor info. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
};
} // namespace arm_compute
#endif /* ARM_COMPUTE_CLPOOLINGLAYER_H */
diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
index 13b7ad363f..b29f808f99 100644
--- a/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
+++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCPoolingLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -51,17 +51,19 @@ public:
* @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
- void configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info);
+ void configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref GCPoolingLayer
*
* @param[in] input Source tensor info. Data types supported: F16/F32.
* @param[in] output Destination tensor info. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
void run() override final;
diff --git a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
index eb840b52f2..e43741c95b 100644
--- a/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEPoolingLayer.h
@@ -51,8 +51,9 @@ public:
* @param[in, out] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[out] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[out] indices (optional) The indices of the maximal values. Data type supported: U32.
*/
- void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info);
+ void configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices = nullptr);
/** Static function to check if given info will lead to a valid configuration of @ref NEPoolingLayer
*
* @note F16 is supported for pool sizes 2 and 3 only
@@ -60,10 +61,11 @@ public:
* @param[in] input Source tensor. (Written to only when padding != 0) Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
* @param[in] output Destination tensor. Data types supported: Same as @p input.
* @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
+ * @param[in] indices (optional) The indices of the maximal values. Data type supported: U32.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices = nullptr);
// Inherited methods overridden:
void run() override;
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 767d6d6ca0..dbbca4771b 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -56,10 +56,11 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, PoolingLayerInfo p
auto_init_if_empty(*output, input->clone()->set_tensor_shape(out_shape));
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in the CL backend.");
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2),
"Unsupported combination of parameters!");
@@ -166,7 +167,7 @@ std::tuple<Status, Window, CLPoolingConfig> validate_and_configure_window(ITenso
} // namespace
CLPoolingLayerKernel::CLPoolingLayerKernel()
- : _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1)
+ : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _border_size(0), _num_elems_processed_per_iteration(1)
{
}
@@ -175,16 +176,16 @@ BorderSize CLPoolingLayerKernel::border_size() const
return _border_size;
}
-void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info)
+void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
// Set instance variables
- _input = input;
- _output = output;
- _pool_info = pool_info;
- _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
-
+ _input = input;
+ _output = output;
+ _pool_info = pool_info;
+ _data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
+ _indices = indices;
int pool_stride_x = 0;
int pool_stride_y = 0;
const PoolingType pool_type = pool_info.pool_type;
@@ -215,7 +216,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
// Check output dimensions
auto_init(input->info(), output->info(), pool_info);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr));
const DataType data_type = input->info()->data_type();
@@ -331,9 +332,9 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
_config_id += lower_string(string_from_data_layout(input->info()->data_layout()));
}
-Status CLPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status CLPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info)));
return Status{};
diff --git a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
index 14cedfe3d2..36499eb4fd 100644
--- a/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
+++ b/src/core/GLES_COMPUTE/kernels/GCPoolingLayerKernel.cpp
@@ -55,9 +55,10 @@ void auto_init(const ITensorInfo *input, ITensorInfo *output, unsigned int poole
auto_init_if_empty(*output, input->clone()->set_tensor_shape(output_shape));
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(indices, "Indices not supported in GLES backend");
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type == PoolingType::L2),
"Unsupported combination of parameters!");
@@ -77,8 +78,8 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
+ unsigned int pooled_w = 0;
+ unsigned int pooled_h = 0;
std::tie(pooled_w, pooled_h) = scaled_dimensions(input->dimension(0),
input->dimension(1),
pool_size,
@@ -93,14 +94,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info)
{
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- int pool_size = pool_info.pool_size.width;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
+ int pool_pad_x = 0;
+ int pool_pad_y = 0;
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ unsigned int pooled_w = 0;
+ unsigned int pooled_h = 0;
+ int pool_size = pool_info.pool_size.width;
+ const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
@@ -215,7 +216,7 @@ std::tuple<Status, Window, GCPoolingConfig> validate_and_configure_window(ITenso
} // namespace
GCPoolingLayerKernel::GCPoolingLayerKernel()
- : _input(nullptr), _output(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
+ : _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _border_size(0), _num_elems_processed_per_iteration(1)
{
}
@@ -224,18 +225,18 @@ BorderSize GCPoolingLayerKernel::border_size() const
return _border_size;
}
-void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
+void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices)
{
- int pool_pad_x = 0;
- int pool_pad_y = 0;
- int pool_stride_x = 0;
- int pool_stride_y = 0;
- unsigned int pooled_w = 0;
- unsigned int pooled_h = 0;
- const PoolingType pool_type = pool_info.pool_type;
- int pool_size = pool_info.pool_size.width;
- const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
- const bool exclude_padding = pool_info.exclude_padding;
+ int pool_pad_x = 0;
+ int pool_pad_y = 0;
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ unsigned int pooled_w = 0;
+ unsigned int pooled_h = 0;
+ const PoolingType pool_type = pool_info.pool_type;
+ int pool_size = pool_info.pool_size.width;
+ const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
+ const bool exclude_padding = pool_info.exclude_padding;
std::tie(pool_pad_x, pool_pad_y) = pad_stride_info.pad();
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
@@ -253,13 +254,13 @@ void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output,
auto_init(input->info(), output->info(), pooled_w, pooled_h);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, (indices) ? indices->info() : nullptr));
// Set instance variables
_input = input;
_output = output;
_pool_info = pool_info;
-
+ _indices = indices;
// Set build options
std::set<std::string> build_opts;
build_opts.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(1));
@@ -321,9 +322,9 @@ void GCPoolingLayerKernel::configure(const IGCTensor *input, IGCTensor *output,
_border_size = pooling_config.second;
}
-Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status GCPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, indices));
ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info)));
return Status{};
diff --git a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
index d6a3fadd33..fdbba815b4 100644
--- a/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEPoolingLayerKernel.cpp
@@ -123,7 +123,8 @@ inline void scale_vector_q16x8(bool exclude_padding, TVec &v, const Coordinates
v = wrapper::vsetlane(elems[7], v, 7);
}
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &pooled_w, unsigned int pooled_h)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info,
+ unsigned int &pooled_w, unsigned int pooled_h, const ITensorInfo *indices, Size2D pool_size)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
@@ -134,6 +135,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
std::tie(pool_stride_x, pool_stride_y) = pad_stride_info.stride();
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ if(indices)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(indices, 1, DataType::U32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_type != PoolingType::MAX, "Pooling indices only supported for MAX pooling method");
+ }
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::F16, DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(pool_type == PoolingType::L2 && is_data_type_quantized(input->data_type()));
ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(input->data_type()) && !pool_info.exclude_padding && (pool_info.pool_type == PoolingType::AVG) && pool_info.pad_stride_info.has_padding()
@@ -146,6 +152,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
|| (output->dimension(get_data_layout_dimension_index(input->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
+
+ if(indices)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_layout() == DataLayout::NHWC, "Pool indices only supported in NCHW");
+ ARM_COMPUTE_RETURN_ERROR_ON((indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::WIDTH)) != pooled_w)
+ || (indices->dimension(get_data_layout_dimension_index(indices->data_layout(), DataLayoutDimension::HEIGHT)) != pooled_h));
+ }
}
return Status{};
@@ -159,13 +173,18 @@ Status validate_arguments_pool_info(const unsigned int pool_size_x, const unsign
return Status{};
}
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, const PoolingLayerInfo &pool_info, unsigned int &num_elems_processed_per_iteration,
- BorderSize &border_size,
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output, ITensorInfo *indices, const PoolingLayerInfo &pool_info,
+ unsigned int &num_elems_processed_per_iteration,
+ BorderSize &border_size,
unsigned int pooled_w, unsigned int pooled_h, int pool_size_x, int pool_size_y)
{
// Output auto inizialitation if not yet initialized
auto_init_if_empty(*output, input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info)));
-
+ if(indices)
+ {
+ // Indices auto inizialitation if not yet initialized
+ auto_init_if_empty(*indices, (input->clone()->set_tensor_shape(compute_pool_shape(*input, pool_info))).set_data_type(DataType::U32) /* we store the offset to the element */);
+ }
const auto data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->data_layout() : pool_info.data_layout;
unsigned int num_elems_read_per_iteration = 0;
unsigned int num_elems_horizontal_window = 0;
@@ -286,25 +305,28 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
{
// Number of iterations in X dimension
const int num_iterations_x = (pooled_w + num_elems_processed_per_iteration - 1) / num_elems_processed_per_iteration;
-
// Upper limit for the number of right/bottom border elements that are accessed
const int upper_bound_w = ((num_iterations_x - 1) * num_elems_processed_per_iteration * pool_stride_x - pool_pad_left + num_elems_read_per_iteration) - input_width;
const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_top + pool_size_y) - input_height;
-
- border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
- border_size.right = std::max(upper_bound_w, pool_pad_right);
- border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
-
+ border_size = BorderSize(pool_pad_top, pool_pad_right, pool_pad_bottom, pool_pad_left);
+ border_size.right = std::max(upper_bound_w, pool_pad_right);
+ border_size.bottom = std::max(upper_bound_h, pool_pad_bottom);
TensorShape output_shape{ input->tensor_shape() };
output_shape.set(0, pooled_w);
output_shape.set(1, pooled_h);
TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
-
win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
- AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
-
+ AccessWindowStatic input_access(input, -pool_pad_left, -pool_pad_top, input_width + border_size.right, input_height + border_size.bottom);
AccessWindowHorizontal output_access(output, 0, num_elems_horizontal_window);
- window_changed = update_window_and_padding(win, input_access, output_access);
+ if(indices)
+ {
+ AccessWindowHorizontal indices_access(indices, 0, num_elems_horizontal_window);
+ window_changed = update_window_and_padding(win, input_access, output_access, indices_access);
+ }
+ else
+ {
+ window_changed = update_window_and_padding(win, input_access, output_access);
+ }
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
}
else
@@ -313,12 +335,18 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
output_shape.set(1, pooled_w);
output_shape.set(2, pooled_h);
TensorInfo output_info(input->clone()->set_tensor_shape(output_shape));
-
win = calculate_max_window(output_info, Steps(num_elems_processed_per_iteration));
AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
-
AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
- window_changed = update_window_and_padding(win, input_access, output_access);
+ if(indices)
+ {
+ AccessWindowHorizontal indices_access(indices, 0, num_elems_processed_per_iteration);
+ window_changed = update_window_and_padding(win, input_access, output_access, indices_access);
+ }
+ else
+ {
+ window_changed = update_window_and_padding(win, input_access, output_access);
+ }
output_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
}
@@ -438,7 +466,7 @@ inline int8x8_t vrequantize_pooling(int8x8_t &vec, const UniformQuantizationInfo
} // namespace
NEPoolingLayerKernel::NEPoolingLayerKernel()
- : _func(nullptr), _input(nullptr), _output(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
+ : _func(nullptr), _input(nullptr), _output(nullptr), _indices(nullptr), _pool_info(), _data_layout(DataLayout::UNKNOWN), _num_elems_processed_per_iteration(0), _border_size(0), _is_square(false)
{
}
@@ -447,10 +475,9 @@ BorderSize NEPoolingLayerKernel::border_size() const
return _border_size;
}
-void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-
const PadStrideInfo pad_stride_info = pool_info.pad_stride_info;
const bool is_global_pooling = pool_info.is_global_pooling;
const int pool_stride_x = pad_stride_info.stride().first;
@@ -478,11 +505,12 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
pad_stride_info);
// Perform validation step
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h));
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), pool_info, pooled_w, pooled_h, (indices) ? indices->info() : nullptr, pool_size));
// Set instance variables
_input = input;
_output = output;
+ _indices = indices;
_pool_info = pool_info;
_data_layout = input->info()->data_layout();
_is_square = (pool_size.x() == pool_size.y());
@@ -690,7 +718,8 @@ void NEPoolingLayerKernel::configure(const ITensor *input, ITensor *output, cons
}
// Configure kernel window
- auto win_config = validate_and_configure_window(input->info(), output->info(), pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y());
+ auto win_config = validate_and_configure_window(input->info(), output->info(), (indices) ? indices->info() : nullptr,
+ pool_info, _num_elems_processed_per_iteration, _border_size, pooled_w, pooled_h, pool_size.x(), pool_size.y());
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
INEKernel::configure(win_config.second);
}
@@ -1435,7 +1464,6 @@ void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const
res = std::max(res, data);
}
}
-
#if defined(__aarch64__)
// Reduction operation available on 64 bit architectures only
res = std::max(vmaxvq_f32(vres), res);
@@ -1459,66 +1487,117 @@ void NEPoolingLayerKernel::poolingMxN_f32_nchw(const Window &window_input, const
input, output);
}
-void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
+void NEPoolingLayerKernel::pooling2_f32_nchw_maxpool_indices(const Window &window_input, const Window &window)
{
- Iterator input(_input, window_input);
- Iterator output(_output, window);
-
- constexpr int pool_size = 2;
- const int pool_pad_right = _pool_info.pad_stride_info.pad_right();
- const int pool_pad_top = _pool_info.pad_stride_info.pad_top();
- const int pool_pad_left = _pool_info.pad_stride_info.pad_left();
- const int pool_pad_bottom = _pool_info.pad_stride_info.pad_bottom();
- int pool_stride_x = 0;
- int pool_stride_y = 0;
+ Iterator input(_input, window_input);
+ Iterator output(_output, window);
+ Iterator indices(_indices, window);
+ int final_index = 0;
+ const int pool_pad_top = _pool_info.pad_stride_info.pad_top();
+ const int pool_pad_left = _pool_info.pad_stride_info.pad_left();
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride();
- const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
- const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
-
const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
- execute_window_loop(window, [&](const Coordinates & id)
+ const Strides &input_strides = _input->info()->strides_in_bytes();
+ const auto in_stridew = input_strides[1];
+
+ execute_window_loop(window, [&](const Coordinates &)
{
- float32x2_t top_data = vld1_f32(reinterpret_cast<const float *>(input_top_ptr + input.offset()));
- float32x2_t bottom_data = vld1_f32(reinterpret_cast<const float *>(input_bottom_ptr + input.offset()));
- float32x2_t res = {};
- float final_res = 0;
+ const auto input_offset_top = input_top_ptr + input.offset();
+ const auto input_offset_bottom = input_bottom_ptr + input.offset();
+ const auto in_top_ptr = reinterpret_cast<const float *>(input_offset_top);
+ const auto in_bottom_ptr = reinterpret_cast<const float *>(input_offset_bottom);
+ float32x2_t top_data = vld1_f32(in_top_ptr);
+ float32x2_t bottom_data = vld1_f32(in_bottom_ptr);
+ float32x2_t res = {};
+ float final_res = 0;
+ const float32x2_t max_data = vmax_f32(top_data, bottom_data);
+ res = vpmax_f32(max_data, max_data);
+ final_res = vget_lane_f32(res, 0);
+ // Store result
+ *(reinterpret_cast<float *>(output.ptr())) = final_res;
+ const uint32_t offset_top = (uint32_t)(input.offset() / sizeof(float));
+ const uint32_t offset_bottom = (uint32_t)offset_top + (in_stridew / sizeof(float));
+ const uint32x2_t voffset_top = { offset_top, offset_top + 1u };
+ const uint32x2_t voffset_bottom = { offset_bottom, offset_bottom + 1u };
+ const uint32x2_t tmp_indices = vbsl_u32(vcgt_f32(top_data, bottom_data), voffset_top, voffset_bottom);
+ final_index = vget_lane_u32(vbsl_u32(vcgt_f32(max_data, vrev64_f32(max_data)), tmp_indices, vrev64_u32(tmp_indices)), 0);
+ *(reinterpret_cast<int *>(indices.ptr())) = final_index;
+ },
+ input, output, indices);
+}
- // Get power of 2 in case of l2 pooling
- if(pooling_type == PoolingType::L2)
+void NEPoolingLayerKernel::pooling2_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type,
+ bool exclude_padding)
+{
+ if(pooling_type == PoolingType::MAX && _indices)
+ {
+ pooling2_f32_nchw_maxpool_indices(window_input, window);
+ }
+ else
+ {
+ Iterator input(_input, window_input);
+ Iterator output(_output, window);
+ constexpr int pool_size = 2;
+ const int pool_pad_right = _pool_info.pad_stride_info.pad_right();
+ const int pool_pad_top = _pool_info.pad_stride_info.pad_top();
+ const int pool_pad_left = _pool_info.pad_stride_info.pad_left();
+ const int pool_pad_bottom = _pool_info.pad_stride_info.pad_bottom();
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ std::tie(pool_stride_x, pool_stride_y) = _pool_info.pad_stride_info.stride();
+ const int upper_bound_w = _input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_right);
+ const int upper_bound_h = _input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_bottom);
+
+ const uint8_t *const input_top_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
+ const uint8_t *const input_bottom_ptr = _input->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
+
+ execute_window_loop(window, [&](const Coordinates & id)
{
- top_data = vmul_f32(top_data, top_data);
- bottom_data = vmul_f32(bottom_data, bottom_data);
- }
+ const auto in_top_ptr = reinterpret_cast<const float *>(input_top_ptr + input.offset());
+ const auto in_bottom_ptr = reinterpret_cast<const float *>(input_bottom_ptr + input.offset());
+ float32x2_t top_data = vld1_f32(in_top_ptr);
+ float32x2_t bottom_data = vld1_f32(in_bottom_ptr);
+ float32x2_t res = {};
+ float final_res = 0;
+ // Get power of 2 in case of l2 pooling
+ if(pooling_type == PoolingType::L2)
+ {
+ top_data = vmul_f32(top_data, top_data);
+ bottom_data = vmul_f32(bottom_data, bottom_data);
+ }
- if(pooling_type != PoolingType::MAX)
- {
- // Calculate scale
- float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
- const float32x2_t scale_v = vdup_n_f32(scale);
+ if(pooling_type != PoolingType::MAX)
+ {
+ // Calculate scale
+ float scale = calculate_avg_scale(exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
+ const float32x2_t scale_v = vdup_n_f32(scale);
- // Perform pooling
- const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
- res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
- }
- else
- {
- const float32x2_t max_data = vmax_f32(top_data, bottom_data);
- res = vpmax_f32(max_data, max_data);
- }
- final_res = vget_lane_f32(res, 0);
+ // Perform pooling
+ const float32x2_t sum_data = vadd_f32(top_data, bottom_data);
+ res = vmul_f32(vpadd_f32(sum_data, sum_data), scale_v);
+ }
+ else
+ {
+ const float32x2_t max_data = vmax_f32(top_data, bottom_data);
+ res = vpmax_f32(max_data, max_data);
+ }
+ final_res = vget_lane_f32(res, 0);
- // Calculate square-root in case of l2 pooling
- if(pooling_type == PoolingType::L2)
- {
- final_res = sqrt(final_res);
- }
+ // Calculate square-root in case of l2 pooling
+ if(pooling_type == PoolingType::L2)
+ {
+ final_res = sqrt(final_res);
+ }
- // Store result
- *(reinterpret_cast<float *>(output.ptr())) = final_res;
- },
- input, output);
+ // Store result
+ *(reinterpret_cast<float *>(output.ptr())) = final_res;
+ },
+ input, output);
+ }
}
void NEPoolingLayerKernel::pooling3_f32_nchw(const Window &window_input, const Window &window, PoolingType pooling_type, bool exclude_padding)
@@ -2001,7 +2080,7 @@ void NEPoolingLayerKernel::poolingMxN_q8_nhwc(const Window &window_input, const
input, output);
}
-Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input);
@@ -2032,8 +2111,9 @@ Status NEPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInf
pool_size_y,
pool_info.pad_stride_info);
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h));
- ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(), pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, pool_info, pooled_w, pooled_h, indices, Size2D(pool_size_x, pool_size_y)));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get(),
+ (indices) ? indices->clone().get() : nullptr, pool_info, num_elems_processed_per_iteration, border_size, pooled_w, pooled_h,
pool_size_x, pool_size_y)
.first);
@@ -2094,4 +2174,4 @@ void NEPoolingLayerKernel::run(const Window &window, const ThreadInfo &info)
// Run function
(this->*_func)(window_input, window, _pool_info.pool_type, exclude_padding);
}
-} // namespace arm_compute \ No newline at end of file
+} // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp
index ebdae0b8ad..9c4fa4a2ba 100644
--- a/src/runtime/CL/functions/CLPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLPoolingLayer.cpp
@@ -30,14 +30,13 @@
namespace arm_compute
{
-void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info)
+void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info, ICLTensor *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(input);
-
// Configure pooling kernel
auto k = arm_compute::support::cpp14::make_unique<CLPoolingLayerKernel>();
k->set_target(CLScheduler::get().target());
- k->configure(input, output, pool_info);
+ k->configure(input, output, pool_info, indices);
_kernel = std::move(k);
const DataType data_type = input->info()->data_type();
@@ -81,8 +80,8 @@ void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const Poolin
CLScheduler::get().tune_kernel_static(*_kernel);
}
-Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- return CLPoolingLayerKernel::validate(input, output, pool_info);
+ return CLPoolingLayerKernel::validate(input, output, pool_info, indices);
}
} // namespace arm_compute
diff --git a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
index 328c5e9762..accf60e204 100644
--- a/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
+++ b/src/runtime/GLES_COMPUTE/functions/GCPoolingLayer.cpp
@@ -29,18 +29,18 @@
#include "support/MemorySupport.h"
-using namespace arm_compute;
-
+namespace arm_compute
+{
GCPoolingLayer::GCPoolingLayer()
: _kernel(nullptr), _border_handler(), _shift_handler()
{
}
-void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info)
+void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const PoolingLayerInfo &pool_info, IGCTensor *indices)
{
// Configure pooling kernel
auto k = arm_compute::support::cpp14::make_unique<GCPoolingLayerKernel>();
- k->configure(input, output, pool_info);
+ k->configure(input, output, pool_info, indices);
_kernel = std::move(k);
// Configure border depending on operation required
@@ -50,9 +50,9 @@ void GCPoolingLayer::configure(IGCTensor *input, IGCTensor *output, const Poolin
_shift_handler.configure(input);
}
-Status GCPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status GCPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- return GCPoolingLayerKernel::validate(input, output, pool_info);
+ return GCPoolingLayerKernel::validate(input, output, pool_info, indices);
}
void GCPoolingLayer::run()
@@ -63,3 +63,4 @@ void GCPoolingLayer::run()
GCScheduler::get().memory_barrier();
GCScheduler::get().dispatch(*_kernel);
}
+} // namespace arm_compute
diff --git a/src/runtime/NEON/functions/NEPoolingLayer.cpp b/src/runtime/NEON/functions/NEPoolingLayer.cpp
index 0a3219375e..12921cf40e 100644
--- a/src/runtime/NEON/functions/NEPoolingLayer.cpp
+++ b/src/runtime/NEON/functions/NEPoolingLayer.cpp
@@ -33,7 +33,7 @@ NEPoolingLayer::NEPoolingLayer()
{
}
-void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info)
+void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLayerInfo &pool_info, ITensor *indices)
{
// Check if we have Global Pooling Layer
_is_global_pooling_layer = (input->info()->dimension(0) == pool_info.pool_size.width) && (input->info()->dimension(1) == pool_info.pool_size.height);
@@ -42,7 +42,7 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
_data_layout = pool_info.data_layout == DataLayout::UNKNOWN ? input->info()->data_layout() : pool_info.data_layout;
// Configure pooling kernel
- _pooling_layer_kernel.configure(input, output, pool_info);
+ _pooling_layer_kernel.configure(input, output, pool_info, indices);
switch(_data_layout)
{
@@ -65,9 +65,9 @@ void NEPoolingLayer::configure(ITensor *input, ITensor *output, const PoolingLay
}
}
-Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info)
+Status NEPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info, const ITensorInfo *indices)
{
- return NEPoolingLayerKernel::validate(input, output, pool_info);
+ return NEPoolingLayerKernel::validate(input, output, pool_info, indices);
}
void NEPoolingLayer::run()
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index 1012320b0d..a5876dcd0a 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -112,13 +112,32 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(
// *INDENT-ON*
template <typename T>
+using NEPoolingLayerIndicesFixture = PoolingLayerIndicesValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
+
+template <typename T>
using NEPoolingLayerFixture = PoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
template <typename T>
using NESpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<Tensor, Accessor, NEPoolingLayer, T>;
+const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
+ framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0) })),
+ framework::dataset::make("ExcludePadding", { true, false }));
+
TEST_SUITE(Float)
TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
+ framework::dataset::make("DataType",
+ DataType::F32))),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW })
+
+ ))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+ validate(Accessor(_target_indices), _ref_indices);
+}
+
FIXTURE_DATA_TEST_CASE(RunSpecial, NESpecialPoolingLayerFixture<float>, framework::DatasetMode::ALL, datasets::PoolingLayerDatasetSpecial() * framework::dataset::make("DataType", DataType::F32))
{
// Validate output
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index ec186564b7..7f2d7ac225 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -34,7 +34,6 @@
#include "tests/framework/Asserts.h"
#include "tests/framework/Fixture.h"
#include "tests/validation/reference/PoolingLayer.h"
-
#include <random>
namespace arm_compute
@@ -48,7 +47,7 @@ class PoolingLayerValidationGenericFixture : public framework::Fixture
{
public:
template <typename...>
- void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout)
+ void setup(TensorShape shape, PoolingLayerInfo pool_info, DataType data_type, DataLayout data_layout, bool indices = false)
{
std::mt19937 gen(library->seed());
std::uniform_int_distribution<> offset_dis(0, 20);
@@ -59,8 +58,8 @@ public:
const QuantizationInfo output_qinfo(scale, scale_out);
_pool_info = pool_info;
- _target = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo);
- _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo);
+ _target = compute_target(shape, pool_info, data_type, data_layout, input_qinfo, output_qinfo, indices);
+ _reference = compute_reference(shape, pool_info, data_type, input_qinfo, output_qinfo, indices);
}
protected:
@@ -79,7 +78,9 @@ protected:
}
TensorType compute_target(TensorShape shape, PoolingLayerInfo info,
- DataType data_type, DataLayout data_layout, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
+ DataType data_type, DataLayout data_layout,
+ QuantizationInfo input_qinfo, QuantizationInfo output_qinfo,
+ bool indices)
{
// Change shape in case of NHWC.
if(data_layout == DataLayout::NHWC)
@@ -91,20 +92,24 @@ protected:
TensorType src = create_tensor<TensorType>(shape, data_type, 1, input_qinfo, data_layout);
const TensorShape dst_shape = misc::shape_calculator::compute_pool_shape(*(src.info()), info);
TensorType dst = create_tensor<TensorType>(dst_shape, data_type, 1, output_qinfo, data_layout);
+ _target_indices = create_tensor<TensorType>(dst_shape, DataType::U32, 1);
// Create and configure function
FunctionType pool_layer;
- pool_layer.configure(&src, &dst, info);
+ pool_layer.configure(&src, &dst, info, (indices) ? &_target_indices : nullptr);
ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(_target_indices.info()->is_resizable(), framework::LogLevel::ERRORS);
// Allocate tensors
src.allocator()->allocate();
dst.allocator()->allocate();
+ _target_indices.allocator()->allocate();
ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS);
ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS);
+ ARM_COMPUTE_EXPECT(!_target_indices.info()->is_resizable(), framework::LogLevel::ERRORS);
// Fill tensors
fill(AccessorType(src));
@@ -115,20 +120,33 @@ protected:
return dst;
}
- SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, QuantizationInfo input_qinfo, QuantizationInfo output_qinfo)
+ SimpleTensor<T> compute_reference(const TensorShape &shape, PoolingLayerInfo info, DataType data_type,
+ QuantizationInfo input_qinfo, QuantizationInfo output_qinfo, bool indices)
{
// Create reference
SimpleTensor<T> src{ shape, data_type, 1, input_qinfo };
-
// Fill reference
fill(src);
- return reference::pooling_layer<T>(src, info, output_qinfo);
+ return reference::pooling_layer<T>(src, info, output_qinfo, indices ? &_ref_indices : nullptr);
}
- TensorType _target{};
- SimpleTensor<T> _reference{};
- PoolingLayerInfo _pool_info{};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ PoolingLayerInfo _pool_info{};
+ TensorType _target_indices{};
+ SimpleTensor<uint32_t> _ref_indices{};
+};
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class PoolingLayerIndicesValidationFixture : public PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout)
+ {
+ PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding),
+ data_type, data_layout, true);
+ }
};
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index ed2eb2c7ec..1a1aebd1b4 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -38,13 +38,15 @@ namespace reference
using namespace arm_compute::misc::shape_calculator;
template <typename T, typename ACC_T, typename std::enable_if<is_floating_point<T>::value, int>::type>
-SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info)
+SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices)
{
ARM_COMPUTE_ERROR_ON(info.is_global_pooling && (src.shape().x() != src.shape().y()));
-
// Create reference
SimpleTensor<T> dst{ compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), src.data_type(), 1 };
-
+ if(indices)
+ {
+ *indices = SimpleTensor<uint32_t> { compute_pool_shape(TensorInfo(src.shape(), 1, src.data_type()), info), DataType::U32, 1 };
+ }
const int pool_size_x = info.is_global_pooling ? src.shape().x() : info.pool_size.width;
const int pool_size_y = info.is_global_pooling ? src.shape().y() : info.pool_size.height;
PoolingType type = info.pool_type;
@@ -79,6 +81,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
hstart = std::max(hstart, 0);
auto max_val = std::numeric_limits<ACC_T>::lowest();
+ int max_index{ 0 };
for(int y = hstart; y < hend; ++y)
{
for(int x = wstart; x < wend; ++x)
@@ -86,12 +89,17 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
const auto val = static_cast<ACC_T>(src[r * h_src * w_src + y * w_src + x]);
if(val > max_val)
{
- max_val = val;
+ max_val = val;
+ max_index = coord2index(src.shape(), Coordinates(x, y, r));
}
}
}
dst[r * h_dst * w_dst + h * w_dst + w] = static_cast<T>(max_val);
+ if(indices)
+ {
+ (*indices)[r * h_dst * w_dst + h * w_dst + w] = max_index;
+ }
}
}
}
@@ -151,48 +159,48 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
return dst;
}
-template SimpleTensor<float> pooling_layer_internal<float>(const SimpleTensor<float> &src, const PoolingLayerInfo &info);
-template SimpleTensor<half> pooling_layer_internal<half>(const SimpleTensor<half> &src, const PoolingLayerInfo &info);
-template SimpleTensor<half> pooling_layer_internal<half, float>(const SimpleTensor<half> &src, const PoolingLayerInfo &info);
+template SimpleTensor<float> pooling_layer_internal<float>(const SimpleTensor<float> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices);
+template SimpleTensor<half> pooling_layer_internal<half>(const SimpleTensor<half> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices);
+template SimpleTensor<half> pooling_layer_internal<half, float>(const SimpleTensor<half> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices);
template <typename T>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
{
ARM_COMPUTE_UNUSED(output_qinfo);
- return pooling_layer_internal<T, T>(src, info);
+ return pooling_layer_internal<T, T>(src, info, indices);
}
template <>
-SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
+SimpleTensor<uint8_t> pooling_layer<uint8_t>(const SimpleTensor<uint8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
{
SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
- SimpleTensor<float> dst_tmp = pooling_layer_internal<float>(src_tmp, info);
+ SimpleTensor<float> dst_tmp = pooling_layer_internal<float>(src_tmp, info, indices);
SimpleTensor<uint8_t> dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_qinfo);
return dst;
}
template <>
-SimpleTensor<int8_t> pooling_layer<int8_t>(const SimpleTensor<int8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
+SimpleTensor<int8_t> pooling_layer<int8_t>(const SimpleTensor<int8_t> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
{
SimpleTensor<float> src_tmp = convert_from_asymmetric(src);
- SimpleTensor<float> dst_tmp = pooling_layer_internal<float>(src_tmp, info);
+ SimpleTensor<float> dst_tmp = pooling_layer_internal<float>(src_tmp, info, indices);
SimpleTensor<int8_t> dst = convert_to_asymmetric<int8_t>(dst_tmp, output_qinfo);
return dst;
}
template <>
-SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo)
+SimpleTensor<half> pooling_layer(const SimpleTensor<half> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices)
{
ARM_COMPUTE_UNUSED(output_qinfo);
if(src.data_type() == DataType::F16 && info.fp_mixed_precision)
{
- return pooling_layer_internal<half, float>(src, info);
+ return pooling_layer_internal<half, float>(src, info, indices);
}
- return pooling_layer_internal<half>(src, info);
+ return pooling_layer_internal<half>(src, info, indices);
}
-template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
+template SimpleTensor<float> pooling_layer(const SimpleTensor<float> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices);
} // namespace reference
} // namespace validation
} // namespace test
diff --git a/tests/validation/reference/PoolingLayer.h b/tests/validation/reference/PoolingLayer.h
index 92d97d548e..3ca7f28d5a 100644
--- a/tests/validation/reference/PoolingLayer.h
+++ b/tests/validation/reference/PoolingLayer.h
@@ -36,9 +36,9 @@ namespace validation
namespace reference
{
template <typename T, typename ACC_T = T, typename std::enable_if<is_floating_point<T>::value, int>::type = 0>
-SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info);
+SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const PoolingLayerInfo &info, SimpleTensor<uint32_t> *indices);
template <typename T>
-SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo);
+SimpleTensor<T> pooling_layer(const SimpleTensor<T> &src, const PoolingLayerInfo &info, const QuantizationInfo &output_qinfo, SimpleTensor<uint32_t> *indices);
} // namespace reference
} // namespace validation
} // namespace test