aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2023-02-22 12:15:14 +0000
committerAdnan AlSinan <adnan.alsinan@arm.com>2023-03-01 11:48:22 +0000
commitbbf2e7477be984702e1a51f2a23910ee8349b867 (patch)
treed5ce7a16e7f6e977bba2f88c8a913d071b4e04ed
parent227db8db83cd85d8704a8edbd4e8c88af0dd1f84 (diff)
downloadComputeLibrary-bbf2e7477be984702e1a51f2a23910ee8349b867.tar.gz
Add support for kernel indices in Maxpool
- Add a max pooling implementation that returns kernel indices. - Add a parameter in pooling info object to pick kernel indices impl. - Add validation tests. Resolves: [ONCPUML-1187] Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I485ef1604f676ee14d5f7f62d33699e49c38e4d3 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9192 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/Types.h21
-rw-r--r--src/cpu/kernels/CpuPool2dKernel.cpp11
-rw-r--r--src/cpu/kernels/pool2d/neon/fp32.cpp143
-rw-r--r--src/cpu/operators/CpuPool2d.cpp7
-rw-r--r--src/cpu/operators/CpuPool2d.h3
-rw-r--r--tests/validation/CL/PoolingLayer.cpp10
-rw-r--r--tests/validation/NEON/PoolingLayer.cpp36
-rw-r--r--tests/validation/fixtures/PoolingLayerFixture.h7
-rw-r--r--tests/validation/reference/PoolingLayer.cpp16
9 files changed, 203 insertions, 51 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index e8eed67c58..946b8a6cb6 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1210,7 +1210,8 @@ struct PoolingLayerInfo
exclude_padding(false),
is_global_pooling(false),
fp_mixed_precision(false),
- use_inf_as_limit(true)
+ use_inf_as_limit(true),
+ use_kernel_indices(false)
{
}
/** Constructor
@@ -1224,6 +1225,7 @@ struct PoolingLayerInfo
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
* @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
+ * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
unsigned int pool_size,
@@ -1231,7 +1233,8 @@ struct PoolingLayerInfo
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
bool fp_mixed_precision = false,
- bool use_inf_as_limit = true)
+ bool use_inf_as_limit = true,
+ bool use_kernel_indices = false)
: pool_type(pool_type),
pool_size(Size2D(pool_size, pool_size)),
data_layout(data_layout),
@@ -1239,7 +1242,8 @@ struct PoolingLayerInfo
exclude_padding(exclude_padding),
is_global_pooling(false),
fp_mixed_precision(fp_mixed_precision),
- use_inf_as_limit(use_inf_as_limit)
+ use_inf_as_limit(use_inf_as_limit),
+ use_kernel_indices(use_kernel_indices)
{
}
@@ -1254,6 +1258,7 @@ struct PoolingLayerInfo
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
* @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
+ * @param[in] use_kernel_indices (Optional) Use kernel indices instead of using source indices while computing indices tensor.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
Size2D pool_size,
@@ -1261,7 +1266,8 @@ struct PoolingLayerInfo
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
bool fp_mixed_precision = false,
- bool use_inf_as_limit = true)
+ bool use_inf_as_limit = true,
+ bool use_kernel_indices = false)
: pool_type(pool_type),
pool_size(pool_size),
data_layout(data_layout),
@@ -1269,7 +1275,8 @@ struct PoolingLayerInfo
exclude_padding(exclude_padding),
is_global_pooling(false),
fp_mixed_precision(fp_mixed_precision),
- use_inf_as_limit(use_inf_as_limit)
+ use_inf_as_limit(use_inf_as_limit),
+ use_kernel_indices(use_kernel_indices)
{
}
@@ -1288,7 +1295,8 @@ struct PoolingLayerInfo
exclude_padding(false),
is_global_pooling(true),
fp_mixed_precision(false),
- use_inf_as_limit(true)
+ use_inf_as_limit(true),
+ use_kernel_indices(false)
{
}
@@ -1300,6 +1308,7 @@ struct PoolingLayerInfo
bool is_global_pooling;
bool fp_mixed_precision;
bool use_inf_as_limit;
+ bool use_kernel_indices;
};
/** Pooling Layer Information struct*/
diff --git a/src/cpu/kernels/CpuPool2dKernel.cpp b/src/cpu/kernels/CpuPool2dKernel.cpp
index 8f04812b0c..d72a41cbbe 100644
--- a/src/cpu/kernels/CpuPool2dKernel.cpp
+++ b/src/cpu/kernels/CpuPool2dKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2022 Arm Limited.
+ * Copyright (c) 2017-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,17 +28,11 @@
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/Window.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/AccessWindowStatic.h"
#include "src/core/CPP/Validate.h"
-#include "src/core/NEON/NEAsymm.h"
-#include "src/core/NEON/NEFixedPoint.h"
-#include "src/core/NEON/NEMath.h"
#include "src/core/common/Registrars.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/WindowHelpers.h"
#include "src/cpu/kernels/pool2d/neon/list.h"
-#include "support/ToolchainSupport.h"
-
#include "src/core/NEON/wrapper/wrapper.h"
#include <arm_neon.h>
@@ -191,7 +185,8 @@ Status validate_arguments(const ITensorInfo *src, const ITensorInfo *dst, const
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(dst, &out_info);
if(indices)
{
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((pool_size != Size2D(2, 2)), "Pooling indices only supported for pool size 2x2");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(((pool_size != Size2D(2, 2)) && !pool_info.use_kernel_indices), "Pooling indices returning source tensor coordinates is only supported for pool size 2x2");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(pool_info.use_kernel_indices && (src->data_layout() != DataLayout::NHWC), "Pooling kernel indices only supported for NHWC");
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(indices, &out_info);
}
}
diff --git a/src/cpu/kernels/pool2d/neon/fp32.cpp b/src/cpu/kernels/pool2d/neon/fp32.cpp
index 018f62b8a8..8e93df3347 100644
--- a/src/cpu/kernels/pool2d/neon/fp32.cpp
+++ b/src/cpu/kernels/pool2d/neon/fp32.cpp
@@ -24,7 +24,6 @@
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/Types.h"
-#include "arm_compute/core/utils/misc/Traits.h"
#include "src/core/NEON/wrapper/intrinsics/intrinsics.h"
#include "src/core/helpers/WindowHelpers.h"
#include "src/cpu/kernels/pool2d/neon/list.h"
@@ -98,10 +97,10 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds
vst1q_f32(reinterpret_cast<float *>(out.ptr()) + x_off, vres);
const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC);
- const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float) + x_off;
- const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_horizontal;
- const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_horizontal * src->info()->tensor_shape()[1];
- const uint32_t offset_x3 = (uint32_t)offset_x2 + in_stride_y / sizeof(float) - pad_horizontal;
+ const uint32_t offset_x0 = offset_base / sizeof(float) + x_off;
+ const uint32_t offset_x1 = offset_x0 + in_stride_y / sizeof(float) - pad_horizontal;
+ const uint32_t offset_x2 = offset_x0 + in_stride_z / sizeof(float) - pad_horizontal * src->info()->tensor_shape()[1];
+ const uint32_t offset_x3 = offset_x2 + in_stride_y / sizeof(float) - pad_horizontal;
const uint32x4_t voffset_x0 = { offset_x0, offset_x0 + 1, offset_x0 + 2, offset_x0 + 3 };
const uint32x4_t voffset_x1 = { offset_x1, offset_x1 + 1, offset_x1 + 2, offset_x1 + 3 };
const uint32x4_t voffset_x2 = { offset_x2, offset_x2 + 1, offset_x2 + 2, offset_x2 + 3 };
@@ -127,10 +126,10 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds
*(reinterpret_cast<float *>(out.ptr()) + x_off) = res;
const uint32_t offset_base = offset_no_padding<float>(in.offset(), id, *src->info(), pool_stride_x, pool_stride_y, DataLayout::NHWC);
- const uint32_t offset_x0 = (uint32_t)offset_base / sizeof(float) + x_off;
- const uint32_t offset_x1 = (uint32_t)offset_x0 + in_stride_y / sizeof(float) - pad_horizontal;
- const uint32_t offset_x2 = (uint32_t)offset_x0 + in_stride_z / sizeof(float) - pad_horizontal * src->info()->tensor_shape()[1];
- const uint32_t offset_x3 = (uint32_t)offset_x2 + in_stride_y / sizeof(float) - pad_horizontal;
+ const uint32_t offset_x0 = offset_base / sizeof(float) + x_off;
+ const uint32_t offset_x1 = offset_x0 + in_stride_y / sizeof(float) - pad_horizontal;
+ const uint32_t offset_x2 = offset_x0 + in_stride_z / sizeof(float) - pad_horizontal * src->info()->tensor_shape()[1];
+ const uint32_t offset_x3 = offset_x2 + in_stride_y / sizeof(float) - pad_horizontal;
const uint32_t tmp_idx0 = (x0 >= x1) ? offset_x0 : offset_x1;
const uint32_t tmp_idx1 = (x2 >= x3) ? offset_x2 : offset_x3;
const uint32_t tmp_idx2 = (std::max(x0, x1) >= std::max(x2, x3)) ? tmp_idx0 : tmp_idx1;
@@ -141,11 +140,135 @@ void pooling2_f32_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *ds
},
in, out, indices);
}
+} // namespace
+
+void poolingMxN_fp32_neon_nhwc_kernel_indices(const ITensor *src, ITensor *dst0, ITensor *dst1, const PoolingLayerInfo &pool_info, const Window &window)
+{
+ const int window_start_x = window.x().start();
+ const int window_end_x = window.x().end();
+ constexpr int window_step_x = 4;
+
+ Window window_out = window;
+ window_out.set(Window::DimX, Window::Dimension(0, 1, 1));
+
+ Iterator out(dst0, window_out);
+ Iterator indices(dst1, window_out);
+
+ const int pool_size_x = pool_info.is_global_pooling ? src->info()->tensor_shape().y() : pool_info.pool_size.width;
+ const int pool_size_y = pool_info.is_global_pooling ? src->info()->tensor_shape().z() : pool_info.pool_size.height;
+
+ const int pool_pad_top = pool_info.pad_stride_info.pad_top();
+ const int pool_pad_left = pool_info.pad_stride_info.pad_left();
+
+ int pool_stride_x = 0;
+ int pool_stride_y = 0;
+ std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
+
+ const float min_value = get_initial_min<float>(pool_info.use_inf_as_limit);
+
+ float32x4_t vres;
+ uint32x4_t vidx;
+
+ constexpr int idx_width = 1;
+ constexpr int idx_height = 2;
+ constexpr int idx_batch = 3;
+
+ const int y_stride = static_cast<int>(src->info()->strides_in_bytes().y());
+ const int z_stride = static_cast<int>(src->info()->strides_in_bytes().z());
+ const int n_stride = static_cast<int>(src->info()->strides_in_bytes()[idx_batch]);
+
+ const int input_dim_w = src->info()->dimension(idx_width);
+ const int input_dim_h = src->info()->dimension(idx_height);
+
+ const uint8_t *in_ptr_start = src->buffer() + src->info()->offset_first_element_in_bytes();
+
+ execute_window_loop(window_out, [&](const Coordinates & id)
+ {
+ const int idx_width = static_cast<int>(id.y()) * pool_stride_x - pool_pad_left;
+ const int idx_height = static_cast<int>(id.z()) * pool_stride_y - pool_pad_top;
+
+ const int pool_start_x = std::max(0, -idx_width);
+ const int pool_start_y = std::max(0, -idx_height);
+
+ const int pool_end_x = std::min(pool_size_x, input_dim_w - idx_width);
+ const int pool_end_y = std::min(pool_size_y, input_dim_h - idx_height);
+
+ const uint8_t *in_ptr_n = in_ptr_start + id[idx_batch] * n_stride;
+
+ const int in_ptr_y_offset = (z_stride * idx_height) + (pool_start_y * z_stride);
+ const int in_ptr_x_offset = (y_stride * idx_width) + (pool_start_x * y_stride);
+
+ int x_off = window_start_x;
+
+ for(; x_off <= (window_end_x - window_step_x); x_off += window_step_x)
+ {
+ vres = vdupq_n_f32(min_value);
+ vidx = vdupq_n_u32(0U);
+ const uint8_t *in_ptr_y = in_ptr_n + in_ptr_y_offset + in_ptr_x_offset;
+ uint32_t curr_kernel_index = pool_size_x * pool_start_y;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x_off * sizeof(float));
+ curr_kernel_index += pool_start_x;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const float32x4_t data = vld1q_f32(reinterpret_cast<const float *>(in_ptr_x));
+ const uint32x4_t vidx_curr = vdupq_n_u32(curr_kernel_index);
+ const uint32x4_t idxMask = vcgtq_f32(data, vres);
+ vidx = vbslq_u32(idxMask, vidx_curr, vidx);
+ vres = vmaxq_f32(vres, data);
+ in_ptr_x += y_stride;
+ curr_kernel_index++;
+ }
+ curr_kernel_index += (pool_size_x - pool_end_x);
+ in_ptr_y += z_stride;
+ }
+ // Store result
+ vst1q_f32(reinterpret_cast<float *>(out.ptr()) + x_off, vres);
+ vst1q_u32(reinterpret_cast<uint32_t *>(indices.ptr()) + x_off, vidx);
+ }
+
+ // Left-overs loop
+ for(; x_off < window_end_x; ++x_off)
+ {
+ float res = min_value;
+ uint32_t idx = 0U;
+ const uint8_t *in_ptr_y = in_ptr_n + in_ptr_y_offset + in_ptr_x_offset;
+ uint32_t curr_kernel_index = pool_size_x * pool_start_y;
+ for(int y = pool_start_y; y < pool_end_y; ++y)
+ {
+ const uint8_t *in_ptr_x = in_ptr_y + (x_off * sizeof(float));
+ curr_kernel_index += pool_start_x;
+ for(int x = pool_start_x; x < pool_end_x; ++x)
+ {
+ const float data = *(reinterpret_cast<const float *>(in_ptr_x));
+ if(data > res)
+ {
+ idx = pool_size_x * y + x;
+ res = data;
+ }
+ in_ptr_x += y_stride;
+ curr_kernel_index++;
+ }
+ curr_kernel_index += (pool_size_x - pool_end_x);
+ in_ptr_y += z_stride;
+ }
+
+ // Store result
+ *(reinterpret_cast<float *>(out.ptr()) + x_off) = res;
+ *(reinterpret_cast<uint32_t *>(indices.ptr()) + x_off) = idx;
+ }
+ },
+ out, indices);
}
void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, PoolingLayerInfo &pool_info, const Window &window_src, const Window &window)
{
- if(pool_info.pool_size == Size2D(2, 2) && pool_info.pool_type == PoolingType::MAX && dst1)
+ if((pool_info.pool_type == PoolingType::MAX) && pool_info.use_kernel_indices && (dst1 != nullptr))
+ {
+ poolingMxN_fp32_neon_nhwc_kernel_indices(src, dst0, dst1, pool_info, window);
+ }
+ else if(pool_info.pool_size == Size2D(2, 2) && pool_info.pool_type == PoolingType::MAX && !pool_info.pad_stride_info.has_padding() && (dst1 != nullptr))
{
pooling2_f32_maxpool_indices(src, dst0, dst1, pool_info, window_src, window);
}
diff --git a/src/cpu/operators/CpuPool2d.cpp b/src/cpu/operators/CpuPool2d.cpp
index eabbd5e0cc..722cd36ee5 100644
--- a/src/cpu/operators/CpuPool2d.cpp
+++ b/src/cpu/operators/CpuPool2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,7 +27,6 @@
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/runtime/NEON/NEScheduler.h"
#include "src/common/utils/Log.h"
-#include "src/core/NEON/kernels/NEFillBorderKernel.h"
#include "src/cpu/kernels/CpuPool2dKernel.h"
#include "src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.h"
@@ -41,6 +40,7 @@ CpuPool2d::CpuPool2d()
: _pooling_layer_kernel(),
_asm_glue(),
_is_global_pooling_layer(false),
+ _use_kernel_indices(false),
_data_layout(DataLayout::NCHW),
_aux_mem(1)
{
@@ -62,6 +62,7 @@ void CpuPool2d::configure(ITensorInfo *src, ITensorInfo *dst, const PoolingLayer
const unsigned int idx_width = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::WIDTH);
const unsigned int idx_height = get_data_layout_dimension_index(_data_layout, DataLayoutDimension::HEIGHT);
_is_global_pooling_layer = (src->dimension(idx_width) == pool_info.pool_size.width) && (src->dimension(idx_height) == pool_info.pool_size.height);
+ _use_kernel_indices = pool_info.use_kernel_indices;
if(run_optimised)
{
@@ -117,7 +118,7 @@ void CpuPool2d::run(ITensorPack &tensors)
NEScheduler::get().schedule_op(_pooling_layer_kernel.get(), _is_global_pooling_layer ? Window::DimZ : Window::DimY, _pooling_layer_kernel->window(), tensors);
break;
case DataLayout::NHWC:
- NEScheduler::get().schedule_op(_pooling_layer_kernel.get(), Window::DimX, _pooling_layer_kernel->window(), tensors);
+ NEScheduler::get().schedule_op(_pooling_layer_kernel.get(), (_use_kernel_indices ? Window::DimY : Window::DimX), _pooling_layer_kernel->window(), tensors);
break;
default:
ARM_COMPUTE_ERROR("Data layout not supported");
diff --git a/src/cpu/operators/CpuPool2d.h b/src/cpu/operators/CpuPool2d.h
index 02c2609a6a..5c571db88a 100644
--- a/src/cpu/operators/CpuPool2d.h
+++ b/src/cpu/operators/CpuPool2d.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021 Arm Limited.
+ * Copyright (c) 2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -76,6 +76,7 @@ private:
std::unique_ptr<INEKernel> _asm_glue;
bool _is_global_pooling_layer;
+ bool _use_kernel_indices;
DataLayout _data_layout;
experimental::MemoryRequirements _aux_mem{};
};
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index f17021671c..9fe28c7acf 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -181,11 +181,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture<float>, framework::Datase
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetFPIndicesSmall,
framework::dataset::make("DataType",
DataType::F32))),
- pool_data_layout_dataset))
+ pool_data_layout_dataset),framework::dataset::make("UseKernelIndices", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -250,11 +250,11 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLMixedPrecesionPoolingLayerFixture<half>, fram
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f16);
}
-FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(),
+FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
combine(PoolingLayerDatasetFPIndicesSmall,
framework::dataset::make("DataType",
DataType::F16))),
- pool_data_layout_dataset))
+ pool_data_layout_dataset), framework::dataset::make("UseKernelIndices", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
diff --git a/tests/validation/NEON/PoolingLayer.cpp b/tests/validation/NEON/PoolingLayer.cpp
index 457610f2bd..3acd453ea2 100644
--- a/tests/validation/NEON/PoolingLayer.cpp
+++ b/tests/validation/NEON/PoolingLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,7 +24,6 @@
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/NEON/functions/NEPoolingLayer.h"
#include "arm_compute/runtime/Tensor.h"
-#include "arm_compute/runtime/TensorAllocator.h"
#include "tests/NEON/Accessor.h"
#include "tests/PaddingCalculator.h"
#include "tests/datasets/PoolingLayerDataset.h"
@@ -150,13 +149,26 @@ using NESpecialPoolingLayerFixture = SpecialPoolingLayerValidationFixture<Tensor
const auto PoolingLayerIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2) })),
framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0) })),
framework::dataset::make("ExcludePadding", { true, false }));
-
+const auto PoolingLayerKernelIndicesDatasetFPSmall = combine(combine(combine(framework::dataset::make("PoolType", { PoolingType::MAX }), framework::dataset::make("PoolingSize", { Size2D(2, 2), Size2D(3, 3), Size2D(7, 7) })),
+ framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 1, 1, 1) })),
+ framework::dataset::make("ExcludePadding", { false }));
TEST_SUITE(Float)
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
- framework::dataset::make("DataType",
- DataType::F32))),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerIndicesDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F32))),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("UseKernelIndices", { false })))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_f32);
+ validate(Accessor(_target_indices), _ref_indices);
+}
+FIXTURE_DATA_TEST_CASE(RunKernelIndices, NEPoolingLayerIndicesFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerKernelIndicesDatasetFPSmall,
+ framework::dataset::make("DataType", DataType::F32))),
+ framework::dataset::make("DataLayout", { DataLayout::NHWC })),
+ framework::dataset::make("UseKernelIndices", { true })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f32);
@@ -208,10 +220,12 @@ TEST_SUITE_END() // FP32
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallNoneUnitShapes(), combine(PoolingLayerIndicesDatasetFPSmall,
- framework::dataset::make("DataType",
- DataType::F16))),
- framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })))
+FIXTURE_DATA_TEST_CASE(RunIndices, NEPoolingLayerIndicesFixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallNoneUnitShapes(),
+ combine(PoolingLayerIndicesDatasetFPSmall,
+ framework::dataset::make("DataType",
+ DataType::F16))),
+ framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })),
+ framework::dataset::make("UseKernelIndices", { false })))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_f16);
diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h
index 6e9edfbb5d..f34aaa8bfa 100644
--- a/tests/validation/fixtures/PoolingLayerFixture.h
+++ b/tests/validation/fixtures/PoolingLayerFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -162,9 +162,10 @@ class PoolingLayerIndicesValidationFixture : public PoolingLayerValidationGeneri
{
public:
template <typename...>
- void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout)
+ void setup(TensorShape shape, PoolingType pool_type, Size2D pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, DataLayout data_layout, bool use_kernel_indices)
{
- PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding),
+ PoolingLayerValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, PoolingLayerInfo(pool_type, pool_size, data_layout, pad_stride_info, exclude_padding, false,
+ true, use_kernel_indices),
data_type, data_layout, true);
}
};
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index 6a358ced0c..bf7bd0c1df 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -83,20 +83,28 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
{
int wstart = w * pool_stride_x - pad_left;
int hstart = h * pool_stride_y - pad_top;
+
+ // Used to calculate kernel indices
+ int kh_start = std::max(0, -hstart);
+ int kw_start = std::max(0, -wstart);
+ int max_ker_index{ 0 };
+
int wend = std::min(wstart + pool_size_x, w_src);
int hend = std::min(hstart + pool_size_y, h_src);
wstart = std::max(wstart, 0);
hstart = std::max(hstart, 0);
auto max_val = info.use_inf_as_limit ? -std::numeric_limits<ACC_T>::infinity() : std::numeric_limits<ACC_T>::lowest();
int max_index{ 0 };
- for(int y = hstart; y < hend; ++y)
+
+ for(int y = hstart, kh = kh_start; y < hend; ++y, ++kh)
{
- for(int x = wstart; x < wend; ++x)
+ for(int x = wstart, kw = kw_start; x < wend; ++x, ++kw)
{
const auto val = static_cast<ACC_T>(src[b * z_src * h_src * w_src + r * h_src * w_src + y * w_src + x]);
if(val > max_val)
{
- max_val = val;
+ max_val = val;
+ max_ker_index = pool_size_x * (kh) + (kw);
if(data_layout == DataLayout::NCHW)
{
max_index = coord2index(src.shape(), Coordinates(x, y, r, 0));
@@ -112,7 +120,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
dst[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = static_cast<T>(max_val);
if(indices)
{
- (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = max_index;
+ (*indices)[b * z_dst * h_dst * w_dst + r * h_dst * w_dst + h * w_dst + w] = (info.use_kernel_indices) ? max_ker_index : max_index;
}
}
}