aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/Types.h21
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h7
-rw-r--r--src/cpu/kernels/pool2d/neon/fp16.cpp14
-rw-r--r--src/cpu/kernels/pool2d/neon/fp32.cpp12
-rw-r--r--src/cpu/kernels/pool2d/neon/list.h6
-rw-r--r--src/cpu/kernels/pool2d/neon/nchw/all.cpp42
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp11
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp13
-rw-r--r--src/gpu/cl/kernels/ClPool2dKernel.cpp5
-rw-r--r--tests/validation/reference/PoolingLayer.cpp2
10 files changed, 86 insertions, 47 deletions
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 71ec926483..e8eed67c58 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1209,7 +1209,8 @@ struct PoolingLayerInfo
pad_stride_info(PadStrideInfo()),
exclude_padding(false),
is_global_pooling(false),
- fp_mixed_precision(false)
+ fp_mixed_precision(false),
+ use_inf_as_limit(true)
{
}
/** Constructor
@@ -1222,20 +1223,23 @@ struct PoolingLayerInfo
* True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
unsigned int pool_size,
DataLayout data_layout,
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
- bool fp_mixed_precision = false)
+ bool fp_mixed_precision = false,
+ bool use_inf_as_limit = true)
: pool_type(pool_type),
pool_size(Size2D(pool_size, pool_size)),
data_layout(data_layout),
pad_stride_info(pad_stride_info),
exclude_padding(exclude_padding),
is_global_pooling(false),
- fp_mixed_precision(fp_mixed_precision)
+ fp_mixed_precision(fp_mixed_precision),
+ use_inf_as_limit(use_inf_as_limit)
{
}
@@ -1249,20 +1253,23 @@ struct PoolingLayerInfo
* True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area).
* Defaults to false;
* @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy.
+ * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type.
*/
explicit PoolingLayerInfo(PoolingType pool_type,
Size2D pool_size,
DataLayout data_layout,
PadStrideInfo pad_stride_info = PadStrideInfo(),
bool exclude_padding = false,
- bool fp_mixed_precision = false)
+ bool fp_mixed_precision = false,
+ bool use_inf_as_limit = true)
: pool_type(pool_type),
pool_size(pool_size),
data_layout(data_layout),
pad_stride_info(pad_stride_info),
exclude_padding(exclude_padding),
is_global_pooling(false),
- fp_mixed_precision(fp_mixed_precision)
+ fp_mixed_precision(fp_mixed_precision),
+ use_inf_as_limit(use_inf_as_limit)
{
}
@@ -1280,7 +1287,8 @@ struct PoolingLayerInfo
pad_stride_info(PadStrideInfo(1, 1, 0, 0)),
exclude_padding(false),
is_global_pooling(true),
- fp_mixed_precision(false)
+ fp_mixed_precision(false),
+ use_inf_as_limit(true)
{
}
@@ -1291,6 +1299,7 @@ struct PoolingLayerInfo
bool exclude_padding;
bool is_global_pooling;
bool fp_mixed_precision;
+ bool use_inf_as_limit;
};
/** Pooling Layer Information struct*/
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h
index 16d88af570..6e1bcdbbfd 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h
@@ -48,8 +48,15 @@ public:
/* Set mixed_precision */
GpuPool2dSettings &mixed_precision(bool mixed_precision);
+ /* Get using -infinity as limit flag */
+ bool use_inf_as_limit() const;
+
+ /* Set using -infinity as limit flag */
+ GpuPool2dSettings use_inf_as_limit(bool use_inf_as_limit);
+
private:
bool _mixed_precision{ false };
+ bool _use_inf_as_limit{ true };
};
/** Operator interface. */
diff --git a/src/cpu/kernels/pool2d/neon/fp16.cpp b/src/cpu/kernels/pool2d/neon/fp16.cpp
index 13e21b1e70..4e15d3ad3f 100644
--- a/src/cpu/kernels/pool2d/neon/fp16.cpp
+++ b/src/cpu/kernels/pool2d/neon/fp16.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -176,10 +176,10 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
int pool_stride_x = 0;
int pool_stride_y = 0;
std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
- const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
- const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
-
- float16x8_t vres;
+ const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
+ const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
+ const float16_t min_value = get_initial_min<half_float::half>(pool_info.use_inf_as_limit);
+ float16x8_t vres;
execute_window_loop(window_out, [&](const Coordinates & id)
{
@@ -228,7 +228,7 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
}
else
{
- vres = vdupq_n_f16(-std::numeric_limits<float>::infinity());
+ vres = vdupq_n_f16(min_value);
for(int y = pool_start_y; y < pool_end_y; ++y)
{
@@ -287,7 +287,7 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
}
else
{
- res = -std::numeric_limits<float>::infinity();
+ res = min_value;
for(int y = pool_start_y; y < pool_end_y; ++y)
{
for(int x = pool_start_x; x < pool_end_x; ++x)
diff --git a/src/cpu/kernels/pool2d/neon/fp32.cpp b/src/cpu/kernels/pool2d/neon/fp32.cpp
index 1ed199be8d..018f62b8a8 100644
--- a/src/cpu/kernels/pool2d/neon/fp32.cpp
+++ b/src/cpu/kernels/pool2d/neon/fp32.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -170,9 +170,9 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
int pool_stride_x = 0;
int pool_stride_y = 0;
std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride();
- const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
- const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
-
+ const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right);
+ const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
+ const float min_value = get_initial_min<float>(pool_info.use_inf_as_limit);
float32x4_t vres;
execute_window_loop(window_out, [&](const Coordinates & id)
@@ -223,7 +223,7 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
}
else
{
- vres = vdupq_n_f32(-std::numeric_limits<float>::infinity());
+ vres = vdupq_n_f32(min_value);
for(int y = pool_start_y; y < pool_end_y; ++y)
{
for(int x = pool_start_x; x < pool_end_x; ++x)
@@ -285,7 +285,7 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1,
}
else
{
- res = -std::numeric_limits<float>::infinity();
+ res = min_value;
for(int y = pool_start_y; y < pool_end_y; ++y)
{
for(int x = pool_start_x; x < pool_end_x; ++x)
diff --git a/src/cpu/kernels/pool2d/neon/list.h b/src/cpu/kernels/pool2d/neon/list.h
index b79323213e..eb141d6fcd 100644
--- a/src/cpu/kernels/pool2d/neon/list.h
+++ b/src/cpu/kernels/pool2d/neon/list.h
@@ -59,6 +59,12 @@ DECLARE_POOLING_KERNEL(poolingMxN_fp32_neon_nchw);
#undef DECLARE_POOLING_KERNEL
template <typename T>
+T get_initial_min(bool use_inf_as_limit)
+{
+ return use_inf_as_limit ? -std::numeric_limits<T>::infinity() : std::numeric_limits<T>::lowest();
+}
+
+template <typename T>
inline uint32_t offset_no_padding(uint32_t padded_offset, const Coordinates &id, const ITensorInfo &info, int pool_stride_x, int pool_stride_y, DataLayout data_layout)
{
const int pad_left = info.padding().left;
diff --git a/src/cpu/kernels/pool2d/neon/nchw/all.cpp b/src/cpu/kernels/pool2d/neon/nchw/all.cpp
index 77f63c6f77..c342b96426 100644
--- a/src/cpu/kernels/pool2d/neon/nchw/all.cpp
+++ b/src/cpu/kernels/pool2d/neon/nchw/all.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2021-2022 Arm Limited.
+ * Copyright (c) 2021-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -93,7 +93,7 @@ void pooling3_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float16_t fp16_min = -std::numeric_limits<half_float::half>::infinity();
+ const float16_t fp16_min = get_initial_min<half_float::half>(pool_info.use_inf_as_limit);
const float16_t fill_value = (pool_info.pool_type == PoolingType::MAX) ? fp16_min : 0.f;
const unsigned char *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const unsigned char *const src_middle_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
@@ -125,7 +125,7 @@ void pooling3_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
{
// Calculate scale
const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ pool_stride_y);
const float16x4_t scale_v = vdup_n_f16(scale);
// Perform pooling
const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data);
@@ -203,7 +203,7 @@ void pooling2_nchw_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *d
const int pad_left = src->info()->padding().left;
const int pad_right = src->info()->padding().right;
const int in_stride_y = static_cast<int>(src->info()->strides_in_bytes().y());
- constexpr T float_min = -std::numeric_limits<float>::infinity();
+ const T float_min = get_initial_min<T>(pool_info.use_inf_as_limit);
const T fill_value = (pool_info.pool_type == PoolingType::MAX) ? float_min : 0.f;
execute_window_loop(window, [&](const Coordinates & id)
@@ -259,7 +259,7 @@ void pooling2_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float16_t fp16_min = -std::numeric_limits<half_float::half>::infinity();
+ const float16_t fp16_min = get_initial_min<half_float::half>(pool_info.use_inf_as_limit);
const float16_t fill_value = (pool_info.pool_type == PoolingType::MAX) ? fp16_min : 0.0f;
const unsigned char *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
@@ -289,7 +289,7 @@ void pooling2_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
if(pool_info.pool_type != PoolingType::MAX)
{
const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ pool_stride_y);
const float16x4_t scale_v = vdup_n_f16(scale);
const float16x4_t sum_data = vadd_f16(top_data, bottom_data);
@@ -333,7 +333,7 @@ void poolingMxN_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float16_t fp16_min = -std::numeric_limits<half_float::half>::infinity();
+ const float16_t fp16_min = get_initial_min<half_float::half>(pool_info.use_inf_as_limit);
const float16_t fill_value = (pool_info.pool_type == PoolingType::MAX) ? fp16_min : 0.0f;
execute_window_loop(window, [&](const Coordinates & id)
@@ -344,7 +344,7 @@ void poolingMxN_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
{
// Calculate scale
const float16_t scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ pool_stride_y);
// Perform pooling
for(int y = 0; y < pool_size_y; ++y)
@@ -421,7 +421,8 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits<float>::infinity() : 0.0f;
+ const float min_value = get_initial_min<float>(pool_info.use_inf_as_limit);
+ const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f;
execute_window_loop(window, [&](const Coordinates & id)
{
@@ -431,7 +432,7 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
{
// Calculate scale
const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h,
- pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
+ pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y);
// Perform pooling
for(int y = 0; y < pool_size_y; ++y)
@@ -459,7 +460,7 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1,
}
else // if max pooling
{
- res = -std::numeric_limits<float>::infinity();
+ res = min_value;
for(int y = 0; y < pool_size_y; ++y)
{
@@ -510,7 +511,8 @@ void pooling2_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits<float>::infinity() : 0.0f;
+ const float min_value = get_initial_min<float>(pool_info.use_inf_as_limit);
+ const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f;
const uint8_t *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const uint8_t *const src_bottom_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
@@ -539,7 +541,7 @@ void pooling2_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
{
// Calculate scale
float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ pool_stride_y);
const float32x2_t scale_v = vdup_n_f32(scale);
// Perform pooling
@@ -584,7 +586,8 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits<float>::infinity() : 0.0f;
+ const float min_value = get_initial_min<float>(pool_info.use_inf_as_limit);
+ const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f;
const uint8_t *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top)));
const uint8_t *const src_middle_ptr = src->ptr_to_element(Coordinates(-static_cast<int>(pool_pad_left), -static_cast<int>(pool_pad_top) + 1));
@@ -619,7 +622,7 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
{
// Calculate scale
float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ pool_stride_y);
const float32x2_t scale_v = vdup_n_f32(scale);
// Perform pooling
@@ -630,7 +633,7 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
else
{
const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data);
- res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::infinity(), max_data, 3)), vget_low_f32(max_data));
+ res = vpmax_f32(vget_high_f32(vsetq_lane_f32(min_value, max_data, 3)), vget_low_f32(max_data));
res = vpmax_f32(res, res);
}
final_res = vget_lane_f32(res, 0);
@@ -665,7 +668,8 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
const int src_h = src->info()->dimension(1);
const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right);
const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom);
- const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits<float>::infinity() : 0.0f;
+ const float min_value = get_initial_min<float>(pool_info.use_inf_as_limit);
+ const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f;
std::array<const uint8_t *, pool_size> src_ptrs{ {} };
for(int i = 0; i < pool_size; ++i)
@@ -688,7 +692,7 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
{
// Calculate scale
float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x,
- pool_stride_y);
+ pool_stride_y);
const float32x2_t scale_v = vdup_n_f32(scale);
// Get power of 2 in case of l2 pooling
@@ -728,7 +732,7 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P
float32x4x2_t temp = read_8_boundary_aware(src_h, src_w, pool_pad_left, pool_pad_top, x_val, y_val, in_ptr, fill_value);
data = vmax2q_f32(data, temp);
}
- res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits<float>::infinity(), data.val[1], 3)), vget_low_f32(data.val[1]));
+ res = vpmax_f32(vget_high_f32(vsetq_lane_f32(min_value, data.val[1], 3)), vget_low_f32(data.val[1]));
res = vpmax_f32(res, vpmax_f32(vget_high_f32(data.val[0]), vget_low_f32(data.val[0])));
res = vpmax_f32(res, res);
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
index a07ad00155..c602f45164 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
@@ -60,6 +60,17 @@ bool GpuPool2dSettings::mixed_precision() const
return _mixed_precision;
}
+GpuPool2dSettings GpuPool2dSettings::use_inf_as_limit(bool use_inf_as_limit)
+{
+ _use_inf_as_limit = use_inf_as_limit;
+ return *this;
+}
+
+bool GpuPool2dSettings::use_inf_as_limit() const
+{
+ return _use_inf_as_limit;
+}
+
Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch,
const ITensorInfo *src,
const ITensorInfo *dst,
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
index 5df4438afe..bbff8ba98f 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
@@ -390,11 +390,12 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["meta_kernel_id"] = id();
// Retrieve relevant data
- const auto padding = _attributes.pad();
- const auto stride = _attributes.stride();
- const auto pool_size = _attributes.pool_size();
- const auto data_type = _src->data_type();
- const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+ const auto padding = _attributes.pad();
+ const auto stride = _attributes.stride();
+ const auto pool_size = _attributes.pool_size();
+ const auto data_type = _src->data_type();
+ const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+ const std::string max_initial_value = _settings.use_inf_as_limit() ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
// pool specific
lut["STRIDE_X"] = stride.x();
@@ -409,7 +410,7 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
lut["SRC_WIDTH"] = _src->dimension(width_idx);
lut["SRC_HEIGHT"] = _src->dimension(height_idx);
- lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? float_to_string_with_full_precision(std::numeric_limits<float>::lowest()) : std::string("0");
+ lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0");
// Tensor specific data
lut["DST_HEIGHT"] = _dst->dimension(height_idx);
diff --git a/src/gpu/cl/kernels/ClPool2dKernel.cpp b/src/gpu/cl/kernels/ClPool2dKernel.cpp
index 2c98c5940f..83bc6bb442 100644
--- a/src/gpu/cl/kernels/ClPool2dKernel.cpp
+++ b/src/gpu/cl/kernels/ClPool2dKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2021 Arm Limited.
+ * Copyright (c) 2017-2021, 2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -182,7 +182,8 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI
}
else
{
- build_opts.add_option("-DINITIAL_VALUE=" + float_to_string_with_full_precision(std::numeric_limits<float>::lowest()));
+ std::string initial_value = pool_info.use_inf_as_limit ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
+ build_opts.add_option("-DINITIAL_VALUE=" + initial_value);
}
}
else
diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp
index 378d91d829..6a358ced0c 100644
--- a/tests/validation/reference/PoolingLayer.cpp
+++ b/tests/validation/reference/PoolingLayer.cpp
@@ -87,7 +87,7 @@ SimpleTensor<T> pooling_layer_internal(const SimpleTensor<T> &src, const Pooling
int hend = std::min(hstart + pool_size_y, h_src);
wstart = std::max(wstart, 0);
hstart = std::max(hstart, 0);
- auto max_val = -std::numeric_limits<ACC_T>::infinity();
+ auto max_val = info.use_inf_as_limit ? -std::numeric_limits<ACC_T>::infinity() : std::numeric_limits<ACC_T>::lowest();
int max_index{ 0 };
for(int y = hstart; y < hend; ++y)
{