From 227db8db83cd85d8704a8edbd4e8c88af0dd1f84 Mon Sep 17 00:00:00 2001 From: Adnan AlSinan Date: Tue, 14 Feb 2023 14:24:09 +0000 Subject: Add an option to use lowest for max-pooling - Add a parameter in PoolingLayerInfo class to pick which value to use as min for max-pooling. Resolves: [ONCPUML-1166] Signed-off-by: Adnan AlSinan Change-Id: I34e1cccc15176bbf31523c61e99f3188ddca23e1 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8989 Comments-Addressed: Arm Jenkins Reviewed-by: SiCong Li Reviewed-by: Pablo Marquez Tello Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/Types.h | 21 +++++++---- .../sketch/gpu/operators/GpuPool2d.h | 7 ++++ src/cpu/kernels/pool2d/neon/fp16.cpp | 14 ++++---- src/cpu/kernels/pool2d/neon/fp32.cpp | 12 +++---- src/cpu/kernels/pool2d/neon/list.h | 6 ++++ src/cpu/kernels/pool2d/neon/nchw/all.cpp | 42 ++++++++++++---------- .../sketch/gpu/operators/GpuPool2d.cpp | 11 ++++++ .../gpu/template_writer/cl/ClTemplatePool2d.cpp | 13 +++---- src/gpu/cl/kernels/ClPool2dKernel.cpp | 5 +-- tests/validation/reference/PoolingLayer.cpp | 2 +- 10 files changed, 86 insertions(+), 47 deletions(-) diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 71ec926483..e8eed67c58 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1209,7 +1209,8 @@ struct PoolingLayerInfo pad_stride_info(PadStrideInfo()), exclude_padding(false), is_global_pooling(false), - fp_mixed_precision(false) + fp_mixed_precision(false), + use_inf_as_limit(true) { } /** Constructor @@ -1222,20 +1223,23 @@ struct PoolingLayerInfo * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). * Defaults to false; * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type. */ explicit PoolingLayerInfo(PoolingType pool_type, unsigned int pool_size, DataLayout data_layout, PadStrideInfo pad_stride_info = PadStrideInfo(), bool exclude_padding = false, - bool fp_mixed_precision = false) + bool fp_mixed_precision = false, + bool use_inf_as_limit = true) : pool_type(pool_type), pool_size(Size2D(pool_size, pool_size)), data_layout(data_layout), pad_stride_info(pad_stride_info), exclude_padding(exclude_padding), is_global_pooling(false), - fp_mixed_precision(fp_mixed_precision) + fp_mixed_precision(fp_mixed_precision), + use_inf_as_limit(use_inf_as_limit) { } @@ -1249,20 +1253,23 @@ struct PoolingLayerInfo * True will exclude padding while false will not (Used in AVG/L2 pooling to determine the pooling area). * Defaults to false; * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy. + * @param[in] use_inf_as_limit (Optional) Use inf to represent the limits of datatypes range, instead of using "lowest" property of the data type. */ explicit PoolingLayerInfo(PoolingType pool_type, Size2D pool_size, DataLayout data_layout, PadStrideInfo pad_stride_info = PadStrideInfo(), bool exclude_padding = false, - bool fp_mixed_precision = false) + bool fp_mixed_precision = false, + bool use_inf_as_limit = true) : pool_type(pool_type), pool_size(pool_size), data_layout(data_layout), pad_stride_info(pad_stride_info), exclude_padding(exclude_padding), is_global_pooling(false), - fp_mixed_precision(fp_mixed_precision) + fp_mixed_precision(fp_mixed_precision), + use_inf_as_limit(use_inf_as_limit) { } @@ -1280,7 +1287,8 @@ struct PoolingLayerInfo pad_stride_info(PadStrideInfo(1, 1, 0, 0)), exclude_padding(false), is_global_pooling(true), - fp_mixed_precision(false) + fp_mixed_precision(false), + use_inf_as_limit(true) { } @@ -1291,6 +1299,7 @@ struct PoolingLayerInfo bool exclude_padding; bool is_global_pooling; bool fp_mixed_precision; + bool use_inf_as_limit; }; /** Pooling Layer Information struct*/ diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h index 16d88af570..6e1bcdbbfd 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h @@ -48,8 +48,15 @@ public: /* Set mixed_precision */ GpuPool2dSettings &mixed_precision(bool mixed_precision); + /* Get using -infinity as limit flag */ + bool use_inf_as_limit() const; + + /* Set using -infinity as limit flag */ + GpuPool2dSettings use_inf_as_limit(bool use_inf_as_limit); + private: bool _mixed_precision{ false }; + bool _use_inf_as_limit{ true }; }; /** Operator interface. */ diff --git a/src/cpu/kernels/pool2d/neon/fp16.cpp b/src/cpu/kernels/pool2d/neon/fp16.cpp index 13e21b1e70..4e15d3ad3f 100644 --- a/src/cpu/kernels/pool2d/neon/fp16.cpp +++ b/src/cpu/kernels/pool2d/neon/fp16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -176,10 +176,10 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, int pool_stride_x = 0; int pool_stride_y = 0; std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride(); - const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right); - const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - - float16x8_t vres; + const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right); + const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom); + const float16_t min_value = get_initial_min(pool_info.use_inf_as_limit); + float16x8_t vres; execute_window_loop(window_out, [&](const Coordinates & id) { @@ -228,7 +228,7 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, } else { - vres = vdupq_n_f16(-std::numeric_limits::infinity()); + vres = vdupq_n_f16(min_value); for(int y = pool_start_y; y < pool_end_y; ++y) { @@ -287,7 +287,7 @@ void poolingMxN_fp16_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, } else { - res = -std::numeric_limits::infinity(); + res = min_value; for(int y = pool_start_y; y < pool_end_y; ++y) { for(int x = pool_start_x; x < pool_end_x; ++x) diff --git a/src/cpu/kernels/pool2d/neon/fp32.cpp b/src/cpu/kernels/pool2d/neon/fp32.cpp index 1ed199be8d..018f62b8a8 100644 --- a/src/cpu/kernels/pool2d/neon/fp32.cpp +++ b/src/cpu/kernels/pool2d/neon/fp32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -170,9 +170,9 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, int pool_stride_x = 0; int pool_stride_y = 0; std::tie(pool_stride_x, pool_stride_y) = pool_info.pad_stride_info.stride(); - const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right); - const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - + const int upper_bound_w = src->info()->dimension(1) + (pool_info.exclude_padding ? 0 : pool_pad_right); + const int upper_bound_h = src->info()->dimension(2) + (pool_info.exclude_padding ? 0 : pool_pad_bottom); + const float min_value = get_initial_min(pool_info.use_inf_as_limit); float32x4_t vres; execute_window_loop(window_out, [&](const Coordinates & id) @@ -223,7 +223,7 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, } else { - vres = vdupq_n_f32(-std::numeric_limits::infinity()); + vres = vdupq_n_f32(min_value); for(int y = pool_start_y; y < pool_end_y; ++y) { for(int x = pool_start_x; x < pool_end_x; ++x) @@ -285,7 +285,7 @@ void poolingMxN_fp32_neon_nhwc(const ITensor *src, ITensor *dst0, ITensor *dst1, } else { - res = -std::numeric_limits::infinity(); + res = min_value; for(int y = pool_start_y; y < pool_end_y; ++y) { for(int x = pool_start_x; x < pool_end_x; ++x) diff --git a/src/cpu/kernels/pool2d/neon/list.h b/src/cpu/kernels/pool2d/neon/list.h index b79323213e..eb141d6fcd 100644 --- a/src/cpu/kernels/pool2d/neon/list.h +++ b/src/cpu/kernels/pool2d/neon/list.h @@ -58,6 +58,12 @@ DECLARE_POOLING_KERNEL(poolingMxN_fp32_neon_nchw); #undef DECLARE_POOLING_KERNEL +template +T get_initial_min(bool use_inf_as_limit) +{ + return use_inf_as_limit ? -std::numeric_limits::infinity() : std::numeric_limits::lowest(); +} + template inline uint32_t offset_no_padding(uint32_t padded_offset, const Coordinates &id, const ITensorInfo &info, int pool_stride_x, int pool_stride_y, DataLayout data_layout) { diff --git a/src/cpu/kernels/pool2d/neon/nchw/all.cpp b/src/cpu/kernels/pool2d/neon/nchw/all.cpp index 77f63c6f77..c342b96426 100644 --- a/src/cpu/kernels/pool2d/neon/nchw/all.cpp +++ b/src/cpu/kernels/pool2d/neon/nchw/all.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -93,7 +93,7 @@ void pooling3_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float16_t fp16_min = -std::numeric_limits::infinity(); + const float16_t fp16_min = get_initial_min(pool_info.use_inf_as_limit); const float16_t fill_value = (pool_info.pool_type == PoolingType::MAX) ? fp16_min : 0.f; const unsigned char *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); const unsigned char *const src_middle_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 1)); @@ -125,7 +125,7 @@ void pooling3_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P { // Calculate scale const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, - pool_stride_y); + pool_stride_y); const float16x4_t scale_v = vdup_n_f16(scale); // Perform pooling const float16x4_t sum_data = vadd_f16(vadd_f16(top_data, bottom_data), middle_data); @@ -203,7 +203,7 @@ void pooling2_nchw_maxpool_indices(const ITensor *src, ITensor *dst0, ITensor *d const int pad_left = src->info()->padding().left; const int pad_right = src->info()->padding().right; const int in_stride_y = static_cast(src->info()->strides_in_bytes().y()); - constexpr T float_min = -std::numeric_limits::infinity(); + const T float_min = get_initial_min(pool_info.use_inf_as_limit); const T fill_value = (pool_info.pool_type == PoolingType::MAX) ? float_min : 0.f; execute_window_loop(window, [&](const Coordinates & id) @@ -259,7 +259,7 @@ void pooling2_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float16_t fp16_min = -std::numeric_limits::infinity(); + const float16_t fp16_min = get_initial_min(pool_info.use_inf_as_limit); const float16_t fill_value = (pool_info.pool_type == PoolingType::MAX) ? fp16_min : 0.0f; const unsigned char *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); @@ -289,7 +289,7 @@ void pooling2_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P if(pool_info.pool_type != PoolingType::MAX) { const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, - pool_stride_y); + pool_stride_y); const float16x4_t scale_v = vdup_n_f16(scale); const float16x4_t sum_data = vadd_f16(top_data, bottom_data); @@ -333,7 +333,7 @@ void poolingMxN_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float16_t fp16_min = -std::numeric_limits::infinity(); + const float16_t fp16_min = get_initial_min(pool_info.use_inf_as_limit); const float16_t fill_value = (pool_info.pool_type == PoolingType::MAX) ? fp16_min : 0.0f; execute_window_loop(window, [&](const Coordinates & id) @@ -344,7 +344,7 @@ void poolingMxN_fp16_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, { // Calculate scale const float16_t scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, - pool_stride_y); + pool_stride_y); // Perform pooling for(int y = 0; y < pool_size_y; ++y) @@ -421,7 +421,8 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits::infinity() : 0.0f; + const float min_value = get_initial_min(pool_info.use_inf_as_limit); + const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f; execute_window_loop(window, [&](const Coordinates & id) { @@ -431,7 +432,7 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, { // Calculate scale const float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size_x, pool_size_y, upper_bound_w, upper_bound_h, - pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y); + pool_pad_left, pool_pad_top, pool_stride_x, pool_stride_y); // Perform pooling for(int y = 0; y < pool_size_y; ++y) @@ -459,7 +460,7 @@ void poolingMxN_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, } else // if max pooling { - res = -std::numeric_limits::infinity(); + res = min_value; for(int y = 0; y < pool_size_y; ++y) { @@ -510,7 +511,8 @@ void pooling2_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits::infinity() : 0.0f; + const float min_value = get_initial_min(pool_info.use_inf_as_limit); + const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f; const uint8_t *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); const uint8_t *const src_bottom_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 1)); @@ -539,7 +541,7 @@ void pooling2_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P { // Calculate scale float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, - pool_stride_y); + pool_stride_y); const float32x2_t scale_v = vdup_n_f32(scale); // Perform pooling @@ -584,7 +586,8 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits::infinity() : 0.0f; + const float min_value = get_initial_min(pool_info.use_inf_as_limit); + const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f; const uint8_t *const src_top_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top))); const uint8_t *const src_middle_ptr = src->ptr_to_element(Coordinates(-static_cast(pool_pad_left), -static_cast(pool_pad_top) + 1)); @@ -619,7 +622,7 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P { // Calculate scale float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, - pool_stride_y); + pool_stride_y); const float32x2_t scale_v = vdup_n_f32(scale); // Perform pooling @@ -630,7 +633,7 @@ void pooling3_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P else { const float32x4_t max_data = vmaxq_f32(vmaxq_f32(top_data, bottom_data), middle_data); - res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits::infinity(), max_data, 3)), vget_low_f32(max_data)); + res = vpmax_f32(vget_high_f32(vsetq_lane_f32(min_value, max_data, 3)), vget_low_f32(max_data)); res = vpmax_f32(res, res); } final_res = vget_lane_f32(res, 0); @@ -665,7 +668,8 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P const int src_h = src->info()->dimension(1); const int upper_bound_w = src_w + (pool_info.exclude_padding ? 0 : pool_pad_right); const int upper_bound_h = src_h + (pool_info.exclude_padding ? 0 : pool_pad_bottom); - const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? -std::numeric_limits::infinity() : 0.0f; + const float min_value = get_initial_min(pool_info.use_inf_as_limit); + const float fill_value = (pool_info.pool_type == PoolingType::MAX) ? min_value : 0.0f; std::array src_ptrs{ {} }; for(int i = 0; i < pool_size; ++i) @@ -688,7 +692,7 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P { // Calculate scale float scale = calculate_avg_scale_pool2d(pool_info.exclude_padding, DataLayout::NCHW, id, pool_size, pool_size, upper_bound_w, upper_bound_h, pool_pad_left, pool_pad_top, pool_stride_x, - pool_stride_y); + pool_stride_y); const float32x2_t scale_v = vdup_n_f32(scale); // Get power of 2 in case of l2 pooling @@ -728,7 +732,7 @@ void pooling7_fp32_neon_nchw(const ITensor *src, ITensor *dst0, ITensor *dst1, P float32x4x2_t temp = read_8_boundary_aware(src_h, src_w, pool_pad_left, pool_pad_top, x_val, y_val, in_ptr, fill_value); data = vmax2q_f32(data, temp); } - res = vpmax_f32(vget_high_f32(vsetq_lane_f32(-std::numeric_limits::infinity(), data.val[1], 3)), vget_low_f32(data.val[1])); + res = vpmax_f32(vget_high_f32(vsetq_lane_f32(min_value, data.val[1], 3)), vget_low_f32(data.val[1])); res = vpmax_f32(res, vpmax_f32(vget_high_f32(data.val[0]), vget_low_f32(data.val[0]))); res = vpmax_f32(res, res); } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp index a07ad00155..c602f45164 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp @@ -60,6 +60,17 @@ bool GpuPool2dSettings::mixed_precision() const return _mixed_precision; } +GpuPool2dSettings GpuPool2dSettings::use_inf_as_limit(bool use_inf_as_limit) +{ + _use_inf_as_limit = use_inf_as_limit; + return *this; +} + +bool GpuPool2dSettings::use_inf_as_limit() const +{ + return _use_inf_as_limit; +} + Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *dst, diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp index 5df4438afe..bbff8ba98f 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp @@ -390,11 +390,12 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const lut["meta_kernel_id"] = id(); // Retrieve relevant data - const auto padding = _attributes.pad(); - const auto stride = _attributes.stride(); - const auto pool_size = _attributes.pool_size(); - const auto data_type = _src->data_type(); - const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX; + const auto padding = _attributes.pad(); + const auto stride = _attributes.stride(); + const auto pool_size = _attributes.pool_size(); + const auto data_type = _src->data_type(); + const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX; + const std::string max_initial_value = _settings.use_inf_as_limit() ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits::lowest()); // pool specific lut["STRIDE_X"] = stride.x(); @@ -409,7 +410,7 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); lut["SRC_WIDTH"] = _src->dimension(width_idx); lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? float_to_string_with_full_precision(std::numeric_limits::lowest()) : std::string("0"); + lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0"); // Tensor specific data lut["DST_HEIGHT"] = _dst->dimension(height_idx); diff --git a/src/gpu/cl/kernels/ClPool2dKernel.cpp b/src/gpu/cl/kernels/ClPool2dKernel.cpp index 2c98c5940f..83bc6bb442 100644 --- a/src/gpu/cl/kernels/ClPool2dKernel.cpp +++ b/src/gpu/cl/kernels/ClPool2dKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -182,7 +182,8 @@ void ClPool2dKernel::configure(const ClCompileContext &compile_context, ITensorI } else { - build_opts.add_option("-DINITIAL_VALUE=" + float_to_string_with_full_precision(std::numeric_limits::lowest())); + std::string initial_value = pool_info.use_inf_as_limit ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits::lowest()); + build_opts.add_option("-DINITIAL_VALUE=" + initial_value); } } else diff --git a/tests/validation/reference/PoolingLayer.cpp b/tests/validation/reference/PoolingLayer.cpp index 378d91d829..6a358ced0c 100644 --- a/tests/validation/reference/PoolingLayer.cpp +++ b/tests/validation/reference/PoolingLayer.cpp @@ -87,7 +87,7 @@ SimpleTensor pooling_layer_internal(const SimpleTensor &src, const Pooling int hend = std::min(hstart + pool_size_y, h_src); wstart = std::max(wstart, 0); hstart = std::max(hstart, 0); - auto max_val = -std::numeric_limits::infinity(); + auto max_val = info.use_inf_as_limit ? -std::numeric_limits::infinity() : std::numeric_limits::lowest(); int max_index{ 0 }; for(int y = hstart; y < hend; ++y) { -- cgit v1.2.1