From 40471d12a19088df4af6ad80e5c0437d724dd8fa Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Mon, 26 Apr 2021 08:39:28 +0100 Subject: Add optimization for global pooling in pooling_layer.cl - Simplify the implementation when the pooling size has the same spatial dimensions of the input tensor - Rework the heuristic for F32/F16 - Add test for validating the global pooling path - Fix compare_dimensions in validation. The validation fails because we have different number of dimensions for NCHW and NHWC (e.g. 1,1,2,1(NCHW) -> 2,1,1,1(NHWC) Change-Id: Iba680cb30bf2a5d0952265a4cc9794f368549ca5 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5510 Reviewed-by: Michele Di Giorgio Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/pooling_layer.cl | 157 ++++++++++++++++------------ src/core/gpu/cl/kernels/ClPoolingKernel.cpp | 4 +- tests/validation/CL/PoolingLayer.cpp | 121 +++++++++++++++++---- tests/validation/Validation.h | 18 ++-- 4 files changed, 197 insertions(+), 103 deletions(-) diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 00250a08a5..b30145b11e 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "helpers.h" #include "repeat.h" +#include "tile_helpers.h" #if defined(POOL_AVG) || defined(POOL_L2) #define POOL_OP(x, y) ((x) + (y)) @@ -506,7 +507,7 @@ inline void offset_no_padding_nchw(const Tensor3D *input, uint *offset_top, uint #if defined(TENSOR_CHANNEL) && defined(TENSOR_WIDTH) && defined(TENSOR_HEIGHT) *offset_top = (uint)((offset_base / sizeof(DATA_TYPE)) % (TENSOR_CHANNEL * TENSOR_WIDTH * TENSOR_HEIGHT)); #else /* defined(TENSOR_CHANNEL) && defined(TENSOR_WIDTH) && defined(TENSOR_HEIGHT) */ - *offset_top = (uint)(offset_base / sizeof(DATA_TYPE)); + *offset_top = (uint)(offset_base / sizeof(DATA_TYPE)); #endif /* defined(TENSOR_CHANNEL) && defined(TENSOR_WIDTH) && defined(TENSOR_HEIGHT) */ *offset_bottom = *offset_top + input->stride_y / sizeof(DATA_TYPE) - pad_horiz; @@ -703,56 +704,79 @@ __kernel void pooling_layer_MxN_nhwc( { // Note: If C is not multiple of VEC_SIZE, we shift back of VEC_SIZE_LEFTOVER elements to compute the leftover elements for get_global_id(0) == 0 // Note: If C is less than VEC_SIZE, VEC_SIZE should be SHRINKED to the closest smaller VEC_SIZE. This operation is performed on the host side - int offset_c = max((int)(get_global_id(0) * VEC_SIZE - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE), 0) * sizeof(DATA_TYPE); - int idx_out_w = get_global_id(1); + int idx_out_c = GET_SPATIAL_IDX(0, VEC_SIZE, VEC_SIZE_LEFTOVER); + int idx_out_w = GET_SPATIAL_IDX(1, 1, 0); #if DST_BATCH_SIZE != 1 // If batch size != 1, the batch size dimension is collapsed over the height dimension - int idx_out_h = get_global_id(2) % DST_HEIGHT; - int idx_out_n = get_global_id(2) / DST_HEIGHT; -#else //DST_BATCH_SIZE != 1 - int idx_out_h = get_global_id(2); + int idx_out_h = GET_SPATIAL_IDX(2, 1, 0) % DST_HEIGHT; + int idx_out_n = GET_SPATIAL_IDX(2, 1, 0) / DST_HEIGHT; +#else //DST_BATCH_SIZE != 1 + int idx_out_h = GET_SPATIAL_IDX(2, 1, 0); + ; int idx_out_n = 0; #endif // DST_BATCH_SIZE != 1 - int idx_in_w = idx_out_w * STRIDE_X - PAD_X; - int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y; + __global unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + idx_out_c * sizeof(DATA_TYPE) + idx_out_n * input_stride_w; - int pool_x_s = max((int)0, -idx_in_w); - int pool_x_e = min((int)POOL_SIZE_X, (int)SRC_WIDTH - idx_in_w); - int pool_y_s = max((int)0, -idx_in_h); - int pool_y_e = min((int)POOL_SIZE_Y, (int)SRC_HEIGHT - idx_in_h); + __global unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + idx_out_c * sizeof(DATA_TYPE) + idx_out_w * output_stride_y + idx_out_h * output_stride_z + idx_out_n * + output_stride_w; + + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + res0 = INITIAL_VALUE; + +#if POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT + // Global pooling path + + int filter_size = POOL_SIZE_X * POOL_SIZE_Y; + +#pragma unroll 8 + for(int y = 0; y < POOL_SIZE_X * POOL_SIZE_Y; ++y) + { + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + data0; +#if defined(FP_MIXED_PRECISION) + // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE + data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); +#else // defined(FP_MIXED_PRECISION) + data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr)); +#endif // defined(FP_MIXED_PRECISION) + +#if defined(POOL_L2) + // Raise to power of 2 for L2 Pooling + data0 *= data0; +#endif // defined(POOL_L2) + + res0 = POOL_OP(res0, data0); + + in_base_ptr += input_stride_y; + } +#else // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT - __global unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + - offset_c + - idx_out_n * input_stride_w; + int idx_in_w = idx_out_w * STRIDE_X - PAD_X; + int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y; - __global unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + - offset_c + - idx_out_w * output_stride_y + - idx_out_h * output_stride_z + - idx_out_n * output_stride_w; + int pool_x_s = max((int)0, -idx_in_w); + int pool_x_e = min((int)POOL_SIZE_X, (int)SRC_WIDTH - idx_in_w); + int pool_y_s = max((int)0, -idx_in_h); + int pool_y_e = min((int)POOL_SIZE_Y, (int)SRC_HEIGHT - idx_in_h); -#if ((defined(POOL_AVG) || defined(POOL_L2))) #if defined(EXCLUDE_PADDING) - int filter_size = 0; -#else // defined(EXCLUDE_PADDING) + int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s); +#else // defined(EXCLUDE_PADDING) int filter_size = POOL_SIZE_X * POOL_SIZE_Y; #endif // defined(EXCLUDE_PADDING) -#endif // ((defined(POOL_AVG) || defined(POOL_L2))) - - VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) - res0 = INITIAL_VALUE; for(int y = pool_y_s; y < pool_y_e; ++y) { for(int x = pool_x_s; x < pool_x_e; ++x) { - VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) data0; + VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE) + data0; #if defined(FP_MIXED_PRECISION) // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE data0 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + (x + idx_in_w) * input_stride_y + (y + idx_in_h) * input_stride_z)), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); -#else // defined(FP_MIXED_PRECISION) - data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + (x + idx_in_w) * input_stride_y + (y + idx_in_h) * input_stride_z)); +#else // defined(FP_MIXED_PRECISION) + data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + (x + idx_in_w) * input_stride_y + (y + idx_in_h) * input_stride_z)); #endif // defined(FP_MIXED_PRECISION) #if defined(POOL_L2) @@ -760,15 +784,13 @@ __kernel void pooling_layer_MxN_nhwc( data0 *= data0; #endif // defined(POOL_L2) res0 = POOL_OP(res0, data0); - -#if ((defined(POOL_AVG) || defined(POOL_L2))) && defined(EXCLUDE_PADDING) - filter_size++; -#endif // ((defined(POOL_AVG) || defined(POOL_L2))) && defined(EXCLUDE_PADDING) } } +#endif // POOL_SIZE_X == SRC_WIDTH && POOL_SIZE_Y == SRC_HEIGHT + #if defined(POOL_AVG) || defined(POOL_L2) - res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size; + res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size; #endif // defined(POOL_AVG) || defined(POOL_L2) #if defined(POOL_L2) @@ -778,9 +800,10 @@ __kernel void pooling_layer_MxN_nhwc( // Store result #if defined(FP_MIXED_PRECISION) - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) res_converted0 = CONVERT(res0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + res_converted0 = CONVERT(res0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); STORE_VECTOR_SELECT(res_converted, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, (VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0); -#else // defined(FP_MIXED_PRECISION) +#else // defined(FP_MIXED_PRECISION) STORE_VECTOR_SELECT(res, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, (VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0); #endif // defined(FP_MIXED_PRECISION) } @@ -853,23 +876,18 @@ __kernel void pooling_layer_2x2_nhwc( // If batch size != 1, the batch size dimension is collapsed over the height dimension int idx_out_h = get_global_id(2) % DST_HEIGHT; int idx_out_n = get_global_id(2) / DST_HEIGHT; -#else //SRC_BATCH_SIZE != 1 +#else //SRC_BATCH_SIZE != 1 int idx_out_h = get_global_id(2); int idx_out_n = 0; #endif // SRC_BATCH_SIZE != 1 - int idx_in_w = idx_out_w * STRIDE_X - PAD_X; - int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y; + int idx_in_w = idx_out_w * STRIDE_X - PAD_X; + int idx_in_h = idx_out_h * STRIDE_Y - PAD_Y; - __global unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + - idx_out_c * sizeof(DATA_TYPE) + - idx_out_n * input_stride_w; + __global unsigned char *in_base_ptr = input_ptr + input_offset_first_element_in_bytes + idx_out_c * sizeof(DATA_TYPE) + idx_out_n * input_stride_w; - __global unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + - idx_out_c * sizeof(DATA_TYPE) + - idx_out_w * output_stride_y + - idx_out_h * output_stride_z + - idx_out_n * output_stride_w; + __global unsigned char *out_base_ptr = output_ptr + output_offset_first_element_in_bytes + idx_out_c * sizeof(DATA_TYPE) + idx_out_w * output_stride_y + idx_out_h * output_stride_z + idx_out_n * + output_stride_w; int pool_x_s = max((int)0, -idx_in_w); int pool_x_e = min((int)2, (int)SRC_WIDTH - idx_in_w); @@ -891,11 +909,11 @@ __kernel void pooling_layer_2x2_nhwc( data1 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x1 * input_stride_y + y0 * input_stride_z)), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); data2 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x0 * input_stride_y + y1 * input_stride_z)), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); data3 = CONVERT(VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x1 * input_stride_y + y1 * input_stride_z)), VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE)); -#else // defined(FP_MIXED_PRECISION) - data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x0 * input_stride_y + y0 * input_stride_z)); - data1 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x1 * input_stride_y + y0 * input_stride_z)); - data2 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x0 * input_stride_y + y1 * input_stride_z)); - data3 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x1 * input_stride_y + y1 * input_stride_z)); +#else // defined(FP_MIXED_PRECISION) + data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x0 * input_stride_y + y0 * input_stride_z)); + data1 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x1 * input_stride_y + y0 * input_stride_z)); + data2 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x0 * input_stride_y + y1 * input_stride_z)); + data3 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)(in_base_ptr + x1 * input_stride_y + y1 * input_stride_z)); #endif // defined(FP_MIXED_PRECISION) #if !defined(POOL_MAX) @@ -931,7 +949,7 @@ __kernel void pooling_layer_2x2_nhwc( #if defined(POOL_AVG) || defined(POOL_L2) #if defined(EXCLUDE_PADDING) res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))filter_size; -#else // !defined(EXCLUDE_PADDING) +#else // !defined(EXCLUDE_PADDING) res0 /= (VEC_DATA_TYPE(ACC_DATA_TYPE, VEC_SIZE))4; #endif // defined(EXCLUDE_PADDING) #endif // defined(POOL_AVG) || defined(POOL_L2) @@ -943,9 +961,10 @@ __kernel void pooling_layer_2x2_nhwc( // Store result #if defined(FP_MIXED_PRECISION) - VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) res_converted0 = CONVERT(res0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); + VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) + res_converted0 = CONVERT(res0, VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)); STORE_VECTOR_SELECT(res_converted, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, (VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0); -#else // defined(FP_MIXED_PRECISION) +#else // defined(FP_MIXED_PRECISION) STORE_VECTOR_SELECT(res, DATA_TYPE, out_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, (VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0); #endif // defined(FP_MIXED_PRECISION) @@ -955,24 +974,26 @@ __kernel void pooling_layer_2x2_nhwc( // Note: DST_CHANNELS and DST_BATCH_SIZE can be used for either the input and output tensor // note: Batch dimension does not contribute in the offset contribution - VEC_DATA_TYPE(uint, VEC_SIZE) base_index = (uint)idx_out_c; + VEC_DATA_TYPE(uint, VEC_SIZE) + base_index = (uint)idx_out_c; base_index += VEC_OFFS(uint, VEC_SIZE); - VEC_DATA_TYPE(uint, VEC_SIZE) index0 = base_index + (uint)x0 * DST_CHANNELS + (uint)y0 * (DST_CHANNELS * SRC_WIDTH); - VEC_DATA_TYPE(uint, VEC_SIZE) index1 = base_index + (uint)x1 * DST_CHANNELS + (uint)y0 * (DST_CHANNELS * SRC_WIDTH); - VEC_DATA_TYPE(uint, VEC_SIZE) index2 = base_index + (uint)x0 * DST_CHANNELS + (uint)y1 * (DST_CHANNELS * SRC_WIDTH); - VEC_DATA_TYPE(uint, VEC_SIZE) index3 = base_index + (uint)x1 * DST_CHANNELS + (uint)y1 * (DST_CHANNELS * SRC_WIDTH); + VEC_DATA_TYPE(uint, VEC_SIZE) + index0 = base_index + (uint)x0 * DST_CHANNELS + (uint)y0 * (DST_CHANNELS * SRC_WIDTH); + VEC_DATA_TYPE(uint, VEC_SIZE) + index1 = base_index + (uint)x1 * DST_CHANNELS + (uint)y0 * (DST_CHANNELS * SRC_WIDTH); + VEC_DATA_TYPE(uint, VEC_SIZE) + index2 = base_index + (uint)x0 * DST_CHANNELS + (uint)y1 * (DST_CHANNELS * SRC_WIDTH); + VEC_DATA_TYPE(uint, VEC_SIZE) + index3 = base_index + (uint)x1 * DST_CHANNELS + (uint)y1 * (DST_CHANNELS * SRC_WIDTH); index0 = select(index1, index0, CONVERT(isgreaterequal(data0, data1), VEC_DATA_TYPE(int, VEC_SIZE))); index1 = select(index3, index2, CONVERT(isgreaterequal(data2, data3), VEC_DATA_TYPE(int, VEC_SIZE))); index0 = select(index1, index0, CONVERT(isgreaterequal(max(data0, data1), max(data2, data3)), VEC_DATA_TYPE(int, VEC_SIZE))); - __global unsigned char *idx_base_ptr = indices_ptr + indices_offset_first_element_in_bytes + - idx_out_c * sizeof(uint) + - idx_out_w * indices_stride_y + - idx_out_h * indices_stride_z + - idx_out_n * indices_stride_w; + __global unsigned char *idx_base_ptr = indices_ptr + indices_offset_first_element_in_bytes + idx_out_c * sizeof(uint) + idx_out_w * indices_stride_y + idx_out_h * indices_stride_z + idx_out_n * + indices_stride_w; // Store result STORE_VECTOR_SELECT(index, uint, idx_base_ptr, VEC_SIZE, VEC_SIZE_LEFTOVER, ((VEC_SIZE_LEFTOVER != 0) && get_global_id(0) == 0)); diff --git a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp index 78243402bf..a432877a1d 100644 --- a/src/core/gpu/cl/kernels/ClPoolingKernel.cpp +++ b/src/core/gpu/cl/kernels/ClPoolingKernel.cpp @@ -173,9 +173,11 @@ std::tuple validate_and_configure_window(ITenso } case DataLayout::NHWC: { + const size_t vec_size = dst->data_type() == DataType::F32 ? 2 : 4; + // Initialize border size border_size = BorderSize(); - num_elems_processed_per_iteration = adjust_vec_size(4, dst->dimension(0)); + num_elems_processed_per_iteration = adjust_vec_size(vec_size, dst->dimension(0)); win = calculate_max_window(*dst, Steps(num_elems_processed_per_iteration)); break; } diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp index f42c187f8f..0153e659ae 100644 --- a/tests/validation/CL/PoolingLayer.cpp +++ b/tests/validation/CL/PoolingLayer.cpp @@ -159,12 +159,12 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture, framework::Datase validate(CLAccessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), - combine(combine(combine(combine(datasets::PoolingTypes(), - framework::dataset::make("PoolingSize", { Size2D(2, 2) })), - framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })), - framework::dataset::make("ExcludePadding", { false })), - framework::dataset::make("DataType", DataType::F32))), - pool_data_layout_dataset)) + combine(combine(combine(combine(datasets::PoolingTypes(), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(2, 1, 0, 0) })), + framework::dataset::make("ExcludePadding", { false })), + framework::dataset::make("DataType", DataType::F32))), + pool_data_layout_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -188,6 +188,44 @@ FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture, fra validate(CLAccessor(_target_indices), _ref_indices); } +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(27, 13) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(79, 37) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling + TEST_SUITE_END() // FP32 TEST_SUITE(FP16) @@ -216,6 +254,45 @@ FIXTURE_DATA_TEST_CASE(RunSmallIndices, CLPoolingLayerIndicesFixture, fram validate(CLAccessor(_target), _reference, tolerance_f32); validate(CLAccessor(_target_indices), _ref_indices); } + +TEST_SUITE(GlobalPooling) +// *INDENT-OFF* +// clang-format off +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixture, framework::DatasetMode::ALL, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(27U, 13U, 2U), + TensorShape(27U, 13U, 2U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(27, 13) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(combine(combine( + framework::dataset::make("InputShape", { TensorShape(79U, 37U, 11U), + TensorShape(79U, 37U, 11U, 4U) + }), + framework::dataset::make("PoolingType", { PoolingType::AVG, PoolingType::L2, PoolingType::MAX })), + framework::dataset::make("PoolingSize", { Size2D(79, 37) })), + framework::dataset::make("PadStride", PadStrideInfo(1, 1, 0, 0))), + framework::dataset::make("ExcludePadding", false)), + framework::dataset::make("DataType", DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +// clang-format on +// *INDENT-ON* +TEST_SUITE_END() // GlobalPooling + TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float @@ -238,14 +315,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture, framew validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), - combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), - framework::dataset::make("PoolingSize", { Size2D(2, 2) })), - framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), - framework::dataset::make("ExcludePadding", { true })), - framework::dataset::make("DataType", DataType::QASYMM8))), - framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), - framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 255.f, 10) })), - framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 255.f, 5) }))) + combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), + framework::dataset::make("ExcludePadding", { true })), + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 255.f, 10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 255.f, 5) }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8); @@ -264,14 +341,14 @@ FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture, framewo validate(CLAccessor(_target), _reference, tolerance_qasymm8_s); } FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, CLPoolingLayerQuantizedMixedDataLayoutFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), - combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), - framework::dataset::make("PoolingSize", { Size2D(2, 2) })), - framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), - framework::dataset::make("ExcludePadding", { true })), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), - framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), - framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) })), - framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) }))) + combine(combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), + framework::dataset::make("PoolingSize", { Size2D(2, 2) })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 2, 1, 1) })), + framework::dataset::make("ExcludePadding", { true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED))), + framework::dataset::make("DataLayout", { DataLayout::NHWC, DataLayout::NCHW })), + framework::dataset::make("InputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) })), + framework::dataset::make("OutputQuantInfo", { QuantizationInfo(1.f / 127.f, -10) }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_qasymm8_s); diff --git a/tests/validation/Validation.h b/tests/validation/Validation.h index a75562bac2..f1ce0fecc7 100644 --- a/tests/validation/Validation.h +++ b/tests/validation/Validation.h @@ -159,11 +159,13 @@ bool compare_dimensions(const Dimensions &dimensions1, const Dimensions &d { // In case a 1D/2D shape becomes 3D after permutation, the permuted tensor will have two/one dimension(s) more and the first (two) value(s) will be 1 // clang-format off - if((dimensions1.num_dimensions() != dimensions2.num_dimensions()) && - ((dimensions1.num_dimensions() != (dimensions2.num_dimensions() + 1)) || (dimensions1.x() != 1)) && - ((dimensions1.num_dimensions() != (dimensions2.num_dimensions() + 2)) || (dimensions1.x() != 1) || (dimensions1.y() != 1))) + const auto max_dims = std::max(dimensions1.num_dimensions(), dimensions2.num_dimensions()); + for(unsigned int i = 3; i < max_dims; ++i) { - return false; + if(dimensions1[i] != dimensions2[i]) + { + return false; + } } // clang-format on @@ -171,14 +173,6 @@ bool compare_dimensions(const Dimensions &dimensions1, const Dimensions &d { return false; } - - for(unsigned int i = 3; i < dimensions1.num_dimensions(); ++i) - { - if(dimensions1[i] != dimensions2[i]) - { - return false; - } - } } return true; -- cgit v1.2.1