From af6204c331eed7894ec4c5fd4e98ec22b6dac676 Mon Sep 17 00:00:00 2001 From: Anton Lokhmotov Date: Wed, 8 Nov 2017 09:34:19 +0000 Subject: COMPMID-661: Add avgpool-uint8 support. Optimize avgpool-fp32 for Bifrost. (#13) Change-Id: I32ba6afbac6694ffa053dd16f03a1b3d14627a19 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/94857 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- arm_compute/core/CL/kernels/CLPoolingLayerKernel.h | 4 +- arm_compute/core/Helpers.inl | 2 +- arm_compute/core/Types.h | 2 +- arm_compute/core/Utils.h | 2 +- arm_compute/runtime/CL/functions/CLPoolingLayer.h | 4 +- src/core/CL/CLKernelLibrary.cpp | 7 +- src/core/CL/cl_kernels/pooling_layer.cl | 101 +---------------- src/core/CL/cl_kernels/pooling_layer_quantized.cl | 121 +++++++++++++++++++++ src/core/CL/kernels/CLActivationLayerKernel.cpp | 4 +- .../CL/kernels/CLDirectConvolutionLayerKernel.cpp | 11 +- src/core/CL/kernels/CLPoolingLayerKernel.cpp | 100 +++++++++-------- src/runtime/CL/functions/CLPoolingLayer.cpp | 14 ++- src/runtime/CL/functions/CLSoftmaxLayer.cpp | 2 +- tests/AssetsLibrary.h | 2 + tests/validation/CL/PoolingLayer.cpp | 59 ++++++++-- tests/validation/CPP/PoolingLayer.cpp | 10 ++ tests/validation/Helpers.cpp | 21 ++++ tests/validation/Helpers.h | 17 +++ tests/validation/fixtures/PoolingLayerFixture.h | 68 +++++++++--- 19 files changed, 362 insertions(+), 189 deletions(-) create mode 100644 src/core/CL/cl_kernels/pooling_layer_quantized.cl diff --git a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h index a9159a4bb8..ffb5d79514 100644 --- a/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h +++ b/arm_compute/core/CL/kernels/CLPoolingLayerKernel.h @@ -53,14 +53,14 @@ public: * * @note QS8 and QS16 are supported only for pool sizes 3, 5 and 7 * - * @param[in] input Source tensor. Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ void configure(const ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayerKernel * - * @param[in] input Source tensor info. Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor info. Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * diff --git a/arm_compute/core/Helpers.inl b/arm_compute/core/Helpers.inl index acdb9567db..656956d00a 100644 --- a/arm_compute/core/Helpers.inl +++ b/arm_compute/core/Helpers.inl @@ -263,7 +263,7 @@ inline bool set_fixed_point_position_if_zero(ITensorInfo &info, int fixed_point_ inline bool set_quantization_info_if_empty(ITensorInfo &info, QuantizationInfo quantization_info) { - if(info.quantization_info().empty() && (is_data_type_quantized_assymetric(info.data_type()))) + if(info.quantization_info().empty() && (is_data_type_quantized_asymmetric(info.data_type()))) { info.set_quantization_info(quantization_info); return true; diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index e8be6127a8..eaff8fb709 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -120,7 +120,7 @@ struct QuantizationInfo float dequantize(uint8_t value) const { ARM_COMPUTE_ERROR_ON_MSG(scale == 0, "QuantizationInfo::dequantize: scale == 0"); - float dequantized = (value - offset) * scale; + float dequantized = (static_cast(value) - offset) * scale; return dequantized; } diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index b2bd7bd4ab..96e99e6874 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -792,7 +792,7 @@ inline bool is_data_type_fixed_point(DataType dt) * * @return True if data type is of symmetric quantized type, else false. */ -inline bool is_data_type_quantized_assymetric(DataType dt) +inline bool is_data_type_quantized_asymmetric(DataType dt) { switch(dt) { diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h index 9c51534f78..58753c1410 100644 --- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -43,14 +43,14 @@ class CLPoolingLayer : public ICLSimpleFunction public: /** Set the input and output tensors. * - * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32. + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ void configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info); /** Static function to check if given info will lead to a valid configuration of @ref CLPoolingLayer * - * @param[in] input Source tensor info. Data types supported: QS8/QS16/F16/F32. + * @param[in] input Source tensor info. Data types supported: QS8/QASYMM8/QS16/F16/F32. * @param[in] output Destination tensor info. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. * diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 6efeebd63f..6ebdf298f1 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -271,9 +271,10 @@ const std::map CLKernelLibrary::_kernel_program_map = { "pixelwise_mul_int", "pixelwise_mul_int.cl" }, { "pooling_layer_2", "pooling_layer.cl" }, { "pooling_layer_3", "pooling_layer.cl" }, - { "pooling_layer_3_optimized", "pooling_layer.cl" }, + { "pooling_layer_optimized_3", "pooling_layer.cl" }, { "pooling_layer_7", "pooling_layer.cl" }, { "pooling_layer_N", "pooling_layer.cl" }, + { "pooling_layer_N_quantized", "pooling_layer_quantized.cl" }, { "quantization_layer", "quantization_layer.cl" }, { "reduction_operation", "reduction_operation.cl" }, { "remap_nearest_neighbour", "remap.cl" }, @@ -544,6 +545,10 @@ const std::map CLKernelLibrary::_program_source_map = { "pooling_layer.cl", #include "./cl_kernels/pooling_layer.clembed" + }, + { + "pooling_layer_quantized.cl", +#include "./cl_kernels/pooling_layer_quantized.clembed" }, { "quantization_layer.cl", diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 635c44a849..ee8ff27ab7 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -375,7 +375,7 @@ calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upp * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image */ -__kernel void pooling_layer_3_optimized( +__kernel void pooling_layer_optimized_3( TENSOR3D_DECLARATION(input), TENSOR3D_DECLARATION(output)) { @@ -403,103 +403,6 @@ __kernel void pooling_layer_3_optimized( } #endif // defined(POOLING3x3) && !defined(FIXED_POINT_POSITION) -/** Performs a pooling function of pool size equal to 7. - * - * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32; - * @note In case of average pooling the following information must be passed at compile time: - * -DPOOL_AVG or -DPOOL_L2 must be provided otherwise max pooling will be performed. - * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) - * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions - * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension - * - * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32 - * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) - * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) - * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image - * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr - * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) - * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) - * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) - * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image - */ -__kernel void pooling_layer_7( - TENSOR3D_DECLARATION(input), - TENSOR3D_DECLARATION(output)) -{ - // Get pixels pointer - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); - - // Load data - VEC_DATA_TYPE(DATA_TYPE, 8) - data0 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 0, 0)); - VEC_DATA_TYPE(DATA_TYPE, 8) - data1 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 1, 0)); - VEC_DATA_TYPE(DATA_TYPE, 8) - data2 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 2, 0)); - VEC_DATA_TYPE(DATA_TYPE, 8) - data3 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 3, 0)); - VEC_DATA_TYPE(DATA_TYPE, 8) - data4 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 4, 0)); - VEC_DATA_TYPE(DATA_TYPE, 8) - data5 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 5, 0)); - VEC_DATA_TYPE(DATA_TYPE, 8) - data6 = vload8(0, (__global DATA_TYPE *)tensor3D_offset(&input, 0, 6, 0)); - -#if defined(POOL_L2) - // Raise to power of 2 for L2 Pooling - data0 = POW2_OP(data0, 8); - data1 = POW2_OP(data1, 8); - data2 = POW2_OP(data2, 8); - data3 = POW2_OP(data3, 8); - data4 = POW2_OP(data4, 8); - data5 = POW2_OP(data5, 8); - data6 = POW2_OP(data6, 8); -#endif /* defined(POOL_L2) */ - - // Pool operation of all rows - data0 = POOL_OP(data0, data1); - data2 = POOL_OP(data2, data3); - data4 = POOL_OP(data4, data5); - data0 = POOL_OP(data0, data2); - data4 = POOL_OP(data4, data6); - data0 = POOL_OP(data0, data4); - - // Set last element -#if defined(POOL_AVG) || defined(POOL_L2) - data0.s7 = 0; -#else /* defined(POOL_AVG) || defined(POOL_L2) */ - data0.s7 = data0.s6; -#endif /* defined(POOL_AVG) || defined(POOL_L2) */ - - // Reduce result - VEC_DATA_TYPE(DATA_TYPE, 4) - reduce4 = POOL_OP(data0.s0123, data0.s4567); - VEC_DATA_TYPE(DATA_TYPE, 2) - reduce2 = POOL_OP(reduce4.s01, reduce4.s23); - DATA_TYPE res = POOL_OP(reduce2.s0, reduce2.s1); - -#if defined(POOL_AVG) || defined(POOL_L2) - // Divide by pool region in case of average pooling - res = DIV_OP(res, calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)); -#endif /* defined(POOL_AVG) || defined(POOL_L2) */ - -#if defined(POOL_L2) - // Take square root of the result in L2 pooling - res = SQRT_OP(res); -#endif /* defined(POOL_L2) */ - - // Store result - *(__global DATA_TYPE *)output.ptr = res; -} - #if defined(POOL_SIZE) // Set the initial value for the pooling operation accordingly with the data type @@ -608,4 +511,4 @@ __kernel void pooling_layer_N( // Store result *(__global DATA_TYPE *)output.ptr = res; } -#endif // defined(POOL_SIZE) \ No newline at end of file +#endif // defined(POOL_SIZE) diff --git a/src/core/CL/cl_kernels/pooling_layer_quantized.cl b/src/core/CL/cl_kernels/pooling_layer_quantized.cl new file mode 100644 index 0000000000..17448d19de --- /dev/null +++ b/src/core/CL/cl_kernels/pooling_layer_quantized.cl @@ -0,0 +1,121 @@ +/* + * Copyright (c) 2017 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "helpers.h" + +#if defined(POOL_AVG) +#define POOL_OP(x, y) ((x) + (y)) +#else /* defined(POOL_AVG) */ +#define POOL_OP(x, y) (max((x), (y))) +#endif /* defined(POOL_AVG) */ + +#define DIV_OP(x, y) (x * (1.f / y)) + +#if defined(POOL_L2) +#error "L2 pooling is not supported" +#endif /* defined(POOL_L2) */ + +int calculate_avg_scale(const int pool_size, const int upper_bound_w, const int upper_bound_h, + const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + int start_x = get_global_id(0) * stride_x - pad_x; + int start_y = get_global_id(1) * stride_y - pad_y; + const int end_x = min(start_x + pool_size, upper_bound_w); + const int end_y = min(start_y + pool_size, upper_bound_h); +#if defined(EXCLUDE_PADDING) + start_x = max(0, start_x); + start_y = max(0, start_y); +#endif /* defined(EXCLUDE_PADDING) */ + return ((end_y - start_y) * (end_x - start_x)); +} + +/** Performs a pooling function of pool size equal to N + * + * @note Pool size must be passed using -DPOOL_SIZE e.g. -DPOOL_SIZE=13; + * @note In case of average pooling the following information must be passed at compile time: + * -DPOOL_AVG must be provided otherwise max pooling will be performed. + * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) + * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions + * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension + * + * @param[in] input_ptr Pointer to the source image. Supported data types: QASYMM8 + * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] input_step_y input_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] input_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] input_step_z input_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] input_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] output_ptr Pointer to the destination image. Supported data types: same as @p input_ptr + * @param[in] output_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] output_step_y output_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] output_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] output_step_z output_stride_z * number of elements along Z processed per workitem(in bytes) + * @param[in] output_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void pooling_layer_N_quantized( + TENSOR3D_DECLARATION(input), + TENSOR3D_DECLARATION(output)) +{ + // Get pixels pointer + Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); + Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); + + int8 vdata = 0; + int sdata = 0; + + // Load data + for(int y = 0; y < POOL_SIZE; y++) + { + int x = 0; + for(; x <= ((int)POOL_SIZE - 8); x += 8) + { + uchar8 data = vload8(0, (__global uchar *)tensor3D_offset(&input, x, y, 0)); + int8 data0 = convert_int8(data); + vdata = POOL_OP(vdata, data0); + } + + // Leftover + for(; x < (int)POOL_SIZE; ++x) + { + uchar data = *((__global uchar *)tensor3D_offset(&input, x, y, 0)); + int data0 = convert_int(data); + sdata = POOL_OP(sdata, data0); + } + } + + // Reduce result + int4 reduce4 = POOL_OP(vdata.s0123, vdata.s4567); + int2 reduce2 = POOL_OP(reduce4.s01, reduce4.s23); + int res = POOL_OP(reduce2.s0, reduce2.s1); + res = POOL_OP(res, sdata); + +#if defined(POOL_AVG) + res = DIV_OP(res, calculate_avg_scale(POOL_SIZE, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)); +#endif /* defined(POOL_AVG) */ + + // Store result + *(__global uchar *)output.ptr = convert_uchar(res); +} diff --git a/src/core/CL/kernels/CLActivationLayerKernel.cpp b/src/core/CL/kernels/CLActivationLayerKernel.cpp index 5bfc832518..adedebba53 100644 --- a/src/core/CL/kernels/CLActivationLayerKernel.cpp +++ b/src/core/CL/kernels/CLActivationLayerKernel.cpp @@ -101,7 +101,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act build_opts.emplace(("-DB_VAL=" + support::cpp11::to_string(b_const_int))); // Set scale and offset of the input and output - if(is_data_type_quantized_assymetric(dt)) + if(is_data_type_quantized_asymmetric(dt)) { float s1 = input->info()->quantization_info().scale; int o1 = input->info()->quantization_info().offset; @@ -127,7 +127,7 @@ void CLActivationLayerKernel::configure(ICLTensor *input, ICLTensor *output, Act } // Create kernel - std::string kernel_name = is_data_type_quantized_assymetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer"); + std::string kernel_name = is_data_type_quantized_asymmetric(dt) ? std::string("activation_layer_qa8") : std::string("activation_layer"); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); // Make sure _kernel is initialized before calling the parent's configure diff --git a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp index 53e46390c1..5f109f76af 100644 --- a/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp +++ b/src/core/CL/kernels/CLDirectConvolutionLayerKernel.cpp @@ -84,7 +84,12 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL output_shape.set(2, weights->info()->dimension(3)); // Output auto inizialitation if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), + output_shape, + 1, + input->info()->data_type(), + input->info()->fixed_point_position(), + input->info()->quantization_info()); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); @@ -176,7 +181,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL else { bool is_quantized_fixed_point = is_data_type_fixed_point(data_type); - bool is_quantized_asymm = is_data_type_quantized_assymetric(data_type); + bool is_quantized_asymm = is_data_type_quantized_asymmetric(data_type); DataType promoted_type = (is_quantized_fixed_point) ? get_promoted_data_type(data_type) : data_type; build_options.add_option_if(is_quantized_asymm, std::string("-DKERNEL_SIZE=" + support::cpp11::to_string(kernel_size))); @@ -220,7 +225,7 @@ void CLDirectConvolutionLayerKernel::configure(const ICLTensor *input, const ICL } // Set static kernel arguments - if(is_data_type_quantized_assymetric(data_type)) + if(is_data_type_quantized_asymmetric(data_type)) { int output_multiplier = 0; int output_shift = 0; diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 2854cd8265..1317278fb5 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/AccessWindowStatic.h" #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/CL/ICLKernel.h" #include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Helpers.h" @@ -80,7 +81,12 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, output_shape.set(0, pooled_w); output_shape.set(1, pooled_h); - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->fixed_point_position()); + auto_init_if_empty(*output->info(), + output_shape, + 1, + input->info()->data_type(), + input->info()->fixed_point_position(), + input->info()->quantization_info()); } ARM_COMPUTE_ERROR_THROW_ON(CLPoolingLayerKernel::validate(input->info(), output->info(), pool_info)); @@ -94,80 +100,80 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, _pool_info = pool_info; _border_size = BorderSize(pool_pad_y, pool_pad_x); - // Set build options - std::set build_opts; - build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); - build_opts.emplace(("-DPOOL_" + string_from_pooling_type(pool_type))); - if(is_data_type_fixed_point(input->info()->data_type())) - { - build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); - } + const GPUTarget gpu_target = get_arch_from_target(get_target()); + const DataType data_type = input->info()->data_type(); - build_opts.emplace(("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x))); + // Set build options + CLBuildOptions build_opts; + build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type)); + build_opts.add_option("-DPOOL_" + string_from_pooling_type(pool_type)); + build_opts.add_option_if(is_data_type_fixed_point(data_type), + "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + build_opts.add_option("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x)); if(pool_type != PoolingType::MAX) { - if(exclude_padding) - { - build_opts.emplace("-DEXCLUDE_PADDING"); - } - build_opts.emplace(("-DMAX_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x)))); - build_opts.emplace(("-DMAX_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y)))); - build_opts.emplace(("-DSTRIDE_Y=" + support::cpp11::to_string(pool_stride_y))); - build_opts.emplace(("-DPAD_X=" + support::cpp11::to_string(pool_pad_x))); - build_opts.emplace(("-DPAD_Y=" + support::cpp11::to_string(pool_pad_y))); + build_opts.add_option_if(exclude_padding, "-DEXCLUDE_PADDING"); + build_opts.add_option("-DMAX_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0) + (exclude_padding ? 0 : pool_pad_x))); + build_opts.add_option("-DMAX_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1) + (exclude_padding ? 0 : pool_pad_y))); + build_opts.add_option("-DSTRIDE_Y=" + support::cpp11::to_string(pool_stride_y)); + build_opts.add_option("-DPAD_X=" + support::cpp11::to_string(pool_pad_x)); + build_opts.add_option("-DPAD_Y=" + support::cpp11::to_string(pool_pad_y)); } // Create kernel - if((pool_size == 2) || (pool_size == 3) || (pool_size == 7)) + if((pool_size == 3) && !is_data_type_quantized_asymmetric(data_type)) { // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenCL kernel where // each thread computes 4 output elements - const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(input->info()->data_type()); + const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(data_type); - int num_elements_read_per_iteration = (pool_size == 7) ? 8 : pool_size; + int num_elems_read_per_iteration = pool_size; if(is_pool3x3_stride_le3) { - // Change the number of elements processed and number of elements read per iteration for pooling 3x3 with stride less equal than 3 + // Change the number of elements processed and the number of elements read per iteration + // for pooling 3x3 with stride less equal than 3 _num_elems_processed_per_iteration = 4; - num_elements_read_per_iteration = pool_size * (pool_stride_x + 1); + num_elems_read_per_iteration = pool_size * (pool_stride_x + 1); } - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elements_read_per_iteration) - input_width; + const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + num_elems_read_per_iteration) - input_width; const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; _border_size.right = std::max(upper_bound_w, pool_pad_x); _border_size.bottom = std::max(upper_bound_h, pool_pad_y); - std::string kernel_name = "pooling_layer_" + support::cpp11::to_string(pool_size); - if(is_pool3x3_stride_le3) - { - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name + "_optimized", build_opts)); - } - else - { - _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); - } + std::string kernel_name = ((is_pool3x3_stride_le3) ? "pooling_layer_optimized_" : "pooling_layer_") + + support::cpp11::to_string(pool_size); + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); } else // Run general case { - _num_elems_processed_per_iteration = 1; - const int upper_bound_w = ((pooled_w - 1) * pool_stride_x - pool_pad_x + pool_size) - input_width; const int upper_bound_h = ((pooled_h - 1) * pool_stride_y - pool_pad_y + pool_size) - input_height; _border_size.right = std::max(upper_bound_w, pool_pad_x); _border_size.bottom = std::max(upper_bound_h, pool_pad_y); - build_opts.emplace(("-DPOOL_SIZE=" + support::cpp11::to_string(pool_size))); - if(input->info()->data_type() == DataType::F16) - { - build_opts.emplace("-DFP16"); - } - _kernel = static_cast(CLKernelLibrary::get().create_kernel("pooling_layer_N", build_opts)); + build_opts.add_option("-DPOOL_SIZE=" + support::cpp11::to_string(pool_size)); + build_opts.add_option_if(data_type == DataType::F16, "-DFP16"); + + std::string kernel_name = is_data_type_quantized_asymmetric(data_type) ? "pooling_layer_N_quantized" : "pooling_layer_N"; + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); } // Configure kernel window - Window win = calculate_max_window(*output->info(), Steps(_num_elems_processed_per_iteration)); + Window win = calculate_max_window(*output->info(), Steps(_num_elems_processed_per_iteration)); + + // Configure the local work size (hint) from the first two dimensions of the global work size. + // On Bifrost, this works for up to 35x35xC filters, for which the pooling_layer_3_optimized + // kernel is launched with gws=(9, 33, C). In any case, the hint will be ignored if it is + // invalid (e.g. exceeds the maximum workgroup size that the kernel can be launched with). + if(gpu_target == GPUTarget::BIFROST) + { + cl::NDRange gws = ICLKernel::gws_from_window(win); + _lws_hint = cl::NDRange(gws[0], gws[1], 1); + } + AccessWindowStatic input_access(input->info(), -pool_pad_x, -pool_pad_y, input_width + _border_size.right, input_height + _border_size.bottom); AccessWindowHorizontal output_access(output->info(), 0, _num_elems_processed_per_iteration); update_window_and_padding(win, input_access, output_access); @@ -178,14 +184,16 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, Error CLPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG((is_data_type_quantized_asymmetric(input->data_type()) && pool_info.pool_type() == PoolingType::L2), + "Unsupported combination of parameters!"); int pool_pad_x = 0; int pool_pad_y = 0; int pool_size = pool_info.pool_size(); std::tie(pool_pad_x, pool_pad_y) = pool_info.pad_stride_info().pad(); ARM_COMPUTE_RETURN_ERROR_ON_MSG(((pool_pad_x >= pool_size) || (pool_pad_y >= pool_size)), - "Invalid pool size and pool pad combination"); + "Invalid pool size and pool pad combination!"); // Checks performed when output is configured if(output->total_size() != 0) @@ -230,7 +238,7 @@ void CLPoolingLayerKernel::run(const Window &window, cl::CommandQueue &queue) unsigned int idx = 0; add_3D_tensor_argument(idx, _input, in_slice); add_3D_tensor_argument(idx, _output, slice); - enqueue(queue, *this, slice); + enqueue(queue, *this, slice, _lws_hint); } while(window_collapsed.slide_window_slice_3D(slice)); } diff --git a/src/runtime/CL/functions/CLPoolingLayer.cpp b/src/runtime/CL/functions/CLPoolingLayer.cpp index 6ca1a33b0c..ac360fbb3d 100644 --- a/src/runtime/CL/functions/CLPoolingLayer.cpp +++ b/src/runtime/CL/functions/CLPoolingLayer.cpp @@ -23,21 +23,33 @@ */ #include "arm_compute/runtime/CL/functions/CLPoolingLayer.h" +#include "arm_compute/core/CL/ICLTensor.h" #include "arm_compute/core/CL/kernels/CLPoolingLayerKernel.h" +#include "arm_compute/runtime/CL/CLScheduler.h" #include "support/ToolchainSupport.h" using namespace arm_compute; void CLPoolingLayer::configure(ICLTensor *input, ICLTensor *output, const PoolingLayerInfo &pool_info) { + ARM_COMPUTE_ERROR_ON_NULLPTR(input); + // Configure pooling kernel auto k = arm_compute::support::cpp14::make_unique(); + k->set_target(CLScheduler::get().target()); k->configure(input, output, pool_info); _kernel = std::move(k); // Configure border depending on operation required BorderMode border_mode = (PoolingType::MAX == pool_info.pool_type()) ? BorderMode::REPLICATE : BorderMode::CONSTANT; - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(0)); + // Quantize border in case data type is quantized asymmetric data type + uint32_t border_value = 0; + if(is_data_type_quantized_asymmetric(input->info()->data_type()) && !pool_info.exclude_padding()) + { + border_value = static_cast(input->info()->quantization_info().quantize(0)); + } + + _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(border_value)); } Error CLPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *output, const PoolingLayerInfo &pool_info) diff --git a/src/runtime/CL/functions/CLSoftmaxLayer.cpp b/src/runtime/CL/functions/CLSoftmaxLayer.cpp index 7268d8eab5..a059f9e5fd 100644 --- a/src/runtime/CL/functions/CLSoftmaxLayer.cpp +++ b/src/runtime/CL/functions/CLSoftmaxLayer.cpp @@ -62,7 +62,7 @@ void CLSoftmaxLayer::configure(const ICLTensor *input, ICLTensor *output, float // Configure kernels // TODO (COMPMID-661): Remove legacy path once the new one is properly validated - _run_legacy_path = is_data_type_quantized_assymetric(input->info()->data_type()); + _run_legacy_path = is_data_type_quantized_asymmetric(input->info()->data_type()); if(_run_legacy_path) { _max_kernel.configure(input, &_max); diff --git a/tests/AssetsLibrary.h b/tests/AssetsLibrary.h index ee136447ee..c2eee8b616 100644 --- a/tests/AssetsLibrary.h +++ b/tests/AssetsLibrary.h @@ -477,6 +477,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t switch(tensor.data_type()) { case DataType::U8: + case DataType::QASYMM8: { std::uniform_int_distribution distribution_u8(std::numeric_limits::lowest(), std::numeric_limits::max()); fill(tensor, distribution_u8, seed_offset); @@ -564,6 +565,7 @@ void AssetsLibrary::fill_tensor_uniform(T &&tensor, std::random_device::result_t switch(tensor.data_type()) { case DataType::U8: + case DataType::QASYMM8: { ARM_COMPUTE_ERROR_ON(!(std::is_same::value)); std::uniform_int_distribution distribution_u8(low, high); diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp index 7038f2c34e..b3d56122db 100644 --- a/tests/validation/CL/PoolingLayer.cpp +++ b/tests/validation/CL/PoolingLayer.cpp @@ -43,19 +43,26 @@ namespace validation { namespace { -/** Input data set for float data types */ +/** Input data set for floating-point data types */ const auto PoolingLayerDatasetFP = combine(combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })), framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), framework::dataset::make("ExcludePadding", { true, false })); -/** Input data set for quantized data types */ +/** Input data set for fixed-point data types */ const auto PoolingLayerDatasetQS = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })), framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), framework::dataset::make("ExcludePadding", { true, false })); -constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for float types */ -constexpr AbsoluteTolerance tolerance_qs8(3); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ -constexpr AbsoluteTolerance tolerance_qs16(6); /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ + +/** Input data set for asymmetric data type */ +const auto PoolingLayerDatasetQASYMM8 = combine(combine(combine(framework::dataset::make("PoolingType", { PoolingType::MAX, PoolingType::AVG }), framework::dataset::make("PoolingSize", { 2, 3 })), + framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) })), + framework::dataset::make("ExcludePadding", { true, false })); + +constexpr AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for 32-bit floating-point type */ +constexpr AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit floating-point type */ +constexpr AbsoluteTolerance tolerance_qs16(6); /**< Tolerance value for comparing reference's output against implementation's output for 16-bit fixed-point type */ +constexpr AbsoluteTolerance tolerance_qs8(3); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit fixed-point type */ +constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for 8-bit asymmetric type */ } // namespace TEST_SUITE(CL) @@ -64,12 +71,13 @@ TEST_SUITE(PoolingLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Mismatching data type TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS8, 4), // Mismatching fixed point position TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QS16, 11), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32, 0), // Invalid pad/size combination + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), // Invalid parameters }), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F16, 0), TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::F32, 0), @@ -77,6 +85,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( TensorInfo(TensorShape(25U, 11U, 2U), 1, DataType::QS16, 11), TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32, 0), TensorInfo(TensorShape(25U, 16U, 2U), 1, DataType::F32, 0), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::QASYMM8, 0), })), framework::dataset::make("PoolInfo", { PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), @@ -84,8 +93,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( PoolingLayerInfo(PoolingType::AVG, 3, PadStrideInfo(1, 1, 0, 0)), PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 2, 0)), PoolingLayerInfo(PoolingType::AVG, 2, PadStrideInfo(1, 1, 0, 2)), + PoolingLayerInfo(PoolingType::L2, 3, PadStrideInfo(1, 1, 0, 0)), })), - framework::dataset::make("Expected", { true, false, true, false, true, true})), + framework::dataset::make("Expected", { true, false, true, false, true, true, true })), input_info, output_info, pool_info, expected) { ARM_COMPUTE_EXPECT(bool(CLPoolingLayer::validate(&input_info, &output_info, pool_info)) == expected, framework::LogLevel::ERRORS); @@ -131,7 +141,7 @@ TEST_SUITE_END() template using CLPoolingLayerFixedPointFixture = PoolingLayerValidationFixedPointFixture; -TEST_SUITE(Quantized) +TEST_SUITE(FixedPoint) TEST_SUITE(QS8) FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerFixedPointFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQS, framework::dataset::make("DataType", DataType::QS8))), @@ -167,6 +177,31 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerFixedPointFixture, frame TEST_SUITE_END() TEST_SUITE_END() +TEST_SUITE(Quantized) + +template +using CLPoolingLayerQuantizedFixture = PoolingLayerValidationQuantizedFixture; + +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLPoolingLayerQuantizedFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), combine(PoolingLayerDatasetQASYMM8, + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255, 127), + QuantizationInfo(7.f / 255, 123) + }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLPoolingLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeShapes(), combine(PoolingLayerDatasetQASYMM8, + framework::dataset::make("DataType", DataType::QASYMM8))), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255, 0) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() +TEST_SUITE_END() + TEST_SUITE_END() TEST_SUITE_END() } // namespace validation diff --git a/tests/validation/CPP/PoolingLayer.cpp b/tests/validation/CPP/PoolingLayer.cpp index 4f755ce2c4..90a48e0c44 100644 --- a/tests/validation/CPP/PoolingLayer.cpp +++ b/tests/validation/CPP/PoolingLayer.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Types.h" #include "tests/validation/FixedPoint.h" +#include "tests/validation/Helpers.h" namespace arm_compute { @@ -277,6 +278,15 @@ SimpleTensor pooling_layer(const SimpleTensor &src, PoolingLayerInfo info) return dst; } +template <> +SimpleTensor pooling_layer(const SimpleTensor &src, PoolingLayerInfo info) +{ + SimpleTensor src_tmp = convert_from_asymmetric(src); + SimpleTensor dst_tmp = pooling_layer(src_tmp, info); + SimpleTensor dst = convert_to_asymmetric(dst_tmp, src.quantization_info()); + return dst; +} + template SimpleTensor pooling_layer(const SimpleTensor &src, PoolingLayerInfo info); template SimpleTensor pooling_layer(const SimpleTensor &src, PoolingLayerInfo info); template SimpleTensor pooling_layer(const SimpleTensor &src, PoolingLayerInfo info); diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index 23ad62a6c3..3ef5fc1cc5 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -112,6 +112,27 @@ HarrisCornersParameters harris_corners_parameters() return params; } + +SimpleTensor convert_from_asymmetric(const SimpleTensor &src) +{ + const QuantizationInfo &quantization_info = src.quantization_info(); + SimpleTensor dst{ src.shape(), DataType::F32, 1, 0 }; + for(int i = 0; i < src.num_elements(); ++i) + { + dst[i] = quantization_info.dequantize(src[i]); + } + return dst; +} + +SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info) +{ + SimpleTensor dst{ src.shape(), DataType::QASYMM8, 1, 0, quantization_info }; + for(int i = 0; i < src.num_elements(); ++i) + { + dst[i] = quantization_info.quantize(src[i]); + } + return dst; +} } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index eecf976a13..6b1c4b9026 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -201,6 +201,23 @@ std::pair get_batchnormalization_layer_test_bounds(int fixed_point_positio return bounds; } + +/** Convert quantized simple tensor into float using tensor quantization information. + * + * @param[in] src Quantized tensor. + * + * @return Float tensor. +*/ +SimpleTensor convert_from_asymmetric(const SimpleTensor &src); + +/** Convert float simple tensor into quantized using specified quantization information. + * + * @param[in] src Float tensor. + * @param[in] quantization_info Quantification information. + * + * @return Quantized tensor. +*/ +SimpleTensor convert_to_asymmetric(const SimpleTensor &src, const QuantizationInfo &quantization_info); } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/PoolingLayerFixture.h b/tests/validation/fixtures/PoolingLayerFixture.h index 09b9e0ef1a..d6190e2977 100644 --- a/tests/validation/fixtures/PoolingLayerFixture.h +++ b/tests/validation/fixtures/PoolingLayerFixture.h @@ -43,28 +43,34 @@ namespace test namespace validation { template -class PoolingLayerValidationFixedPointFixture : public framework::Fixture +class PoolingLayerValidationGenericFixture : public framework::Fixture { public: template - void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits) + void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, + DataType data_type, int fractional_bits, QuantizationInfo quantization_info) { - _fractional_bits = fractional_bits; + _fractional_bits = fractional_bits; + _quantization_info = quantization_info; PoolingLayerInfo info(pool_type, pool_size, pad_stride_info, exclude_padding); - _target = compute_target(shape, info, data_type, fractional_bits); - _reference = compute_reference(shape, info, data_type, fractional_bits); + _target = compute_target(shape, info, data_type, fractional_bits, quantization_info); + _reference = compute_reference(shape, info, data_type, fractional_bits, quantization_info); } protected: template void fill(U &&tensor) { - if(_fractional_bits == 0) + if(!is_data_type_quantized(tensor.data_type())) { std::uniform_real_distribution<> distribution(-1.f, 1.f); library->fill(tensor, distribution, 0); } + else if(is_data_type_quantized_asymmetric(tensor.data_type())) + { + library->fill_tensor_uniform(tensor, 0); + } else { const int one_fixed = 1 << _fractional_bits; @@ -73,10 +79,11 @@ protected: } } - TensorType compute_target(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, int fixed_point_position = 0) + TensorType compute_target(const TensorShape &shape, PoolingLayerInfo info, + DataType data_type, int fixed_point_position, QuantizationInfo quantization_info) { // Create tensors - TensorType src = create_tensor(shape, data_type, 1, fixed_point_position); + TensorType src = create_tensor(shape, data_type, 1, fixed_point_position, quantization_info); TensorType dst; // Create and configure function @@ -102,10 +109,11 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &shape, PoolingLayerInfo info, DataType data_type, int fixed_point_position = 0) + SimpleTensor compute_reference(const TensorShape &shape, PoolingLayerInfo info, + DataType data_type, int fixed_point_position, QuantizationInfo quantization_info) { // Create reference - SimpleTensor src{ shape, data_type, 1, fixed_point_position }; + SimpleTensor src{ shape, data_type, 1, fixed_point_position, quantization_info }; // Fill reference fill(src); @@ -113,30 +121,56 @@ protected: return reference::pooling_layer(src, info); } - TensorType _target{}; - SimpleTensor _reference{}; - int _fractional_bits{}; + TensorType _target{}; + SimpleTensor _reference{}; + int _fractional_bits{}; + QuantizationInfo _quantization_info{}; }; template -class PoolingLayerValidationFixture : public PoolingLayerValidationFixedPointFixture +class PoolingLayerValidationFixture : public PoolingLayerValidationGenericFixture { public: template void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type) { - PoolingLayerValidationFixedPointFixture::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding, data_type, 0); + PoolingLayerValidationGenericFixture::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding, + data_type, 0, QuantizationInfo()); + } +}; + +template +class PoolingLayerValidationFixedPointFixture : public PoolingLayerValidationGenericFixture +{ +public: + template + void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, int fractional_bits) + { + PoolingLayerValidationGenericFixture::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding, + data_type, fractional_bits, QuantizationInfo()); + } +}; + +template +class PoolingLayerValidationQuantizedFixture : public PoolingLayerValidationGenericFixture +{ +public: + template + void setup(TensorShape shape, PoolingType pool_type, int pool_size, PadStrideInfo pad_stride_info, bool exclude_padding, DataType data_type, QuantizationInfo quantization_info) + { + PoolingLayerValidationGenericFixture::setup(shape, pool_type, pool_size, pad_stride_info, exclude_padding, + data_type, 0, quantization_info); } }; template -class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationFixedPointFixture +class GlobalPoolingLayerValidationFixture : public PoolingLayerValidationFixture { public: template void setup(TensorShape shape, PoolingType pool_type, DataType data_type) { - PoolingLayerValidationFixedPointFixture::setup(shape, pool_type, shape.x(), PadStrideInfo(1, 1, 0, 0), true, data_type, 0); + PoolingLayerValidationFixture::setup(shape, pool_type, shape.x(), PadStrideInfo(1, 1, 0, 0), true, data_type); } }; } // namespace validation -- cgit v1.2.1