From 0c7614f7178b255c6c3d5b09aeee259e219fd8c8 Mon Sep 17 00:00:00 2001 From: steniu01 Date: Fri, 23 Jun 2017 17:00:26 +0100 Subject: COMPMID-431 Port OpenCL pooling layer to use fixed point Change-Id: I6a73cd6582097aaefa83588aad789bdefdc74406 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79967 Tested-by: Kaizen Reviewed-by: Michalis Spyrou --- arm_compute/core/CL/kernels/CLFillBorderKernel.h | 2 +- arm_compute/runtime/CL/functions/CLFillBorder.h | 2 +- arm_compute/runtime/CL/functions/CLPoolingLayer.h | 2 +- src/core/CL/cl_kernels/activation_layer.cl | 2 +- src/core/CL/cl_kernels/fill_border.cl | 6 ++ src/core/CL/cl_kernels/fixed_point.h | 31 +++++---- src/core/CL/cl_kernels/normalization_layer.cl | 2 +- src/core/CL/cl_kernels/pooling_layer.cl | 78 ++++++++++++++--------- src/core/CL/cl_kernels/softmax_layer.cl | 2 +- src/core/CL/kernels/CLFillBorderKernel.cpp | 4 ++ src/core/CL/kernels/CLPoolingLayerKernel.cpp | 11 +++- tests/validation/CL/PoolingLayer.cpp | 73 ++++++++++++++++++--- 12 files changed, 157 insertions(+), 58 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h index 797f86dae8..3881b427f7 100644 --- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h +++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h @@ -51,7 +51,7 @@ public: /** Initialise the kernel's input, output and border mode. * - * @param[in,out] tensor Tensor to process Data types supported: U8, S16, S32, F32. + * @param[in,out] tensor Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F32. * @param[in] border_size Size of the border to fill in elements. * @param[in] border_mode Border mode to use for the convolution. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h index b4855475c3..80a8cf20e3 100644 --- a/arm_compute/runtime/CL/functions/CLFillBorder.h +++ b/arm_compute/runtime/CL/functions/CLFillBorder.h @@ -38,7 +38,7 @@ class CLFillBorder : public ICLSimpleFunction public: /** Initialize the function * - * @param[in,out] tensor Source tensor. Data types supported: U8, S16 + * @param[in,out] tensor Source tensor. Data types supported: QS8/U8/S16/QS16 * @param[in] border_width The border width * @param[in] border_mode Strategy to use for borders. * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT. diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h index f92860e5b2..b9675b94e0 100644 --- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h +++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h @@ -42,7 +42,7 @@ class CLPoolingLayer : public ICLSimpleFunction public: /** Set the input and output tensors. * - * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: F16, F32. + * @param[in,out] input Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32. * @param[out] output Destination tensor. Data types supported: Same as @p input. * @param[in] pool_info Contains pooling operation information described in @ref PoolingLayerInfo. */ diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl index 7527b1ce37..119879afd5 100644 --- a/src/core/CL/cl_kernels/activation_layer.cl +++ b/src/core/CL/cl_kernels/activation_layer.cl @@ -34,7 +34,7 @@ #define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE) #define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define MLA_OP(a, b, c) MLA_SAT_OP_EXPAND((a), (b), (c), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) -#define DIV_OP(a, b) DIV_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define DIV_OP(a, b) DIV_SAT_OP_VEC_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define EXP_OP(a) EXP_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define LOG_OP(a) LOG_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define SQRT_OP(a) DIV_OP(CONST_ONE, INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)) diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl index f511613a37..2db8c67877 100644 --- a/src/core/CL/cl_kernels/fill_border.cl +++ b/src/core/CL/cl_kernels/fill_border.cl @@ -24,6 +24,12 @@ #include "fixed_point.h" #include "helpers.h" +#if defined(FIXED_POINT_POSITION) + +#include "fixed_point.h" + +#endif /* FIXED_POINT_POSITION */ + /** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel. * * @attention The DATA_TYPE needs to be passed at the compile time. diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h index d35a46f428..478a414cad 100644 --- a/src/core/CL/cl_kernels/fixed_point.h +++ b/src/core/CL/cl_kernels/fixed_point.h @@ -290,7 +290,7 @@ MLALQ_SAT_IMPL(qs16x8, qs32x8) #define MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position) mlal_sat_##type##x##size((a), (b), (c), (position)) #define MLAL_SAT_OP_EXPAND(a, b, c, type, size, position) MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position) -/** Saturate division of two fixed point numbers +/** Saturate division of two fixed point vectors * * @param[in] stype the actual scalar data type. * @param[in] type the actual data type. @@ -298,22 +298,27 @@ MLALQ_SAT_IMPL(qs16x8, qs32x8) * * @return The result of the fixed point division. The result is saturated in case of overflow */ -#define DIVQ_SAT_IMPL(stype, type, itype) \ - inline type div_sat_##type(type VopA, type VopB, int fixed_point_position) \ - { \ - itype conv_a = CONVERT((VopA), itype); \ - itype denominator = CONVERT((VopB), itype); \ - itype numerator = conv_a << (itype)(fixed_point_position); \ - itype res = select(numerator / denominator, select((itype)stype##_MAX, (itype)stype##_MIN, conv_a < (itype)0), denominator == (itype)0); \ - return CONVERT_SAT((res), type); \ +#define DIVQ_SAT_IMPL(stype, type, itype) \ + inline type div_sat_##type(type VopA, type VopB, int fixed_point_position) \ + { \ + itype conv_a = CONVERT((VopA), itype); \ + itype denominator = CONVERT((VopB), itype); \ + itype numerator = conv_a << (itype)(fixed_point_position); \ + itype res = select((itype)(numerator / denominator), select((itype)stype##_MAX, (itype)stype##_MIN, (itype)(conv_a < (itype)0)), (itype)(denominator == (itype)0)); \ + return CONVERT_SAT((res), type); \ } DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16) DIVQ_SAT_IMPL(qs16, qs16x8, qs32x8) DIVQ_SAT_IMPL(qs16, qs16x16, qs32x16) +DIVQ_SAT_IMPL(qs8, qs8, qs16) +DIVQ_SAT_IMPL(qs16, qs16, qs32) -#define DIV_SAT_OP_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position)) -#define DIV_SAT_OP_EXPAND(a, b, type, size, position) DIV_SAT_OP_EXPAND_STR(a, b, type, size, position) +#define DIV_SAT_OP_EXPAND_STR(a, b, type, position) div_sat_##type((a), (b), (position)) +#define DIV_SAT_OP_EXPAND(a, b, type, position) DIV_SAT_OP_EXPAND_STR(a, b, type, position) + +#define DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position)) +#define DIV_SAT_OP_VEC_EXPAND(a, b, type, size, position) DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position) /** Saturate exponential of a fixed point vector * @@ -372,7 +377,7 @@ EXPQ_IMPL(qs16, qs16x16, 16) type B = -(type)(0x56AE >> (15 - fixed_point_position)); /* -0.6771900 */ \ type C = (type)(0x2933 >> (15 - fixed_point_position)); /* 0.3218538 */ \ type D = -(type)(0x0AA7 >> (15 - fixed_point_position)); /* -0.0832229 */ \ - type inter_a = select(VopA, DIV_SAT_OP_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one); \ + type inter_a = select(VopA, DIV_SAT_OP_VEC_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one); \ type shift_val = (type)(15 - stype##_SHIFT) - clz(inter_a >> (type)fixed_point_position); \ inter_a = inter_a >> shift_val; \ inter_a = sub_sat(inter_a, const_one); \ @@ -444,7 +449,7 @@ INVSQRTQ_IMPL(qs16, qs16x8, 8) type exp2x = EXP_OP_EXPAND(MUL_SAT_OP_EXPAND(const_two, VopA, stype, size, fixed_point_position), stype, size, fixed_point_position); \ type num = SUB_SAT_OP_EXPAND(exp2x, const_one, stype, size); \ type den = ADD_SAT_OP_EXPAND(exp2x, const_one, stype, size); \ - return DIV_SAT_OP_EXPAND(num, den, stype, size, fixed_point_position); \ + return DIV_SAT_OP_VEC_EXPAND(num, den, stype, size, fixed_point_position); \ } TANHQ_IMPL(qs8, qs8x16, 16) diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl index 598b734c26..e2a5c4079a 100644 --- a/src/core/CL/cl_kernels/normalization_layer.cl +++ b/src/core/CL/cl_kernels/normalization_layer.cl @@ -28,7 +28,7 @@ #include "fixed_point.h" #define MUL_OP(x, y) MUL_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define ADD_OP(x, y) ADD_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE) -#define DIV_OP(x, y) DIV_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) +#define DIV_OP(x, y) DIV_SAT_OP_VEC_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define EXP_OP(x) EXP_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define LOG_OP(x) LOG_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION) #define POW_OP(x, y) EXP_OP(MUL_OP(LOG_OP((x)), (y))) diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl index 06989aa15e..18ad4a69a8 100644 --- a/src/core/CL/cl_kernels/pooling_layer.cl +++ b/src/core/CL/cl_kernels/pooling_layer.cl @@ -23,12 +23,31 @@ */ #include "helpers.h" -#ifdef POOL_AVG +#ifdef FIXED_POINT_POSITION + +#include "fixed_point.h" + +#if defined(POOL_AVG) +#define POOL_OP(x, y) add_sat(x, y) +#else /* POOL_AVG */ +#define POOL_OP(x, y) (max((x), (y))) +#endif /* POOL_AVG */ + +#define DIV_OP1(x, y) DIV_SAT_OP_EXPAND((x), y, DATA_TYPE, FIXED_POINT_POSITION) +#define DIV_OP(x, y) DIV_OP1(x, y << FIXED_POINT_POSITION) + +#else /* FIXED_POINT_POSITION */ + +#if defined(POOL_AVG) #define POOL_OP(x, y) ((x) + (y)) #else /* POOL_AVG */ #define POOL_OP(x, y) (fmax((x), (y))) #endif /* POOL_AVG */ +#define DIV_OP(x, y) (x * (1.f / y)) + +#endif /* FIXED_POINT_POSITION */ + #if STRIDE_X == 1 #define POOLING3x3(res, input, output) POOLING3x3_STRIDE1(res, input, output) #elif STRIDE_X == 2 /* STRIDE_X == 1 */ @@ -37,9 +56,6 @@ #define POOLING3x3(res, input, output) POOLING3x3_STRIDE3(res, input, output) #endif /* STRIDE_X == 3 */ -#define CONVERT_OP(data_type) convert_##data_type##4 -#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type) - #define POOLING3x3_STRIDE1(res, input, output) \ ({ \ VEC_DATA_TYPE(DATA_TYPE, 4) \ @@ -142,30 +158,19 @@ DATA_TYPE calculate_avg_scale(const int pool_size, const int upper_bound_w, cons const int start_y = get_global_id(1) * stride_y - pad_y; const int end_x = min(start_x + pool_size, upper_bound_w); const int end_y = min(start_y + pool_size, upper_bound_h); - return 1.f / ((end_y - start_y) * (end_x - start_x)); -} - -VEC_DATA_TYPE(DATA_TYPE, 4) -calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h, - const int pad_x, const int pad_y, const int stride_x, const int stride_y) -{ - const int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x; - const int start_y = get_global_id(1) * stride_y - pad_y; - const int4 end_x = min(start_x + (int4)pool_size, (int4)upper_bound_w); - const int end_y = min(start_y + pool_size, upper_bound_h); - return (VEC_DATA_TYPE(DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x)); + return ((end_y - start_y) * (end_x - start_x)); } /** Performs a pooling function of pool size equal to 2. * - * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32; + * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32; * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 + * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) @@ -202,7 +207,7 @@ __kernel void pooling_layer_2( // Divide by pool region in case of average pooling #ifdef POOL_AVG - res *= calculate_avg_scale(2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); + res = DIV_OP(res, calculate_avg_scale(2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)); #endif /* POOL_AVG */ // Store result @@ -211,14 +216,14 @@ __kernel void pooling_layer_2( /** Performs a pooling function of pool size equal to 3 * - * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32; + * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32; * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 + * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) @@ -258,17 +263,32 @@ __kernel void pooling_layer_3( // Divide by pool region in case of average pooling #ifdef POOL_AVG - res *= calculate_avg_scale(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); -#endif //POOL_AVG + res = DIV_OP(res, calculate_avg_scale(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)); +#endif /* POOL_AVG */ // Store result *(__global DATA_TYPE *)output.ptr = res; } -#if defined(POOLING3x3) +#if defined(POOLING3x3) && !defined(FIXED_POINT_POSITION) + +#define CONVERT_OP(data_type) convert_##data_type##4 +#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type) + +VEC_DATA_TYPE(DATA_TYPE, 4) +calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h, + const int pad_x, const int pad_y, const int stride_x, const int stride_y) +{ + const int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x; + const int start_y = get_global_id(1) * stride_y - pad_y; + const int4 end_x = min(start_x + (int4)pool_size, (int4)upper_bound_w); + const int end_y = min(start_y + pool_size, upper_bound_h); + return (VEC_DATA_TYPE(DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x)); +} + /** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3 * - * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32; + * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32; * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) @@ -313,18 +333,18 @@ __kernel void pooling_layer_3_optimized( vstore4(res, 0, (__global DATA_TYPE *)output.ptr); } -#endif // defined(POOLING3x3) +#endif // defined(POOLING3x3) && !defined(FIXED_POINT_POSITION) /** Performs a pooling function of pool size equal to 7. * - * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32; + * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32; * @note In case of average pooling the following information must be passed at compile time: * -DPOOL_AVG must be provided otherwise max pooling will be performed. * -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad) * -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions * -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension * - * @param[in] input_ptr Pointer to the source image. Supported data types: F16/F32 + * @param[in] input_ptr Pointer to the source image. Supported data types: QS8/QS16/F16/F32 * @param[in] input_stride_x Stride of the source image in X dimension (in bytes) * @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] input_stride_y Stride of the source image in Y dimension (in bytes) @@ -389,7 +409,7 @@ __kernel void pooling_layer_7( // Divide by pool region in case of average pooling #ifdef POOL_AVG - res *= calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y); + res = DIV_OP(res, calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y)); #endif /* POOL_AVG */ // Store result diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl index e895bc1eae..9b24380393 100644 --- a/src/core/CL/cl_kernels/softmax_layer.cl +++ b/src/core/CL/cl_kernels/softmax_layer.cl @@ -29,7 +29,7 @@ #define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size) #define ADD_OP(x, y, type, size) ADD_SAT_OP_EXPAND((x), (y), type, size) #define SUB_OP(x, y, type, size) SUB_SAT_OP_EXPAND((x), (y), type, size) -#define DIV_OP(x, y, type, size) DIV_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION) +#define DIV_OP(x, y, type, size) DIV_SAT_OP_VEC_EXPAND((x), (y), type, size, FIXED_POINT_POSITION) #define EXP_OP(x, type, size) EXP_OP_EXPAND((x), type, size, FIXED_POINT_POSITION) #define MIN_VAL_EXPAND(type) type##_MIN diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp index 7667491710..6ff152113b 100644 --- a/src/core/CL/kernels/CLFillBorderKernel.cpp +++ b/src/core/CL/kernels/CLFillBorderKernel.cpp @@ -90,6 +90,10 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo build_opts.emplace(("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom))); build_opts.emplace(("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left))); build_opts.emplace(("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right))); + if(is_data_type_fixed_point(tensor->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION"); + } // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts)); diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp index 6b2e881e68..3ef4725df9 100644 --- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp +++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp @@ -67,10 +67,12 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, static const std::set supported_pool_sizes = { 2, 3, 7 }; ARM_COMPUTE_UNUSED(supported_pool_sizes); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_NULLPTR(output); ARM_COMPUTE_ERROR_ON(supported_pool_sizes.find(pool_size) == supported_pool_sizes.end()); ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); // Check output dimensions std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0), @@ -94,7 +96,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenCL kernel where // each thread computes 4 output elements - const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3); + const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(input->info()->data_type()); int num_elements_read_per_iteration = (pool_size == 7) ? 8 : pool_size; if(is_pool3x3_stride_le3) @@ -120,6 +122,11 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output, std::set build_opts; build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type()))); build_opts.emplace(("-DPOOL_" + ((PoolingType::MAX == pool_type) ? std::string("MAX") : std::string("AVG")))); + if(is_data_type_fixed_point(input->info()->data_type())) + { + build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position())); + } + build_opts.emplace(("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x))); if(pool_type == PoolingType::AVG) { diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp index 114325bc6f..286b1d98df 100644 --- a/tests/validation/CL/PoolingLayer.cpp +++ b/tests/validation/CL/PoolingLayer.cpp @@ -39,21 +39,24 @@ using namespace arm_compute::test::validation; namespace { -const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */ +const float tolerance_qs8 = 3; /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ +const float tolerance_qs16 = 6; /**< Tolerance value for comparing reference's output against implementation's output for quantized input */ +const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */ /** Compute CL pooling layer function. * - * @param[in] shape Shape of the input and output tensors. - * @param[in] dt Data type of input and output tensors. - * @param[in] pool_info Pooling Layer information. + * @param[in] shape Shape of the input and output tensors. + * @param[in] dt Data type of input and output tensors. + * @param[in] pool_info Pooling Layer information. + * @param[in] fixed_point_position The fixed point position. * * @return Computed output tensor. */ -CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info) +CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position = 0) { // Create tensors - CLTensor src = create_tensor(shape_in, dt); - CLTensor dst = create_tensor(shape_out, dt); + CLTensor src = create_tensor(shape_in, dt, 1, fixed_point_position); + CLTensor dst = create_tensor(shape_out, dt, 1, fixed_point_position); // Create and configure function CLPoolingLayer pool; @@ -67,7 +70,24 @@ CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &s BOOST_TEST(!dst.info()->is_resizable()); // Fill tensors - std::uniform_real_distribution<> distribution(-1, 1); + // Fill tensors + int min = 0; + int max = 0; + switch(dt) + { + case DataType::F32: + min = -1; + max = 1; + break; + case DataType::QS8: + case DataType::QS16: + min = -(1 << fixed_point_position); + max = (1 << fixed_point_position); + break; + default: + ARM_COMPUTE_ERROR("DataType not supported."); + } + std::uniform_real_distribution<> distribution(min, max); library->fill(CLAccessor(src), distribution, 0); // Compute function @@ -113,6 +133,43 @@ BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes() * PoolingType } BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE(Quantized) + +BOOST_AUTO_TEST_SUITE(QS8) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RandomDataset, + RandomPoolingLayerDataset() * boost::unit_test::data::xrange(1, 5), + obj, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS8, obj.info, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS8, obj.info, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_qs8, 0); +} +BOOST_AUTO_TEST_SUITE_END() + +BOOST_AUTO_TEST_SUITE(QS16) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RandomDataset, + RandomPoolingLayerDataset() * boost::unit_test::data::xrange(1, 12), + obj, fixed_point_position) +{ + // Compute function + CLTensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS16, obj.info, fixed_point_position); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS16, obj.info, fixed_point_position); + + // Validate output + validate(CLAccessor(dst), ref_dst, tolerance_qs16, 0); +} +BOOST_AUTO_TEST_SUITE_END() +BOOST_AUTO_TEST_SUITE_END() + BOOST_AUTO_TEST_SUITE_END() BOOST_AUTO_TEST_SUITE_END() #endif /* DOXYGEN_SKIP_THIS */ -- cgit v1.2.1