COMPMID-431 Port OpenCL pooling layer to use fixed point

Change-Id: I6a73cd6582097aaefa83588aad789bdefdc74406 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79967 Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
author: steniu01 <steven.niu@arm.com> 2017-06-23 17:00:26 +0100
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:35:24 +0000
commit: 0c7614f7178b255c6c3d5b09aeee259e219fd8c8 (patch)
tree: 304472ef6be1a2c5bfc774d40461213ff6733122
parent: c57a06aee8e610bbae53b5f67e8f76f6cdcd78bf (diff)
download: ComputeLibrary-0c7614f7178b255c6c3d5b09aeee259e219fd8c8.tar.gz
12 files changed, 157 insertions, 58 deletions
diff --git a/arm_compute/core/CL/kernels/CLFillBorderKernel.h b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
index 797f86dae8..3881b427f7 100644
--- a/arm_compute/core/CL/kernels/CLFillBorderKernel.h
+++ b/arm_compute/core/CL/kernels/CLFillBorderKernel.h
@@ -51,7 +51,7 @@ public:
 
     /** Initialise the kernel's input, output and border mode.
      *
-     * @param[in,out] tensor                Tensor to process Data types supported: U8, S16, S32, F32.
+     * @param[in,out] tensor                Tensor to process Data types supported: U8/QS8/S16/QS16/S32/F32.
      * @param[in]     border_size           Size of the border to fill in elements.
      * @param[in]     border_mode           Border mode to use for the convolution.
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
index b4855475c3..80a8cf20e3 100644
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ b/arm_compute/runtime/CL/functions/CLFillBorder.h
@@ -38,7 +38,7 @@ class CLFillBorder : public ICLSimpleFunction
 public:
     /** Initialize the function
      *
-     * @param[in,out] tensor                Source tensor. Data types supported: U8, S16
+     * @param[in,out] tensor                Source tensor. Data types supported: QS8/U8/S16/QS16
      * @param[in]     border_width          The border width
      * @param[in]     border_mode           Strategy to use for borders.
      * @param[in]     constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
diff --git a/arm_compute/runtime/CL/functions/CLPoolingLayer.h b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
index f92860e5b2..b9675b94e0 100644
--- a/arm_compute/runtime/CL/functions/CLPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLPoolingLayer.h
@@ -42,7 +42,7 @@ class CLPoolingLayer : public ICLSimpleFunction
 public:
     /** Set the input and output tensors.
      *
-     * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: F16, F32.
+     * @param[in,out] input     Source tensor. (Written to only when padding != 0) Data types supported: QS8/QS16/F16/F32.
      * @param[out]    output    Destination tensor. Data types supported: Same as @p input.
      * @param[in]     pool_info Contains pooling operation information described in @ref PoolingLayerInfo.
      */
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl
index 7527b1ce37..119879afd5 100644
--- a/src/core/CL/cl_kernels/activation_layer.cl
+++ b/src/core/CL/cl_kernels/activation_layer.cl
@@ -34,7 +34,7 @@
 #define SUB_OP(a, b) SUB_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE)
 #define MUL_OP(a, b) MUL_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define MLA_OP(a, b, c) MLA_SAT_OP_EXPAND((a), (b), (c), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
-#define DIV_OP(a, b) DIV_SAT_OP_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
+#define DIV_OP(a, b) DIV_SAT_OP_VEC_EXPAND((a), (b), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define EXP_OP(a) EXP_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define LOG_OP(a) LOG_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define SQRT_OP(a) DIV_OP(CONST_ONE, INVSQRT_OP_EXPAND((a), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION))
diff --git a/src/core/CL/cl_kernels/fill_border.cl b/src/core/CL/cl_kernels/fill_border.cl
index f511613a37..2db8c67877 100644
--- a/src/core/CL/cl_kernels/fill_border.cl
+++ b/src/core/CL/cl_kernels/fill_border.cl
@@ -24,6 +24,12 @@
 #include "fixed_point.h"
 #include "helpers.h"
 
+#if defined(FIXED_POINT_POSITION)
+
+#include "fixed_point.h"
+
+#endif /* FIXED_POINT_POSITION */
+
 /** Fill N pixel of the padding edge of a single channel image by replicating the closest valid pixel.
  *
  * @attention  The DATA_TYPE needs to be passed at the compile time.
diff --git a/src/core/CL/cl_kernels/fixed_point.h b/src/core/CL/cl_kernels/fixed_point.h
index d35a46f428..478a414cad 100644
--- a/src/core/CL/cl_kernels/fixed_point.h
+++ b/src/core/CL/cl_kernels/fixed_point.h
@@ -290,7 +290,7 @@ MLALQ_SAT_IMPL(qs16x8, qs32x8)
 #define MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position) mlal_sat_##type##x##size((a), (b), (c), (position))
 #define MLAL_SAT_OP_EXPAND(a, b, c, type, size, position) MLAL_SAT_OP_EXPAND_STR(a, b, c, type, size, position)
 
-/** Saturate division of two fixed point numbers
+/** Saturate division of two fixed point vectors
   *
   * @param[in] stype the actual scalar data type.
   * @param[in] type  the actual data type.
@@ -298,22 +298,27 @@ MLALQ_SAT_IMPL(qs16x8, qs32x8)
   *
   * @return The result of the fixed point division. The result is saturated in case of overflow
   */
-#define DIVQ_SAT_IMPL(stype, type, itype)                                                                                                                \
-    inline type div_sat_##type(type VopA, type VopB, int fixed_point_position)                                                                           \
-    {                                                                                                                                                    \
-        itype conv_a      = CONVERT((VopA), itype);                                                                                                      \
-        itype denominator = CONVERT((VopB), itype);                                                                                                      \
-        itype numerator   = conv_a << (itype)(fixed_point_position);                                                                                     \
-        itype res         = select(numerator / denominator, select((itype)stype##_MAX, (itype)stype##_MIN, conv_a < (itype)0), denominator == (itype)0); \
-        return CONVERT_SAT((res), type);                                                                                                                 \
+#define DIVQ_SAT_IMPL(stype, type, itype)                                                                                                                                           \
+    inline type div_sat_##type(type VopA, type VopB, int fixed_point_position)                                                                                                      \
+    {                                                                                                                                                                               \
+        itype conv_a      = CONVERT((VopA), itype);                                                                                                                                 \
+        itype denominator = CONVERT((VopB), itype);                                                                                                                                 \
+        itype numerator   = conv_a << (itype)(fixed_point_position);                                                                                                                \
+        itype res         = select((itype)(numerator / denominator), select((itype)stype##_MAX, (itype)stype##_MIN, (itype)(conv_a < (itype)0)), (itype)(denominator == (itype)0)); \
+        return CONVERT_SAT((res), type);                                                                                                                                            \
     }
 
 DIVQ_SAT_IMPL(qs8, qs8x16, qs16x16)
 DIVQ_SAT_IMPL(qs16, qs16x8, qs32x8)
 DIVQ_SAT_IMPL(qs16, qs16x16, qs32x16)
+DIVQ_SAT_IMPL(qs8, qs8, qs16)
+DIVQ_SAT_IMPL(qs16, qs16, qs32)
 
-#define DIV_SAT_OP_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position))
-#define DIV_SAT_OP_EXPAND(a, b, type, size, position) DIV_SAT_OP_EXPAND_STR(a, b, type, size, position)
+#define DIV_SAT_OP_EXPAND_STR(a, b, type, position) div_sat_##type((a), (b), (position))
+#define DIV_SAT_OP_EXPAND(a, b, type, position) DIV_SAT_OP_EXPAND_STR(a, b, type, position)
+
+#define DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position) div_sat_##type##x##size((a), (b), (position))
+#define DIV_SAT_OP_VEC_EXPAND(a, b, type, size, position) DIV_SAT_OP_VEC_EXPAND_STR(a, b, type, size, position)
 
 /** Saturate exponential of a fixed point vector
   *
@@ -372,7 +377,7 @@ EXPQ_IMPL(qs16, qs16x16, 16)
         type B         = -(type)(0x56AE >> (15 - fixed_point_position)); /* -0.6771900 */                                                  \
         type C         = (type)(0x2933 >> (15 - fixed_point_position));  /* 0.3218538 */                                                   \
         type D         = -(type)(0x0AA7 >> (15 - fixed_point_position)); /* -0.0832229 */                                                  \
-        type inter_a   = select(VopA, DIV_SAT_OP_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one);            \
+        type inter_a   = select(VopA, DIV_SAT_OP_VEC_EXPAND(const_one, VopA, stype, size, fixed_point_position), VopA < const_one);        \
         type shift_val = (type)(15 - stype##_SHIFT) - clz(inter_a >> (type)fixed_point_position);                                          \
         inter_a        = inter_a >> shift_val;                                                                                             \
         inter_a        = sub_sat(inter_a, const_one);                                                                                      \
@@ -444,7 +449,7 @@ INVSQRTQ_IMPL(qs16, qs16x8, 8)
         type exp2x     = EXP_OP_EXPAND(MUL_SAT_OP_EXPAND(const_two, VopA, stype, size, fixed_point_position), stype, size, fixed_point_position); \
         type num       = SUB_SAT_OP_EXPAND(exp2x, const_one, stype, size);                                                                        \
         type den       = ADD_SAT_OP_EXPAND(exp2x, const_one, stype, size);                                                                        \
-        return DIV_SAT_OP_EXPAND(num, den, stype, size, fixed_point_position);                                                                    \
+        return DIV_SAT_OP_VEC_EXPAND(num, den, stype, size, fixed_point_position);                                                                \
     }
 
 TANHQ_IMPL(qs8, qs8x16, 16)
diff --git a/src/core/CL/cl_kernels/normalization_layer.cl b/src/core/CL/cl_kernels/normalization_layer.cl
index 598b734c26..e2a5c4079a 100644
--- a/src/core/CL/cl_kernels/normalization_layer.cl
+++ b/src/core/CL/cl_kernels/normalization_layer.cl
@@ -28,7 +28,7 @@
 #include "fixed_point.h"
 #define MUL_OP(x, y) MUL_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define ADD_OP(x, y) ADD_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE)
-#define DIV_OP(x, y) DIV_SAT_OP_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
+#define DIV_OP(x, y) DIV_SAT_OP_VEC_EXPAND((x), (y), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define EXP_OP(x) EXP_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define LOG_OP(x) LOG_OP_EXPAND((x), DATA_TYPE, VEC_SIZE, FIXED_POINT_POSITION)
 #define POW_OP(x, y) EXP_OP(MUL_OP(LOG_OP((x)), (y)))
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 06989aa15e..18ad4a69a8 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -23,12 +23,31 @@
  */
 #include "helpers.h"
 
-#ifdef POOL_AVG
+#ifdef FIXED_POINT_POSITION
+
+#include "fixed_point.h"
+
+#if defined(POOL_AVG)
+#define POOL_OP(x, y) add_sat(x, y)
+#else /* POOL_AVG */
+#define POOL_OP(x, y) (max((x), (y)))
+#endif /* POOL_AVG */
+
+#define DIV_OP1(x, y) DIV_SAT_OP_EXPAND((x), y, DATA_TYPE, FIXED_POINT_POSITION)
+#define DIV_OP(x, y) DIV_OP1(x, y << FIXED_POINT_POSITION)
+
+#else /* FIXED_POINT_POSITION */
+
+#if defined(POOL_AVG)
 #define POOL_OP(x, y) ((x) + (y))
 #else /* POOL_AVG */
 #define POOL_OP(x, y) (fmax((x), (y)))
 #endif /* POOL_AVG */
 
+#define DIV_OP(x, y) (x * (1.f / y))
+
+#endif /* FIXED_POINT_POSITION */
+
 #if STRIDE_X == 1
 #define POOLING3x3(res, input, output) POOLING3x3_STRIDE1(res, input, output)
 #elif STRIDE_X == 2 /* STRIDE_X == 1 */
@@ -37,9 +56,6 @@
 #define POOLING3x3(res, input, output) POOLING3x3_STRIDE3(res, input, output)
 #endif /* STRIDE_X == 3 */
 
-#define CONVERT_OP(data_type) convert_##data_type##4
-#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type)
-
 #define POOLING3x3_STRIDE1(res, input, output)                                                                                               \
     ({                                                                                                                                       \
         VEC_DATA_TYPE(DATA_TYPE, 4)                                                                                                          \
@@ -142,30 +158,19 @@ DATA_TYPE calculate_avg_scale(const int pool_size, const int upper_bound_w, cons
     const int start_y = get_global_id(1) * stride_y - pad_y;
     const int end_x   = min(start_x + pool_size, upper_bound_w);
     const int end_y   = min(start_y + pool_size, upper_bound_h);
-    return 1.f / ((end_y - start_y) * (end_x - start_x));
-}
-
-VEC_DATA_TYPE(DATA_TYPE, 4)
-calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h,
-                     const int pad_x, const int pad_y, const int stride_x, const int stride_y)
-{
-    const int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x;
-    const int  start_y = get_global_id(1) * stride_y - pad_y;
-    const int4 end_x   = min(start_x + (int4)pool_size, (int4)upper_bound_w);
-    const int  end_y   = min(start_y + pool_size, upper_bound_h);
-    return (VEC_DATA_TYPE(DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x));
+    return ((end_y - start_y) * (end_x - start_x));
 }
 
 /** Performs a pooling function of pool size equal to 2.
  *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
  * @note In case of average pooling the following information must be passed at compile time:
  *       -DPOOL_AVG must be provided otherwise max pooling will be performed.
  *       -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
  *       -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
  *       -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
  *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16/F32
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: QS8/QS16/F16/F32
  * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
@@ -202,7 +207,7 @@ __kernel void pooling_layer_2(
 
     // Divide by pool region in case of average pooling
 #ifdef POOL_AVG
-    res *= calculate_avg_scale(2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+    res = DIV_OP(res, calculate_avg_scale(2, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
 #endif /* POOL_AVG */
 
     // Store result
@@ -211,14 +216,14 @@ __kernel void pooling_layer_2(
 
 /** Performs a pooling function of pool size equal to 3
  *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
  * @note In case of average pooling the following information must be passed at compile time:
  *       -DPOOL_AVG must be provided otherwise max pooling will be performed.
  *       -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
  *       -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
  *       -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
  *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16/F32
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: QS8/QS16/F16/F32
  * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
@@ -258,17 +263,32 @@ __kernel void pooling_layer_3(
 
     // Divide by pool region in case of average pooling
 #ifdef POOL_AVG
-    res *= calculate_avg_scale(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
-#endif //POOL_AVG
+    res = DIV_OP(res, calculate_avg_scale(3, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
+#endif /* POOL_AVG */
 
     // Store result
     *(__global DATA_TYPE *)output.ptr = res;
 }
 
-#if defined(POOLING3x3)
+#if defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
+
+#define CONVERT_OP(data_type) convert_##data_type##4
+#define CONVERT_VECTOR4(data_type) CONVERT_OP(data_type)
+
+VEC_DATA_TYPE(DATA_TYPE, 4)
+calculate_avg_scale4(const int pool_size, const int upper_bound_w, const int upper_bound_h,
+                     const int pad_x, const int pad_y, const int stride_x, const int stride_y)
+{
+    const int4 start_x = ((int4)get_global_id(0) * 4 + (int4)(0, 1, 2, 3)) * (int4)stride_x - (int4)pad_x;
+    const int  start_y = get_global_id(1) * stride_y - pad_y;
+    const int4 end_x   = min(start_x + (int4)pool_size, (int4)upper_bound_w);
+    const int  end_y   = min(start_y + pool_size, upper_bound_h);
+    return (VEC_DATA_TYPE(DATA_TYPE, 4))(1.f) / CONVERT_VECTOR4(DATA_TYPE)(((int4)(end_y - start_y)) * (end_x - start_x));
+}
+
 /** Performs an optimized pooling function of pool size equal to 3 when the stride_x is less equal than 3
  *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
  * @note In case of average pooling the following information must be passed at compile time:
  *       -DPOOL_AVG must be provided otherwise max pooling will be performed.
  *       -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
@@ -313,18 +333,18 @@ __kernel void pooling_layer_3_optimized(
 
     vstore4(res, 0, (__global DATA_TYPE *)output.ptr);
 }
-#endif // defined(POOLING3x3)
+#endif // defined(POOLING3x3) && !defined(FIXED_POINT_POSITION)
 
 /** Performs a pooling function of pool size equal to 7.
  *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
  * @note In case of average pooling the following information must be passed at compile time:
  *       -DPOOL_AVG must be provided otherwise max pooling will be performed.
  *       -DMAX_WIDTH and -DMAX_HEIGHT which are the maximum accessible indeces in x and y dimensions (width + pad)
  *       -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
  *       -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
  *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16/F32
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: QS8/QS16/F16/F32
  * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
@@ -389,7 +409,7 @@ __kernel void pooling_layer_7(
 
     // Divide by pool region in case of average pooling
 #ifdef POOL_AVG
-    res *= calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y);
+    res = DIV_OP(res, calculate_avg_scale(7, MAX_WIDTH, MAX_HEIGHT, PAD_X, PAD_Y, STRIDE_X, STRIDE_Y));
 #endif /* POOL_AVG */
 
     // Store result
diff --git a/src/core/CL/cl_kernels/softmax_layer.cl b/src/core/CL/cl_kernels/softmax_layer.cl
index e895bc1eae..9b24380393 100644
--- a/src/core/CL/cl_kernels/softmax_layer.cl
+++ b/src/core/CL/cl_kernels/softmax_layer.cl
@@ -29,7 +29,7 @@
 #define MAX_OP(x, y, type, size) MAX_OP_EXPAND(x, y, type, size)
 #define ADD_OP(x, y, type, size) ADD_SAT_OP_EXPAND((x), (y), type, size)
 #define SUB_OP(x, y, type, size) SUB_SAT_OP_EXPAND((x), (y), type, size)
-#define DIV_OP(x, y, type, size) DIV_SAT_OP_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
+#define DIV_OP(x, y, type, size) DIV_SAT_OP_VEC_EXPAND((x), (y), type, size, FIXED_POINT_POSITION)
 #define EXP_OP(x, type, size) EXP_OP_EXPAND((x), type, size, FIXED_POINT_POSITION)
 
 #define MIN_VAL_EXPAND(type) type##_MIN
diff --git a/src/core/CL/kernels/CLFillBorderKernel.cpp b/src/core/CL/kernels/CLFillBorderKernel.cpp
index 7667491710..6ff152113b 100644
--- a/src/core/CL/kernels/CLFillBorderKernel.cpp
+++ b/src/core/CL/kernels/CLFillBorderKernel.cpp
@@ -90,6 +90,10 @@ void CLFillBorderKernel::configure(ICLTensor *tensor, BorderSize border_size, Bo
     build_opts.emplace(("-DBORDER_SIZE_BOTTOM=" + support::cpp11::to_string(border_size.bottom)));
     build_opts.emplace(("-DBORDER_SIZE_LEFT=" + support::cpp11::to_string(border_size.left)));
     build_opts.emplace(("-DBORDER_SIZE_RIGHT=" + support::cpp11::to_string(border_size.right)));
+    if(is_data_type_fixed_point(tensor->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION");
+    }
 
     // Create kernel
     _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts));
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 6b2e881e68..3ef4725df9 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -67,10 +67,12 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     static const std::set<int> supported_pool_sizes = { 2, 3, 7 };
     ARM_COMPUTE_UNUSED(supported_pool_sizes);
 
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
     ARM_COMPUTE_ERROR_ON_NULLPTR(output);
     ARM_COMPUTE_ERROR_ON(supported_pool_sizes.find(pool_size) == supported_pool_sizes.end());
     ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size);
+    ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output);
 
     // Check output dimensions
     std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
@@ -94,7 +96,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
 
     // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenCL kernel where
     // each thread computes 4 output elements
-    const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3);
+    const bool is_pool3x3_stride_le3 = (pool_size == 3) && (pool_stride_x <= 3) && !is_data_type_fixed_point(input->info()->data_type());
 
     int num_elements_read_per_iteration = (pool_size == 7) ? 8 : pool_size;
     if(is_pool3x3_stride_le3)
@@ -120,6 +122,11 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     std::set<std::string> build_opts;
     build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
     build_opts.emplace(("-DPOOL_" + ((PoolingType::MAX == pool_type) ? std::string("MAX") : std::string("AVG"))));
+    if(is_data_type_fixed_point(input->info()->data_type()))
+    {
+        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(input->info()->fixed_point_position()));
+    }
+
     build_opts.emplace(("-DSTRIDE_X=" + support::cpp11::to_string(pool_stride_x)));
     if(pool_type == PoolingType::AVG)
     {
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 114325bc6f..286b1d98df 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -39,21 +39,24 @@ using namespace arm_compute::test::validation;
 
 namespace
 {
-const float tolerance_f = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
+const float tolerance_qs8  = 3;     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+const float tolerance_qs16 = 6;     /**< Tolerance value for comparing reference's output against implementation's output for quantized input */
+const float tolerance_f    = 1e-05; /**< Tolerance value for comparing reference's output against implementation's output for float input */
 
 /** Compute CL pooling layer function.
  *
- * @param[in] shape     Shape of the input and output tensors.
- * @param[in] dt        Data type of input and output tensors.
- * @param[in] pool_info Pooling Layer information.
+ * @param[in] shape                Shape of the input and output tensors.
+ * @param[in] dt                   Data type of input and output tensors.
+ * @param[in] pool_info            Pooling Layer information.
+ * @param[in] fixed_point_position The fixed point position.
  *
  * @return Computed output tensor.
  */
-CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info)
+CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &shape_out, DataType dt, PoolingLayerInfo pool_info, int fixed_point_position = 0)
 {
     // Create tensors
-    CLTensor src = create_tensor<CLTensor>(shape_in, dt);
-    CLTensor dst = create_tensor<CLTensor>(shape_out, dt);
+    CLTensor src = create_tensor<CLTensor>(shape_in, dt, 1, fixed_point_position);
+    CLTensor dst = create_tensor<CLTensor>(shape_out, dt, 1, fixed_point_position);
 
     // Create and configure function
     CLPoolingLayer pool;
@@ -67,7 +70,24 @@ CLTensor compute_pooling_layer(const TensorShape &shape_in, const TensorShape &s
     BOOST_TEST(!dst.info()->is_resizable());
 
     // Fill tensors
-    std::uniform_real_distribution<> distribution(-1, 1);
+    // Fill tensors
+    int min = 0;
+    int max = 0;
+    switch(dt)
+    {
+        case DataType::F32:
+            min = -1;
+            max = 1;
+            break;
+        case DataType::QS8:
+        case DataType::QS16:
+            min = -(1 << fixed_point_position);
+            max = (1 << fixed_point_position);
+            break;
+        default:
+            ARM_COMPUTE_ERROR("DataType not supported.");
+    }
+    std::uniform_real_distribution<> distribution(min, max);
     library->fill(CLAccessor(src), distribution, 0);
 
     // Compute function
@@ -113,6 +133,43 @@ BOOST_DATA_TEST_CASE(RunSmall, SmallShapes() * CNNFloatDataTypes() * PoolingType
 }
 BOOST_AUTO_TEST_SUITE_END()
 
+BOOST_AUTO_TEST_SUITE(Quantized)
+
+BOOST_AUTO_TEST_SUITE(QS8)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RandomDataset,
+                     RandomPoolingLayerDataset() * boost::unit_test::data::xrange(1, 5),
+                     obj, fixed_point_position)
+{
+    // Compute function
+    CLTensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS8, obj.info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS8, obj.info, fixed_point_position);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_qs8, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+
+BOOST_AUTO_TEST_SUITE(QS16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RandomDataset,
+                     RandomPoolingLayerDataset() * boost::unit_test::data::xrange(1, 12),
+                     obj, fixed_point_position)
+{
+    // Compute function
+    CLTensor dst = compute_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS16, obj.info, fixed_point_position);
+
+    // Compute reference
+    RawTensor ref_dst = Reference::compute_reference_pooling_layer(obj.src_shape, obj.dst_shape, DataType::QS16, obj.info, fixed_point_position);
+
+    // Validate output
+    validate(CLAccessor(dst), ref_dst, tolerance_qs16, 0);
+}
+BOOST_AUTO_TEST_SUITE_END()
+BOOST_AUTO_TEST_SUITE_END()
+
 BOOST_AUTO_TEST_SUITE_END()
 BOOST_AUTO_TEST_SUITE_END()
 #endif /* DOXYGEN_SKIP_THIS */
author	steniu01 <steven.niu@arm.com>	2017-06-23 17:00:26 +0100
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:35:24 +0000
commit	0c7614f7178b255c6c3d5b09aeee259e219fd8c8 (patch)
tree	304472ef6be1a2c5bfc774d40461213ff6733122
parent	c57a06aee8e610bbae53b5f67e8f76f6cdcd78bf (diff)
download	ComputeLibrary-0c7614f7178b255c6c3d5b09aeee259e219fd8c8.tar.gz