3 files changed, 4 insertions, 5 deletions
diff --git a/src/core/CL/cl_kernels/pooling_layer.cl b/src/core/CL/cl_kernels/pooling_layer.cl
index 99d7e6e01b..5304c71adf 100644
--- a/src/core/CL/cl_kernels/pooling_layer.cl
+++ b/src/core/CL/cl_kernels/pooling_layer.cl
@@ -515,7 +515,7 @@ __kernel void pooling_layer_7(
 
 /** Performs a pooling function of pool size equal to N
  *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16/F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are QS8/QS16/F16/F32;
  * @note -DFP16 must be passed at compile time if half float data type is used
  * @note Pool size must be passed using -DPOOL_SIZE e.g. -DPOOL_SIZE=13;
  * @note In case of average pooling the following information must be passed at compile time:
@@ -524,7 +524,7 @@ __kernel void pooling_layer_7(
  *       -DSTRIDE_X and -DSTRIDE_Y which are the steps of the window along the x and y directions
  *       -DPAD_X and -DPAD_Y which are the pooling paddings in x and y dimension
  *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16/F32
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: QS8/QS16/F16/F32
  * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
diff --git a/src/core/CL/kernels/CLPoolingLayerKernel.cpp b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
index 497e87b2b5..542d5dcf9f 100644
--- a/src/core/CL/kernels/CLPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLPoolingLayerKernel.cpp
@@ -67,7 +67,6 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
     ARM_COMPUTE_ERROR_ON_NULLPTR(output);
     ARM_COMPUTE_ERROR_ON(pool_pad_x >= pool_size || pool_pad_y >= pool_size);
-    ARM_COMPUTE_ERROR_ON(pool_size > 7 && is_data_type_fixed_point(input->info()->data_type()));
 
     // Check output dimensions
     std::tie(pooled_w, pooled_h) = scaled_dimensions(input->info()->dimension(0),
@@ -118,7 +117,7 @@ void CLPoolingLayerKernel::configure(const ICLTensor *input, ICLTensor *output,
     }
 
     // Create kernel
-    if(pool_size <= 7)
+    if((pool_size == 2) || (pool_size == 3) || (pool_size == 7))
     {
         // Check if we have pool3x3 with stride_x less equal than 3. In these cases, run an optimized OpenCL kernel where
         // each thread computes 4 output elements
diff --git a/tests/validation/CL/PoolingLayer.cpp b/tests/validation/CL/PoolingLayer.cpp
index 44617f624c..809c80f28c 100644
--- a/tests/validation/CL/PoolingLayer.cpp
+++ b/tests/validation/CL/PoolingLayer.cpp
@@ -44,7 +44,7 @@ namespace validation
 namespace
 {
 /** Input data set for float data types */
-const auto PoolingLayerDatasetFP = combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 7, 9 })),
+const auto PoolingLayerDatasetFP = combine(combine(datasets::PoolingTypes(), framework::dataset::make("PoolingSize", { 2, 3, 4, 7, 9 })),
                                            framework::dataset::make("PadStride", { PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 1, 0, 0), PadStrideInfo(1, 2, 1, 1), PadStrideInfo(2, 2, 1, 0) }));
 
 /** Input data set for quantized data types */