6 files changed, 279 insertions, 54 deletions
diff --git a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
index fded3006ad..3b7804c2b7 100644
--- a/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
+++ b/arm_compute/runtime/CL/functions/CLROIPoolingLayer.h
@@ -43,7 +43,7 @@ class CLROIPoolingLayer : public ICLSimpleFunction
 public:
     /** Set the input and output tensors.
      *
-     * @param[in]  input     Source tensor. Data types supported: F16/F32.
+     * @param[in]  input     Source tensor. Data types supported: F16/F32/QASYMM8
      * @param[in]  rois      ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
      *                       as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
      * @param[out] output    Destination tensor. Data types supported: Same as @p input.
@@ -58,7 +58,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: F16/F32.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32/QASYMM8
      * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
      *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
      * @param[out] output          Destination tensor. Data types supported: Same as @p input.
@@ -69,7 +69,23 @@ public:
      * @note The z dimensions of @p output tensor and @p input tensor must be the same.
      * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLROIPoolingLayer
+     *
+     * @param[in] input     Source tensor info. Data types supported: QASYMM8/F16/F32.
+     * @param[in] rois      ROIs tensor info. Data types supported: U16
+     * @param[in] output    Destination tensor info. Data types supported: Same as @p input.
+     * @param[in] pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     *
+     * @return a Status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
 };
 }
 #endif /* ARM_COMPUTE_CLROIPOOLINGLAYER_H */
diff --git a/src/core/CL/cl_kernels/roi_pooling_layer.cl b/src/core/CL/cl_kernels/roi_pooling_layer.cl
index ac193e8fb6..6899b952e0 100644
--- a/src/core/CL/cl_kernels/roi_pooling_layer.cl
+++ b/src/core/CL/cl_kernels/roi_pooling_layer.cl
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "helpers.h"
+#include "helpers_asymm.h"
 
 #if DATA_SIZE == 32
 #define VEC_SIZE 4
@@ -29,24 +30,41 @@
 #elif DATA_SIZE == 16
 #define VEC_SIZE 8
 #define VEC_MAX vec8_max
-#else /* DATA_SIZE not equals 32 or 16 */
+#elif DATA_SIZE == 8
+#define VEC_SIZE 16
+#define VEC_MAX vec16_max
+#else /* DATA_SIZE not equals 8, 16, 32 */
 #error "Unsupported data size"
 #endif /* DATA_SIZE == 32 */
 
+// Define whether to use max (Quantized datatype) or fmax (Float) functions
+#if defined(OFFSET_OUT) && defined(SCALE_OUT)
+#define MAX(x, y) max(x, y)
+#else // !(defined(OFFSET_OUT) && defined(SCALE_OUT)
+#define MAX(x, y) fmax(x, y)
+#endif // defined(OFFSET_OUT) && defined(SCALE_OUT)
+
 inline DATA_TYPE vec4_max(VEC_DATA_TYPE(DATA_TYPE, 4) vec)
 {
     VEC_DATA_TYPE(DATA_TYPE, 2)
-    temp = fmax(vec.lo, vec.hi);
-    return fmax(temp.x, temp.y);
+    temp = MAX(vec.lo, vec.hi);
+    return MAX(temp.x, temp.y);
 }
 
 inline DATA_TYPE vec8_max(VEC_DATA_TYPE(DATA_TYPE, 8) vec)
 {
     VEC_DATA_TYPE(DATA_TYPE, 4)
-    temp = fmax(vec.lo, vec.hi);
+    temp = MAX(vec.lo, vec.hi);
     return vec4_max(temp);
 }
 
+inline DATA_TYPE vec16_max(VEC_DATA_TYPE(DATA_TYPE, 16) vec)
+{
+    VEC_DATA_TYPE(DATA_TYPE, 8)
+    temp = MAX(vec.lo, vec.hi);
+    return vec8_max(temp);
+}
+
 /** Performs a roi pooling on a single output pixel.
  *
  * @param[in] input          Pointer to input Tensor3D struct.
@@ -69,7 +87,8 @@ inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int reg
     {
         int num_iter = (int)((region_end_x - region_start_x) / VEC_SIZE);
         VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
-        curr_max = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))(-FLT_MAX);
+        curr_max = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))(MIN_VALUE);
+
         for(int j = region_start_y; j < region_end_y; ++j)
         {
             int i = region_start_x;
@@ -77,27 +96,34 @@ inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int reg
             {
                 VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
                 val      = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(input, i, j, pz));
-                curr_max = fmax(val, curr_max);
+                curr_max = MAX(val, curr_max);
             }
             for(; i < region_end_x; ++i)
             {
                 DATA_TYPE val = *(__global DATA_TYPE *)tensor3D_offset(input, i, j, pz);
-                curr_max      = fmax(curr_max, val);
+                curr_max      = MAX(curr_max, val);
             }
         }
-        return (DATA_TYPE)VEC_MAX(curr_max);
+
+        const DATA_TYPE temp = (DATA_TYPE)VEC_MAX(curr_max);
+
+#if defined(OFFSET_OUT) && defined(SCALE_OUT)
+        return QUANTIZE(temp, OFFSET_OUT, SCALE_OUT, DATA_TYPE, 1);
+#endif /* if quantized, requantize and return */
+
+        return temp;
     }
 }
 
 /** Performs a roi pooling function.
  *
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32, QASYMM8;
  * @note Datasize must be passed using -DDATA_SIZE e.g. -DDATA_SIZE=32;
  * @note Input dimensions must be passed using -DMAX_DIM_X, -DMAX_DIM_Y and -DMAX_DIM_Z;
  * @note Pooled region dimensions must be passed using -DPOOLED_DIM_X and -DPOOLED_DIM_Y;
  * @note Spatial scale must be passed using -DSPATIAL_SCALE;
  *
- * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16, F32
+ * @param[in]  input_ptr                            Pointer to the source image. Supported data types: F16, F32, QASYMM8
  * @param[in]  input_stride_x                       Stride of the source image in X dimension (in bytes)
  * @param[in]  input_step_x                         input_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  input_stride_y                       Stride of the source image in Y dimension (in bytes)
@@ -111,7 +137,7 @@ inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int reg
  * @param[in]  rois_stride_y                        Stride of the ROIs tensor in Y dimension (in bytes)
  * @param[in]  rois_step_y                          Step of the ROIs tensor in Y dimension (in bytes)
  * @param[in]  rois_offset_first_element_in_bytes   The offset of the first element in the ROIs tensor
- * @param[out] output_ptr                           Pointer to the destination image. Supported data types: F16, F32
+ * @param[out] output_ptr                           Pointer to the destination image. Supported data types: same as input
  * @param[in]  output_stride_x                      Stride of the destination image in X dimension (in bytes)
  * @param[in]  output_step_x                        output_stride_x * number of elements along X processed per workitem(in bytes)
  * @param[in]  output_stride_y                      Stride of the destination image in Y dimension (in bytes)
@@ -139,9 +165,9 @@ __kernel void roi_pooling_layer(
 
     // Load roi parameters
     // roi is laid out as follows { batch_index, x1, y1, x2, y2 }
-    const ushort roi_batch = (ushort) * ((__global DATA_TYPE *)offset(&rois, 0, pw));
-    const VEC_DATA_TYPE(DATA_TYPE, 4)
-    roi               = vload4(0, (__global DATA_TYPE *)offset(&rois, 1, pw));
+    const ushort roi_batch = (ushort) * ((__global ushort *)offset(&rois, 0, pw));
+    const VEC_DATA_TYPE(ushort, 4)
+    roi               = vload4(0, (__global ushort *)offset(&rois, 1, pw));
     const int2 roi_anchor = convert_int2_sat(round(convert_float2(roi.s01) * (float)SPATIAL_SCALE));
     const int2 roi_dims   = convert_int2_sat(fmax(round(convert_float2(roi.s23 - roi.s01) * (float)SPATIAL_SCALE), 1.f));
 
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
index 5867cde3bd..2deb8fac81 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.cpp
@@ -36,6 +36,7 @@
 #include "src/core/helpers/WindowHelpers.h"
 #include "support/StringSupport.h"
 
+#include <float.h>
 #include <cmath>
 #include <set>
 #include <string>
@@ -44,13 +45,13 @@ namespace arm_compute
 {
 namespace
 {
-std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
 {
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
 
     // Output auto initialization if not yet initialized
     TensorShape output_shape(pool_info.pooled_width(), pool_info.pooled_height(), input->dimension(2), rois->dimension(1));
-    auto_init_if_empty((*output), output_shape, 1, input->data_type());
+    auto_init_if_empty((*output), output_shape, 1, input->data_type(), output->quantization_info());
 
     // Configure kernel window
     constexpr unsigned int num_elems_processed_per_iteration = 1;
@@ -70,31 +71,38 @@ CLROIPoolingLayerKernel::CLROIPoolingLayerKernel()
 {
 }
 
+Status CLROIPoolingLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output);
+
+    //Validate arguments
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, rois, output);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rois, 1, DataType::U16);
+    ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(0) != 5);
+    ARM_COMPUTE_RETURN_ERROR_ON(rois->num_dimensions() > 2);
+    ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0));
+
+    if(output->total_size() != 0)
+    {
+        ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
+        ARM_COMPUTE_RETURN_ERROR_ON((output->dimension(0) != pool_info.pooled_width()) || (output->dimension(1) != pool_info.pooled_height()));
+        ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(2) != output->dimension(2));
+        ARM_COMPUTE_RETURN_ERROR_ON(rois->dimension(1) != output->dimension(3));
+    }
+
+    return Status{};
+}
+
 void CLROIPoolingLayerKernel::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
 }
 
-void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
 {
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, rois, output);
-
-    //Validate arguments
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input->info(), rois->info(), output->info());
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(rois, 1, DataType::U16);
-    ARM_COMPUTE_ERROR_ON(rois->info()->dimension(0) != 5);
-    ARM_COMPUTE_ERROR_ON(rois->info()->num_dimensions() > 2);
-    ARM_COMPUTE_ERROR_ON_F16_UNSUPPORTED(input);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16);
-    ARM_COMPUTE_ERROR_ON((pool_info.pooled_width() == 0) || (pool_info.pooled_height() == 0));
-
-    if(output->info()->total_size() != 0)
-    {
-        ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-        ARM_COMPUTE_ERROR_ON((output->info()->dimension(0) != pool_info.pooled_width()) || (output->info()->dimension(1) != pool_info.pooled_height()));
-        ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(2));
-        ARM_COMPUTE_ERROR_ON(rois->info()->dimension(1) != output->info()->dimension(3));
-    }
+    ARM_COMPUTE_ERROR_THROW_ON(CLROIPoolingLayerKernel::validate(input->info(), rois->info(), output->info(), pool_info));
 
     // Configure kernel window
     auto win_config = validate_and_configure_window(input->info(), rois->info(), output->info(), pool_info);
@@ -106,20 +114,39 @@ void CLROIPoolingLayerKernel::configure(const CLCompileContext &compile_context,
     _output    = output;
     _pool_info = pool_info;
 
+    const DataType data_type = input->info()->data_type();
+    const bool     is_qasymm = is_data_type_quantized_asymmetric(data_type);
+
     // Set build options
-    std::set<std::string> build_opts;
-    build_opts.emplace(("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())));
-    build_opts.emplace(("-DDATA_SIZE=" + get_data_size_from_data_type(input->info()->data_type())));
-    build_opts.emplace(("-DMAX_DIM_X=" + support::cpp11::to_string(_input->info()->dimension(Window::DimX))));
-    build_opts.emplace(("-DMAX_DIM_Y=" + support::cpp11::to_string(_input->info()->dimension(Window::DimY))));
-    build_opts.emplace(("-DMAX_DIM_Z=" + support::cpp11::to_string(_input->info()->dimension(Window::DimZ))));
-    build_opts.emplace(("-DPOOLED_DIM_X=" + support::cpp11::to_string(pool_info.pooled_width())));
-    build_opts.emplace(("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height())));
-    build_opts.emplace(("-DSPATIAL_SCALE=" + support::cpp11::to_string(pool_info.spatial_scale())));
+    CLBuildOptions build_opts;
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(data_type));
+    build_opts.add_option("-DDATA_SIZE=" + get_data_size_from_data_type(data_type));
+    build_opts.add_option("-DMAX_DIM_X=" + support::cpp11::to_string(_input->info()->dimension(Window::DimX)));
+    build_opts.add_option("-DMAX_DIM_Y=" + support::cpp11::to_string(_input->info()->dimension(Window::DimY)));
+    build_opts.add_option("-DMAX_DIM_Z=" + support::cpp11::to_string(_input->info()->dimension(Window::DimZ)));
+    build_opts.add_option("-DPOOLED_DIM_X=" + support::cpp11::to_string(pool_info.pooled_width()));
+    build_opts.add_option("-DPOOLED_DIM_Y=" + support::cpp11::to_string(pool_info.pooled_height()));
+    build_opts.add_option("-DSPATIAL_SCALE=" + support::cpp11::to_string(pool_info.spatial_scale()));
+
+    if(is_qasymm)
+    {
+        // Determine quantization info scale, offset
+        UniformQuantizationInfo uqinfo = UniformQuantizationInfo();
+        uqinfo                         = compute_requantization_scale_offset(_input->info()->quantization_info().uniform(), _output->info()->quantization_info().uniform());
+        build_opts.add_option("-DOFFSET_OUT=" + float_to_string_with_full_precision(uqinfo.offset));
+        build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(uqinfo.scale));
+
+        // Specify minimum possible value of datatype
+        build_opts.add_option("-DMIN_VALUE=" + support::cpp11::to_string(0));
+    }
+    else{
+        // Specify min value of F32 datatype
+        build_opts.add_option("-DMIN_VALUE=" + support::cpp11::to_string(-FLT_MAX));
+    }
 
     // Create kernel
     std::string kernel_name = "roi_pooling_layer";
-    _kernel                 = create_kernel(compile_context, kernel_name, build_opts);
+    _kernel                 = create_kernel(compile_context, kernel_name, build_opts.options());
 
     // Set static kernel arguments
     unsigned int idx = 2 * num_arguments_per_3D_tensor() + num_arguments_per_1D_array();
diff --git a/src/core/CL/kernels/CLROIPoolingLayerKernel.h b/src/core/CL/kernels/CLROIPoolingLayerKernel.h
index 124ae3f268..7b7b457632 100644
--- a/src/core/CL/kernels/CLROIPoolingLayerKernel.h
+++ b/src/core/CL/kernels/CLROIPoolingLayerKernel.h
@@ -63,7 +63,7 @@ public:
     /** Set the input and output tensors.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor. Data types supported: F16/F32.
+     * @param[in]  input           Source tensor. Data types supported: F16/F32/QASYMM8
      * @param[in]  rois            ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
      *                             as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
      * @param[out] output          Destination tensor. Data types supported: Same as @p input.
@@ -74,15 +74,30 @@ public:
      * @note The z dimensions of @p output tensor and @p input tensor must be the same.
      * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
      */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
+    void configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
 
+    /** Static Validate function to check inputs will lead to valid configuration of @ref CLROIPoolingLayer
+     *
+     * @param[in]  input     Source tensor. Data types supported: F16/F32/QASYMM8
+     * @param[in]  rois      ROIs tensor, it is a 2D tensor of size [5, N] (where N is the number of ROIs) containing top left and bottom right corner
+     *                       as coordinate of an image and batch_id of ROI [ batch_id, x1, y1, x2, y2 ]. Data types supported: U16
+     * @param[out] output    Destination tensor. Data types supported: Same as @p input.
+     * @param[in]  pool_info Contains pooling operation information described in @ref ROIPoolingLayerInfo.
+     *
+     * @note The x and y dimensions of @p output tensor must be the same as @p pool_info 's pooled
+     * width and pooled height.
+     * @note The z dimensions of @p output tensor and @p input tensor must be the same.
+     * @note The fourth dimension of @p output tensor must be the same as the number of elements in @p rois array.
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *rois, const ITensorInfo *output, const ROIPoolingLayerInfo &pool_info);
+
 private:
     const ICLTensor    *_input;
     const ICLTensor    *_rois;
-    ICLTensor          *_output;
+    const ICLTensor    *_output;
     ROIPoolingLayerInfo _pool_info;
 };
 } // namespace arm_compute
diff --git a/src/runtime/CL/functions/CLROIPoolingLayer.cpp b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
index debc5eb24c..cf7d4bcbc3 100644
--- a/src/runtime/CL/functions/CLROIPoolingLayer.cpp
+++ b/src/runtime/CL/functions/CLROIPoolingLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -27,12 +27,17 @@
 
 using namespace arm_compute;
 
+Status CLROIPoolingLayer::validate(const ITensorInfo *input, const ITensorInfo *rois, ITensorInfo *output, const ROIPoolingLayerInfo &pool_info)
+{
+    return CLROIPoolingLayerKernel::validate(input, rois, output, pool_info);
+}
+
 void CLROIPoolingLayer::configure(const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
 {
     configure(CLKernelLibrary::get().get_compile_context(), input, rois, output, pool_info);
 }
 
-void CLROIPoolingLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
+void CLROIPoolingLayer::configure(const CLCompileContext &compile_context, const ICLTensor *input, const ICLTensor *rois, const ICLTensor *output, const ROIPoolingLayerInfo &pool_info)
 {
     // Configure ROI pooling kernel
     auto k = std::make_unique<CLROIPoolingLayerKernel>();
diff --git a/tests/validation/CL/ROIPoolingLayer.cpp b/tests/validation/CL/ROIPoolingLayer.cpp
new file mode 100644
index 0000000000..eb16c1baec
--- /dev/null
+++ b/tests/validation/CL/ROIPoolingLayer.cpp
@@ -0,0 +1,136 @@
+/*
+ * Copyright (c) 2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLROIPoolingLayer.h"
+#include "tests/CL/CLAccessor.h"
+#include "tests/Globals.h"
+#include "tests/datasets/ROIDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/ROIPoolingLayerFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+RelativeTolerance<float> relative_tolerance_f32(0.01f);
+AbsoluteTolerance<float> absolute_tolerance_f32(0.001f);
+
+constexpr AbsoluteTolerance<uint8_t> tolerance_qasymm8(1);
+} // end namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(RoiPooling)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+               framework::dataset::make("InputInfo", { TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Successful test
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::QASYMM8), // Successful test (quantized)
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Incorrect rois type
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching data type input/output
+                                                       TensorInfo(TensorShape(250U, 128U, 2U), 1, DataType::F32), // Mismatching depth size input/output
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching number of rois and output batch size
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Invalid number of values per ROIS
+                                                       TensorInfo(TensorShape(250U, 128U, 3U), 1, DataType::F32), // Mismatching height and width input/output
+
+                                                     }),
+               framework::dataset::make("RoisInfo", { TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::F16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 10U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(4, 4U), 1, DataType::U16),
+                                                      TensorInfo(TensorShape(5, 4U), 1, DataType::U16),
+                                                    })),
+               framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::QASYMM8),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F16),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(7U, 7U, 3U, 4U), 1, DataType::F32),
+                                                       TensorInfo(TensorShape(5U, 5U, 3U, 4U), 1, DataType::F32),
+                                                     })),
+               framework::dataset::make("PoolInfo", { ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      ROIPoolingLayerInfo(7U, 7U, 1./8),
+                                                      })),
+               framework::dataset::make("Expected", { true, true, false, false, false, false, false })),
+               input_info, rois_info, output_info, pool_info, expected)
+{
+    ARM_COMPUTE_EXPECT(bool(CLROIPoolingLayer::validate(&input_info.clone()->set_is_resizable(true), &rois_info.clone()->set_is_resizable(true), &output_info.clone()->set_is_resizable(true), pool_info)) == expected, framework::LogLevel::ERRORS);
+}
+
+using CLROIPoolingLayerFloatFixture = ROIPoolingLayerFixture<CLTensor, CLAccessor, CLROIPoolingLayer, float>;
+
+TEST_SUITE(Float)
+FIXTURE_DATA_TEST_CASE(Small, CLROIPoolingLayerFloatFixture, framework::DatasetMode::ALL,
+                       framework::dataset::combine(framework::dataset::combine(datasets::SmallROIDataset(),
+                                                    framework::dataset::make("DataType", { DataType::F32 })),
+                                                    framework::dataset::make("DataLayout", { DataLayout::NCHW })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, relative_tolerance_f32, .02f, absolute_tolerance_f32);
+}
+
+TEST_SUITE_END() // Float test suite end
+
+// Begin quantized tests
+template <typename T>
+using CLROIPoolingLayerQuantizedFixture = ROIPoolingLayerQuantizedFixture<CLTensor, CLAccessor, CLROIPoolingLayer, T>;
+
+TEST_SUITE(QASYMM8)
+
+FIXTURE_DATA_TEST_CASE(Small, CLROIPoolingLayerQuantizedFixture<uint8_t>, framework::DatasetMode::ALL,
+                       combine(combine(combine(combine(datasets::SmallROIDataset(),
+                                                       framework::dataset::make("DataType", { DataType::QASYMM8 })),
+                                               framework::dataset::make("DataLayout", { DataLayout::NCHW })),
+                                       framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(1.f / 255.f, 127) })),
+                               framework::dataset::make("OutputQuantizationInfo", { QuantizationInfo(2.f / 255.f, 120) })))
+{
+    // Validate output
+    validate(CLAccessor(_target), _reference, tolerance_qasymm8);
+}
+
+TEST_SUITE_END() // end qasymm8 tests
+
+TEST_SUITE_END() // RoiPooling
+TEST_SUITE_END() // NEON
+
+} // validation namespace end
+} // test namespace end
+} // arm_compute namespace end