aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/roi_pooling_layer.cl
diff options
context:
space:
mode:
authorSuhail Munshi <MohammedSuhail.Munshi@arm.com>2021-03-22 13:13:55 +0000
committerMohmun02 <MohammedSuhail.Munshi@arm.com>2021-04-01 12:53:32 +0000
commit4ed7b39dbbe8ccc6267a9eacefca51717c3b3e10 (patch)
tree57848f8a31ace7c0ca06d17abac3e975b5997812 /src/core/CL/cl_kernels/roi_pooling_layer.cl
parent33f41fabd30fb444aaa0cf3e65b61794d498d151 (diff)
downloadComputeLibrary-4ed7b39dbbe8ccc6267a9eacefca51717c3b3e10.tar.gz
Added Qasymm8 datatype support to CLROIPoolingLayer with Tests
Also fixes RoiPoolingLayer not matching reference with Float32 datatype Issue Tests added to check ROIPooling Layer against reference with both Float32 and Qasymm8 input. Resolves : COMPMID-2320 Change-Id: Ib86d2e6b3803e74f922a545ea573da02c28e54cc Signed-off-by: Suhail Munshi <MohammedSuhail.Munshi@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5332 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/roi_pooling_layer.cl')
-rw-r--r--src/core/CL/cl_kernels/roi_pooling_layer.cl56
1 files changed, 41 insertions, 15 deletions
diff --git a/src/core/CL/cl_kernels/roi_pooling_layer.cl b/src/core/CL/cl_kernels/roi_pooling_layer.cl
index ac193e8fb6..6899b952e0 100644
--- a/src/core/CL/cl_kernels/roi_pooling_layer.cl
+++ b/src/core/CL/cl_kernels/roi_pooling_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "helpers.h"
+#include "helpers_asymm.h"
#if DATA_SIZE == 32
#define VEC_SIZE 4
@@ -29,24 +30,41 @@
#elif DATA_SIZE == 16
#define VEC_SIZE 8
#define VEC_MAX vec8_max
-#else /* DATA_SIZE not equals 32 or 16 */
+#elif DATA_SIZE == 8
+#define VEC_SIZE 16
+#define VEC_MAX vec16_max
+#else /* DATA_SIZE not equals 8, 16, 32 */
#error "Unsupported data size"
#endif /* DATA_SIZE == 32 */
+// Define whether to use max (Quantized datatype) or fmax (Float) functions
+#if defined(OFFSET_OUT) && defined(SCALE_OUT)
+#define MAX(x, y) max(x, y)
+#else // !(defined(OFFSET_OUT) && defined(SCALE_OUT)
+#define MAX(x, y) fmax(x, y)
+#endif // defined(OFFSET_OUT) && defined(SCALE_OUT)
+
inline DATA_TYPE vec4_max(VEC_DATA_TYPE(DATA_TYPE, 4) vec)
{
VEC_DATA_TYPE(DATA_TYPE, 2)
- temp = fmax(vec.lo, vec.hi);
- return fmax(temp.x, temp.y);
+ temp = MAX(vec.lo, vec.hi);
+ return MAX(temp.x, temp.y);
}
inline DATA_TYPE vec8_max(VEC_DATA_TYPE(DATA_TYPE, 8) vec)
{
VEC_DATA_TYPE(DATA_TYPE, 4)
- temp = fmax(vec.lo, vec.hi);
+ temp = MAX(vec.lo, vec.hi);
return vec4_max(temp);
}
+inline DATA_TYPE vec16_max(VEC_DATA_TYPE(DATA_TYPE, 16) vec)
+{
+ VEC_DATA_TYPE(DATA_TYPE, 8)
+ temp = MAX(vec.lo, vec.hi);
+ return vec8_max(temp);
+}
+
/** Performs a roi pooling on a single output pixel.
*
* @param[in] input Pointer to input Tensor3D struct.
@@ -69,7 +87,8 @@ inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int reg
{
int num_iter = (int)((region_end_x - region_start_x) / VEC_SIZE);
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
- curr_max = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))(-FLT_MAX);
+ curr_max = (VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE))(MIN_VALUE);
+
for(int j = region_start_y; j < region_end_y; ++j)
{
int i = region_start_x;
@@ -77,27 +96,34 @@ inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int reg
{
VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)
val = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)tensor3D_offset(input, i, j, pz));
- curr_max = fmax(val, curr_max);
+ curr_max = MAX(val, curr_max);
}
for(; i < region_end_x; ++i)
{
DATA_TYPE val = *(__global DATA_TYPE *)tensor3D_offset(input, i, j, pz);
- curr_max = fmax(curr_max, val);
+ curr_max = MAX(curr_max, val);
}
}
- return (DATA_TYPE)VEC_MAX(curr_max);
+
+ const DATA_TYPE temp = (DATA_TYPE)VEC_MAX(curr_max);
+
+#if defined(OFFSET_OUT) && defined(SCALE_OUT)
+ return QUANTIZE(temp, OFFSET_OUT, SCALE_OUT, DATA_TYPE, 1);
+#endif /* if quantized, requantize and return */
+
+ return temp;
}
}
/** Performs a roi pooling function.
*
- * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32;
+ * @note Datatype must be passed using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types are F16, F32, QASYMM8;
* @note Datasize must be passed using -DDATA_SIZE e.g. -DDATA_SIZE=32;
* @note Input dimensions must be passed using -DMAX_DIM_X, -DMAX_DIM_Y and -DMAX_DIM_Z;
* @note Pooled region dimensions must be passed using -DPOOLED_DIM_X and -DPOOLED_DIM_Y;
* @note Spatial scale must be passed using -DSPATIAL_SCALE;
*
- * @param[in] input_ptr Pointer to the source image. Supported data types: F16, F32
+ * @param[in] input_ptr Pointer to the source image. Supported data types: F16, F32, QASYMM8
* @param[in] input_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] input_step_x input_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] input_stride_y Stride of the source image in Y dimension (in bytes)
@@ -111,7 +137,7 @@ inline DATA_TYPE roi_pool_1x1(const Tensor3D *input, int region_start_x, int reg
* @param[in] rois_stride_y Stride of the ROIs tensor in Y dimension (in bytes)
* @param[in] rois_step_y Step of the ROIs tensor in Y dimension (in bytes)
* @param[in] rois_offset_first_element_in_bytes The offset of the first element in the ROIs tensor
- * @param[out] output_ptr Pointer to the destination image. Supported data types: F16, F32
+ * @param[out] output_ptr Pointer to the destination image. Supported data types: same as input
* @param[in] output_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] output_step_x output_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] output_stride_y Stride of the destination image in Y dimension (in bytes)
@@ -139,9 +165,9 @@ __kernel void roi_pooling_layer(
// Load roi parameters
// roi is laid out as follows { batch_index, x1, y1, x2, y2 }
- const ushort roi_batch = (ushort) * ((__global DATA_TYPE *)offset(&rois, 0, pw));
- const VEC_DATA_TYPE(DATA_TYPE, 4)
- roi = vload4(0, (__global DATA_TYPE *)offset(&rois, 1, pw));
+ const ushort roi_batch = (ushort) * ((__global ushort *)offset(&rois, 0, pw));
+ const VEC_DATA_TYPE(ushort, 4)
+ roi = vload4(0, (__global ushort *)offset(&rois, 1, pw));
const int2 roi_anchor = convert_int2_sat(round(convert_float2(roi.s01) * (float)SPATIAL_SCALE));
const int2 roi_dims = convert_int2_sat(fmax(round(convert_float2(roi.s23 - roi.s01) * (float)SPATIAL_SCALE), 1.f));