aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/scale_quantized.cl
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2020-12-03 16:26:35 +0000
committerManuel Bottini <manuel.bottini@arm.com>2020-12-18 13:36:07 +0000
commit0c58f99495844c6ace629116451dae00e8c27418 (patch)
tree709fe9b3088626a135aec253ca20ede21d2c870f /src/core/CL/cl_kernels/scale_quantized.cl
parent2567adfc881049fefada9be523347b4f384e6d27 (diff)
downloadComputeLibrary-0c58f99495844c6ace629116451dae00e8c27418.tar.gz
Remove OpenCL padding CLScaleKernel
Resolves COMPMID-3918 Change-Id: I970b1eaf2ae6f2f5a8cfc318cd1a3dfd3ba36fdb Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4668 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/scale_quantized.cl')
-rw-r--r--src/core/CL/cl_kernels/scale_quantized.cl49
1 files changed, 31 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/scale_quantized.cl b/src/core/CL/cl_kernels/scale_quantized.cl
index 2aa7f185c6..010e4ed57a 100644
--- a/src/core/CL/cl_kernels/scale_quantized.cl
+++ b/src/core/CL/cl_kernels/scale_quantized.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -93,6 +93,7 @@ __kernel void scale_bilinear_quantized_nchw(
* @note Offset value for QASYMM8 data type to used is passed as -DOFFSET=<VALUE> e.g. -DOFFSET=1
* @note If border mode replicate is used, is should be passed as -DBORDER_MODE_REPLICATE
* @note Output tensor's depth should be given as a preprocessor argument using -DDEPTH_OUT=size. e.g. -DDEPTH=16
+ * @note The value to be used at the edges of the images shoud be given as a preprocessor argument using -DCONSTANT_VALUE=value.
*
* @param[in] in_ptr Pointer to the source image. Supported data types: QASYMM8.
* @param[in] in_stride_x Stride of the source image in X dimension (in bytes)
@@ -114,6 +115,7 @@ __kernel void scale_bilinear_quantized_nchw(
* @param[in] input_height Input image height
* @param[in] scale_x The scale factor along x dimension
* @param[in] scale_y The scale factor along y dimension
+ * @param[in] constant_border_value Constant border value to use
*/
__kernel void scale_bilinear_quantized_nhwc(
TENSOR4D_DECLARATION(in),
@@ -136,27 +138,38 @@ __kernel void scale_bilinear_quantized_nhwc(
#error("Unsupported sampling policy");
#endif /* SAMPLING_POLICY */
- const float new_xf = floor(new_x);
- const float new_yf = floor(new_y);
- float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
- float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
- float clamped_x_ = clamped_x;
- float clamped_x1_ = clamped_x1;
- const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
- const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
+ const float new_xf = floor(new_x);
+ const float new_yf = floor(new_y);
+ const float clamped_x = clamp(new_xf, 0.0f, input_width - 1);
+ const float clamped_x1 = clamp(new_xf + 1, 0.0f, input_width - 1);
+ const float clamped_y = clamp(new_yf, 0.0f, input_height - 1);
+ const float clamped_y1 = clamp(new_yf + 1, 0.0f, input_height - 1);
#ifndef BORDER_MODE_REPLICATE
- clamped_x1 = select(clamped_x1, 0.0f - BORDER_SIZE, new_yf + 1 < 0.f || new_yf + 1 > input_height - 1 || new_xf + 1 < 0.f || new_xf + 1 > input_width - 1);
- clamped_x_ = select(clamped_x_, 0.0f - BORDER_SIZE, new_yf + 1 > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
- clamped_x = select(clamped_x, 0.0f - BORDER_SIZE, new_yf < 0.f || new_yf > input_height - 1 || new_xf < 0.f || new_xf > input_width - 1);
- clamped_x1_ = select(clamped_x1_, 0.0f - BORDER_SIZE, new_xf + 1 < 0.f || new_xf + 1 > input_width - 1 || new_yf < 0.f || new_yf > input_height - 1);
+ const bool check_x = (0.f <= new_xf && new_xf < input_width);
+ const bool check_x1 = (-1.f <= new_xf && new_xf < input_width - 1);
+ const bool check_y = (0.f <= new_yf && new_yf < input_height);
+ const bool check_y1 = (-1.f <= new_yf && new_yf < input_height - 1);
+ const int ins_0 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x && check_y);
+ const int ins_1 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x1 && check_y);
+ const int ins_2 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x && check_y1);
+ const int ins_3 = select((int)(CONSTANT_VALUE), (int)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1),
+ (get_global_id(2) / DEPTH_OUT)))),
+ check_x1 && check_y1);
+ int4 ins = (int4)(ins_0, ins_1, ins_2, ins_3);
+#else /* BORDER_MODE_REPLICATE */
+ int4 ins = (int4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
+ *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
#endif /* BORDER_MODE_REPLICATE */
- int4 ins = (int4)(*((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1_), convert_int(clamped_y), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x_), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))),
- *((__global DATA_TYPE *)tensor4D_offset(&in, get_global_id(0), convert_int(clamped_x1), convert_int(clamped_y1), (get_global_id(2) / DEPTH_OUT))));
-
const float a = new_x - new_xf;
const float b = 1.f - a;
const float a1 = new_y - new_yf;