aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/nhwc/scale.cl
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2022-12-26 16:24:04 +0000
committerGunes Bayir <gunes.bayir@arm.com>2022-12-29 17:27:33 +0000
commitb7e8626717b2ef81b0d03284c8f6ffdbe9cd2245 (patch)
tree8fb8197a4f69fb5024a5330e031d3d5c0b26bdad /src/core/CL/cl_kernels/nhwc/scale.cl
parenta5cb79f18685292bf5b63a0c484a58945320823d (diff)
downloadComputeLibrary-b7e8626717b2ef81b0d03284c8f6ffdbe9cd2245.tar.gz
Optimize CL Scale/Resize Quantized by removing (de)quant. code
This patch removes the quant/dequant code in CLScale and the Resize operator in dynamic fusion. We don't support different quantization information for input and output and in this case the quantization and dequantization is not necessary. The very same optimization was delivered for cpu. It also moves the SCALE_X and SCALE_Y arguments to look-up table from build options in the template writer of Resize. Change-Id: Icd043c8671220c8feea935dd4b24a5b17c6c4ea4 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8888 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/nhwc/scale.cl')
-rw-r--r--src/core/CL/cl_kernels/nhwc/scale.cl29
1 files changed, 6 insertions, 23 deletions
diff --git a/src/core/CL/cl_kernels/nhwc/scale.cl b/src/core/CL/cl_kernels/nhwc/scale.cl
index bccfd6543a..f6a3e0971b 100644
--- a/src/core/CL/cl_kernels/nhwc/scale.cl
+++ b/src/core/CL/cl_kernels/nhwc/scale.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2021 Arm Limited.
+ * Copyright (c) 2016-2022 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -218,34 +218,17 @@ __kernel void scale_bilinear_nhwc(
// Calculate the output
out[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1));
#else // defined(IS_FLOATING_POINT)
- TILE(float, 1, N0, out_f);
- TILE(float, 1, N0, in00_f);
- TILE(float, 1, N0, in01_f);
- TILE(float, 1, N0, in10_f);
- TILE(float, 1, N0, in11_f);
const float a = (xi_f - (float)xi);
const float b = (1.f - a);
const float a1 = (yi_f - (float)yi);
const float b1 = (1.f - a1);
- // Dequantize
- LOOP_UNROLLING(int, n0, 0, 1, N0,
- {
- in00_f[0].s[n0] = ((float)in00[0].s[n0] - (float)OFFSET) * (float)SCALE;
- in01_f[0].s[n0] = ((float)in01[0].s[n0] - (float)OFFSET) * (float)SCALE;
- in10_f[0].s[n0] = ((float)in10[0].s[n0] - (float)OFFSET) * (float)SCALE;
- in11_f[0].s[n0] = ((float)in11[0].s[n0] - (float)OFFSET) * (float)SCALE;
- })
-
- // Calculate the output in the floating-point domain
- out_f[0].v = ((in00_f[0].v * b * b1) + (in01_f[0].v * a * b1) + (in10_f[0].v * b * a1) + (in11_f[0].v * a * a1));
-
- // Quantize
- LOOP_UNROLLING(int, n0, 0, 1, N0,
- {
- out[0].s[n0] = CONVERT_SAT(out_f[0].s[n0] / (float)SCALE + (float)OFFSET, DST_DATA_TYPE);
- })
+ out[0].v = CONVERT_SAT((CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
+ (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
+ (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
+ (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1),
+ VEC_DATA_TYPE(DST_DATA_TYPE, N0));
#endif // defined(IS_FLOATING_POINT)
TILE(uint, 1, 1, dst_indirect_y);