aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-11-01 15:56:33 +0000
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commit5784624cb10c56cf3665a08cae02aa5a3b1244d9 (patch)
treed9911e812be6032769406b292a4a88b58f32ed39
parent01bbd5fd0af0847ac847c78c7064b3c9810a924a (diff)
downloadComputeLibrary-5784624cb10c56cf3665a08cae02aa5a3b1244d9.tar.gz
COMPMID-1739: Fix broadcast CLArithmeticAddition for QASYMM8
Commit 16121924 `COMPMID-1673: Collapse window in CLArithmeticAddition when one operand is a vector` changed the number of elements processed per iteration to 8, but didn't update the quantized kernel to reflect that. Change-Id: I49a2fbcee81f5bbc1b210b4a5c6d63b94eafdcec Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/156355 Reviewed-by: Isabella Gottardi <isabella.gottardi@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/arithmetic_op_quantized.cl34
1 files changed, 24 insertions, 10 deletions
diff --git a/src/core/CL/cl_kernels/arithmetic_op_quantized.cl b/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
index 5f31c56250..fc7fa771f3 100644
--- a/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
+++ b/src/core/CL/cl_kernels/arithmetic_op_quantized.cl
@@ -31,7 +31,17 @@
#define SUB(x, y) (x) - (y)
#endif /* SATURATE */
+#define CONVERT_RTE(x, type) (convert_##type##_rte((x)))
+#define CONVERT_DOWN(x, type) CONVERT_RTE(x, type)
+
#if defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT)
+
+#if defined(VEC_SIZE)
+
+#define VEC_FLOAT VEC_DATA_TYPE(float, VEC_SIZE)
+#define VEC_INT VEC_DATA_TYPE(int, VEC_SIZE)
+#define VEC_UCHAR VEC_DATA_TYPE(uchar, VEC_SIZE)
+
/** This function adds two tensors.
*
* @note The quantization offset of the first operand must be passed at compile time using -DOFFSET_IN1, i.e. -DOFFSET_IN1=10
@@ -41,6 +51,7 @@
* @note The quantization scale of the second operand must be passed at compile time using -DSCALE_IN2, i.e. -DSCALE_IN2=10
* @note The quantization scale of the output must be passed at compile time using -DSCALE_OUT, i.e. -DSCALE_OUT=10
* @note To perform saturating operation -DSATURATE has to be passed to the compiler otherwise wrapping policy will be used.
+ * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size. e.g. -DVEC_SIZE=16
*
* @param[in] in1_ptr Pointer to the source tensor. Supported data types: QASYMM8
* @param[in] in1_stride_x Stride of the source tensor in X dimension (in bytes)
@@ -77,20 +88,23 @@ __kernel void arithmetic_add_quantized(
Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
- int16 in_a = CONVERT(vload16(0, (__global uchar *)in1.ptr), int16);
- int16 in_b = CONVERT(vload16(0, (__global uchar *)in2.ptr), int16);
+ VEC_INT in_a = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in1.ptr), VEC_INT);
+ VEC_INT in_b = CONVERT(VLOAD(VEC_SIZE)(0, (__global uchar *)in2.ptr), VEC_INT);
- in_a = SUB(in_a, (int16)((int)OFFSET_IN1));
- in_b = SUB(in_b, (int16)((int)OFFSET_IN2));
+ in_a = SUB(in_a, (VEC_INT)((int)OFFSET_IN1));
+ in_b = SUB(in_b, (VEC_INT)((int)OFFSET_IN2));
- const float16 in1f32 = convert_float16(in_a) * (float16)((float)SCALE_IN1);
- const float16 in2f32 = convert_float16(in_b) * (float16)((float)SCALE_IN2);
- const float16 qresf32 = (in1f32 + in2f32) / ((float16)(float)SCALE_OUT) + ((float16)((float16)OFFSET_OUT));
- const uchar16 res = convert_uchar16_sat(convert_int16_rte(qresf32));
+ const VEC_FLOAT in1f32 = CONVERT(in_a, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN1);
+ const VEC_FLOAT in2f32 = CONVERT(in_b, VEC_FLOAT) * (VEC_FLOAT)((float)SCALE_IN2);
+
+ const VEC_FLOAT qresf32 = (in1f32 + in2f32) / ((VEC_FLOAT)(float)SCALE_OUT) + ((VEC_FLOAT)((float)OFFSET_OUT));
+ const VEC_UCHAR res = CONVERT_SAT(CONVERT_DOWN(qresf32, VEC_INT), VEC_UCHAR);
// Store result
- vstore16(res, 0, (__global uchar *)out.ptr);
+ VSTORE(VEC_SIZE)
+ (res, 0, (__global uchar *)out.ptr);
}
+#endif /* defined(VEC_SIZE) */
/** This function subtracts two tensors.
*
@@ -151,4 +165,4 @@ __kernel void arithmetic_sub_quantized(
// Store result
vstore16(res, 0, (__global uchar *)out.ptr);
}
-#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) */ \ No newline at end of file
+#endif /* defined(OFFSET_IN1) && defined(OFFSET_IN2) && defined(OFFSET_OUT) && defined(SCALE_IN1) && defined(SCALE_IN2) && defined(SCALE_OUT) */