From d2191150736dde66d79eb97e0c8ee506eef3c8fc Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Tue, 19 Mar 2024 22:10:04 +0000 Subject: Make Cpu/Gpu/Ref scalar/vectoral S32 division consistent - Neon(TM) implementation converts integers to float and performs the division because there is no vector integer division instructions. However, leftover loop still uses integer division, which makes results inconsistent depending on where we are in the tensor. - SVE path does it in integer domain. - OpenCL(TM) does it similar to Neon(TM) vector path. - Reference implementation does it in integer domain. These differences cause intermittent mismatches. This patch ensures all follow the same logic. On the other hand, the provided Neon(TM) implementation is faster than the Fp32 converted version. Resolves: COMPMID-6925 Change-Id: Ia12606d57f40a7d331b9b698f87fd4321496b275 Signed-off-by: Gunes Bayir Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11316 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/core/CL/cl_kernels/common/elementwise_operation.cl | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'src/core') diff --git a/src/core/CL/cl_kernels/common/elementwise_operation.cl b/src/core/CL/cl_kernels/common/elementwise_operation.cl index 45dcbfc6e2..91e51d9d1a 100644 --- a/src/core/CL/cl_kernels/common/elementwise_operation.cl +++ b/src/core/CL/cl_kernels/common/elementwise_operation.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,11 +46,7 @@ #define PRELU(x, y) (select(y * x, x, CONVERT((x > (DATA_TYPE)0), SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)))) #endif // VEC_SIZE_OUT == 1 -#if defined(S32) -#define DIV(x, y) CONVERT(floor(CONVERT(x, VEC_DATA_TYPE(float, VEC_SIZE_OUT)) / CONVERT(y, VEC_DATA_TYPE(float, VEC_SIZE_OUT))), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)); -#else /* S32 */ #define DIV(x, y) (x / y) -#endif /* S32 */ #define AND(x, y) (CONVERT((x && y), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)) & ((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT))1)) #define OR(x, y) (CONVERT((x || y), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)) & ((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT))1)) -- cgit v1.2.1