diff options
Diffstat (limited to 'src')
-rw-r--r-- | src/core/CL/cl_kernels/common/elementwise_operation.cl | 6 | ||||
-rw-r--r-- | src/cpu/kernels/elementwise_binary/generic/neon/impl.h | 26 |
2 files changed, 14 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/common/elementwise_operation.cl b/src/core/CL/cl_kernels/common/elementwise_operation.cl index 45dcbfc6e2..91e51d9d1a 100644 --- a/src/core/CL/cl_kernels/common/elementwise_operation.cl +++ b/src/core/CL/cl_kernels/common/elementwise_operation.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2021 Arm Limited. + * Copyright (c) 2018-2021, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -46,11 +46,7 @@ #define PRELU(x, y) (select(y * x, x, CONVERT((x > (DATA_TYPE)0), SELECT_VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)))) #endif // VEC_SIZE_OUT == 1 -#if defined(S32) -#define DIV(x, y) CONVERT(floor(CONVERT(x, VEC_DATA_TYPE(float, VEC_SIZE_OUT)) / CONVERT(y, VEC_DATA_TYPE(float, VEC_SIZE_OUT))), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)); -#else /* S32 */ #define DIV(x, y) (x / y) -#endif /* S32 */ #define AND(x, y) (CONVERT((x && y), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)) & ((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT))1)) #define OR(x, y) (CONVERT((x || y), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT)) & ((VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE_OUT))1)) diff --git a/src/cpu/kernels/elementwise_binary/generic/neon/impl.h b/src/cpu/kernels/elementwise_binary/generic/neon/impl.h index 98f7e8b949..78e3baf74b 100644 --- a/src/cpu/kernels/elementwise_binary/generic/neon/impl.h +++ b/src/cpu/kernels/elementwise_binary/generic/neon/impl.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2021-2022 Arm Limited. + * Copyright (c) 2021-2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_CORE_NEON_KERNELS_ELEMENTWISE_IMPL_H -#define SRC_CORE_NEON_KERNELS_ELEMENTWISE_IMPL_H +#ifndef ACL_SRC_CPU_KERNELS_ELEMENTWISE_BINARY_GENERIC_NEON_IMPL_H +#define ACL_SRC_CPU_KERNELS_ELEMENTWISE_BINARY_GENERIC_NEON_IMPL_H #include "src/core/NEON/NEAsymm.h" @@ -198,14 +198,6 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar case ArithmeticOperation::DIV: { res = a / b; - if (std::is_integral<ScalarType>::value) - { - res = (b == 0) ? 0 : res; - if (static_cast<int32_t>(a) % static_cast<int32_t>(b) != 0 && ((a < 0) != (b < 0))) - { - --res; - } - } break; } case ArithmeticOperation::POWER: @@ -224,7 +216,15 @@ inline int32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<int32_t, 4>>(const int32x4_t &a, const int32x4_t &b) { - return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b)))); + int32x4_t result; + + // Neon(TM) does not have vector integer division + result[0] = a[0] / b[0]; + result[1] = a[1] / b[1]; + result[2] = a[2] / b[2]; + result[3] = a[3] / b[3]; + + return result; } template <> @@ -1313,4 +1313,4 @@ void elementwise_comp_op_quantized_signed(const ITensor *in1, const ITensor *in2 } // namespace cpu } // namespace arm_compute -#endif /* SRC_CORE_NEON_KERNELS_ELEMENTWISE_IMPL_H */ +#endif // ACL_SRC_CPU_KERNELS_ELEMENTWISE_BINARY_GENERIC_NEON_IMPL_H |