From 1813422f5095983fa802e96cb8e9e0d980de82c7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 Sep 2020 21:00:23 +0100 Subject: COMPMID-3583: Add S32 support to NEElementwiseDivision Division follows the flooring division approach where for example 5/2=2 while -5/2=-3 Signed-off-by: Georgios Pinitas Change-Id: I65756e0b31fe8d97f743a4c13dc5f96304722f75 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3929 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels/NEElementwiseOperationKernel.cpp') diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index db4f5923bc..da53a523e6 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,6 +142,14 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar case ArithmeticOperation::DIV: { res = a / b; + if(std::is_integral::value) + { + res = (b == 0) ? 0 : res; + if(static_cast(a) % static_cast(b) != 0 && ((a < 0) != (b < 0))) + { + --res; + } + } break; } case ArithmeticOperation::POWER: @@ -207,6 +215,12 @@ inline typename VectorType::type elementwise_arithm_op(const typename VectorType return res; } +template <> +inline int32x4_t elementwise_arithm_op>(const int32x4_t &a, const int32x4_t &b) +{ + return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b)))); +} + template <> inline float32x4_t elementwise_arithm_op>(const float32x4_t &a, const float32x4_t &b) { @@ -1259,7 +1273,7 @@ void NEDivisionOperationKernel::configure(const ITensorInfo *input1, const ITens Status NEDivisionOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32); return NEArithmeticOperationKernel::validate_arguments(input1, input2, output); } -- cgit v1.2.1