diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-09-03 21:00:23 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-09-10 09:57:52 +0000 |
commit | 1813422f5095983fa802e96cb8e9e0d980de82c7 (patch) | |
tree | a58e9d9f0fc8a2eb98a055f28ff700eb7e0bdd5e /src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | |
parent | 5489394cdfe10afb469171cf521f26f923eeb2e2 (diff) | |
download | ComputeLibrary-1813422f5095983fa802e96cb8e9e0d980de82c7.tar.gz |
COMPMID-3583: Add S32 support to NEElementwiseDivision
Division follows the flooring division approach where for example 5/2=2 while
-5/2=-3
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I65756e0b31fe8d97f743a4c13dc5f96304722f75
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3929
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEElementwiseOperationKernel.cpp')
-rw-r--r-- | src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index db4f5923bc..da53a523e6 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,6 +142,14 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar case ArithmeticOperation::DIV: { res = a / b; + if(std::is_integral<ScalarType>::value) + { + res = (b == 0) ? 0 : res; + if(static_cast<int32_t>(a) % static_cast<int32_t>(b) != 0 && ((a < 0) != (b < 0))) + { + --res; + } + } break; } case ArithmeticOperation::POWER: @@ -208,6 +216,12 @@ inline typename VectorType::type elementwise_arithm_op(const typename VectorType } template <> +inline int32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<int32_t, 4>>(const int32x4_t &a, const int32x4_t &b) +{ + return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b)))); +} + +template <> inline float32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<float, 4>>(const float32x4_t &a, const float32x4_t &b) { return wrapper::vdiv(a, b); @@ -1259,7 +1273,7 @@ void NEDivisionOperationKernel::configure(const ITensorInfo *input1, const ITens Status NEDivisionOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32); return NEArithmeticOperationKernel::validate_arguments(input1, input2, output); } |