aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-09-03 21:00:23 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-09-10 09:57:52 +0000
commit1813422f5095983fa802e96cb8e9e0d980de82c7 (patch)
treea58e9d9f0fc8a2eb98a055f28ff700eb7e0bdd5e /src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
parent5489394cdfe10afb469171cf521f26f923eeb2e2 (diff)
downloadComputeLibrary-1813422f5095983fa802e96cb8e9e0d980de82c7.tar.gz
COMPMID-3583: Add S32 support to NEElementwiseDivision
Division follows the flooring division approach where for example 5/2=2 while -5/2=-3 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I65756e0b31fe8d97f743a4c13dc5f96304722f75 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3929 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEElementwiseOperationKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEElementwiseOperationKernel.cpp16
1 files changed, 15 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index db4f5923bc..da53a523e6 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -142,6 +142,14 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar
case ArithmeticOperation::DIV:
{
res = a / b;
+ if(std::is_integral<ScalarType>::value)
+ {
+ res = (b == 0) ? 0 : res;
+ if(static_cast<int32_t>(a) % static_cast<int32_t>(b) != 0 && ((a < 0) != (b < 0)))
+ {
+ --res;
+ }
+ }
break;
}
case ArithmeticOperation::POWER:
@@ -208,6 +216,12 @@ inline typename VectorType::type elementwise_arithm_op(const typename VectorType
}
template <>
+inline int32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<int32_t, 4>>(const int32x4_t &a, const int32x4_t &b)
+{
+ return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b))));
+}
+
+template <>
inline float32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<float, 4>>(const float32x4_t &a, const float32x4_t &b)
{
return wrapper::vdiv(a, b);
@@ -1259,7 +1273,7 @@ void NEDivisionOperationKernel::configure(const ITensorInfo *input1, const ITens
Status NEDivisionOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32);
return NEArithmeticOperationKernel::validate_arguments(input1, input2, output);
}