aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2020-09-03 21:00:23 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2020-09-10 09:57:52 +0000
commit1813422f5095983fa802e96cb8e9e0d980de82c7 (patch)
treea58e9d9f0fc8a2eb98a055f28ff700eb7e0bdd5e
parent5489394cdfe10afb469171cf521f26f923eeb2e2 (diff)
downloadComputeLibrary-1813422f5095983fa802e96cb8e9e0d980de82c7.tar.gz
COMPMID-3583: Add S32 support to NEElementwiseDivision
Division follows the flooring division approach where for example 5/2=2 while -5/2=-3 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I65756e0b31fe8d97f743a4c13dc5f96304722f75 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3929 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h16
-rw-r--r--arm_compute/runtime/NEON/functions/NEElementwiseOperations.h8
-rw-r--r--docs/00_introduction.dox2
-rw-r--r--src/core/NEON/kernels/NEElementwiseOperationKernel.cpp16
-rw-r--r--tests/validation/NEON/ElementwiseDivision.cpp13
-rw-r--r--tests/validation/fixtures/ElementwiseOperationsFixture.h25
-rw-r--r--tests/validation/reference/ElementwiseOperations.cpp11
7 files changed, 68 insertions, 23 deletions
diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
index 47b8c3b7c8..7dae25c22c 100644
--- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
+++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h
@@ -97,7 +97,7 @@ public:
/** Default constructor */
NEArithmeticOperationKernel() = default;
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ /** Configure kernel
*
* @param[in] op Arithmetic operation to be executed.
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32.
@@ -128,17 +128,17 @@ public:
/** Default constructor */
NEDivisionOperationKernel() = default;
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ /** Configure kernel
*
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[out] output Output tensor info. Data types supported: Same as @p input1.
*/
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel
*
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[in] output Output tensor info. Data types supported: Same as @p input1.
*
@@ -157,7 +157,7 @@ public:
/** Default constructor */
NEPowerOperationKernel() = default;
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ /** Configure kernel
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -165,7 +165,7 @@ public:
*/
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
- /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel
+ /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel
*
* @param[in] input1 First tensor input info. Data types supported: F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
@@ -186,7 +186,7 @@ public:
/** Default constructor */
NEComparisonOperationKernel() = default;
- /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel
+ /** Configure kernel
*
* @param[in] op Comparison operation to be executed.
* @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32.
diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
index 66d8db5d0d..5c755e96ac 100644
--- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
+++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h
@@ -456,22 +456,22 @@ public:
/** Basic function to run @ref NEArithmeticOperationKernel for division
*
- * @note The tensor data type for the inputs must be F16/F32.
- * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i])
+ * @note The tensor data type for the inputs must be S32/F16/F32.
+ * @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i])
*/
class NEElementwiseDivision : public INEOperator
{
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in, out] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32.
* @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[out] output Output tensor info. Data types supported: Same as @p input1.
*/
void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division
*
- * @param[in] input1 First tensor input info. Data types supported: F16/F32.
+ * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32.
* @param[in] input2 Second tensor input info. Data types supported: Same as @p input1.
* @param[in] output Output tensor info. Data types supported: Same as @p input1.
*
diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox
index bfe5799362..97d5ffec70 100644
--- a/docs/00_introduction.dox
+++ b/docs/00_introduction.dox
@@ -243,6 +243,8 @@ v20.11 Public major release
- @ref NEArithmeticSubtractionKernel
- @ref NEPixelWiseMultiplication
- @ref NEPixelWiseMultiplicationKernel
+ - @ref NEElementwiseDivision
+ - @ref NEDivisionOperationKernel
- Interface change
- Properly support softmax axis to have the same meaning as other major frameworks. That is, axis now defines the dimension
on which Softmax/Logsoftmax is performed. E.g. for input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5.
diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
index db4f5923bc..da53a523e6 100644
--- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
+++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp
@@ -142,6 +142,14 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar
case ArithmeticOperation::DIV:
{
res = a / b;
+ if(std::is_integral<ScalarType>::value)
+ {
+ res = (b == 0) ? 0 : res;
+ if(static_cast<int32_t>(a) % static_cast<int32_t>(b) != 0 && ((a < 0) != (b < 0)))
+ {
+ --res;
+ }
+ }
break;
}
case ArithmeticOperation::POWER:
@@ -208,6 +216,12 @@ inline typename VectorType::type elementwise_arithm_op(const typename VectorType
}
template <>
+inline int32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<int32_t, 4>>(const int32x4_t &a, const int32x4_t &b)
+{
+ return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b))));
+}
+
+template <>
inline float32x4_t elementwise_arithm_op<ArithmeticOperation::DIV, typename wrapper::traits::neon_vector<float, 4>>(const float32x4_t &a, const float32x4_t &b)
{
return wrapper::vdiv(a, b);
@@ -1259,7 +1273,7 @@ void NEDivisionOperationKernel::configure(const ITensorInfo *input1, const ITens
Status NEDivisionOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output)
{
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32);
return NEArithmeticOperationKernel::validate_arguments(input1, input2, output);
}
diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp
index f6e0a65e84..4b613cbd0a 100644
--- a/tests/validation/NEON/ElementwiseDivision.cpp
+++ b/tests/validation/NEON/ElementwiseDivision.cpp
@@ -44,6 +44,9 @@ namespace
{
RelativeTolerance<float> tolerance_fp32(0.000001f);
/** Input data sets **/
+const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32),
+ framework::dataset::make("DataType", DataType::S32)),
+ framework::dataset::make("DataType", DataType::S32));
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
RelativeTolerance<half> tolerance_fp16(static_cast<half>(0.01f));
const auto ElementwiseDivisionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)),
@@ -135,6 +138,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseDivisionBroadcastFixture<
TEST_SUITE_END() // F32
TEST_SUITE_END() // Float
+TEST_SUITE(Integer)
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture<int32_t>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionS32Dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END() // S32
+TEST_SUITE_END() // Integer
+
TEST_SUITE_END() // ElementwiseDivision
TEST_SUITE_END() // NEON
} // namespace validation
diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h
index ebc52d5083..dcb408c801 100644
--- a/tests/validation/fixtures/ElementwiseOperationsFixture.h
+++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h
@@ -59,16 +59,23 @@ protected:
template <typename U>
void fill(U &&tensor, int i)
{
- switch(_op)
+ if(is_data_type_float(tensor.data_type()))
{
- case ArithmeticOperation::DIV:
- library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) });
- break;
- case ArithmeticOperation::POWER:
- library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f);
- break;
- default:
- library->fill_tensor_uniform(tensor, i);
+ switch(_op)
+ {
+ case ArithmeticOperation::DIV:
+ library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) });
+ break;
+ case ArithmeticOperation::POWER:
+ library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f);
+ break;
+ default:
+ library->fill_tensor_uniform(tensor, i);
+ }
+ }
+ else
+ {
+ library->fill_tensor_uniform(tensor, i);
}
}
diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp
index aab9d9d00c..f22c84e153 100644
--- a/tests/validation/reference/ElementwiseOperations.cpp
+++ b/tests/validation/reference/ElementwiseOperations.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 Arm Limited.
+ * Copyright (c) 2018-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -74,6 +74,15 @@ T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy
case ArithmeticOperation::DIV:
{
val = (static_cast<intermediate_type>(src1) / static_cast<intermediate_type>(src2));
+ if(std::is_integral<T>::value)
+ {
+ // Implement flooring division
+ val = (src2 == 0) ? 0 : val;
+ if(static_cast<int32_t>(src1) % static_cast<int32_t>(src2) != 0 && ((src1 < 0) != (src2 < 0)))
+ {
+ --val;
+ }
+ }
break;
}
case ArithmeticOperation::POWER: