From 1813422f5095983fa802e96cb8e9e0d980de82c7 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 3 Sep 2020 21:00:23 +0100 Subject: COMPMID-3583: Add S32 support to NEElementwiseDivision Division follows the flooring division approach where for example 5/2=2 while -5/2=-3 Signed-off-by: Georgios Pinitas Change-Id: I65756e0b31fe8d97f743a4c13dc5f96304722f75 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3929 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../NEON/kernels/NEElementwiseOperationKernel.h | 16 +++++++------- .../NEON/functions/NEElementwiseOperations.h | 8 +++---- docs/00_introduction.dox | 2 ++ .../NEON/kernels/NEElementwiseOperationKernel.cpp | 16 +++++++++++++- tests/validation/NEON/ElementwiseDivision.cpp | 13 +++++++++++ .../fixtures/ElementwiseOperationsFixture.h | 25 ++++++++++++++-------- .../validation/reference/ElementwiseOperations.cpp | 11 +++++++++- 7 files changed, 68 insertions(+), 23 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h index 47b8c3b7c8..7dae25c22c 100644 --- a/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h +++ b/arm_compute/core/NEON/kernels/NEElementwiseOperationKernel.h @@ -97,7 +97,7 @@ public: /** Default constructor */ NEArithmeticOperationKernel() = default; - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + /** Configure kernel * * @param[in] op Arithmetic operation to be executed. * @param[in] input1 First tensor input info. Data types supported: QASYMM8/S16/F16/S32/F32. @@ -128,17 +128,17 @@ public: /** Default constructor */ NEDivisionOperationKernel() = default; - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + /** Configure kernel * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[out] output Output tensor info. Data types supported: Same as @p input1. */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + /** Static function to check if given info will lead to a valid configuration of @ref NEDivisionOperationKernel * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: Same as @p input1. * @@ -157,7 +157,7 @@ public: /** Default constructor */ NEPowerOperationKernel() = default; - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + /** Configure kernel * * @param[in] input1 First tensor input info. Data types supported: F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -165,7 +165,7 @@ public: */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); - /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel + /** Static function to check if given info will lead to a valid configuration of @ref NEPowerOperationKernel * * @param[in] input1 First tensor input info. Data types supported: F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. @@ -186,7 +186,7 @@ public: /** Default constructor */ NEComparisonOperationKernel() = default; - /** Static function to check if given info will lead to a valid configuration of @ref NEComparisonOperationKernel + /** Configure kernel * * @param[in] op Comparison operation to be executed. * @param[in] input1 First tensor input info. Data types supported: QASYMM8/QASYMM8_SIGNED/S16/F16/S32/F32. diff --git a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h index 66d8db5d0d..5c755e96ac 100644 --- a/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h +++ b/arm_compute/runtime/NEON/functions/NEElementwiseOperations.h @@ -456,22 +456,22 @@ public: /** Basic function to run @ref NEArithmeticOperationKernel for division * - * @note The tensor data type for the inputs must be F16/F32. - * @note The function performs a squared different operation between two tensors (i.e., out[i] = in1[i] / in2[i]) + * @note The tensor data type for the inputs must be S32/F16/F32. + * @note The function performs a division operation between two tensors (i.e., out[i] = in1[i] / in2[i]) */ class NEElementwiseDivision : public INEOperator { public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in, out] input1 First tensor input info. Data types supported: F16/F32. + * @param[in, out] input1 First tensor input info. Data types supported: S32/F16/F32. * @param[in, out] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[out] output Output tensor info. Data types supported: Same as @p input1. */ void configure(const ITensorInfo *input1, const ITensorInfo *input2, ITensorInfo *output); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticOperationKernel for division * - * @param[in] input1 First tensor input info. Data types supported: F16/F32. + * @param[in] input1 First tensor input info. Data types supported: S32/F16/F32. * @param[in] input2 Second tensor input info. Data types supported: Same as @p input1. * @param[in] output Output tensor info. Data types supported: Same as @p input1. * diff --git a/docs/00_introduction.dox b/docs/00_introduction.dox index bfe5799362..97d5ffec70 100644 --- a/docs/00_introduction.dox +++ b/docs/00_introduction.dox @@ -243,6 +243,8 @@ v20.11 Public major release - @ref NEArithmeticSubtractionKernel - @ref NEPixelWiseMultiplication - @ref NEPixelWiseMultiplicationKernel + - @ref NEElementwiseDivision + - @ref NEDivisionOperationKernel - Interface change - Properly support softmax axis to have the same meaning as other major frameworks. That is, axis now defines the dimension on which Softmax/Logsoftmax is performed. E.g. for input of shape 4x5x6 and axis=1, softmax will be applied to 4x6=24 vectors of size 5. diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index db4f5923bc..da53a523e6 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -142,6 +142,14 @@ inline ScalarType elementwise_arithm_op_scalar(const ScalarType &a, const Scalar case ArithmeticOperation::DIV: { res = a / b; + if(std::is_integral::value) + { + res = (b == 0) ? 0 : res; + if(static_cast(a) % static_cast(b) != 0 && ((a < 0) != (b < 0))) + { + --res; + } + } break; } case ArithmeticOperation::POWER: @@ -207,6 +215,12 @@ inline typename VectorType::type elementwise_arithm_op(const typename VectorType return res; } +template <> +inline int32x4_t elementwise_arithm_op>(const int32x4_t &a, const int32x4_t &b) +{ + return vcvtq_s32_f32(vfloorq_f32(wrapper::vdiv(vcvtq_f32_s32(a), vcvtq_f32_s32(b)))); +} + template <> inline float32x4_t elementwise_arithm_op>(const float32x4_t &a, const float32x4_t &b) { @@ -1259,7 +1273,7 @@ void NEDivisionOperationKernel::configure(const ITensorInfo *input1, const ITens Status NEDivisionOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::S32, DataType::F16, DataType::F32); return NEArithmeticOperationKernel::validate_arguments(input1, input2, output); } diff --git a/tests/validation/NEON/ElementwiseDivision.cpp b/tests/validation/NEON/ElementwiseDivision.cpp index f6e0a65e84..4b613cbd0a 100644 --- a/tests/validation/NEON/ElementwiseDivision.cpp +++ b/tests/validation/NEON/ElementwiseDivision.cpp @@ -44,6 +44,9 @@ namespace { RelativeTolerance tolerance_fp32(0.000001f); /** Input data sets **/ +const auto ElementwiseDivisionS32Dataset = combine(combine(framework::dataset::make("DataType", DataType::S32), + framework::dataset::make("DataType", DataType::S32)), + framework::dataset::make("DataType", DataType::S32)); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC RelativeTolerance tolerance_fp16(static_cast(0.01f)); const auto ElementwiseDivisionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), @@ -135,6 +138,16 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcast, NEElementwiseDivisionBroadcastFixture< TEST_SUITE_END() // F32 TEST_SUITE_END() // Float +TEST_SUITE(Integer) +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEElementwiseDivisionFixture, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ElementwiseDivisionS32Dataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32 +TEST_SUITE_END() // Integer + TEST_SUITE_END() // ElementwiseDivision TEST_SUITE_END() // NEON } // namespace validation diff --git a/tests/validation/fixtures/ElementwiseOperationsFixture.h b/tests/validation/fixtures/ElementwiseOperationsFixture.h index ebc52d5083..dcb408c801 100644 --- a/tests/validation/fixtures/ElementwiseOperationsFixture.h +++ b/tests/validation/fixtures/ElementwiseOperationsFixture.h @@ -59,16 +59,23 @@ protected: template void fill(U &&tensor, int i) { - switch(_op) + if(is_data_type_float(tensor.data_type())) { - case ArithmeticOperation::DIV: - library->fill_tensor_uniform_ranged(tensor, i, { std::pair(-0.001f, 0.001f) }); - break; - case ArithmeticOperation::POWER: - library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); - break; - default: - library->fill_tensor_uniform(tensor, i); + switch(_op) + { + case ArithmeticOperation::DIV: + library->fill_tensor_uniform_ranged(tensor, i, { std::pair(-0.001f, 0.001f) }); + break; + case ArithmeticOperation::POWER: + library->fill_tensor_uniform(tensor, i, 0.0f, 5.0f); + break; + default: + library->fill_tensor_uniform(tensor, i); + } + } + else + { + library->fill_tensor_uniform(tensor, i); } } diff --git a/tests/validation/reference/ElementwiseOperations.cpp b/tests/validation/reference/ElementwiseOperations.cpp index aab9d9d00c..f22c84e153 100644 --- a/tests/validation/reference/ElementwiseOperations.cpp +++ b/tests/validation/reference/ElementwiseOperations.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 Arm Limited. + * Copyright (c) 2018-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -74,6 +74,15 @@ T arithm_op(ArithmeticOperation op, T src1, T src2, ConvertPolicy convert_policy case ArithmeticOperation::DIV: { val = (static_cast(src1) / static_cast(src2)); + if(std::is_integral::value) + { + // Implement flooring division + val = (src2 == 0) ? 0 : val; + if(static_cast(src1) % static_cast(src2) != 0 && ((src1 < 0) != (src2 < 0))) + { + --val; + } + } break; } case ArithmeticOperation::POWER: -- cgit v1.2.1