From 11c562c94fa6a0399aff798bfd970ed8c1942730 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 10 Jun 2020 16:34:50 +0100 Subject: COMPMID-3527: QLSTM should allow nullptr for optional projection bias Change-Id: I5568ca1e9383f3960886f211d8320fc4525a8804 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3326 Reviewed-by: James Conroy Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../core/NEON/kernels/NEArithmeticAdditionKernel.h | 21 ++++++++--------- arm_compute/core/NEON/wrapper/scalar/add.h | 9 +++++++- .../runtime/NEON/functions/NEArithmeticAddition.h | 26 +++++++++++++++++----- .../CL/kernels/CLElementwiseOperationKernel.cpp | 12 +++++++--- .../NEON/kernels/NEArithmeticAdditionKernel.cpp | 15 +++++++++++-- src/runtime/CL/functions/CLQLSTMLayer.cpp | 16 ++++++++++--- src/runtime/NEON/functions/NEQLSTMLayer.cpp | 15 ++++++++++--- tests/validation/NEON/ArithmeticAddition.cpp | 15 +++++++++++-- .../validation/reference/ArithmeticOperations.cpp | 1 + 9 files changed, 100 insertions(+), 30 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h index 36d257b886..bff34dfda2 100644 --- a/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h +++ b/arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,23 +61,24 @@ public: * - (S16,U8) -> S16 * - (U8,S16) -> S16 * - (S16,S16) -> S16 + * - (S32,S32) -> S32 * - (F16,F16) -> F16 * - (F32,F32) -> F32 * - (QASYMM8,QASYMM8) -> QASYMM8 * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED * - (QSYMM16,QSYMM16) -> QSYMM16 * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] policy Overflow policy. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAdditionKernel * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32. + * @param[in] input1 First input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32. * @param[in] policy Overflow policy. * * @return a status @@ -90,9 +91,9 @@ public: private: /** Common signature for all the specialised add functions * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 - * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32. + * @param[in] input1 First input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32 + * @param[out] output The output tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/S32/F32. * @param[in] policy Overflow policy. * @param[in] window Region on which to execute the kernel. */ diff --git a/arm_compute/core/NEON/wrapper/scalar/add.h b/arm_compute/core/NEON/wrapper/scalar/add.h index 5a04fe20fa..e0fc9ab103 100644 --- a/arm_compute/core/NEON/wrapper/scalar/add.h +++ b/arm_compute/core/NEON/wrapper/scalar/add.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,6 +44,13 @@ inline int16_t add_sat(const int16_t &a, const int16_t &b) return vget_lane_s16(vqadd_s16(va, vb), 0); } +inline int32_t add_sat(const int32_t &a, const int32_t &b) +{ + const int32x2_t va = { a, 0 }; + const int32x2_t vb = { b, 0 }; + return vget_lane_s32(vqadd_s32(va, vb), 0); +} + inline float add_sat(const float &a, const float &b) { // No notion of saturation exists in floating point diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h index 6cab5b3547..bf8f2cc14b 100644 --- a/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h +++ b/arm_compute/runtime/NEON/functions/NEArithmeticAddition.h @@ -37,18 +37,32 @@ class NEArithmeticAddition : public INESimpleFunction public: /** Initialise the kernel's inputs, output and conversion policy. * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * Valid configurations (Input1,Input2) -> Output : + * + * - (U8,U8) -> U8 + * - (U8,U8) -> S16 + * - (S16,U8) -> S16 + * - (U8,S16) -> S16 + * - (S16,S16) -> S16 + * - (S32,S32) -> S32 + * - (F16,F16) -> F16 + * - (F32,F32) -> F32 + * - (QASYMM8,QASYMM8) -> QASYMM8 + * - (QASYMM8_SIGNED,QASYMM8_SIGNED) -> QASYMM8_SIGNED + * - (QSYMM16,QSYMM16) -> QSYMM16 + * + * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] policy Policy to use to handle overflow. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy, const ActivationLayerInfo &act_info = ActivationLayerInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticAddition * - * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 - * @param[in] output Output tensor. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input1 First tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] input2 Second tensor input info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 + * @param[in] output Output tensor info. Data types supported: U8/SQASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/S32/F32 * @param[in] policy Policy to use to handle overflow * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Currently not supported. * diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 00a97d50e9..4e7d3b3753 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -93,9 +93,13 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &inp Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::S32, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input2); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::S32, DataType::F32); const bool is_quantized = is_data_type_quantized(input1.data_type()) || is_data_type_quantized(input2.data_type()); if(is_quantized) @@ -119,7 +123,9 @@ Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const if(output.total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::S32, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG((output.data_type() == DataType::U8) && ((input1.data_type() != DataType::U8) || (input2.data_type() != DataType::U8)), "Output can only be U8 if both inputs are U8"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index f8ee578ef8..3878c764a6 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -815,8 +815,12 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::S32, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, + DataType::S16, DataType::QSYMM16, DataType::F16, + DataType::S32, DataType::F32); const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); @@ -834,6 +838,7 @@ Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16) && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16) + && !(input1.data_type() == DataType::S32 && input2.data_type() == DataType::S32 && output.data_type() == DataType::S32) && !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32 && output.data_type() == DataType::F32) && !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16 && output.data_type() == DataType::F16) && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8) @@ -862,6 +867,10 @@ std::pair validate_and_configure_window(ITensorInfo &input1, ITe { set_format_if_unknown(output, Format::S16); } + if(input1.data_type() == DataType::S32 || input2.data_type() == DataType::S32) + { + set_format_if_unknown(output, Format::S32); + } else if(input1.data_type() == DataType::F16 || input2.data_type() == DataType::F16) { set_format_if_unknown(output, Format::F16); @@ -926,6 +935,8 @@ void NEArithmeticAdditionKernel::configure(const ITensor *input1, const ITensor { "add_saturate_U8_U8_S16", &add_U8_U8_S16 }, { "add_wrap_S16_S16_S16", &add_same }, { "add_saturate_S16_S16_S16", &add_same }, + { "add_wrap_S32_S32_S32", &add_same }, + { "add_saturate_S32_S32_S32", &add_same }, { "add_wrap_F32_F32_F32", &add_same }, { "add_saturate_F32_F32_F32", &add_same }, #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC diff --git a/src/runtime/CL/functions/CLQLSTMLayer.cpp b/src/runtime/CL/functions/CLQLSTMLayer.cpp index 524c7b3aae..f063410972 100644 --- a/src/runtime/CL/functions/CLQLSTMLayer.cpp +++ b/src/runtime/CL/functions/CLQLSTMLayer.cpp @@ -211,6 +211,10 @@ void CLQLSTMLayer::configure(const CLCompileContext &compile_context, const ICLT if(_has_projection) { _projection_reduction.configure(compile_context, _projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); + if(_projection_bias != nullptr) + { + _projection_bias_add.configure(compile_context, ArithmeticOperation::ADD, _projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE); + } } // Pre-transpose weights to be used in GEMM. @@ -640,6 +644,12 @@ Status CLQLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(CLGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true))); + if(lstm_params.projection_bias() != nullptr) + { + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32); + ARM_COMPUTE_RETURN_ON_ERROR(CLSaturatedArithmeticOperationKernel::validate(ArithmeticOperation::ADD, lstm_params.projection_bias(), &projection_eff_bias_info, + &projection_eff_bias_info, ConvertPolicy::SATURATE)); + } } const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info()); @@ -832,7 +842,6 @@ Status CLQLSTMLayer::validate(const ITensorInfo *input, if(lstm_params.has_projection()) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.projection_bias()); ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0); const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform(); @@ -1095,10 +1104,11 @@ void CLQLSTMLayer::prepare() if(_has_projection) { + _projection_eff_bias.allocator()->allocate(); + CLScheduler::get().enqueue(_projection_reduction); if(_projection_bias != nullptr) { - _projection_eff_bias.allocator()->allocate(); - CLScheduler::get().enqueue(_projection_reduction); + CLScheduler::get().enqueue(_projection_bias_add); _projection_bias->mark_as_unused(); } diff --git a/src/runtime/NEON/functions/NEQLSTMLayer.cpp b/src/runtime/NEON/functions/NEQLSTMLayer.cpp index 083e3fddb4..a22c669ca7 100644 --- a/src/runtime/NEON/functions/NEQLSTMLayer.cpp +++ b/src/runtime/NEON/functions/NEQLSTMLayer.cpp @@ -189,6 +189,10 @@ void NEQLSTMLayer::configure(const ITensor *input, if(_has_projection) { _projection_reduction.configure(_projection_weights, &_projection_eff_bias, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true)); + if(_projection_bias != nullptr) + { + _projection_bias_add.configure(_projection_bias, &_projection_eff_bias, &_projection_eff_bias, ConvertPolicy::SATURATE); + } } // Pre-transpose weights to be used in GEMM. @@ -612,6 +616,11 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input, ARM_COMPUTE_RETURN_ON_ERROR(NEGEMMLowpMatrixAReductionKernel::validate(lstm_params.projection_weights(), &projection_eff_bias_info, GEMMLowpReductionKernelInfo(output_size, false, lstm_params.hidden_state_zero(), true))); + if(lstm_params.projection_bias() != nullptr) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lstm_params.projection_bias(), 1, DataType::S32); + ARM_COMPUTE_RETURN_ON_ERROR(NEArithmeticAdditionKernel::validate(lstm_params.projection_bias(), &projection_eff_bias_info, &projection_eff_bias_info, ConvertPolicy::SATURATE)); + } } const TensorInfo input_weights_transposed(TensorShape(num_units, input_size), 1, input_to_forget_weights->data_type(), input_to_forget_weights->quantization_info()); @@ -804,7 +813,6 @@ Status NEQLSTMLayer::validate(const ITensorInfo *input, if(lstm_params.has_projection()) { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(recurrent_to_forget_weights, lstm_params.projection_weights()); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(forget_gate_bias, lstm_params.projection_bias()); ARM_COMPUTE_RETURN_ERROR_ON(qoutput_state_in.scale == 0); const UniformQuantizationInfo qprojection = lstm_params.projection_weights()->quantization_info().uniform(); @@ -1065,10 +1073,11 @@ void NEQLSTMLayer::prepare() if(_has_projection) { + _projection_eff_bias.allocator()->allocate(); + NEScheduler::get().schedule(&_projection_reduction, Window::DimY); if(_projection_bias != nullptr) { - _projection_eff_bias.allocator()->allocate(); - NEScheduler::get().schedule(&_projection_reduction, Window::DimY); + NEScheduler::get().schedule(&_projection_bias_add, Window::DimY); _projection_bias->mark_as_unused(); } diff --git a/tests/validation/NEON/ArithmeticAddition.cpp b/tests/validation/NEON/ArithmeticAddition.cpp index d1b6ce24dc..72993172fd 100644 --- a/tests/validation/NEON/ArithmeticAddition.cpp +++ b/tests/validation/NEON/ArithmeticAddition.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,6 +52,8 @@ const auto ArithmeticAdditionU8Dataset = combine(combine(framework::dataset::mak DataType::U8)); const auto ArithmeticAdditionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), framework::dataset::make("DataType", DataType::S16)); +const auto ArithmeticAdditionS32Dataset = combine(combine(framework::dataset::make("DataType", { DataType::S32 }), framework::dataset::make("DataType", DataType::S32)), + framework::dataset::make("DataType", DataType::S32)); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto ArithmeticAdditionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); @@ -61,7 +63,7 @@ const auto ArithmeticAdditionFP32Dataset = combine(combine(framework::dataset::m const auto ArithmeticAdditionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("DataType", DataType::QASYMM8)); const auto ArithmeticAdditionQASYMM8SIGNEDDataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), - framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); const auto ArithmeticAdditionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16), framework::dataset::make("DataType", DataType::QSYMM16)), framework::dataset::make("DataType", DataType::QSYMM16)); } // namespace @@ -130,6 +132,15 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEArithmeticAdditionFixture, framework validate(Accessor(_target), _reference); } TEST_SUITE_END() // S16 + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticAdditionFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), ArithmeticAdditionS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32 TEST_SUITE_END() // Integer TEST_SUITE(Float) diff --git a/tests/validation/reference/ArithmeticOperations.cpp b/tests/validation/reference/ArithmeticOperations.cpp index fd32f45cfa..40f4e64fbf 100644 --- a/tests/validation/reference/ArithmeticOperations.cpp +++ b/tests/validation/reference/ArithmeticOperations.cpp @@ -193,6 +193,7 @@ SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleTensor< return dst; } +template SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleTensor &src1, const SimpleTensor &src2, DataType dst_data_type, ConvertPolicy convert_policy); template SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleTensor &src1, const SimpleTensor &src2, DataType dst_data_type, ConvertPolicy convert_policy); template SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleTensor &src1, const SimpleTensor &src2, DataType dst_data_type, ConvertPolicy convert_policy); template SimpleTensor arithmetic_operation(ArithmeticOperation op, const SimpleTensor &src1, const SimpleTensor &src2, DataType dst_data_type, ConvertPolicy convert_policy); -- cgit v1.2.1