From 52ea9c24607648acce37cda960e4fbaa59c9a263 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Tue, 10 Dec 2019 11:28:53 +0000 Subject: COMPMID-2811: QASYMM8_SIGNED support in NEPixelwiseMultiplication. Change-Id: I4e52bd55fc9804796f47fab04859961d846f4ceb Signed-off-by: Pablo Tello Reviewed-on: https://review.mlplatform.org/c/2459 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 16 ++-- .../NEON/functions/NEPixelWiseMultiplication.h | 16 ++-- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 60 ++++++++++----- tests/validation/NEON/PixelWiseMultiplication.cpp | 88 ++++++++++++++++++---- .../reference/PixelWiseMultiplication.cpp | 28 +++++++ 5 files changed, 160 insertions(+), 48 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index 612177152b..9b71ac81cf 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -56,12 +56,12 @@ public: * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. * @param[in] rounding_policy Rounding policy. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); @@ -70,12 +70,12 @@ public: * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QSYMM16/S16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED) , S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. * @param[in] rounding_policy Rounding policy. * * @return a status diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index f2ea77d7e0..25f409871b 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -40,14 +40,14 @@ public: * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 + * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if @p input1 is QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. * @param[in] rounding_policy Rounding policy. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); @@ -56,12 +56,12 @@ public: * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/QSYMM16/F16/F32 - * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/QSYMM16/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), QASYMM8_SIGNED (only if @p input1 is QASYMM8_SIGNED), S16, QSYMM16 (only if both inputs are QSYMM16), F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8 or QSYMM16. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8, QASYMM8_SIGNED or QSYMM16. * @param[in] rounding_policy Rounding policy. * * @return a status diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index 4bd03e959e..7ec52f788b 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -64,26 +64,18 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i ARM_COMPUTE_UNUSED(rounding_policy); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::QSYMM16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QASYMM8 && input2->data_type() != DataType::QASYMM8, - "Input2 must be QASYMM8 if input1 is QASYMM8"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() != DataType::QASYMM8 && input2->data_type() == DataType::QASYMM8, - "Input1 must be QASYMM8 if input2 is QASYMM8"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QSYMM16 && input2->data_type() != DataType::QSYMM16, - "Input2 must be QSYMM16 if input1 is QSYMM16"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() != DataType::QSYMM16 && input2->data_type() == DataType::QSYMM16, - "Input1 must be QSYMM16 if input2 is QSYMM16"); - - ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_data_type_quantized(input1->data_type()) && overflow_policy == ConvertPolicy::WRAP, - "ConvertPolicy cannot be WRAP if datatype is quantized"); + if(is_data_type_quantized(input1->data_type())|| + is_data_type_quantized(input2->data_type())) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(overflow_policy == ConvertPolicy::WRAP,"ConvertPolicy cannot be WRAP if datatype is quantized"); + } if(output->total_size() > 0) { @@ -142,6 +134,10 @@ inline std::pair validate_and_configure_window(ITensorInfo *inpu { set_data_type_if_unknown(*output, DataType::QASYMM8); } + else if(input1->data_type() == DataType::QASYMM8_SIGNED) + { + set_data_type_if_unknown(*output, DataType::QASYMM8_SIGNED); + } else if(input1->data_type() == DataType::QSYMM16) { set_data_type_if_unknown(*output, DataType::QSYMM16); @@ -238,6 +234,32 @@ inline void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n_opt(const void *__restrict in vst1q_u8(output, vcombine_u8(pa, pb)); } +inline void mul_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED_n( + const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, + float scale, const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, + const UniformQuantizationInfo &output_qua_info) + +{ + const auto input1 = static_cast(input1_ptr); + const auto input2 = static_cast(input2_ptr); + const auto output = static_cast(output_ptr); + const qasymm8x16_signed_t input1_q = vld1q_s8(input1); + const qasymm8x16_signed_t input2_q = vld1q_s8(input2); + // Dequantitize inputs + const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info); + const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info); + const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset }; + const float32x4x4_t out_f32x4x4 = + { + vmulq_f32(in1_f32x4x4.val[0], in2_f32x4x4.val[0]), + vmulq_f32(in1_f32x4x4.val[1], in2_f32x4x4.val[1]), + vmulq_f32(in1_f32x4x4.val[2], in2_f32x4x4.val[2]), + vmulq_f32(in1_f32x4x4.val[3], in2_f32x4x4.val[3]), + }; + const int8x16_t result = vquantize_signed(out_f32x4x4, tmp_qua_info); + vst1q_s8(output, result); +} + void mul_saturate_QSYMM16_QSYMM16_QSYMM16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, const UniformQuantizationInfo &input1_qua_info, const UniformQuantizationInfo &input2_qua_info, const UniformQuantizationInfo &output_qua_info) { @@ -604,6 +626,10 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe { _run_optimized_qasymm8 = true; } + else if(dt_input1 == DataType::QASYMM8_SIGNED && dt_input2 == DataType::QASYMM8_SIGNED) + { + _func_quantized = &mul_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED_n; + } else if(dt_input1 == DataType::QSYMM16 && dt_input2 == DataType::QSYMM16) { _func_quantized = &mul_saturate_QSYMM16_QSYMM16_QSYMM16_n; diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp index af3dd58059..fd54e42083 100644 --- a/tests/validation/NEON/PixelWiseMultiplication.cpp +++ b/tests/validation/NEON/PixelWiseMultiplication.cpp @@ -128,8 +128,9 @@ void validate_configuration(TensorShape shape, DataType dt1, DataType dt2, float } } // namespace -using NEPixelWiseMultiplicationQASYMM8Fixture = PixelWiseMultiplicationValidationQuantizedFixture; -using NEPixelWiseMultiplicationQSYMM16Fixture = PixelWiseMultiplicationValidationQuantizedFixture; +using NEPixelWiseMultiplicationQASYMM8Fixture = PixelWiseMultiplicationValidationQuantizedFixture; +using NEPixelWiseMultiplicationQASYMM8SignedFixture = PixelWiseMultiplicationValidationQuantizedFixture; +using NEPixelWiseMultiplicationQSYMM16Fixture = PixelWiseMultiplicationValidationQuantizedFixture; template using NEPixelWiseMultiplicationToU8Fixture = PixelWiseMultiplicationValidationFixture; template @@ -146,15 +147,20 @@ TEST_SUITE(PixelWiseMultiplication) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid scale - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data type - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Mismatching data type +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), //1 Ok + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), //2 Ok + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), //3 Window shrink + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), //4 Invalid scale + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), //5 Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), //6 Mismatching shapes + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), //7 Mismatching data type + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), //8 Mismatching data type + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), //9 Ok + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), //10 Mismatching data type + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), //11 Mismatching data type + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), //12 Ok + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), //13 Quantized cannot do WRAP }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), @@ -164,6 +170,11 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), @@ -173,18 +184,65 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), })), - framework::dataset::make("Scale",{ scale_unity, scale_unity, scale_unity, -1.f, scale_unity, scale_unity, scale_unity})), - framework::dataset::make("Expected", { true, true, false, false, false, false, false, false })), - input1_info, input2_info, output_info, scale, expected) + framework::dataset::make("Scale",{ scale_unity, + scale_unity, + scale_unity, + -1.f, + scale_unity, + scale_unity, + scale_unity, + scale_unity, + scale_unity, + scale_unity, + scale_unity, + scale_unity, + scale_unity})), + framework::dataset::make("OverflowPolicy",{ + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::SATURATE, + ConvertPolicy::WRAP, + ConvertPolicy::WRAP, + ConvertPolicy::SATURATE, + ConvertPolicy::WRAP, + })), + + framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, true , false, false, true, false })), + input1_info, input2_info, output_info, scale, policy, expected) { - bool has_error = bool(NEPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO)); + bool has_error = bool(NEPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, policy, RoundingPolicy::TO_ZERO)); ARM_COMPUTE_EXPECT(has_error == expected, framework::LogLevel::ERRORS); } // clang-format on // *INDENT-ON* TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8_SIGNED) +TEST_SUITE(Scale255) +FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("Scale", { scale_unity })), + PixelWiseMultiplicationPolicySTZDataset), + PixelWiseMultiplicationQASYMM8QuantDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // Scale255 +TEST_SUITE_END() // QASYMM8 + TEST_SUITE(QASYMM8) TEST_SUITE(Scale255) FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp index d9895e5ed9..2b4c849c39 100644 --- a/tests/validation/reference/PixelWiseMultiplication.cpp +++ b/tests/validation/reference/PixelWiseMultiplication.cpp @@ -177,6 +177,34 @@ SimpleTensor pixel_wise_multiplication(const SimpleTensor &src return dst; } +template <> +SimpleTensor pixel_wise_multiplication(const SimpleTensor &src1, const SimpleTensor &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, + const QuantizationInfo &qout) +{ + SimpleTensor dst(TensorShape::broadcast_shape(src1.shape(), src2.shape()), src2.data_type(), 1, qout); + + if(src1.data_type() == DataType::QASYMM8_SIGNED && src2.data_type() == DataType::QASYMM8_SIGNED) + { + SimpleTensor src1_tmp = convert_from_asymmetric(src1); + SimpleTensor src2_tmp = convert_from_asymmetric(src2); + SimpleTensor dst_tmp = pixel_wise_multiplication(src1_tmp, src2_tmp, scale, convert_policy, rounding_policy, qout); + dst = convert_to_asymmetric(dst_tmp, qout); + } + else + { + if(scale < 0) + { + ARM_COMPUTE_ERROR("Scale of pixel-wise multiplication must be non-negative"); + } + + Coordinates id_src1{}; + Coordinates id_src2{}; + Coordinates id_dst{}; + BroadcastUnroll::unroll(src1, src2, dst, scale, convert_policy, rounding_policy, id_src1, id_src2, id_dst); + } + return dst; +} + template <> SimpleTensor pixel_wise_multiplication(const SimpleTensor &src1, const SimpleTensor &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, const QuantizationInfo &qout) -- cgit v1.2.1