From 79fa9a22022824735986f74557bf38095eb2284d Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Fri, 22 Feb 2019 17:54:22 +0000 Subject: COMPMID-2009: Add support for QASYMM8 in NEPixelWiseMultiplicationKernel Change-Id: I58536e945d069c96a065b82cc14960f54afc6e1a Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/781 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- .../NEON/kernels/NEPixelWiseMultiplicationKernel.h | 38 ++++++--- .../NEON/functions/NEPixelWiseMultiplication.h | 31 +++++--- .../kernels/NEPixelWiseMultiplicationKernel.cpp | 90 ++++++++++++++++++---- tests/validation/CL/PixelWiseMultiplication.cpp | 4 +- tests/validation/NEON/PixelWiseMultiplication.cpp | 48 +++++++++++- .../fixtures/PixelWiseMultiplicationFixture.h | 15 +++- .../reference/PixelWiseMultiplication.cpp | 4 +- 7 files changed, 182 insertions(+), 48 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h index 41ea91495f..2a8e36b1de 100644 --- a/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h +++ b/arm_compute/core/NEON/kernels/NEPixelWiseMultiplicationKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -56,12 +56,12 @@ public: * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8, S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[out] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/S16/F16/F32 + * @param[in] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8. * @param[in] rounding_policy Rounding policy. */ void configure(const ITensor *input1, const ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); @@ -70,12 +70,12 @@ public: * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. * - * @param[in] input1 An input tensor. Data types supported: U8/S16/F16/F32 - * @param[in] input2 An input tensor. Data types supported: U8, S16/F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). - * @param[in] output The output tensor. Data types supported: U8 (Only if both inputs are U8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8. * @param[in] rounding_policy Rounding policy. * * @return a status @@ -92,6 +92,7 @@ private: * @param[in] input1_ptr Pointer to the first input tensor. * @param[in] input2_ptr Pointer to the second input tensor. * @param[out] output_ptr Pointer to the output tensor. + * @param[in] scale Integer scale factor. */ using MulFunctionInt = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int scale); /** Common signature for all the specialised multiplication functions with float scaling factor @@ -99,11 +100,26 @@ private: * @param[in] input1_ptr Pointer to the first input tensor. * @param[in] input2_ptr Pointer to the second input tensor. * @param[out] output_ptr Pointer to the output tensor. + * @param[in] scale Float scale factor. */ using MulFunctionFloat = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale); + /** Common signature for all the specialised QASYMM8 multiplication functions with float scaling factor + * + * @param[in] input1_ptr Pointer to the first input tensor. + * @param[in] input2_ptr Pointer to the second input tensor. + * @param[out] output_ptr Pointer to the output tensor. + * @param[in] scale Float scale factor. + * @param[in] input1_qua_info Quantization Info of tensor input1. + * @param[in] input2_qua_info Quantization Info of tensor input2. + * @param[in] output_qua_info Quantization Info of tensor output. + * + */ + using MulFunctionQASYMM8 = void(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, + const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info); - MulFunctionFloat *_func_float; - MulFunctionInt *_func_int; + MulFunctionFloat *_func_float; + MulFunctionInt *_func_int; + MulFunctionQASYMM8 *_func_qasymm8; private: const ITensor *_input1; diff --git a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h index 371bb2e13e..869dd4e1d5 100644 --- a/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h +++ b/arm_compute/runtime/NEON/functions/NEPixelWiseMultiplication.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -37,24 +37,31 @@ class NEPixelWiseMultiplication : public INESimpleFunction public: /** Initialise the kernel's inputs, output and convertion policy. * - * @param[in, out] input1 An input tensor. Data types supported: U8/S16/F16/F32. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[in, out] input2 An input tensor. Data types supported: same as @p input1. - * The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. - * @param[out] output Output tensor. Data types supported: U8/S16/F16/F32. + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in, out] input1 An input tensor. Data types supported: U8/QASYMM8/S16/F16/F32 + * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[in, out] input2 An input tensor. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * This input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0. + * @param[out] output Output tensor. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32). * @param[in] scale Scale to apply after multiplication. * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. - * @param[in] overflow_policy Overflow policy. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8. * @param[in] rounding_policy Rounding policy. */ void configure(ITensor *input1, ITensor *input2, ITensor *output, float scale, ConvertPolicy overflow_policy, RoundingPolicy rounding_policy); /** Static function to check if given info will lead to a valid configuration of @ref NEPixelWiseMultiplication * - * @param[in] input1 First tensor info input. Data types supported: U8/S16/F16/F32. - * @param[in] input2 Second tensor info input. Data types supported: U8/S16/F16/F32. - * @param[in] output Output tensor info. Data types supported: U8/S16/F16/F32. - * @param[in] scale Scale to apply after multiplication. Must be positive. - * @param[in] overflow_policy Overflow policy. + * @note For @p scale equal to 1/255 only round to nearest even (implemented as round half up) is supported. + * For all other scale values only round to zero (implemented as round towards minus infinity) is supported. + * + * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/S16/F16/F32 + * @param[in] input2 An input tensor info. Data types supported: U8, QASYMM8 (only if @p input1 is QASYMM8), S16, F16 (only if @p input1 is F16), F32 (only if @p input1 is F32). + * @param[in] output Output tensor info. Data types supported: U8 (Only if both inputs are U8), QASYMM8 (only if both inputs are QASYMM8), S16/F16 (only if @p input1 is F16), F32 (only if both inputs are F32). + * @param[in] scale Scale to apply after multiplication. + * Scale must be positive and its value must be either 1/255 or 1/2^n where n is between 0 and 15. + * @param[in] overflow_policy Overflow policy. ConvertPolicy cannot be WRAP if datatype is QASYMM8. * @param[in] rounding_policy Rounding policy. * * @return a status diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index a4f51436b4..e3166e02b6 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,8 +28,10 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/IAccessWindow.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" #include @@ -42,12 +44,9 @@ #include // needed for float16_t #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -using namespace arm_compute; - namespace arm_compute { class Coordinates; -} // namespace arm_compute namespace { @@ -63,15 +62,29 @@ inline Status validate_arguments(const ITensorInfo *input1, const ITensorInfo *i ARM_COMPUTE_UNUSED(rounding_policy); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->data_type() == DataType::U8 && (input1->data_type() != DataType::U8 || input2->data_type() != DataType::U8), "Output can only be U8 if both inputs are U8"); - const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QASYMM8 && input2->data_type() != DataType::QASYMM8, + "Input2 must be QASYMM8 if both input1 is QASYMM8"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input1->data_type() == DataType::QASYMM8 && input2->data_type() == DataType::QASYMM8 && overflow_policy == ConvertPolicy::WRAP, + "ConvertPolicy cannot be WRAP if datatype is QASYMM8"); + + if(output->total_size() > 0) + { + if(output->data_type() == DataType::QASYMM8) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2, output); + } + + const TensorShape &out_shape = TensorShape::broadcast_shape(input1->tensor_shape(), input2->tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output->tensor_shape(), 0), "Wrong shape for output"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); + } if(std::abs(scale - scale255_constant) < 0.00001f) { @@ -159,6 +172,34 @@ inline uint16x8_t scale255_U16_U16(uint16x8_t in) return vreinterpretq_u16_s16(vcombine_s16(vmovn_s32(tmp_s2), vmovn_s32(tmp_s1))); } +void mul_saturate_QASYMM8_QASYMM8_QASYMM8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale, + const QuantizationInfo &input1_qua_info, const QuantizationInfo &input2_qua_info, const QuantizationInfo &output_qua_info) +{ + const auto input1 = static_cast(input1_ptr); + const auto input2 = static_cast(input2_ptr); + const auto output = static_cast(output_ptr); + + const qasymm8x16_t input1_q = vld1q_u8(input1); + const qasymm8x16_t input2_q = vld1q_u8(input2); + + // Dequantitize inputs + const float32x4x4_t in1_f32x4x4 = vdequantize(input1_q, input1_qua_info); + const float32x4x4_t in2_f32x4x4 = vdequantize(input2_q, input2_qua_info); + + const QuantizationInfo tmp_qua_info = QuantizationInfo(output_qua_info.scale / scale, output_qua_info.offset); + + const float32x4x4_t out_f32x4x4 = + { + vmulq_f32(in1_f32x4x4.val[0], in2_f32x4x4.val[0]), + vmulq_f32(in1_f32x4x4.val[1], in2_f32x4x4.val[1]), + vmulq_f32(in1_f32x4x4.val[2], in2_f32x4x4.val[2]), + vmulq_f32(in1_f32x4x4.val[3], in2_f32x4x4.val[3]) + }; + + const uint8x16_t result = vquantize(out_f32x4x4, tmp_qua_info); + vst1q_u8(output, result); +} + template void mul_U8_U8_U8_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, int n) { @@ -291,7 +332,6 @@ void mul_S16_S16_S16_n(const void *__restrict input1_ptr, const void *__restrict vst2q_s16(output, result); } -template void mul_F32_F32_F32_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale) { const auto input1 = static_cast(input1_ptr); @@ -313,7 +353,6 @@ void mul_F32_F32_F32_n(const void *__restrict input1_ptr, const void *__restrict vst4q_f32(output, result); } -template void mul_F16_F16_F16_n(const void *__restrict input1_ptr, const void *__restrict input2_ptr, void *__restrict output_ptr, float scale) { #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC @@ -419,7 +458,7 @@ void mul_U8_S16_S16_n(const void *__restrict input1_ptr, const void *__restrict } // namespace NEPixelWiseMultiplicationKernel::NEPixelWiseMultiplicationKernel() - : _func_float(nullptr), _func_int(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _scale{ 0 }, _scale_exponent{ 0 } + : _func_float(nullptr), _func_int(nullptr), _func_qasymm8(nullptr), _input1(nullptr), _input2(nullptr), _output(nullptr), _scale{ 0 }, _scale_exponent{ 0 } { } @@ -439,6 +478,7 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe _output = output; _scale = scale; _scale_exponent = 0; + _func_qasymm8 = nullptr; _func_int = nullptr; _func_float = nullptr; @@ -464,7 +504,11 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe const DataType dt_output = output->info()->data_type(); const bool is_sat = (overflow_policy == ConvertPolicy::SATURATE); - if(DataType::U8 == dt_input1 && DataType::U8 == dt_input2 && DataType::U8 == dt_output) + if(dt_input1 == DataType::QASYMM8 && dt_input2 == DataType::QASYMM8) + { + _func_qasymm8 = &mul_saturate_QASYMM8_QASYMM8_QASYMM8_n; + } + else if(DataType::U8 == dt_input1 && DataType::U8 == dt_input2 && DataType::U8 == dt_output) { if(is_scale_255) { @@ -521,12 +565,12 @@ void NEPixelWiseMultiplicationKernel::configure(const ITensor *input1, const ITe } else if(DataType::F16 == dt_input1 && DataType::F16 == dt_input2 && DataType::F16 == dt_output) { - _func_float = &mul_F16_F16_F16_n; + _func_float = &mul_F16_F16_F16_n; _func_int = nullptr; } else if(DataType::F32 == dt_input1 && DataType::F32 == dt_input2 && DataType::F32 == dt_output) { - _func_float = &mul_F32_F32_F32_n; + _func_float = &mul_F32_F32_F32_n; _func_int = nullptr; } else @@ -581,7 +625,18 @@ void NEPixelWiseMultiplicationKernel::run(const Window &window, const ThreadInfo Iterator input2(_input2, slice_input2); Iterator output(_output, slice); - if(_func_int != nullptr) + if(_func_qasymm8 != nullptr) + { + execute_window_loop(collapsed, [&](const Coordinates & id) + { + (*_func_qasymm8)(input1.ptr(), input2.ptr(), output.ptr(), _scale, + _input1->info()->quantization_info(), _input2->info()->quantization_info(), _output->info()->quantization_info()); + collapsed.slide_window_slice_3D(slice_input1); + collapsed.slide_window_slice_3D(slice_input2); + }, + input1, input2, output); + } + else if(_func_int != nullptr) { execute_window_loop(collapsed, [&](const Coordinates & id) { @@ -610,3 +665,4 @@ BorderSize NEPixelWiseMultiplicationKernel::border_size() const const unsigned int border = std::min(num_elems_processed_per_iteration - 1U, replicateSize); return BorderSize(0, border, 0, 0); } +} // namespace arm_compute diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp index b61ec394ea..03ce4c9639 100644 --- a/tests/validation/CL/PixelWiseMultiplication.cpp +++ b/tests/validation/CL/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -129,7 +129,7 @@ TEST_SUITE(QASYMM8) FIXTURE_DATA_TEST_CASE(RunSmall, CLPixelWiseMultiplicationQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("Scale", { 1.f, 2.f })), - framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), framework::dataset::make("RoundingPolicy", RoundingPolicy::TO_NEAREST_EVEN)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp index 77da473e0d..0cc97a2c26 100644 --- a/tests/validation/NEON/PixelWiseMultiplication.cpp +++ b/tests/validation/NEON/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,6 +43,7 @@ const float scale_255 = 1.f / 255.f; const float scale_other = 1.f / 32768.f; #define DEFAULT_VALIDATE validate(Accessor(_target), _reference); +#define QASYMM8_VALIDATE validate(Accessor(_target), _reference, AbsoluteTolerance(1), 0.f); #define VALIDATE(TYPE, TOLERANCE) validate(Accessor(_target), _reference, AbsoluteTolerance(TOLERANCE), 0.f); #define WRAP_VALIDATE(TYPE, TOLERANCE) validate_wrap(Accessor(_target), _reference, AbsoluteTolerance(TOLERANCE), 0.f); @@ -74,6 +75,24 @@ const float scale_other = 1.f / 32768.f; { \ VALIDATE \ } + +#define PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(TEST_NAME, FIXTURE, MODE, SHAPES, DT1, DT2, SCALE, RP, VALIDATE) \ + FIXTURE_DATA_TEST_CASE(TEST_NAME, NEPixelWiseMultiplication##FIXTURE, framework::DatasetMode::MODE, \ + combine(combine(combine(combine(combine(combine(combine(combine( \ + datasets::SHAPES, \ + framework::dataset::make("DataType1", DataType::DT1)), \ + framework::dataset::make("DataType2", DataType::DT2)), \ + framework::dataset::make("Scale", std::move(SCALE))), \ + framework::dataset::make("ConvertPolicy", ConvertPolicy::SATURATE)), \ + framework::dataset::make("RoundingPolicy", RoundingPolicy::RP)), \ + framework::dataset::make("QuantizationInfoIn1", QuantizationInfo(1.0 , 0))), \ + framework::dataset::make("QuantizationInfoIn2", QuantizationInfo(1.0 , 0))), \ + framework::dataset::make("QuantizationInfoOut", QuantizationInfo(100.0, 10)))) \ + { \ + VALIDATE \ + } + + // *INDENT-ON* // clang-format on @@ -105,6 +124,7 @@ void validate_configuration(TensorShape shape, DataType dt1, DataType dt2, float } } // namespace +using NEPixelWiseMultiplicationToQASYMM8Fixture = PixelWiseMultiplicationQuatizedValidationFixture; template using NEPixelWiseMultiplicationToU8Fixture = PixelWiseMultiplicationValidationFixture; template @@ -128,7 +148,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid scale TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data type + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching data type + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Mismatching data type }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), @@ -137,6 +158,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), @@ -145,9 +167,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), })), framework::dataset::make("Scale",{ scale_unity, scale_unity, scale_unity, -1.f, scale_unity, scale_unity, scale_unity})), - framework::dataset::make("Expected", { true, true, false, false, false, false, false })), + framework::dataset::make("Expected", { true, true, false, false, false, false, false, false })), input1_info, input2_info, output_info, scale, expected) { bool has_error = bool(NEPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, ConvertPolicy::WRAP, RoundingPolicy::TO_ZERO)); @@ -156,6 +179,25 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( // clang-format on // *INDENT-ON* +TEST_SUITE(QASYMM8toQASYMM8) + +TEST_SUITE(Scale255) +PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1)) +PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_255, TO_NEAREST_UP, WRAP_VALIDATE(uint8_t, 1)) +TEST_SUITE_END() // Scale255 + +TEST_SUITE(ScaleUnity) +PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_unity, TO_ZERO, QASYMM8_VALIDATE) +PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_unity, TO_ZERO, QASYMM8_VALIDATE) +TEST_SUITE_END() // ScaleUnity + +TEST_SUITE(ScaleOther) +PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunSmall, ToQASYMM8Fixture, PRECOMMIT, SmallShapes(), QASYMM8, QASYMM8, scale_other, TO_ZERO, QASYMM8_VALIDATE) +PIXEL_WISE_MULTIPLICATION_FIXTURE_QASYMM8_DATA_TEST_CASE(RunLarge, ToQASYMM8Fixture, NIGHTLY, LargeShapes(), QASYMM8, QASYMM8, scale_other, TO_ZERO, QASYMM8_VALIDATE) +TEST_SUITE_END() // ScaleOther + +TEST_SUITE_END() // QASYMM8toQASYMM8 + TEST_SUITE(U8toU8) TEST_SUITE(Scale255) diff --git a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h index 9927b75032..efdf5d078e 100644 --- a/tests/validation/fixtures/PixelWiseMultiplicationFixture.h +++ b/tests/validation/fixtures/PixelWiseMultiplicationFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -121,6 +121,19 @@ protected: SimpleTensor _reference{}; }; +template +class PixelWiseMultiplicationQuatizedValidationFixture : public PixelWiseMultiplicationGenericValidationFixture +{ +public: + template + void setup(const TensorShape &shape, DataType dt_in1, DataType dt_in2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, + QuantizationInfo in1_qua_info, QuantizationInfo in2_qua_info, QuantizationInfo out_qua_info) + { + PixelWiseMultiplicationGenericValidationFixture::setup(shape, shape, dt_in1, dt_in2, scale, convert_policy, rounding_policy, + in1_qua_info, in2_qua_info, out_qua_info); + } +}; + template class PixelWiseMultiplicationValidationFixture : public PixelWiseMultiplicationGenericValidationFixture { diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp index d86f8aae0d..3470de241a 100644 --- a/tests/validation/reference/PixelWiseMultiplication.cpp +++ b/tests/validation/reference/PixelWiseMultiplication.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -18,7 +18,7 @@ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * dst OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "PixelWiseMultiplication.h" -- cgit v1.2.1