diff options
author | SiCong Li <sicong.li@arm.com> | 2020-08-28 11:18:47 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2020-09-09 12:01:59 +0000 |
commit | bb88f89b7a12e83eea2fc701f1f82aabf7dfcf7a (patch) | |
tree | dc9339328346fc539f45ee2b7b39a0786cadbc3a /tests/validation | |
parent | d64444ba197c2f95dcf4d205f50a196d5a29cdeb (diff) | |
download | ComputeLibrary-bb88f89b7a12e83eea2fc701f1f82aabf7dfcf7a.tar.gz |
COMPMID-3581 Add S32 support to NEPixelWiseMultiplication
* Add S32 support to NEPixelWiseMultiplication and NEPixelWiseMultiplicationKernel
* Scale == 1/255 is not supported for S32, as on non-aarch64 the
precision requirement is not met, and scale is a non-standard
parameter anyway.
* Fix the data types validation logics to also test for all invalid data
type combinations.
* Add validation tests for S32 NEON PixelWiseMultiplication
* The wrap tolerance for ScaleOther (scale == 1/2^n) cases is set to
1 instead of 0 because the reference uses floating point division
followed by rounding, which is isn't bit accurate.
Change-Id: I28839afda7a4f98c985d1763620e08d98f740142
Signed-off-by: SiCong Li <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3923
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/validation')
-rw-r--r-- | tests/validation/NEON/PixelWiseMultiplication.cpp | 31 | ||||
-rw-r--r-- | tests/validation/reference/PixelWiseMultiplication.cpp | 87 |
2 files changed, 112 insertions, 6 deletions
diff --git a/tests/validation/NEON/PixelWiseMultiplication.cpp b/tests/validation/NEON/PixelWiseMultiplication.cpp index 0b88628912..a66f6f192f 100644 --- a/tests/validation/NEON/PixelWiseMultiplication.cpp +++ b/tests/validation/NEON/PixelWiseMultiplication.cpp @@ -111,6 +111,8 @@ using NEPixelWiseMultiplicationToU8Fixture = PixelWiseMultiplicationValidationFi template <typename T> using NEPixelWiseMultiplicationToS16Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, int16_t>; template <typename T> +using NEPixelWiseMultiplicationToS32Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, int32_t>; +template <typename T> using NEPixelWiseMultiplicationToF16Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, half_float::half>; template <typename T> using NEPixelWiseMultiplicationToF32Fixture = PixelWiseMultiplicationValidationFixture<Tensor, Accessor, NEPixelWiseMultiplication, T, float>; @@ -139,6 +141,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), //11 Mismatching data type TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), //12 Ok TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), //13 Quantized cannot do WRAP + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), //14 S32 does not support scale255 }), framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), @@ -153,6 +156,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), @@ -160,13 +164,14 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), })), framework::dataset::make("Scale",{ scale_unity, scale_unity, @@ -180,7 +185,8 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( scale_unity, scale_unity, scale_unity, - scale_unity})), + scale_unity, + scale_255})), framework::dataset::make("OverflowPolicy",{ ConvertPolicy::WRAP, ConvertPolicy::WRAP, @@ -195,9 +201,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( ConvertPolicy::WRAP, ConvertPolicy::SATURATE, ConvertPolicy::WRAP, + ConvertPolicy::SATURATE, })), - framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true , false, false, true, false })), + framework::dataset::make("Expected", { true, true, true, false, false, false, false, false, true , false, false, true, false, false})), input1_info, input2_info, output_info, scale, policy, expected) { bool has_error = bool(NEPixelWiseMultiplication::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), scale, policy, RoundingPolicy::TO_ZERO)); @@ -260,7 +267,7 @@ TEST_SUITE_END() // InPlaceValidate TEST_SUITE(Quantized) TEST_SUITE(QASYMM8_SIGNED) -TEST_SUITE(Scale255) +TEST_SUITE(ScaleUnity) FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(combine(combine(combine(datasets::SmallShapes(), framework::dataset::make("DataTypeIn1", DataType::QASYMM8_SIGNED)), framework::dataset::make("DataTypeIn2", DataType::QASYMM8_SIGNED)), @@ -273,8 +280,8 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEPixelWiseMultiplicationQASYMM8SignedFixture, // Validate output validate(Accessor(_target), _reference, tolerance_qasymm8); } -TEST_SUITE_END() // Scale255 -TEST_SUITE_END() // QASYMM8 +TEST_SUITE_END() // ScaleUnity +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE(QASYMM8) TEST_SUITE(Scale255) @@ -476,6 +483,18 @@ TEST_SUITE_END() // ScaleOther TEST_SUITE_END() // S16toS16 +TEST_SUITE(S32toS32) + +TEST_SUITE(ScaleUnity) +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS32Fixture<int32_t>, ALL, SmallShapes(), S32, S32, S32, scale_unity, TO_ZERO, InPlaceDataSet, WRAP_VALIDATE(int32_t, 1)) +TEST_SUITE_END() // ScaleUnity + +TEST_SUITE(ScaleOther) +PIXEL_WISE_MULTIPLICATION_FIXTURE_DATA_TEST_CASE(RunSmall, ToS32Fixture<int32_t>, ALL, SmallShapes(), S32, S32, S32, scale_other, TO_ZERO, InPlaceDataSet, WRAP_VALIDATE(int32_t, 1)) +TEST_SUITE_END() // ScaleOther + +TEST_SUITE_END() // S32toS32 + #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC TEST_SUITE(F16toF16) diff --git a/tests/validation/reference/PixelWiseMultiplication.cpp b/tests/validation/reference/PixelWiseMultiplication.cpp index 9f70b1c2af..0450991f61 100644 --- a/tests/validation/reference/PixelWiseMultiplication.cpp +++ b/tests/validation/reference/PixelWiseMultiplication.cpp @@ -43,6 +43,8 @@ struct is_floating_point namespace { +constexpr float scale1_constant = 1.f; + /** Compute the result of `src1 * src2 * scale`. The result type always matches the type of @p src2. * * @param[in] src1 An input value. Data types supported: U8/S16/F16/F32. @@ -89,6 +91,90 @@ T3 mul(const T1 src1, const T2 src2, float scale, ConvertPolicy convert_policy, } } +template <> +int32_t mul(const int32_t src1, const int32_t src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy) +{ + const int64_t intermediate_val = static_cast<int64_t>(src1) * static_cast<int64_t>(src2); + + if(std::abs(scale - scale1_constant) < 0.00001f) + { + // Use bit-accurate integer arithmetic for scale == 1 + // Apply conversion + if(convert_policy == ConvertPolicy::SATURATE) + { + return saturate_cast<int32_t>(intermediate_val); + } + else + { + // Correct wrapping behaviour for int32_t + const auto i32_hi = static_cast<int64_t>(std::numeric_limits<int32_t>::max()); + const auto i32_lo = static_cast<int64_t>(std::numeric_limits<int32_t>::lowest()); + const auto i32_wi = static_cast<int64_t>(1) << 32; + int64_t wrapped_rounded_val = intermediate_val - i32_wi * static_cast<int64_t>(support::cpp11::trunc(static_cast<double>(intermediate_val) / i32_wi)); + if(wrapped_rounded_val <= i32_hi) + { + return static_cast<int32_t>(wrapped_rounded_val); + } + else + { + // Values beyond i32_hi wrap around to negatives + return static_cast<int32_t>((wrapped_rounded_val - i32_hi) + i32_lo - 1); + } + } + } + else + { + // Use double arithmetic for scale != 1; may not be bit-accurate + // Apply scaling + // scale == 1 / 2^scale_exponent + int scale_exponent = 0; + std::frexp(scale, &scale_exponent); + // Store the positive exponent. We know that we compute 1/2^n + // Additionally we need to subtract 1 to compensate that frexp used a mantissa of 0.5 + scale_exponent = std::abs(scale_exponent - 1); + const double scale_inv = static_cast<int64_t>(1) << scale_exponent; + const double val = intermediate_val / scale_inv; + // Apply rounding + double rounded_val = 0; + switch(rounding_policy) + { + case(RoundingPolicy::TO_ZERO): + rounded_val = support::cpp11::trunc(val); + break; + case(RoundingPolicy::TO_NEAREST_UP): + rounded_val = round_half_up(val); + break; + case(RoundingPolicy::TO_NEAREST_EVEN): + rounded_val = round_half_even(val); + break; + default: + ARM_COMPUTE_ERROR("Unsupported rounding policy"); + } + // Apply conversion + if(convert_policy == ConvertPolicy::SATURATE) + { + return saturate_cast<int32_t>(rounded_val); + } + else + { + // Correct wrapping behaviour for int32_t + const auto i32_hi = static_cast<double>(std::numeric_limits<int32_t>::max()); + const auto i32_lo = static_cast<double>(std::numeric_limits<int32_t>::lowest()); + const auto i32_wi = static_cast<double>(static_cast<int64_t>(1) << 32); + double wrapped_rounded_val = rounded_val - i32_wi * std::floor(rounded_val / i32_wi); + if(wrapped_rounded_val <= i32_hi) + { + return static_cast<int32_t>(wrapped_rounded_val); + } + else + { + // Values beyond i32_hi wrap around to negatives + return static_cast<int32_t>((wrapped_rounded_val - i32_hi) + i32_lo - 1); + } + } + } +} + template <size_t dim> struct BroadcastUnroll { @@ -264,6 +350,7 @@ SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<int16_t> &src // clang-format off template SimpleTensor<int16_t> pixel_wise_multiplication(const SimpleTensor<uint8_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, DataType dt_out, const QuantizationInfo &qout); template SimpleTensor<int32_t> pixel_wise_multiplication(const SimpleTensor<int16_t> &src1, const SimpleTensor<int16_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, DataType dt_out, const QuantizationInfo &qout); +template SimpleTensor<int32_t> pixel_wise_multiplication(const SimpleTensor<int32_t> &src1, const SimpleTensor<int32_t> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, DataType dt_out, const QuantizationInfo &qout); template SimpleTensor<float> pixel_wise_multiplication(const SimpleTensor<float> &src1, const SimpleTensor<float> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, DataType dt_out, const QuantizationInfo &qout); template SimpleTensor<half_float::half> pixel_wise_multiplication(const SimpleTensor<half_float::half> &src1, const SimpleTensor<half_float::half> &src2, float scale, ConvertPolicy convert_policy, RoundingPolicy rounding_policy, DataType dt_out, const QuantizationInfo &qout); // clang-format on |