diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2022-09-11 15:59:19 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2022-09-16 12:38:59 +0000 |
commit | c4f2743951473f8d97f5a43767fdbb31a4df967c (patch) | |
tree | 26c49b1af8113bb169931f3af5e502904d455a25 /tests/validation | |
parent | 0d05b6690fe69c57f63ca43d59b551f074613062 (diff) | |
download | ComputeLibrary-c4f2743951473f8d97f5a43767fdbb31a4df967c.tar.gz |
Optimize Quantized/Integer Bilinear Scale for Neon™
This patch introduces several performance optimizations regarding the Bilinear Scale operator with REPLICATE Border mode. Changes apply only to NHWC.
This patch
- Reduces the memory footprint by disabling precomputation of indices and weights when they're not used
- Rewrites the kernels for QASYMM8/QASYMM8_SIGNED/U8(Uint8)
- Adds S8(Int8) Bilinear Scale for Border mode REPLICATE
- Removes Bilinear Scale SVE kernels for Quantized and Integer types and adjust the heuristics to choose the Neon™ implementation
- Adds new test cases where the input and output of the Bilinear Scale operator have different quantization scale and offset
Resolves: COMPMID-5453, COMPMID-5454
Change-Id: I3d251e76e0c6978fd5a0a1795ec62ab536bec93c
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8250
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/validation')
-rw-r--r-- | tests/validation/NEON/Scale.cpp | 72 | ||||
-rw-r--r-- | tests/validation/fixtures/ScaleFixture.h | 63 | ||||
-rw-r--r-- | tests/validation/reference/Scale.cpp | 20 | ||||
-rw-r--r-- | tests/validation/reference/Scale.h | 4 |
4 files changed, 110 insertions, 49 deletions
diff --git a/tests/validation/NEON/Scale.cpp b/tests/validation/NEON/Scale.cpp index e386d804ca..9927e21490 100644 --- a/tests/validation/NEON/Scale.cpp +++ b/tests/validation/NEON/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,16 +22,10 @@ * SOFTWARE. */ #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/functions/NEScale.h" -#include "arm_compute/runtime/Tensor.h" -#include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" -#include "tests/PaddingCalculator.h" #include "tests/datasets/ScaleValidationDataset.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Macros.h" -#include "tests/validation/Helpers.h" #include "tests/validation/Validation.h" #include "tests/validation/fixtures/ScaleFixture.h" @@ -51,7 +45,7 @@ using datasets::ScaleAlignCornersSamplingPolicySet; /** We consider vector size in byte 64 since the maximum size of * a vector used by the kernel is currently 64-byte (float32x4x4). - * There are possibility to reduce test time further by using + * There is possibility to reduce test time further by using * smaller vector sizes for different data types where applicable. */ constexpr uint32_t vector_byte = 64; @@ -62,26 +56,31 @@ constexpr uint32_t num_elements_per_vector() return vector_byte / sizeof(T); } -/** Scale data types */ -const auto ScaleDataTypes = framework::dataset::make("DataType", +/** Quantization information data set */ +const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo", { - DataType::U8, - DataType::S16, - DataType::F32, + QuantizationInfo(0.5f, -10), }); /** Quantization information data set */ -const auto QuantizationInfoSet = framework::dataset::make("QuantizationInfo", +const auto InputQuantizationInfoSet = framework::dataset::make("InputQuantizationInfo", { QuantizationInfo(0.5f, -10), }); +/** Quantization information data set */ +const auto OutputQuantizationInfoSet = framework::dataset::make("OutputQuantizationInfo", +{ + QuantizationInfo(0.2f, 20), +}); + /** Tolerance */ constexpr AbsoluteTolerance<uint8_t> tolerance_u8(1); +constexpr AbsoluteTolerance<int8_t> tolerance_s8(1); constexpr AbsoluteTolerance<int16_t> tolerance_s16(1); RelativeTolerance<float> tolerance_f32(0.05); #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC -constexpr float abs_tolerance_f16(0.01f); +constexpr float abs_tolerance_f16(0.01f); RelativeTolerance<half> tolerance_f16(half(0.1)); #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ @@ -321,6 +320,8 @@ using NEScaleMixedDataLayoutFixture = ScaleValidationFixture<Tensor, Accessor, N template <typename T> using NEScaleQuantizedFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T>; template <typename T> +using NEScaleDifferentOutputQuantizedFixture = ScaleValidationDifferentOutputQuantizedFixture<Tensor, Accessor, NEScale, T>; +template <typename T> using NEScaleQuantizedMixedDataLayoutFixture = ScaleValidationQuantizedFixture<Tensor, Accessor, NEScale, T, true>; TEST_SUITE(Float) @@ -457,6 +458,27 @@ FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<uint8_t>, framework: validate(Accessor(_target), _reference, valid_region, tolerance_u8); } TEST_SUITE_END() // U8 +TEST_SUITE(S8) +const auto s8_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<int8_t>())), framework::dataset::make("DataType", DataType::S8)); +FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_S8_DATASET(s8_shape, ScaleSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_s8); +} +FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_S8_DATASET(s8_shape, ScaleAlignCornersSamplingPolicySet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_s8); +} +TEST_SUITE_END() // S8 TEST_SUITE(S16) const auto s16_shape = combine((SCALE_SHAPE_DATASET(num_elements_per_vector<int16_t>())), framework::dataset::make("DataType", DataType::S16)); FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleFixture<int16_t>, framework::DatasetMode::ALL, ASSEMBLE_DATASET(s16_shape, ScaleSamplingPolicySet)) @@ -492,6 +514,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<uint8_t>, framework::Da // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_u8); } +FIXTURE_DATA_TEST_CASE(RunSmallDifferentOutputQuantization, NEScaleDifferentOutputQuantizedFixture<uint8_t>, framework::DatasetMode::ALL, + ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet, InputQuantizationInfoSet, OutputQuantizationInfoSet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_u8); +} FIXTURE_DATA_TEST_CASE(RunMixedDataLayout, NEScaleQuantizedMixedDataLayoutFixture<uint8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_shape, ScaleSamplingPolicySet, QuantizationInfoSet)) { @@ -525,6 +557,16 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEScaleQuantizedFixture<int8_t>, framework::Dat // Validate output validate(Accessor(_target), _reference, valid_region, tolerance_qasymm8_signed); } +FIXTURE_DATA_TEST_CASE(RunSmallDifferentOutputQuantization, NEScaleDifferentOutputQuantizedFixture<int8_t>, framework::DatasetMode::ALL, + ASSEMBLE_DIFFERENTLY_QUANTIZED_DATASET(qasymm8_signed_shape, ScaleSamplingPolicySet, InputQuantizationInfoSet, OutputQuantizationInfoSet)) +{ + //Create valid region + TensorInfo src_info(_shape, 1, _data_type); + ValidRegion valid_region = calculate_valid_region_scale(src_info, _reference.shape(), _policy, _sampling_policy, (_border_mode == BorderMode::UNDEFINED)); + + // Validate output + validate(Accessor(_target), _reference, valid_region, tolerance_qasymm8_signed); +} FIXTURE_DATA_TEST_CASE(RunSmallAlignCorners, NEScaleQuantizedFixture<int8_t>, framework::DatasetMode::ALL, ASSEMBLE_QUANTIZED_DATASET(qasymm8_signed_shape, ScaleAlignCornersSamplingPolicySet, QuantizationInfoSet)) { diff --git a/tests/validation/fixtures/ScaleFixture.h b/tests/validation/fixtures/ScaleFixture.h index c0b44bcb5f..72feb62016 100644 --- a/tests/validation/fixtures/ScaleFixture.h +++ b/tests/validation/fixtures/ScaleFixture.h @@ -24,12 +24,6 @@ #ifndef ARM_COMPUTE_TEST_SCALE_FIXTURE #define ARM_COMPUTE_TEST_SCALE_FIXTURE -#include "arm_compute/core/TensorShape.h" -#include "arm_compute/core/Types.h" -#include "tests/AssetsLibrary.h" -#include "tests/Globals.h" -#include "tests/IAccessor.h" -#include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/Scale.h" @@ -46,16 +40,17 @@ class ScaleValidationGenericFixture : public framework::Fixture public: template <typename...> void setup(TensorShape shape, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout, InterpolationPolicy policy, BorderMode border_mode, SamplingPolicy sampling_policy, - bool align_corners, bool mixed_layout) + bool align_corners, bool mixed_layout, QuantizationInfo output_quantization_info) { - _shape = shape; - _policy = policy; - _border_mode = border_mode; - _sampling_policy = sampling_policy; - _data_type = data_type; - _quantization_info = quantization_info; - _align_corners = align_corners; - _mixed_layout = mixed_layout; + _shape = shape; + _policy = policy; + _border_mode = border_mode; + _sampling_policy = sampling_policy; + _data_type = data_type; + _input_quantization_info = quantization_info; + _output_quantization_info = output_quantization_info; + _align_corners = align_corners; + _mixed_layout = mixed_layout; generate_scale(shape); @@ -144,7 +139,7 @@ protected: } // Create tensors - TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _quantization_info, data_layout); + TensorType src = create_tensor<TensorType>(shape, _data_type, 1, _input_quantization_info, data_layout); const int idx_width = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int idx_height = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); @@ -152,7 +147,7 @@ protected: TensorShape shape_scaled(shape); shape_scaled.set(idx_width, shape[idx_width] * _scale_x, /* apply_dim_correction = */ false); shape_scaled.set(idx_height, shape[idx_height] * _scale_y, /* apply_dim_correction = */ false); - TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, _quantization_info, data_layout); + TensorType dst = create_tensor<TensorType>(shape_scaled, _data_type, 1, _output_quantization_info, data_layout); // Create and configure function FunctionType scale; @@ -188,12 +183,12 @@ protected: SimpleTensor<T> compute_reference(const TensorShape &shape) { // Create reference - SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info }; + SimpleTensor<T> src{ shape, _data_type, 1, _input_quantization_info }; // Fill reference fill(src); - return reference::scale<T>(src, _scale_x, _scale_y, _policy, _border_mode, _constant_border_value, _sampling_policy, /* ceil_policy_scale */ false, _align_corners); + return reference::scale<T>(src, _scale_x, _scale_y, _policy, _border_mode, _constant_border_value, _sampling_policy, /* ceil_policy_scale */ false, _align_corners, _output_quantization_info); } TensorType _target{}; @@ -204,7 +199,8 @@ protected: T _constant_border_value{}; SamplingPolicy _sampling_policy{}; DataType _data_type{}; - QuantizationInfo _quantization_info{}; + QuantizationInfo _input_quantization_info{}; + QuantizationInfo _output_quantization_info{}; bool _align_corners{ false }; bool _mixed_layout{ false }; float _scale_x{ 1.f }; @@ -227,7 +223,29 @@ public: border_mode, sampling_policy, align_corners, - mixed_layout); + mixed_layout, + quantization_info); + } +}; +template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> +class ScaleValidationDifferentOutputQuantizedFixture : public ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(TensorShape shape, DataType data_type, QuantizationInfo input_quantization_info, QuantizationInfo output_quantization_info, DataLayout data_layout, InterpolationPolicy policy, + BorderMode border_mode, SamplingPolicy sampling_policy, + bool align_corners) + { + ScaleValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(shape, + data_type, + input_quantization_info, + data_layout, + policy, + border_mode, + sampling_policy, + align_corners, + mixed_layout, + output_quantization_info); } }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, bool mixed_layout = false> @@ -245,7 +263,8 @@ public: border_mode, sampling_policy, align_corners, - mixed_layout); + mixed_layout, + QuantizationInfo()); } }; } // namespace validation diff --git a/tests/validation/reference/Scale.cpp b/tests/validation/reference/Scale.cpp index 71e98fd776..2f429cb29b 100644 --- a/tests/validation/reference/Scale.cpp +++ b/tests/validation/reference/Scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,7 +25,6 @@ #include "Scale.h" #include "Utils.h" -#include "arm_compute/core/utils/misc/Utility.h" #include "src/core/utils/ScaleUtils.h" #include "support/Rounding.h" @@ -183,14 +182,15 @@ SimpleTensor<T> scale_core(const SimpleTensor<T> &in, float scale_x, float scale template <typename T> SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { + ARM_COMPUTE_UNUSED(output_quantization_info); return scale_core<T>(src, scale_x, scale_y, policy, border_mode, constant_border_value, sampling_policy, ceil_policy_scale, align_corners); } template <> SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, uint8_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { SimpleTensor<uint8_t> dst; if(src.quantization_info().uniform().scale != 0.f) @@ -198,7 +198,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo SimpleTensor<float> src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8(constant_border_value, src.quantization_info()); SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners); - dst = convert_to_asymmetric<uint8_t>(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric<uint8_t>(dst_tmp, output_quantization_info); } else { @@ -209,7 +209,7 @@ SimpleTensor<uint8_t> scale(const SimpleTensor<uint8_t> &src, float scale_x, flo template <> SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int8_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners) + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info) { SimpleTensor<int8_t> dst; if(src.quantization_info().uniform().scale != 0.f) @@ -217,7 +217,7 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float SimpleTensor<float> src_tmp = convert_from_asymmetric(src); float constant_border_value_f = dequantize_qasymm8_signed(constant_border_value, src.quantization_info()); SimpleTensor<float> dst_tmp = scale_core<float>(src_tmp, scale_x, scale_y, policy, border_mode, constant_border_value_f, sampling_policy, ceil_policy_scale, align_corners); - dst = convert_to_asymmetric<int8_t>(dst_tmp, src.quantization_info()); + dst = convert_to_asymmetric<int8_t>(dst_tmp, output_quantization_info); } else { @@ -227,11 +227,11 @@ SimpleTensor<int8_t> scale(const SimpleTensor<int8_t> &src, float scale_x, float } template SimpleTensor<int16_t> scale(const SimpleTensor<int16_t> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, int16_t constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); template SimpleTensor<half> scale(const SimpleTensor<half> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, half constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); template SimpleTensor<float> scale(const SimpleTensor<float> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, float constant_border_value, - SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners); + SamplingPolicy sampling_policy, bool ceil_policy_scale, bool align_corners, QuantizationInfo output_quantization_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/Scale.h b/tests/validation/reference/Scale.h index c66af8d94e..c32c07d1c0 100644 --- a/tests/validation/reference/Scale.h +++ b/tests/validation/reference/Scale.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -37,7 +37,7 @@ namespace reference { template <typename T> SimpleTensor<T> scale(const SimpleTensor<T> &src, float scale_x, float scale_y, InterpolationPolicy policy, BorderMode border_mode, T constant_border_value = 0, - SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false); + SamplingPolicy sampling_policy = SamplingPolicy::CENTER, bool ceil_policy_scale = false, bool align_corners = false, QuantizationInfo output_quantization_info = QuantizationInfo()); } // namespace reference } // namespace validation } // namespace test |