From a668f9f8a4eab405df0fe8dd58e7d9425bcf9640 Mon Sep 17 00:00:00 2001 From: Jonathan Deakin Date: Wed, 24 Jan 2024 09:15:38 +0000 Subject: Add s8f32 kernels and dynamic QuantizationInfo - Add support for QASYMM_SIGNED*QASYMM8_SIGNED->F32 in CpuGemmLowpMatrixMultiplyCore - Add s8f32 kernel using existing s8->s32 kernels with a new DequantizeFloat OutputStage, the structure is similar to Requantize32 but the opposite way around. - Add SME s8f32 kernels with integrated support for DequantizeFloat. - Add scale to CpuGemmLowpOffsetContributionKernel. - Add virtual dequantize scale to gemm_common, only implemented for gemm_interleaved. - Update year to 2024 in generate_build_files. - Add dynamic flag to QuantizationInfo which signals to operators that it can change after configuration - Add support for dynamic quantization in NEGEMMLowpMatrixMultiplyCore - Add dynamic quantization fixture by extending GEMMLowpGenericMatrixMultiplyCoreValidationFixture - Add GEMMLowpDequantizedMatrixMultiplyValidationFixture - Store k (number of cols of A) rather than k_offset in the offset contribution kernels so that we can recompute it when the other offsets change relates to: ONCPUML-1444 MLINFSW-439 Co-authored-by: Milos Puzovic Co-authored-by: David Mansell Change-Id: I58a3acf2c09289a303e52eea6b336a696a5bc8da Signed-off-by: Jonathan Deakin Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11022 Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- tests/validation/NEON/GEMMLowp.cpp | 34 +++++++ tests/validation/fixtures/GEMMLowpFixture.h | 121 +++++++++++++++++++---- tests/validation/reference/QuantizationLayer.cpp | 12 ++- 3 files changed, 146 insertions(+), 21 deletions(-) (limited to 'tests') diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 514d156983..1231c21ddb 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -53,6 +53,7 @@ namespace { constexpr AbsoluteTolerance tolerance_batched(1); constexpr AbsoluteTolerance tolerance_quant(1); + constexpr AbsoluteTolerance tolerance_dequantized(0.01f); } // namespace @@ -62,6 +63,11 @@ TEST_SUITE(MatrixMultiplyCore) using NEGEMMLowpMatrixMultiplyCoreFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; using NEGEMMLowpMatrixMultiplyCoreAccumulateFixture = GEMMLowpMatrixMultiplyAccumulateValidationFixture; +using NEGEMMLowpBatchedMatMulFixture = GEMMLowpMatrixMultiplyCoreValidationFixture; +using NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture = GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture; +using NEGEMMLowpDequantizedMatrixMultiplyValidationFixture = GEMMLowpDequantizedMatrixMultiplyValidationFixture; + +using framework::dataset::make; DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallGEMMLowpDataset(), datasets::LargeGEMMLowpDataset()), shape_a, shape_b, shape_c, a_offset, b_offset) @@ -337,6 +343,34 @@ TEST_SUITE_END() // S32 TEST_SUITE_END() // ACCUMULATION #endif // __arch64__ +TEST_SUITE(DynamicQuantization) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // DynamicQuantization + +TEST_SUITE(Dequant) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_dequantized); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_dequantized); +} +TEST_SUITE_END() // Dequant + TEST_SUITE_END() // MatrixMultiplyCore TEST_SUITE_END() // GEMMLowp TEST_SUITE_END() // NEON diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 0f6908a468..11a491faa7 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -31,6 +31,7 @@ #include "tests/validation/Validation.h" #include "tests/validation/reference/GEMMLowp.h" #include "tests/validation/reference/ArithmeticOperations.h" +#include "tests/validation/reference/QuantizationLayer.h" #include #include @@ -57,11 +58,21 @@ void fill_quantized(U &&tensor, int i) } template -void fill_s32(U &&tensor, int i, int32_t min, int32_t max) +void fill(U &&tensor, int i, int32_t min, int32_t max) { - ARM_COMPUTE_ASSERT(tensor.data_type() == DataType::S32); - std::uniform_int_distribution distribution(min, max); - library->fill(tensor, distribution, i); + if (tensor.data_type() == DataType::S32) { + std::uniform_int_distribution distribution(min, max); + library->fill(tensor, distribution, i); + } + else if(tensor.data_type() == DataType::F32) + { + std::uniform_real_distribution distribution((float)min, (float)max); + library->fill(tensor, distribution, i); + } + else + { + ARM_COMPUTE_ERROR("NOT SUPPORTED!"); + } } /** Information about how to fill tensors */ @@ -83,22 +94,25 @@ template (shape_a, data_type_a, 1, a_qinfo); - TensorType b = create_tensor(shape_b, data_type_b, 1, b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated + // Create tensors + TensorType a = create_tensor(shape_a, data_type_a, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : a_qinfo); + TensorType b = create_tensor(shape_b, data_type_b, 1, dynamic_qinfo ? QuantizationInfo(1.0,0,true) : b_qinfo); // gemm output before output stage mismatch if i pass data_layout_output here. to be investigated TensorType output = create_tensor(shape_output, data_type_output, 1, output_qinfo /* output_qinfo will be ignored when output stage type is None */); TensorType bias; if(is_fused) { TensorShape bias_shape(shape_b[0]); - bias = create_tensor(bias_shape, DataType::S32, 1); + bias = create_tensor(bias_shape,data_type_output == DataType::F32 ? DataType::F32 : DataType::S32, 1); } // Create and configure function @@ -109,6 +123,13 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape arm_compute::ActivationLayerInfo(), false /* fixed_format */, arm_compute::WeightFormat::UNSPECIFIED, false /* pretranspose_B */, accumulate)); + // If the QuantizationInfo is dynamic, it needs to be settable after configure (note that we also force it to be dynamic) + if (dynamic_qinfo) + { + a.info()->set_quantization_info(QuantizationInfo(a_qinfo.scale(), a_qinfo.offset(), true)); + b.info()->set_quantization_info(QuantizationInfo(b_qinfo.scale(), b_qinfo.offset(), true)); + } + ARM_COMPUTE_ASSERT(a.info()->is_resizable()); ARM_COMPUTE_ASSERT(b.info()->is_resizable()); ARM_COMPUTE_ASSERT(output.info()->is_resizable()); @@ -131,7 +152,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape if (accumulate) { ARM_COMPUTE_ASSERT(accumulate != run_twice); - fill_s32(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output); + fill(AccessorType(output), 6 + finfo.hash, finfo.min_output, finfo.max_output); } if(is_fused) @@ -139,7 +160,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape ARM_COMPUTE_ASSERT(bias.info()->is_resizable()); bias.allocator()->allocate(); ARM_COMPUTE_ASSERT(!bias.info()->is_resizable()); - fill_s32(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias); + fill(AccessorType(bias), 2 + finfo.hash, finfo.min_bias, finfo.max_bias); } // Run with variable inputs. @@ -150,7 +171,7 @@ TensorType compute_gemmlowp_target(const TensorShape &shape_a, const TensorShape fill_quantized(AccessorType(b), 4 + finfo.hash); if(is_fused) { - fill_s32(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias); + fill(AccessorType(bias), 5 + finfo.hash, finfo.min_bias, finfo.max_bias); } } @@ -225,21 +246,20 @@ template (shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, - DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate); + return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8, DataType::QASYMM8, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo); } SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate) @@ -250,7 +270,7 @@ protected: if (accumulate) { SimpleTensor output{ shape_output, DataType::S32, 1 }; - fill_s32(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); + fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); reference::arithmetic_operation(reference::ArithmeticOperation::ADD, output, ref_output, output, ConvertPolicy::SATURATE); return output; } @@ -282,6 +302,16 @@ public: } }; +template +class GEMMLowpMatrixMultiplyCoreDynamicQuantizationFixture : protected GEMMLowpGenericMatrixMultiplyCoreValidationFixture +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + GEMMLowpGenericMatrixMultiplyCoreValidationFixture::setup(shape_a, shape_b, shape_output, a_offset, b_offset, false /* accumulate */, true /* dynamic_qinfo */); + } +}; + template class GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public framework::Fixture { @@ -417,7 +447,7 @@ protected: TensorShape bias_shape(shape_b[0]); SimpleTensor bias{ bias_shape, DataType::S32, 1 }; - (run_twice) ? fill_s32(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill_s32(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target + (run_twice) ? fill(bias, 5 + finfo.hash, finfo.min_bias, finfo.max_bias) : fill(bias, 2 + finfo.hash, finfo.min_bias, finfo.max_bias); // Fill bias with same seed as last run of gemmlowp_target switch(output_stage.type) { @@ -438,6 +468,57 @@ protected: SimpleTensor _reference{}; }; +template +class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture +{ +public: + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + { + // Accumulation is supported for Int8/UInt8 only in aarch64 + bool accumulate = true; + // Accumulation is not supported for Int8/UInt8 in aarch32 +#ifdef __arm__ + accumulate = false; +#endif //__arm__ + bool dynamic_qinfo = false; + const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); + const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset); + TensorFillInfo finfo; + _target = compute_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate, dynamic_qinfo); + _reference = compute_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, finfo, accumulate); + } + +protected: + TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, const bool accumulate, const bool dynamic_qinfo) + { + const auto output_qinfo = QuantizationInfo(); + return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, output_qinfo, DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, GEMMLowpOutputStageInfo(), false, finfo, accumulate, dynamic_qinfo, DataType::F32); + } + + SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, const QuantizationInfo& a_qinfo, const QuantizationInfo& b_qinfo, const TensorFillInfo& finfo, bool accumulate) + { + SimpleTensor s32_ref_output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_qinfo, b_qinfo, + DataType::QASYMM8_SIGNED, DataType::QASYMM8_SIGNED, finfo); + + SimpleTensor f32_ref_output(s32_ref_output.shape(), DataType::F32); + QuantizationInfo dst_quant_info = QuantizationInfo(a_qinfo.uniform().scale * b_qinfo.uniform().scale, 0); + f32_ref_output = reference::quantization_layer(s32_ref_output, DataType::F32, dst_quant_info); + + if (accumulate) + { + SimpleTensor output{ shape_output, DataType::F32, 1 }; + fill(output, 6 + finfo.hash, finfo.min_output, finfo.max_output); + reference::arithmetic_operation(reference::ArithmeticOperation::ADD, output, f32_ref_output, output, ConvertPolicy::SATURATE); + return output; + } + + return f32_ref_output; + } + + TensorType _target{}; + SimpleTensor _reference{}; +}; + template class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture : public GEMMLowpGenericMatrixMultiplyCoreFusedOffsetOutputValidationFixture { diff --git a/tests/validation/reference/QuantizationLayer.cpp b/tests/validation/reference/QuantizationLayer.cpp index 27665375c3..b76263bf95 100644 --- a/tests/validation/reference/QuantizationLayer.cpp +++ b/tests/validation/reference/QuantizationLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -80,6 +80,15 @@ SimpleTensor quantization_layer(const SimpleTensor &src, DataType out dst[i] = quantize_qasymm16((src[i]), qinfo, rounding_policy); } break; + case DataType::F32: +#if defined(_OPENMP) + #pragma omp parallel for +#endif /* _OPENMP */ + for(int i = 0; i < src.num_elements(); ++i) + { + dst[i] = dequantize_s32((src[i]), qinfo); + } + break; default: ARM_COMPUTE_ERROR("Unsupported output data type"); } @@ -127,6 +136,7 @@ template SimpleTensor quantization_layer(const SimpleTensor &src, template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); +template SimpleTensor quantization_layer(const SimpleTensor &src, DataType output_data_type, const QuantizationInfo &quantization_info); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1