From 9c7c2d2d23693877867bb3284c577b33cfbff471 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Tue, 11 Apr 2023 17:16:27 +0100 Subject: Add quantized support for CPU MatMul Resolves: COMPMID-5899 Signed-off-by: Viet-Hoa Do Change-Id: I89d96e292c3492ba9b1900a3e5683f9dcd11dfc6 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9440 Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- arm_compute/core/utils/quantization/AsymmHelpers.h | 15 ++- src/core/utils/quantization/AsymmHelpers.cpp | 34 ++++- src/cpu/operators/CpuFullyConnected.cpp | 34 +---- src/cpu/operators/CpuMatMul.cpp | 52 ++++++- tests/SimpleTensor.h | 18 ++- tests/datasets/SmallMatMulDataset.h | 11 ++ tests/validation/NEON/MatMul.cpp | 149 +++++++++++++++++++-- tests/validation/fixtures/MatMulFixture.h | 102 +++++++++++--- 8 files changed, 348 insertions(+), 67 deletions(-) diff --git a/arm_compute/core/utils/quantization/AsymmHelpers.h b/arm_compute/core/utils/quantization/AsymmHelpers.h index c9d0930c3a..9e1e9668fb 100644 --- a/arm_compute/core/utils/quantization/AsymmHelpers.h +++ b/arm_compute/core/utils/quantization/AsymmHelpers.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -81,6 +81,19 @@ Status calculate_quantized_multipliers(const QuantizationInfo &iq_info, * @return min and max values for the quantized data type */ std::pair get_min_max_values_from_quantized_data_type(DataType data_type); + +/** Get minimum and maximum output of the activation function after quantization. + * + * Only ReLU, upper bounded ReLU and lower+upper bounded ReLU are supported. + * + * @param[in] q_info Output quantization info. + * @param[in] act_info Activation function information. + * @param[in] data_type Output data type (either QASYMM8 or QASYMM8_SIGNED). + * + * @return The minimum and maximum output of the activation function after quantization. + */ +std::tuple get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type); + /** Compute quantized per-channel multipliers and shifts. As many multipliers * and shifts as output channels are computed. If weights are not quantized * per-channel, multipliers and shifts will end up being the same for each diff --git a/src/core/utils/quantization/AsymmHelpers.cpp b/src/core/utils/quantization/AsymmHelpers.cpp index eb008639b1..ba9a97aef9 100644 --- a/src/core/utils/quantization/AsymmHelpers.cpp +++ b/src/core/utils/quantization/AsymmHelpers.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2022 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -176,6 +176,38 @@ std::pair get_min_max_values_from_quantized_data_type(DataType data_ty } return std::make_pair(min_quant_val, max_quant_val); } + +std::tuple get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type) +{ + PixelValue type_min{}; + PixelValue type_max{}; + std::tie(type_min, type_max) = get_min_max(data_type); + const UniformQuantizationInfo q_unif = q_info.uniform(); + + if(act_info.enabled()) + { + switch(act_info.activation()) + { + case ActivationLayerInfo::ActivationFunction::RELU: + type_min = PixelValue(q_unif.offset); + break; + case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: + type_min = PixelValue(q_unif.offset); + type_max = PixelValue(act_info.a(), data_type, q_info); + break; + case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: + type_min = PixelValue(act_info.b(), data_type, q_info); + type_max = PixelValue(act_info.a(), data_type, q_info); + break; + default: + ARM_COMPUTE_ERROR("Activation function not supported."); + break; + } + } + + return std::make_pair(type_min, type_max); +} + void compute_quantized_multipliers_and_shifts(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, diff --git a/src/cpu/operators/CpuFullyConnected.cpp b/src/cpu/operators/CpuFullyConnected.cpp index 70584a64f8..460257902d 100644 --- a/src/cpu/operators/CpuFullyConnected.cpp +++ b/src/cpu/operators/CpuFullyConnected.cpp @@ -48,38 +48,6 @@ using namespace arm_compute::misc::shape_calculator; namespace { -// Get min, max bound of a quantized asymmetric dst tensor, with the effect of fused activation -std::pair get_quantized_asymmetric_output_min_max(const QuantizationInfo &q_info, const ActivationLayerInfo &act_info, DataType data_type) -{ - PixelValue type_min{}; - PixelValue type_max{}; - std::tie(type_min, type_max) = get_min_max(data_type); - const UniformQuantizationInfo q_unif = q_info.uniform(); - - if(act_info.enabled()) - { - switch(act_info.activation()) - { - case ActivationLayerInfo::ActivationFunction::RELU: - type_min = PixelValue(q_unif.offset); - break; - case ActivationLayerInfo::ActivationFunction::BOUNDED_RELU: - type_min = PixelValue(q_unif.offset); - type_max = PixelValue(act_info.a(), data_type, q_info); - break; - case ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU: - type_min = PixelValue(act_info.b(), data_type, q_info); - type_max = PixelValue(act_info.a(), data_type, q_info); - break; - default: - ARM_COMPUTE_ERROR("Activation function not supported."); - break; - } - } - - return std::make_pair(type_min, type_max); -} - Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const ActivationLayerInfo &act, GEMMLowpOutputStageInfo &gemmlowp_output_stage_info) { @@ -97,7 +65,7 @@ Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo PixelValue type_min{}; PixelValue type_max{}; - std::tie(type_min, type_max) = get_quantized_asymmetric_output_min_max(oq_info, act, data_type); + std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type); gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier; gemmlowp_output_stage_info.gemmlowp_shift = output_shift; diff --git a/src/cpu/operators/CpuMatMul.cpp b/src/cpu/operators/CpuMatMul.cpp index b5359e51af..369466b669 100644 --- a/src/cpu/operators/CpuMatMul.cpp +++ b/src/cpu/operators/CpuMatMul.cpp @@ -23,7 +23,9 @@ */ #include "src/cpu/operators/CpuMatMul.h" +#include "arm_compute/core/Types.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/core/experimental/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -40,6 +42,40 @@ namespace arm_compute { namespace cpu { +namespace +{ + +Status get_gemmlowp_output_stage_info(const ITensorInfo *src, const ITensorInfo *weights, const ITensorInfo *dst, const ActivationLayerInfo &act, + GEMMLowpOutputStageInfo &gemmlowp_output_stage_info) +{ + const auto data_type = src->data_type(); + const QuantizationInfo oq_info = dst->quantization_info(); + const UniformQuantizationInfo iq_unif = src->quantization_info().uniform(); + const UniformQuantizationInfo wq_unif = weights->quantization_info().uniform(); + const UniformQuantizationInfo oq_unif = oq_info.uniform(); + + float multiplier = (iq_unif.scale * wq_unif.scale) / oq_unif.scale; + int32_t output_multiplier; + int32_t output_shift; + + ARM_COMPUTE_RETURN_ON_ERROR(quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift)); + + PixelValue type_min{}; + PixelValue type_max{}; + std::tie(type_min, type_max) = quantization::get_quantized_asymmetric_output_min_max(oq_info, act, data_type); + + gemmlowp_output_stage_info.gemmlowp_multiplier = output_multiplier; + gemmlowp_output_stage_info.gemmlowp_shift = output_shift; + gemmlowp_output_stage_info.gemmlowp_offset = oq_unif.offset; + gemmlowp_output_stage_info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + gemmlowp_output_stage_info.gemmlowp_min_bound = type_min.get(); + gemmlowp_output_stage_info.gemmlowp_max_bound = type_max.get(); + + return Status{}; +} + +} + CpuMatMul::CpuMatMul() : _transpose_kernel_lhs(), _transpose_kernel_rhs(), _asm_glue(), _lhs_transposed(), _rhs_transposed(), _original_lhs_shape(), _original_rhs_shape(), _original_dst_shape() { @@ -47,8 +83,8 @@ CpuMatMul::CpuMatMul() Status CpuMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings) { - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs, dst); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->are_values_constant(), "LHS Tensor must be dynamic."); ARM_COMPUTE_RETURN_ERROR_ON_MSG(rhs->are_values_constant(), "RHS Tensor must be dynamic."); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(lhs); @@ -96,6 +132,12 @@ Status CpuMatMul::validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs_to_use->dimension(i) != rhs_to_use->dimension(i), "Broadcasting in Batch dimension is unsupported by this operator."); } + // Quantized-specific configuration + if(is_data_type_quantized(lhs->data_type())) + { + ARM_COMPUTE_RETURN_ON_ERROR(get_gemmlowp_output_stage_info(lhs_to_use, rhs_to_use, dst, gemm_info.activation_info, gemm_info.output_stage)); + } + cpu::CpuGemmAssemblyDispatch::validate(lhs_to_use, rhs_to_use, nullptr, dst, gemm_info); return Status{}; @@ -157,6 +199,12 @@ void CpuMatMul::configure(ITensorInfo *lhs, ITensorInfo *rhs, ITensorInfo *dst, lhs_to_use = (_adj_lhs) ? _lhs_transposed : lhs_to_use; rhs_to_use = (_adj_rhs) ? _rhs_transposed : rhs_to_use; + // Quantized-specific configuration + if(is_data_type_quantized(lhs->data_type())) + { + get_gemmlowp_output_stage_info(&lhs_to_use, &rhs_to_use, &dst_to_use, _gemm_info.activation_info, _gemm_info.output_stage); + } + // Configure Asm Kernel _asm_glue = std::make_unique(); _asm_glue->configure(&lhs_to_use, &rhs_to_use, nullptr, &dst_to_use, _gemm_info); // c is nullptr as bias not supported in MatMul diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h index c1bd7f87b5..9ea171d492 100644 --- a/tests/SimpleTensor.h +++ b/tests/SimpleTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2020, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -173,6 +173,15 @@ public: */ QuantizationInfo quantization_info() const override; + /** Set the quantization information of the tensor. + * + * This function does not have any effect on the raw quantized data of the tensor. + * It simply changes the quantization information, hence changes the dequantized values. + * + * @return A reference to the current object. + */ + SimpleTensor &quantization_info(const QuantizationInfo &qinfo); + /** Constant pointer to the underlying buffer. * * @return a constant pointer to the data. @@ -334,6 +343,13 @@ QuantizationInfo SimpleTensor::quantization_info() const return _quantization_info; } +template +SimpleTensor &SimpleTensor::quantization_info(const QuantizationInfo &qinfo) +{ + _quantization_info = qinfo; + return *this; +} + template size_t SimpleTensor::size() const { diff --git a/tests/datasets/SmallMatMulDataset.h b/tests/datasets/SmallMatMulDataset.h index 52ef01da7b..bb4cdad54b 100644 --- a/tests/datasets/SmallMatMulDataset.h +++ b/tests/datasets/SmallMatMulDataset.h @@ -47,6 +47,17 @@ public: } }; +class SmallerMatMulDataset final : public MatMulDataset +{ +public: + SmallerMatMulDataset() + { + add_config(TensorShape(9U, 6U), TensorShape(5U, 9U), TensorShape(5U, 6U)); + add_config(TensorShape(8U, 4U, 2U), TensorShape(16U, 8U, 2U), TensorShape(16U, 4U, 2U)); + add_config(TensorShape(32U, 2U), TensorShape(17U, 32U), TensorShape(17U, 2U)); + } +}; + class TinyMatMulDataset final : public MatMulDataset { public: diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp index 3bfbc16e71..1a23697092 100644 --- a/tests/validation/NEON/MatMul.cpp +++ b/tests/validation/NEON/MatMul.cpp @@ -43,8 +43,10 @@ namespace validation TEST_SUITE(NEON) TEST_SUITE(MatMul) -constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ -const AbsoluteTolerance tolerance_fp16(half(0.1f)); +constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for FP32 data types */ +const AbsoluteTolerance tolerance_fp16(half(0.1f)); +constexpr AbsoluteTolerance tolerance_qasymm8(0); +constexpr AbsoluteTolerance tolerance_qasymm8_signed(0); // clang-format off // *INDENT-OFF* @@ -57,6 +59,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(9U, 6U), 1, DataType::F32), TensorInfo(TensorShape(9U, 6U , 12U) , 1 , DataType::F32), TensorInfo(TensorShape(9U, 6U , 12U) , 1 , DataType::F32), // Tensors are not dynamic + TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(9U, 6U), 1, DataType::QASYMM8_SIGNED), // Mismatching data type }), framework::dataset::make("InputBInfo",{ TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8), TensorInfo(TensorShape(5U, 9U), 1, DataType::S32), @@ -65,6 +70,9 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(5U, 9U), 1, DataType::F32), TensorInfo(TensorShape(5U, 9U, 12U), 1, DataType::F32), TensorInfo(TensorShape(5U, 9U, 12U), 1, DataType::F32), + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(5U, 9U), 1, DataType::QASYMM8_SIGNED), })), framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(5U, 6U), 1, DataType::F32), TensorInfo(TensorShape(5U, 6U), 1, DataType::S32), @@ -73,9 +81,12 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( TensorInfo(TensorShape(5U, 6U), 1, DataType::F32), TensorInfo(TensorShape(5U, 6U, 12U) , 1, DataType::F32), TensorInfo(TensorShape(5U, 6U, 12U) , 1, DataType::F32), + TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(5U, 6U), 1, DataType::QASYMM8), })), - framework::dataset::make( "TensorIsConst", {false, false, false, false, false , false, true} )), - framework::dataset::make("Expected", { false, false, false, false, true, true, false })), + framework::dataset::make( "TensorIsConst", {false, false, false, false, false , false, true, false, false, false} )), + framework::dataset::make("Expected", { false, false, false, false, true, true, false, true, true, false })), a_info, b_info, output_info, are_tensors_const, expected) { TensorInfo a{a_info}; @@ -103,6 +114,9 @@ using NEMatMulFastMathFixture = MatMulGenericValidationFixture using NEMatMulDynamicTensorsFixture = MatMulValidationWithDynamicTensorsFixture; +template +using NEQuantizedMatMulFixture = QuantizedMatMulValidationFixture; + TEST_SUITE(Float) TEST_SUITE(FP32) FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallMatMulDataset(), @@ -149,13 +163,18 @@ TEST_SUITE_END() // FP32 /* Note : MatMul BF16 is enabled by specifying FP32 datatype and enabling the fast math setting */ constexpr AbsoluteTolerance tolerance_bf16(0.001f); TEST_SUITE(BF16) -FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFastMathFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(datasets::SmallMatMulDataset(), - framework::dataset::make("TransposeA", { false, true })), - framework::dataset::make("TransposeB", { false, true })), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })), - framework::dataset::make("RunTimes", { 0 })), - framework::dataset::make("Settings", { CpuMatMulSettings().fast_math(true) }))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFastMathFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::F32)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo() })), + framework::dataset::make("RunTimes", { 0 })), + framework::dataset::make("Settings", { CpuMatMulSettings().fast_math(true) })), + framework::dataset::make("LhsQInfo", { QuantizationInfo() })), + framework::dataset::make("RhsQInfo", { QuantizationInfo() })), + framework::dataset::make("OutQInfo", { QuantizationInfo() })) +) { // Validate output validate(Accessor(_target), _reference, tolerance_bf16); @@ -198,6 +217,114 @@ TEST_SUITE_END() // FP16 TEST_SUITE_END() // Float +TEST_SUITE(Quantized) + +TEST_SUITE(QASYMM8) + +FIXTURE_DATA_TEST_CASE(RunSmall, NEQuantizedMatMulFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) })), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f, 2) })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunSmallExtraActivation, NEQuantizedMatMulFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallerMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) })), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 50, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 30, -1) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f, 2) })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEQuantizedMatMulFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) })), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 100, 1) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 200, -1) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f, 2) })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} + +TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) + +FIXTURE_DATA_TEST_CASE(RunSmall, NEQuantizedMatMulFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) })), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 40, -2) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 50, 1) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f, 1) })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} + +FIXTURE_DATA_TEST_CASE(RunSmallExtraActivation, NEQuantizedMatMulFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::SmallerMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) })), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 40, -2) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 50, 1) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f, 1) })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEQuantizedMatMulFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(combine(combine(combine( + datasets::LargeMatMulDataset(), + framework::dataset::make("TransposeA", { false, true })), + framework::dataset::make("TransposeB", { false, true })), + framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)), + framework::dataset::make("ActivationInfo", { ActivationLayerInfo(), ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) })), + framework::dataset::make("NumberOfExtraRuns", { 0, 1 })), + framework::dataset::make("LhsQInfo", { QuantizationInfo(1.f / 150, -2) })), + framework::dataset::make("RhsQInfo", { QuantizationInfo(1.f / 250, 1) })), + framework::dataset::make("OutQInfo", { QuantizationInfo(1.f, 1) })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8_signed); +} + +TEST_SUITE_END() // QASYMM8_SIGNED + +TEST_SUITE_END() // Quantized + TEST_SUITE_END() // MatMul TEST_SUITE_END() // NEON } // namespace validation diff --git a/tests/validation/fixtures/MatMulFixture.h b/tests/validation/fixtures/MatMulFixture.h index bb4a1cd7be..f8f038af3f 100644 --- a/tests/validation/fixtures/MatMulFixture.h +++ b/tests/validation/fixtures/MatMulFixture.h @@ -25,12 +25,17 @@ #define TESTS_VALIDATION_FIXTURES_MATMULFIXTURE #include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "tests/framework/Fixture.h" #include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/GEMM.h" +#include "tests/validation/reference/GEMMLowp.h" #include "tests/validation/reference/Permute.h" #include "tests/validation/reference/ReshapeLayer.h" +#include #include +#include namespace arm_compute { @@ -44,7 +49,7 @@ class MatMulGenericValidationFixture : public framework::Fixture public: template void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool transpose_a, bool transpose_b, DataType data_type, ActivationLayerInfo act_info, int num_extra_runs, - Settings settings) + Settings settings, QuantizationInfo a_qinfo = QuantizationInfo(), QuantizationInfo b_qinfo = QuantizationInfo(), QuantizationInfo o_qinfo = QuantizationInfo()) { // For brevity, the input shapes are assumed to be not-transposed for both a and b matrices. if(transpose_a) @@ -56,8 +61,8 @@ public: permute(shape_b, PermutationVector(1U, 0U)); } - _target = compute_target(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, settings); - _reference = compute_reference(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info); + _target = compute_target(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, settings, a_qinfo, b_qinfo, o_qinfo); + _reference = compute_reference(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, a_qinfo, b_qinfo, o_qinfo); } protected: @@ -78,23 +83,29 @@ protected: library->fill(tensor, distribution, i); break; } - default: + case DataType::QASYMM8: + case DataType::QASYMM8_SIGNED: { library->fill_tensor_uniform(tensor, i); + break; + } + default: + { + ARM_COMPUTE_ERROR("Unsupported data type."); } } } TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &output_shape, bool transpose_a, bool transpose_b, DataType data_type, - ActivationLayerInfo act_info, int num_extra_runs, const Settings &settings) + ActivationLayerInfo act_info, int num_extra_runs, const Settings &settings, QuantizationInfo a_qinfo, QuantizationInfo b_qinfo, QuantizationInfo o_qinfo) { // 1. Create Classes and configure function // ---------------------------------------------------- // Create tensors // Configure relevant classes and matmul function - TensorType a = create_tensor(shape_a, data_type, 1); - TensorType b = create_tensor(shape_b, data_type, 1); - TensorType dst = create_tensor(output_shape, data_type, 1); + TensorType a = create_tensor(shape_a, data_type, 1, a_qinfo); + TensorType b = create_tensor(shape_b, data_type, 1, b_qinfo); + TensorType dst = create_tensor(output_shape, data_type, 1, o_qinfo); FunctionType matmul; @@ -149,18 +160,61 @@ protected: return dst; } + template + typename std::enable_if::value, SimpleTensor>::type + compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo) + { + ARM_COMPUTE_UNUSED(act_info, o_qinfo); + + return reference::gemm(a, b, c, alpha, beta); + } + + template + typename std::enable_if::value, SimpleTensor>::type + compute_reference_gemm(const SimpleTensor &a, const SimpleTensor &b, const SimpleTensor &c, float alpha, float beta, const ActivationLayerInfo &act_info, const QuantizationInfo &o_qinfo) + { + ARM_COMPUTE_UNUSED(alpha, beta); + + const auto aq = a.quantization_info().uniform(); + const auto bq = b.quantization_info().uniform(); + const auto oq = o_qinfo.uniform(); + + const auto multiplier = aq.scale * bq.scale / oq.scale; + + int32_t output_multiplier = 0; + int32_t output_shift = 0; + quantization::calculate_quantized_multiplier(multiplier, &output_multiplier, &output_shift); + std::vector output_multipliers{ output_multiplier }; + std::vector output_shifts{ output_shift }; + + PixelValue output_min{}; + PixelValue output_max{}; + std::tie(output_min, output_max) = quantization::get_quantized_asymmetric_output_min_max( + o_qinfo, act_info, a.data_type()); + + const auto tmp = reference::gemmlowp_matrix_multiply_core( + a, b, c.shape(), aq.offset, bq.offset); + + auto output = reference::gemmlowp_quantize_down_scale_by_fixedpoint( + tmp, output_multipliers, output_shifts, oq.offset, + output_min.get(), output_max.get()); + output.quantization_info(o_qinfo); + + return output; + } + SimpleTensor compute_reference(const TensorShape &a_shape, const TensorShape &b_shape, const TensorShape &output_shape, bool transpose_a, bool transpose_b, DataType data_type, - ActivationLayerInfo act_info) + ActivationLayerInfo act_info, QuantizationInfo a_qinfo, QuantizationInfo b_qinfo, QuantizationInfo o_qinfo) { - // We collapse dimensions > 3 onto dimension 3, i.e. 5D+ tensors will look like 4D - // This is necessary unless we choose to extend gemm reference for 5D+ tensors - TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimW); - TensorShape a_shape_collapsed = a_shape.collapsed_from(Window::DimW); - TensorShape b_shape_collapsed = b_shape.collapsed_from(Window::DimW); + // We collapse dimensions > 2 onto dimension 2, i.e. 4D+ tensors will look like 3D + // This is necessary unless we choose to extend gemm reference for 4D+ tensors + TensorShape output_shape_collapsed = output_shape.collapsed_from(Window::DimZ); + TensorShape a_shape_collapsed = a_shape.collapsed_from(Window::DimZ); + TensorShape b_shape_collapsed = b_shape.collapsed_from(Window::DimZ); // Create reference - SimpleTensor a{ a_shape_collapsed, data_type, 1 }; - SimpleTensor b{ b_shape_collapsed, data_type, 1 }; + SimpleTensor a{ a_shape_collapsed, data_type, 1, a_qinfo }; + SimpleTensor b{ b_shape_collapsed, data_type, 1, b_qinfo }; SimpleTensor c{ output_shape_collapsed, data_type, 1 }; // Fill reference @@ -199,8 +253,9 @@ protected: // Setting beta to 0 will effectively disable C for the // computation of the reference: alpha * A * B + 0 * C // Use transposed tensors if boolean enabled else use original tensors - SimpleTensor result = reference::gemm((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f); - result = reference::activation_layer(result, act_info, QuantizationInfo()); + auto result = compute_reference_gemm((transpose_a) ? a_transposed : a, (transpose_b) ? b_transposed : b, c, 1.0f, 0.f, act_info, o_qinfo); + + result = reference::activation_layer(result, act_info, o_qinfo); // We reshape the gemm output back if the tensor is high dimensional if(output_shape_collapsed != output_shape) @@ -249,6 +304,17 @@ public: } }; +template +class QuantizedMatMulValidationFixture : public MatMulGenericValidationFixture +{ +public: + template + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape output_shape, bool transpose_a, bool transpose_b, DataType data_type, ActivationLayerInfo act_info, int num_extra_runs, QuantizationInfo a_qinfo, QuantizationInfo b_qinfo, QuantizationInfo o_qinfo) + { + MatMulGenericValidationFixture::setup(shape_a, shape_b, output_shape, transpose_a, transpose_b, data_type, act_info, num_extra_runs, Settings(), a_qinfo, b_qinfo, o_qinfo); + } +}; + } // namespace validation } // namespace test } // namespace arm_compute -- cgit v1.2.1