From 97a609bb9b2ad22a2ebd54493cd1374f0992eb52 Mon Sep 17 00:00:00 2001 From: Mohammed Suhail Munshi Date: Fri, 21 Oct 2022 11:15:54 +0100 Subject: Fix GemmLowp BatchMatMul Tests to use quantized Outputs - Fix includes int8/uint8 quantized inputs - Bias S32 value is limited to better allow detection of mismatches in gemmlowp kernel Resolves: [COMPMID-5659] Signed-off-by: Mohammed Suhail Munshi Change-Id: Ie9cca430c6ab66253fe1d5252bd2c5396c7f38cf Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8514 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Gunes Bayir Benchmark: Arm Jenkins --- tests/datasets/GEMMLowpFusedOffsetOutputDataset.h | 34 +++++++++++++++++- tests/validation/CL/GEMMLowp.cpp | 32 +++++++++++++---- tests/validation/NEON/GEMMLowp.cpp | 42 ++++++++++++++++++----- tests/validation/fixtures/GEMMLowpFixture.h | 19 +++++++--- 4 files changed, 106 insertions(+), 21 deletions(-) diff --git a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h index b87b2fa947..8c90efcbdd 100644 --- a/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h +++ b/tests/datasets/GEMMLowpFusedOffsetOutputDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2021 Arm Limited. + * Copyright (c) 2019-2022 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -179,6 +179,38 @@ public: } }; +class SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned final : public GEMMLowpFusedOffsetOutputDataset +{ +public: + SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned() + { + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, 0, 254)); + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, 0, 254)); + add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, 20, 210)); + add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, 14, 210)); + add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, 0, 127)); + add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, 10, 210)); + add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), -3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 3, 10, 210)); + add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, 50, 225)); + } +}; + +class SmallGEMMLowpFusedBatchedMatMulDatasetSigned final : public GEMMLowpFusedOffsetOutputDataset +{ +public: + SmallGEMMLowpFusedBatchedMatMulDatasetSigned() + { + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 5, 1 << 25, 5, -128, 127)); + add_config(TensorShape(4U, 3U), TensorShape(2U, 4U), TensorShape(2U, 3U), 0, 0, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 100, 1 << 25, 3, -128, 127)); + add_config(TensorShape(12U, 15U), TensorShape(7U, 12U), TensorShape(7U, 15U), -3, 15, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, 0, 1 << 19, 0, -108, 127)); + add_config(TensorShape(59U, 17U), TensorShape(36U, 59U), TensorShape(36U, 17U), -2, 13, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -30, 2, 1 << 25, -98, 107)); + add_config(TensorShape(2U, 4U, 3U), TensorShape(5U, 2U, 3U), TensorShape(5U, 4U, 3U), -5, 12, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -20, 1 << 25, 4, -127, 64)); + add_config(TensorShape(15U, 7U, 3U), TensorShape(29U, 15U, 3U), TensorShape(29U, 7U, 3U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -10, 1 << 25, 6, -64, 127)); + add_config(TensorShape(56U, 17U, 32U), TensorShape(5U, 56U, 32U), TensorShape(5U, 17U, 32U), 3, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -127, 110)); + add_config(TensorShape(13U, 256U, 32U), TensorShape(19U, 13U, 32U), TensorShape(19U, 256U, 32U), 5, 2, OutputStageInfo(GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT, -15, 1 << 25, 6, -77, 115)); + } +}; + class SmallGEMMLowpFusedOffsetOutputOutput3DUint8Dataset final : public GEMMLowpFusedOffsetOutputDataset { public: diff --git a/tests/validation/CL/GEMMLowp.cpp b/tests/validation/CL/GEMMLowp.cpp index 19e8eeb0f5..0b057b9dce 100644 --- a/tests/validation/CL/GEMMLowp.cpp +++ b/tests/validation/CL/GEMMLowp.cpp @@ -66,13 +66,33 @@ FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMLowpMatrixMultiplyCoreFixture, framework: // Validate output validate(CLAccessor(_target), _reference); } -TEST_SUITE(BATCHED_MATMUL) -FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpBatchedMatMulFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpBatchedMatMulDataset()) + +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; +TEST_SUITE(BatchedMatMul) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, + combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(), + framework::dataset::make("DataType", { DataType::QASYMM8 })), + framework::dataset::make("bool", { false }))) { - // Validate output - validate(CLAccessor(_target), _reference); + validate(CLAccessor(_target), _reference, tolerance_quant); +} +TEST_SUITE_END() // QASYMM8 + +using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, + combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(), + framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("bool", { false }))) +{ + validate(CLAccessor(_target), _reference, tolerance_quant); } -TEST_SUITE_END() // BATCHED_MATMUL +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // BatchedMatMul + TEST_SUITE(FusedOffsetOutput) TEST_SUITE(QASYMM8) using CLGEMMLowpMatrixMultiplyCoreFusedOffsetOutputUint8Fixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; @@ -264,4 +284,4 @@ TEST_SUITE_END() // GEMMLowp TEST_SUITE_END() // CL } // namespace validation } // namespace test -} // namespace arm_compute \ No newline at end of file +} // namespace arm_compute diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 2dcc740b97..17c6d060cb 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -211,13 +211,6 @@ TEST_CASE(MultipleExecutionWithConfigure, framework::DatasetMode::ALL) } } -TEST_SUITE(BatchedMatMul) -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpBatchedMatMulFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpBatchedMatMulDataset()) -{ - validate(Accessor(_target), _reference); -} -TEST_SUITE_END() // BatchedMatMul - FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) { // Validate output @@ -230,20 +223,51 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFixture, framework: validate(Accessor(_target), _reference); } +constexpr AbsoluteTolerance tolerance_batched(2); + +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned = + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; + +TEST_SUITE(BatchedMatMul) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedUnsigned, framework::DatasetMode::ALL, + combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetUnsigned(), + framework::dataset::make("DataType", { DataType::QASYMM8 })), + framework::dataset::make("bool", { false }))) +{ + validate(Accessor(_target), _reference, tolerance_batched); +} +TEST_SUITE_END() // QASYMM8 + +using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned = + GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture; +TEST_SUITE(QASYMM8_SIGNED) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixtureBatchedSigned, framework::DatasetMode::ALL, + combine(combine(datasets::SmallGEMMLowpFusedBatchedMatMulDatasetSigned(), + framework::dataset::make("DataType", { DataType::QASYMM8_SIGNED })), + framework::dataset::make("bool", { false }))) +{ + validate(Accessor(_target), _reference, tolerance_batched); +} +TEST_SUITE_END() // QASYMM8_SIGNED +TEST_SUITE_END() // BatchedMatMul + using NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture = GEMMLowpMatrixMultiplyCoreFusedOffsetOutputValidationFixture; +constexpr AbsoluteTolerance tolerance_quant(1); + TEST_SUITE(FusedOffsetOutput) FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::ALL, combine(datasets::SmallGEMMLowpFusedOffsetOutputUint8Dataset(), framework::dataset::make("DataType", { DataType::QASYMM8 }))) { // Validate output - validate(Accessor(_target), _reference); + validate(Accessor(_target), _reference, tolerance_quant); } FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpMatrixMultiplyCoreFusedOffsetOutputFixture, framework::DatasetMode::NIGHTLY, combine(datasets::LargeGEMMLowpFusedOffsetOutputUint8Dataset(), framework::dataset::make("DataType", { DataType::QASYMM8 }))) { // Validate output - validate(Accessor(_target), _reference); + validate(Accessor(_target), _reference, tolerance_quant); } TEST_SUITE_END() // FusedOffsetOutput TEST_SUITE_END() // MatrixMultiplyCore diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index f1ec81aae6..043fc9cb3c 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -68,6 +68,12 @@ void fill(U &&tensor, int i) library->fill(tensor, distribution, i); break; } + case DataType::S32: + { + std::uniform_int_distribution distribution(-20000, 20000); + library->fill(tensor, distribution, i); + break; + } case DataType::F16: { arm_compute::utils::uniform_real_distribution_16bit distribution{ -1.0f, 1.0f }; @@ -235,7 +241,8 @@ public: protected: TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) { - return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, b_offset); + return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, + b_offset); } SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset) @@ -247,7 +254,7 @@ protected: SimpleTensor _reference{}; }; -template +template class GEMMLowpMatrixMultiplyCoreFusedOffsetOutputGenericValidationFixture : public framework::Fixture { public: @@ -286,18 +293,20 @@ protected: TensorType compute_target(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo, bool reshape_b_only_on_first_run = false) { - return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, b_offset, + return compute_gemmlowp_target(shape_a, shape_b, shape_output, a_offset, + b_offset, output_stage, data_type_a, data_type_b, b_qinfo, reshape_b_only_on_first_run); } SimpleTensor compute_reference(const TensorShape &shape_a, const TensorShape &shape_b, const TensorShape &shape_output, int32_t a_offset, int32_t b_offset, GEMMLowpOutputStageInfo output_stage, DataType data_type_a, DataType data_type_b, QuantizationInfo b_qinfo) { - SimpleTensor output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b, b_qinfo); + SimpleTensor output = compute_gemmlowp_reference(shape_a, shape_b, shape_output, a_offset, b_offset, data_type_a, data_type_b, + b_qinfo); TensorShape bias_shape(shape_b[0]); SimpleTensor bias{ bias_shape, DataType::S32, 1 }; - fill(bias, 2); + (run_twice) ? fill(bias, 5) : fill(bias, 2); // Fill bias with same seed as last run of gemmlowp_target switch(output_stage.type) { -- cgit v1.2.1