diff options
author | Gunes Bayir <gunes.bayir@arm.com> | 2024-04-26 16:51:54 +0100 |
---|---|---|
committer | Gunes Bayir <gunes.bayir@arm.com> | 2024-04-29 09:31:44 +0000 |
commit | e5ef8c159a14872dda5e36e320f07b0963858d8c (patch) | |
tree | 2d359eeff83b54b3e7e65484abc37e531aedbdf2 | |
parent | 499b5bca1a897461d4105ba52e4c766ddb5f564a (diff) | |
download | ComputeLibrary-e5ef8c159a14872dda5e36e320f07b0963858d8c.tar.gz |
Disable SME2 Gemmlowp s8f32 kernel selection in case results needs to be accumulated
Similar to https://review.mlplatform.org/c/ml/ComputeLibrary/+/11500, s8f32 kernels do not support accumulate mode. This patch modifies the kernel selection and also adds more tests to stress these test cases better.
Partially Resolves: COMPMID-6995
Change-Id: I40e19446c012eb7334e4511e254cce0d635aa234
Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11503
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Radu Salavat <radu.salavat@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp | 6 | ||||
-rw-r--r-- | tests/validation/NEON/GEMMLowp.cpp | 12 | ||||
-rw-r--r-- | tests/validation/fixtures/GEMMLowpFixture.h | 10 |
3 files changed, 15 insertions, 13 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp index 782399df8c..38d9b763f6 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_s8fp32.cpp @@ -55,7 +55,7 @@ static const GemmImplementation<int8_t, float, DequantizeFloat> gemm_s8fp32_meth { GemmMethod::GEMM_INTERLEAVED, "sme2_interleaved_nomerge_s8qfp32_mopa_1VLx4VL.hpp", - [](const GemmArgs &args, const DequantizeFloat &) { return args._ci->has_sme2(); }, + [](const GemmArgs &args, const DequantizeFloat &) { return args._ci->has_sme2() && !args._accumulate; }, [](const GemmArgs &args, const DequantizeFloat &) { const auto VL = sme::get_vector_length<float>(); return args._Msize <= VL || (2*VL < args._Msize && args._Msize <= 3*VL); }, [](const GemmArgs &args, const DequantizeFloat &dq) { return new GemmInterleavedNoMergeDequantized<cls_sme2_interleaved_nomerge_s8qfp32_mopa_1VLx4VL, int8_t, float>(args, dq); } @@ -63,7 +63,7 @@ static const GemmImplementation<int8_t, float, DequantizeFloat> gemm_s8fp32_meth { GemmMethod::GEMM_INTERLEAVED, "sme2_interleaved_nomerge_s8qfp32_mopa_4Vx1VL.hpp", - [](const GemmArgs &args, const DequantizeFloat &) { return args._ci->has_sme2(); }, + [](const GemmArgs &args, const DequantizeFloat &) { return args._ci->has_sme2() && !args._accumulate; }, [](const GemmArgs &args, const DequantizeFloat &) { const auto VL = sme::get_vector_length<float>(); return args._Nsize <= VL || (2*VL < args._Nsize && args._Nsize <= 3*VL); }, [](const GemmArgs &args, const DequantizeFloat &dq) { return new GemmInterleavedNoMergeDequantized<cls_sme2_interleaved_nomerge_s8qfp32_mopa_4VLx1VL, int8_t, float>(args, dq); } @@ -71,7 +71,7 @@ static const GemmImplementation<int8_t, float, DequantizeFloat> gemm_s8fp32_meth { GemmMethod::GEMM_INTERLEAVED, "sme2_interleaved_nomerge_s8qfp32_mopa_2Vx2VL.hpp", - [](const GemmArgs &args, const DequantizeFloat &) { return args._ci->has_sme2(); }, + [](const GemmArgs &args, const DequantizeFloat &) { return args._ci->has_sme2() && !args._accumulate; }, nullptr, [](const GemmArgs &args, const DequantizeFloat &dq) { return new GemmInterleavedNoMergeDequantized<cls_sme2_interleaved_nomerge_s8qfp32_mopa_2VLx2VL, int8_t, float>(args, dq); } }, diff --git a/tests/validation/NEON/GEMMLowp.cpp b/tests/validation/NEON/GEMMLowp.cpp index 9b1da61ed7..d25f43a330 100644 --- a/tests/validation/NEON/GEMMLowp.cpp +++ b/tests/validation/NEON/GEMMLowp.cpp @@ -360,13 +360,21 @@ TEST_SUITE_END() // DynamicQuantization // Deqaunt tests involve returning F32 from the MatrixMultiplyCore kernels and is only implemented in aarch64 TEST_SUITE(Dequant) constexpr AbsoluteTolerance<float> tolerance_dequantized(0.01f); -FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, datasets::SmallGEMMLowpDataset()) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::ALL, + combine( + datasets::SmallGEMMLowpDataset(), + make("accumulate", {true, false}) + )) { // Validate output validate(Accessor(_target), _reference, tolerance_dequantized); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, datasets::LargeGEMMLowpDataset()) +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMLowpDequantizedMatrixMultiplyValidationFixture, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeGEMMLowpDataset(), + make("accumulate", {false}) + )) { // Validate output validate(Accessor(_target), _reference, tolerance_dequantized); diff --git a/tests/validation/fixtures/GEMMLowpFixture.h b/tests/validation/fixtures/GEMMLowpFixture.h index 6b7cbba92e..aa4eedb75d 100644 --- a/tests/validation/fixtures/GEMMLowpFixture.h +++ b/tests/validation/fixtures/GEMMLowpFixture.h @@ -472,15 +472,9 @@ template <typename TensorType, typename AccessorType, typename FunctionType, boo class GEMMLowpDequantizedMatrixMultiplyValidationFixture : public framework::Fixture { public: - void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset) + void setup(TensorShape shape_a, TensorShape shape_b, TensorShape shape_output, int32_t a_offset, int32_t b_offset, bool accumulate) { - // Accumulation is supported for Int8/UInt8 only in aarch64 - bool accumulate = true; - // Accumulation is not supported for Int8/UInt8 in aarch32 -#ifdef __arm__ - accumulate = false; -#endif //__arm__ - bool dynamic_qinfo = false; + const bool dynamic_qinfo = false; const auto a_qinfo = QuantizationInfo(1.0f / 255, a_offset); const auto b_qinfo = QuantizationInfo(5.0f / 255, b_offset); TensorFillInfo finfo; |