From 2aec5f1870b6cd5edd7de6403b5cf75530eb77f5 Mon Sep 17 00:00:00 2001 From: Gunes Bayir Date: Tue, 23 Jan 2024 17:19:44 +0000 Subject: Fix tolerance issue in BF16 MatMul tests BF16 kernels are not expected to have the same tolerance/accuracy standards as full float kernels. The reference implementation is a standard floating point implementation, thus resulting in small mismatches. We increase the tolerance of the MatMul BF16 tests, and add more tests to cover more shapes. Previously, the only tested bf16 kernel was a64_hybrid_fp32bf16fp32_mmla_4x24. With the inclusion of new shapes, heuristics also choose a64_hybrid_fp32bf16fp32_mmla_6x16 and stress this kernel as well, covering every implementation. Resolves: COMPMID-6654 Signed-off-by: Gunes Bayir Change-Id: I15342606912013c123b94c7e0ea2e6bbb25680d7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11014 Benchmark: Arm Jenkins Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- tests/validation/NEON/MatMul.cpp | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp index 8cc20211f2..5577a9bb98 100644 --- a/tests/validation/NEON/MatMul.cpp +++ b/tests/validation/NEON/MatMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -177,7 +177,7 @@ TEST_SUITE_END() // FP32 #ifdef ARM_COMPUTE_ENABLE_BF16 /* Note : MatMul BF16 is enabled by specifying FP32 datatype and enabling the fast math setting */ -constexpr AbsoluteTolerance tolerance_bf16(0.001f); +constexpr AbsoluteTolerance tolerance_bf16(0.02f); TEST_SUITE(BF16) FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFastMathFixture, framework::DatasetMode::PRECOMMIT, combine( @@ -196,6 +196,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFastMathFixture, framework::Data // Validate output validate(Accessor(_target), _reference, tolerance_bf16); } + +FIXTURE_DATA_TEST_CASE(RunLarge, NEMatMulFastMathFixture, framework::DatasetMode::NIGHTLY, + combine( + datasets::LargeMatMulDataset(), + make("TransposeA", { false, true }), + make("TransposeB", { false, true }), + make("DataType", DataType::F32), + make("ActivationInfo", { ActivationLayerInfo() }), + make("RunTimes", { 0 }), + make("Settings", { CpuMatMulSettings().fast_math(true) }), + make("LhsQInfo", { QuantizationInfo() }), + make("RhsQInfo", { QuantizationInfo() }), + make("OutQInfo", { QuantizationInfo() })) +) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_bf16, 0.01 /* tolerance_num */); +} TEST_SUITE_END() // BF16 #endif /* ARM_COMPUTE_ENABLE_BF16 */ -- cgit v1.2.1