From 2aec5f1870b6cd5edd7de6403b5cf75530eb77f5 Mon Sep 17 00:00:00 2001
From: Gunes Bayir <gunes.bayir@arm.com>
Date: Tue, 23 Jan 2024 17:19:44 +0000
Subject: Fix tolerance issue in BF16 MatMul tests

BF16 kernels are not expected to have the same tolerance/accuracy standards as full float kernels. The reference implementation is a standard floating point implementation, thus resulting in small mismatches.

We increase the tolerance of the MatMul BF16 tests, and add more tests to cover more shapes. Previously, the only tested bf16 kernel was a64_hybrid_fp32bf16fp32_mmla_4x24. With the inclusion of new shapes, heuristics also choose a64_hybrid_fp32bf16fp32_mmla_6x16 and stress this kernel as well, covering every implementation.

Resolves: COMPMID-6654

Signed-off-by: Gunes Bayir <gunes.bayir@arm.com>
Change-Id: I15342606912013c123b94c7e0ea2e6bbb25680d7
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11014
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 tests/validation/NEON/MatMul.cpp | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)
diff --git a/tests/validation/NEON/MatMul.cpp b/tests/validation/NEON/MatMul.cpp
index 8cc20211f2..5577a9bb98 100644
--- a/tests/validation/NEON/MatMul.cpp
+++ b/tests/validation/NEON/MatMul.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -177,7 +177,7 @@ TEST_SUITE_END() // FP32
 
 #ifdef ARM_COMPUTE_ENABLE_BF16
 /* Note : MatMul BF16 is enabled by specifying FP32 datatype and enabling the fast math setting */
-constexpr AbsoluteTolerance<float> tolerance_bf16(0.001f);
+constexpr AbsoluteTolerance<float> tolerance_bf16(0.02f);
 TEST_SUITE(BF16)
 FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFastMathFixture<float>, framework::DatasetMode::PRECOMMIT,
     combine(
@@ -196,6 +196,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEMatMulFastMathFixture<float>, framework::Data
     // Validate output
     validate(Accessor(_target), _reference, tolerance_bf16);
 }
+
+FIXTURE_DATA_TEST_CASE(RunLarge, NEMatMulFastMathFixture<float>, framework::DatasetMode::NIGHTLY,
+    combine(
+        datasets::LargeMatMulDataset(),
+        make("TransposeA", { false, true }),
+        make("TransposeB", { false, true }),
+        make("DataType", DataType::F32),
+        make("ActivationInfo", { ActivationLayerInfo() }),
+        make("RunTimes", { 0 }),
+        make("Settings", { CpuMatMulSettings().fast_math(true) }),
+        make("LhsQInfo", { QuantizationInfo() }),
+        make("RhsQInfo", { QuantizationInfo() }),
+        make("OutQInfo", { QuantizationInfo() }))
+)
+{
+    // Validate output
+    validate(Accessor(_target), _reference, tolerance_bf16, 0.01 /* tolerance_num */);
+}
 TEST_SUITE_END() // BF16
 #endif           /* ARM_COMPUTE_ENABLE_BF16 */
 
-- 
cgit v1.2.1