From 905786ea0c1abb2b8df36c56eae93a97823cace1 Mon Sep 17 00:00:00 2001 From: Milos Puzovic Date: Tue, 26 Mar 2024 14:34:30 +0000 Subject: Added new NEON fixed format fast math mode hybrid kernel with maximum height of 6 for accumulation and updated heuristics Change-Id: Ib52ea6825e164f4a8b8422eab7991b50af0b0d7c Signed-off-by: Milos Puzovic Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11354 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Benchmark: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp') diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp index 44a7bb894a..e85dd59425 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2023 Arm Limited. + * Copyright (c) 2017-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,6 +34,7 @@ #ifdef ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS #include "kernels/a64_ffhybrid_fp32_mla_6x16.hpp" #include "kernels/a64_ffhybrid_fp32bf16fp32_mmla_4x24.hpp" +#include "kernels/a64_ffhybrid_fp32bf16fp32_mmla_6x16.hpp" #include "kernels/a64_ffinterleaved_bf16fp32_mmla_8x12.hpp" #include "kernels/a64_ffinterleaved_fp32_mla_8x12.hpp" #endif // ARM_COMPUTE_ENABLE_FIXED_FORMAT_KERNELS @@ -350,6 +351,14 @@ GemmImplementation::with_estimate( [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat::estimate_cycles(args); }, [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat(args); } ), +GemmImplementation::with_estimate( + GemmMethod::GEMM_HYBRID, + "a64_ffhybrid_fp32bf16fp32_mmla_6x16", + KernelWeightFormat::VL256_BL64_BF16, + [](const GemmArgs &args) { return args._fast_mode && args._ci->has_bf16(); }, + [](const GemmArgs &args) { return GemmHybridIndirectFixedFormat::estimate_cycles(args); }, + [](const GemmArgs &args) { return new GemmHybridIndirectFixedFormat(args); } +), #endif // BF16 GemmImplementation::with_estimate( GemmMethod::GEMM_INTERLEAVED, -- cgit v1.2.1