From 5aa1a0b7ca5eed010e4b297a95b1c4851f741328 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 2 Jul 2020 20:02:20 +0100 Subject: COMPID-3324: Clean GEMM kernels Signed-off-by: Georgios Pinitas Change-Id: I170de1671e061a78740caee31fb4a1b8642c1369 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3505 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio --- src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp | 30 ++++++++++++++++++++++------ 1 file changed, 24 insertions(+), 6 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp') diff --git a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp index 8bef2b7bae..1d5b97b41a 100644 --- a/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp +++ b/src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,7 +44,9 @@ #include "kernels/a64_sgemv_trans.hpp" #include "kernels/sve_hybrid_fp32_mla_4VLx4.hpp" +#include "kernels/sve_hybrid_fp32_mmla_4VLx4.hpp" #include "kernels/sve_interleaved_fp32_mla_3VLx8.hpp" +#include "kernels/sve_interleaved_fp32_mmla_3VLx8.hpp" #include "kernels/sve_native_fp32_mla_4VLx4.hpp" #include "kernels/sve_smallK_hybrid_fp32_mla_1VLx8.hpp" @@ -75,6 +77,23 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return new GemvNativeTransposed(args); } }, +#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32) +{ + GemmMethod::GEMM_HYBRID, + "hybrid_fp32_mmla_4VLx4", + [](const GemmArgs &args) { return (args._Ksize >= 4) && !args._trA && args._pretransposed_hint; }, + [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || ((args._nmulti > 1) && ((args._Msize / args._maxthreads) < 8)); }, + [](const GemmArgs &args) { return new GemmHybrid(args); } +}, +{ + GemmMethod::GEMM_INTERLEAVED, + "interleaved_fp32_mmla_3VLx8", + [](const GemmArgs &args) { return (args._Ksize>4); }, + nullptr, + [](const GemmArgs &args) { return new GemmInterleaved(args); } +}, +#endif // __ARM_FEATURE_SVE && MMLA_FP32 + #ifdef __ARM_FEATURE_SVE // SVE smallk / native / hybrid methods { @@ -124,7 +143,7 @@ static const GemmImplementation gemm_fp32_methods[] = }, { GemmMethod::GEMM_HYBRID, - "hybrid_fp32_mla_16x4_normal", + "hybrid_fp32_mla_16x4", [](const GemmArgs &args) { return (args._Ksize >= 4) && !args._trA && args._pretransposed_hint; }, [](const GemmArgs &args) { return ((args._Ksize <= 256) && (args._Nsize <= 256)) || (args._Msize < 16) || (args._nmulti > 1); }, [](const GemmArgs &args) { return new GemmHybrid(args); } @@ -146,7 +165,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return new GemmInterleaved(args); } }, #endif // __ARM_FEATURE_SVE -//Pretranpose, 2D split +// Pretranposed, 2D split { GemmMethod::GEMM_INTERLEAVED_2D, "sgemm_12x8_pretranspose_2d", @@ -154,7 +173,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return args._maxthreads >= 8; }, [](const GemmArgs &args) { return new GemmInterleavedPretransposed2d(args); } }, -//Tranpose, 2D split, no blockmanager +// Non-pretransposed, 2D split (no buffer manager) { GemmMethod::GEMM_INTERLEAVED_2D, "sgemm_12x8_2d", @@ -162,7 +181,7 @@ static const GemmImplementation gemm_fp32_methods[] = [](const GemmArgs &args) { return (!args._pretransposed_hint) && (args._maxthreads >= 8); }, [](const GemmArgs &args) { return new GemmInterleaved2d(args); } }, -//Tranpose, 1D split, with blockmanager +// 1D split (with pretransposed or not) { GemmMethod::GEMM_INTERLEAVED, "sgemm_12x8_1d", @@ -170,7 +189,6 @@ static const GemmImplementation gemm_fp32_methods[] = nullptr, [](const GemmArgs &args) { return new GemmInterleaved(args); } }, - #endif // __aarch64__ #ifdef __arm__ -- cgit v1.2.1