From 4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 16 Jul 2021 16:16:43 +0100 Subject: Update GEMM assembly kernels - Introduce Fp32 kernels with internal calculations in Bfloat16 when fast_mode is enabled - Improve kernel selection heuristics Signed-off-by: Georgios Pinitas Change-Id: I68a9e7e862b6fd2721b46e0d7cc791091c4ab279 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5965 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/arm_gemm/transforms/list.hpp | 29 +++++++++++++++++++--- 1 file changed, 25 insertions(+), 4 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/transforms/list.hpp') diff --git a/src/core/NEON/kernels/arm_gemm/transforms/list.hpp b/src/core/NEON/kernels/arm_gemm/transforms/list.hpp index e092c729ba..adbaa6cf2f 100644 --- a/src/core/NEON/kernels/arm_gemm/transforms/list.hpp +++ b/src/core/NEON/kernels/arm_gemm/transforms/list.hpp @@ -22,7 +22,28 @@ * SOFTWARE. */ #include "a32_transpose_interleave_8way_32bit.hpp" -#include "a64_transpose_interleave_12way_16bit.hpp" -#include "a64_transpose_interleave_12way_half_to_float.hpp" -#include "a64_transpose_interleave_24way_16bit.hpp" -#include "a64_transpose_interleave_8way_32bit.hpp" +#include "a64_transpose_interleave_12_1x4.hpp" +#include "a64_transpose_interleave_12_1x8.hpp" +#include "a64_transpose_interleave_12_2x2.hpp" +#include "a64_transpose_interleave_12_2x4_fp32bf16.hpp" +#include "a64_transpose_interleave_12_2x4.hpp" +#include "a64_transpose_interleave_128.hpp" +#include "a64_transpose_interleave_12_s8s16.hpp" +#include "a64_transpose_interleave_12_u8u16.hpp" +#include "a64_transpose_interleave_16_1x4.hpp" +#include "a64_transpose_interleave_16_1x8.hpp" +#include "a64_transpose_interleave_16_2x2.hpp" +#include "a64_transpose_interleave_16_2x4.hpp" +#include "a64_transpose_interleave_16_2x4_fp32bf16.hpp" +#include "a64_transpose_interleave_16.hpp" +#include "a64_transpose_interleave_24_bf16fp32.hpp" +#include "a64_transpose_interleave_24_fp16fp32.hpp" +#include "a64_transpose_interleave_24_2x4_fp32bf16.hpp" +#include "a64_transpose_interleave_24.hpp" +#include "a64_transpose_interleave_32_1x4.hpp" +#include "a64_transpose_interleave_32_2x2.hpp" +#include "a64_transpose_interleave_4_1x16.hpp" +#include "a64_transpose_interleave_4_1x4.hpp" +#include "a64_transpose_interleave_48.hpp" +#include "a64_transpose_interleave_64.hpp" +#include "a64_transpose_interleave_96.hpp" -- cgit v1.2.1