From c0b6f76561580414f08633a804fc548ccad65659 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 2 Nov 2020 01:37:17 +0000 Subject: COMPMID-3776: Indirect GEMM Signed-off-by: Georgios Pinitas Change-Id: I51a1b0f098bc3a8c408c50c92221e4df3061e12c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4343 Tested-by: Arm Jenkins Reviewed-by: Sang-Hoon Park Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- Android.bp | 129 +++++++++++++++++++++++++++++++------------------------------ 1 file changed, 66 insertions(+), 63 deletions(-) (limited to 'Android.bp') diff --git a/Android.bp b/Android.bp index 8d931c23c8..98b00cf5ba 100644 --- a/Android.bp +++ b/Android.bp @@ -367,10 +367,12 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp", "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", + "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", "src/core/NEON/kernels/arm_gemm/misc.cpp", "src/core/NEON/kernels/arm_gemm/quantized.cpp", - "src/core/NEON/kernels/assembly/INEGEMMWrapperKernel.cpp", + "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp", + "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp", "src/core/NEON/kernels/convolution/common/padding.cpp", "src/core/NEON/kernels/convolution/common/qasymm8.cpp", "src/core/NEON/kernels/convolution/common/qsymm8.cpp", @@ -669,9 +671,9 @@ cc_library_static { "src/runtime/NEON/functions/NEFuseBatchNormalization.cpp", "src/runtime/NEON/functions/NEGEMM.cpp", "src/runtime/NEON/functions/NEGEMMAssemblyDispatch.cpp", + "src/runtime/NEON/functions/NEGEMMConv2d.cpp", "src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp", "src/runtime/NEON/functions/NEGEMMInterleave4x4.cpp", - "src/runtime/NEON/functions/NEGEMMLowpAssemblyMatrixMultiplyCore.cpp", "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp", "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp", "src/runtime/NEON/functions/NEGEMMTranspose1xW.cpp", @@ -727,7 +729,6 @@ cc_library_static { "src/runtime/NEON/functions/NEScale.cpp", "src/runtime/NEON/functions/NEScharr3x3.cpp", "src/runtime/NEON/functions/NESelect.cpp", - "src/runtime/NEON/functions/NESimpleAssemblyFunction.cpp", "src/runtime/NEON/functions/NESlice.cpp", "src/runtime/NEON/functions/NESobel3x3.cpp", "src/runtime/NEON/functions/NESobel5x5.cpp", @@ -779,69 +780,71 @@ cc_library_static { }, arm64: { srcs: [ - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_12x8/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_12x8/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_24x8/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_16x4/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_16x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_16x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_12x8/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a53.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/a55r1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_12x8/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_gemv_fp32_mla_32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_4x6/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_4x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_4x6/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_4x6/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_4x8/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_4x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x6/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x6/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x8/a55.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_4x8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6VLx2/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_8VLx2/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mmla_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_4VLx4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_3VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_1VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_1VLx8/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_1VLx8/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp", ], }, -- cgit v1.2.1