diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-07-16 16:16:43 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-07-22 02:25:50 +0000 |
commit | 4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2 (patch) | |
tree | 2f8362d33cdad4212f4b96995681c68184c759e1 /Android.bp | |
parent | 59fd7a722e5bc7e85309d6200bc37a772721a719 (diff) | |
download | ComputeLibrary-4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2.tar.gz |
Update GEMM assembly kernels
- Introduce Fp32 kernels with internal calculations in Bfloat16 when
fast_mode is enabled
- Improve kernel selection heuristics
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I68a9e7e862b6fd2721b46e0d7cc791091c4ab279
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5965
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'Android.bp')
-rw-r--r-- | Android.bp | 45 |
1 files changed, 43 insertions, 2 deletions
diff --git a/Android.bp b/Android.bp index 1d9ec1c9c1..e5bb7a6a80 100644 --- a/Android.bp +++ b/Android.bp @@ -198,12 +198,15 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp", "src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp", "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp", + "src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp", "src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp", "src/core/NEON/kernels/arm_gemm/mergeresults.cpp", "src/core/NEON/kernels/arm_gemm/misc.cpp", "src/core/NEON/kernels/arm_gemm/quantized.cpp", "src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp", "src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp", + "src/core/NEON/kernels/arm_gemm/transform-sve.cpp", + "src/core/NEON/kernels/arm_gemm/transform.cpp", "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp", "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp", "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp", @@ -838,30 +841,52 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/a64_gemv_fp32_mla_32/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_fp16_mla_8x24/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_fp16_mla_8x24/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_fp16_mla_8x24/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_fp32_mla_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_fp32_mla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_fp32_mla_8x12/x1.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_dot_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_dot_8x12/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_dot_8x12/a55.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_dot_8x12/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_dot_8x12/x1.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp", @@ -880,23 +905,39 @@ cc_library_static { "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp", "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp", - "src/core/NEON/kernels/arm_gemm/kernels/sve_gemv_fp32_mla_8VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32_mla_8x1VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_4x6VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp32bf16fp32_mmla_6x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qa_mmla_4x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8qs_mmla_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_s8s32_mmla_6x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_dot_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8qa_mmla_4x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_dot_6x4VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_u8u32_mmla_6x4VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_dot_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_bf16fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp16_mla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mla_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_fp32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_dot_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_s8s32_mmla_8x3VL/generic.cpp", + "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/a64fx.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_dot_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_interleaved_u8u32_mmla_8x3VL/generic.cpp", "src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_fp32_mla_8x1VL/generic.cpp", |