aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/kernels
ModeNameSize
-rw-r--r--a32_sgemm_8x6.hpp2855logplain
d---------a32_sgemm_8x6111logplain
-rw-r--r--a64_ffhybrid_bf16fp32_mmla_6x16.hpp3025logplain
d---------a64_ffhybrid_bf16fp32_mmla_6x1639logplain
-rw-r--r--a64_ffhybrid_fp16_mla_6x32.hpp2973logplain
d---------a64_ffhybrid_fp16_mla_6x3239logplain
-rw-r--r--a64_ffhybrid_fp32_mla_6x16.hpp2965logplain
d---------a64_ffhybrid_fp32_mla_6x1639logplain
-rw-r--r--a64_ffhybrid_fp32bf16fp32_mmla_4x24.hpp3110logplain
d---------a64_ffhybrid_fp32bf16fp32_mmla_4x2439logplain
-rw-r--r--a64_ffhybrid_fp32bf16fp32_mmla_6x16.hpp3105logplain
d---------a64_ffhybrid_fp32bf16fp32_mmla_6x1639logplain
-rw-r--r--a64_ffinterleaved_bf16fp32_dot_8x12.hpp2895logplain
d---------a64_ffinterleaved_bf16fp32_dot_8x1239logplain
-rw-r--r--a64_ffinterleaved_bf16fp32_mmla_8x12.hpp3173logplain
d---------a64_ffinterleaved_bf16fp32_mmla_8x1239logplain
-rw-r--r--a64_ffinterleaved_fp16_mla_8x24.hpp2848logplain
d---------a64_ffinterleaved_fp16_mla_8x2439logplain
-rw-r--r--a64_ffinterleaved_fp32_mla_8x12.hpp2842logplain
d---------a64_ffinterleaved_fp32_mla_8x1239logplain
-rw-r--r--a64_gemm_s16_8x12.hpp2499logplain
d---------a64_gemm_s16_8x1239logplain
-rw-r--r--a64_gemm_s8_4x4.hpp3110logplain
d---------a64_gemm_s8_4x439logplain
-rw-r--r--a64_gemm_s8_8x12.hpp3632logplain
d---------a64_gemm_s8_8x12110logplain
-rw-r--r--a64_gemm_u16_8x12.hpp2172logplain
d---------a64_gemm_u16_8x1239logplain
-rw-r--r--a64_gemm_u8_4x4.hpp3462logplain
d---------a64_gemm_u8_4x439logplain
-rw-r--r--a64_gemm_u8_8x12.hpp3963logplain
d---------a64_gemm_u8_8x12110logplain
-rw-r--r--a64_hgemm_8x24.hpp3193logplain
d---------a64_hgemm_8x24110logplain
-rw-r--r--a64_hybrid_bf16fp32_dot_6x16.hpp2931logplain
d---------a64_hybrid_bf16fp32_dot_6x1639logplain
-rw-r--r--a64_hybrid_bf16fp32_mmla_6x16.hpp2930logplain
d---------a64_hybrid_bf16fp32_mmla_6x1639logplain
-rw-r--r--a64_hybrid_fp16_mla_6x32.hpp3208logplain
d---------a64_hybrid_fp16_mla_6x3274logplain
-rw-r--r--a64_hybrid_fp32_mla_4x24.hpp3386logplain
d---------a64_hybrid_fp32_mla_4x2474logplain
-rw-r--r--a64_hybrid_fp32_mla_6x16.hpp3391logplain
d---------a64_hybrid_fp32_mla_6x1674logplain
-rw-r--r--a64_hybrid_fp32_mla_8x4.hpp2638logplain
d---------a64_hybrid_fp32_mla_8x474logplain
-rw-r--r--a64_hybrid_fp32bf16fp32_mmla_4x24.hpp2936logplain
d---------a64_hybrid_fp32bf16fp32_mmla_4x2439logplain
-rw-r--r--a64_hybrid_fp32bf16fp32_mmla_6x16.hpp2937logplain
d---------a64_hybrid_fp32bf16fp32_mmla_6x1639logplain
-rw-r--r--a64_hybrid_s8qa_dot_4x16.hpp3233logplain
d---------a64_hybrid_s8qa_dot_4x1674logplain
-rw-r--r--a64_hybrid_s8qa_mmla_4x16.hpp2903logplain
d---------a64_hybrid_s8qa_mmla_4x1639logplain
-rw-r--r--a64_hybrid_s8qs_dot_6x16.hpp3233logplain
d---------a64_hybrid_s8qs_dot_6x1674logplain
-rw-r--r--a64_hybrid_s8qs_mmla_6x16.hpp2903logplain
d---------a64_hybrid_s8qs_mmla_6x1639logplain
-rw-r--r--a64_hybrid_s8s32_dot_6x16.hpp3682logplain
d---------a64_hybrid_s8s32_dot_6x1674logplain
-rw-r--r--a64_hybrid_s8s32_mmla_6x16.hpp3254logplain
d---------a64_hybrid_s8s32_mmla_6x1639logplain
-rw-r--r--a64_hybrid_u8qa_dot_4x16.hpp3240logplain
d---------a64_hybrid_u8qa_dot_4x1674logplain
-rw-r--r--a64_hybrid_u8qa_mmla_4x16.hpp2910logplain
d---------a64_hybrid_u8qa_mmla_4x1639logplain
-rw-r--r--a64_hybrid_u8u32_dot_6x16.hpp3691logplain
d---------a64_hybrid_u8u32_dot_6x1674logplain
-rw-r--r--a64_hybrid_u8u32_mmla_6x16.hpp3263logplain
d---------a64_hybrid_u8u32_mmla_6x1639logplain
-rw-r--r--a64_interleaved_bf16fp32_dot_8x12.hpp2832logplain
d---------a64_interleaved_bf16fp32_dot_8x1273logplain
-rw-r--r--a64_interleaved_bf16fp32_mmla_8x12.hpp3461logplain
d---------a64_interleaved_bf16fp32_mmla_8x1275logplain
-rw-r--r--a64_interleaved_s8s32_mmla_8x12.hpp3426logplain
d---------a64_interleaved_s8s32_mmla_8x1275logplain
-rw-r--r--a64_interleaved_u8u32_mmla_8x12.hpp3433logplain
d---------a64_interleaved_u8u32_mmla_8x1275logplain
-rw-r--r--a64_sgemm_8x12.hpp4372logplain
d---------a64_sgemm_8x12180logplain
-rw-r--r--a64_sgemm_8x6.hpp2377logplain
d---------a64_sgemm_8x639logplain
-rw-r--r--a64_sgemv_pretransposed.hpp2572logplain
d---------a64_sgemv_pretransposed39logplain
-rw-r--r--a64_smallK_hybrid_fp32_mla_6x4.hpp2414logplain
d---------a64_smallK_hybrid_fp32_mla_6x439logplain
-rw-r--r--a64_smallK_hybrid_fp32_mla_8x4.hpp2414logplain
d---------a64_smallK_hybrid_fp32_mla_8x439logplain
-rw-r--r--a64_smallK_hybrid_s8s32_dot_6x4.hpp2721logplain
d---------a64_smallK_hybrid_s8s32_dot_6x474logplain
-rw-r--r--a64_smallK_hybrid_s8s32_dot_8x4.hpp2721logplain
d---------a64_smallK_hybrid_s8s32_dot_8x474logplain
-rw-r--r--a64_smallK_hybrid_u8u32_dot_6x4.hpp2735logplain
d---------a64_smallK_hybrid_u8u32_dot_6x474logplain
-rw-r--r--a64_smallK_hybrid_u8u32_dot_8x4.hpp2735logplain
d---------a64_smallK_hybrid_u8u32_dot_8x474logplain
-rw-r--r--sme2_gemv_bf16fp32_dot_16VL.hpp2362logplain
d---------sme2_gemv_bf16fp32_dot_16VL39logplain
-rw-r--r--sme2_gemv_fp16fp32fp16_dot_16VL.hpp2345logplain
d---------sme2_gemv_fp16fp32fp16_dot_16VL39logplain
-rw-r--r--sme2_gemv_fp32_mla_16VL.hpp2312logplain
d---------sme2_gemv_fp32_mla_16VL39logplain
-rw-r--r--sme2_gemv_fp32bf16fp32_dot_16VL.hpp2375logplain
d---------sme2_gemv_fp32bf16fp32_dot_16VL39logplain
-rw-r--r--sme2_gemv_s8qa_dot_16VL.hpp2341logplain
d---------sme2_gemv_s8qa_dot_16VL39logplain
-rw-r--r--sme2_gemv_u8qa_dot_16VL.hpp2347logplain
d---------sme2_gemv_u8qa_dot_16VL39logplain
-rw-r--r--sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL.hpp2815logplain
d---------sme2_interleaved_nomerge_bf16fp32_mopa_1VLx4VL39logplain
-rw-r--r--sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL.hpp2815logplain
d---------sme2_interleaved_nomerge_bf16fp32_mopa_2VLx2VL39logplain
-rw-r--r--sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL.hpp2815logplain
d---------sme2_interleaved_nomerge_bf16fp32_mopa_4VLx1VL39logplain
-rw-r--r--sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL.hpp2795logplain
d---------sme2_interleaved_nomerge_fp16fp32fp16_mopa_1VLx4VL39logplain
-rw-r--r--sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL.hpp2795logplain
d---------sme2_interleaved_nomerge_fp16fp32fp16_mopa_2VLx2VL39logplain
-rw-r--r--sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL.hpp2795logplain
d---------sme2_interleaved_nomerge_fp16fp32fp16_mopa_4VLx1VL39logplain
-rw-r--r--sme2_interleaved_nomerge_fp32_mopa_1VLx4VL.hpp2760logplain
d---------sme2_interleaved_nomerge_fp32_mopa_1VLx4VL39logplain
-rw-r--r--sme2_interleaved_nomerge_fp32_mopa_2VLx2VL.hpp2760logplain
d---------sme2_interleaved_nomerge_fp32_mopa_2VLx2VL39logplain
-rw-r--r--sme2_interleaved_nomerge_fp32_mopa_4VLx1VL.hpp2760logplain
d---------sme2_interleaved_nomerge_fp32_mopa_4VLx1VL39logplain
-rw-r--r--sme2_interleaved_nomerge_s8q_mopa_1VLx4VL.hpp2836logplain
d---------sme2_interleaved_nomerge_s8q_mopa_1VLx4VL39logplain
-rw-r--r--sme2_interleaved_nomerge_s8q_mopa_2VLx2VL.hpp2836logplain
d---------sme2_interleaved_nomerge_s8q_mopa_2VLx2VL39logplain
-rw-r--r--sme2_interleaved_nomerge_s8q_mopa_4VLx1VL.hpp2836logplain
d---------sme2_interleaved_nomerge_s8q_mopa_4VLx1VL39logplain
-rw-r--r--sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL.hpp2798logplain
d---------sme2_interleaved_nomerge_s8s32_mopa_1VLx4VL39logplain
-rw-r--r--sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL.hpp2798logplain
d---------sme2_interleaved_nomerge_s8s32_mopa_2VLx2VL39logplain
-rw-r--r--sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL.hpp2798logplain
d---------sme2_interleaved_nomerge_s8s32_mopa_4VLx1VL39logplain
-rw-r--r--sme2_interleaved_nomerge_u8q_mopa_1VLx4VL.hpp2846logplain
d---------sme2_interleaved_nomerge_u8q_mopa_1VLx4VL39logplain
-rw-r--r--sme2_interleaved_nomerge_u8q_mopa_2VLx2VL.hpp2846logplain
d---------sme2_interleaved_nomerge_u8q_mopa_2VLx2VL39logplain
-rw-r--r--sme2_interleaved_nomerge_u8q_mopa_4VLx1VL.hpp2846logplain
d---------sme2_interleaved_nomerge_u8q_mopa_4VLx1VL39logplain
-rw-r--r--sve_ffhybrid_bf16fp32_mmla_6x4VL.hpp3103logplain
d---------sve_ffhybrid_bf16fp32_mmla_6x4VL39logplain
-rw-r--r--sve_ffhybrid_fp16_mla_6x4VL.hpp3313logplain
d---------sve_ffhybrid_fp16_mla_6x4VL76logplain
-rw-r--r--sve_ffhybrid_fp32_mla_6x4VL.hpp3303logplain
d---------sve_ffhybrid_fp32_mla_6x4VL76logplain
-rw-r--r--sve_ffhybrid_fp32bf16fp32_mmla_4x6VL.hpp3189logplain
d---------sve_ffhybrid_fp32bf16fp32_mmla_4x6VL39logplain
-rw-r--r--sve_ffinterleaved_bf16fp32_mmla_8x3VL.hpp3266logplain
d---------sve_ffinterleaved_bf16fp32_mmla_8x3VL39logplain
-rw-r--r--sve_ffinterleaved_fp16_mla_8x3VL.hpp3197logplain
d---------sve_ffinterleaved_fp16_mla_8x3VL76logplain
-rw-r--r--sve_ffinterleaved_fp32_mla_8x3VL.hpp3189logplain
d---------sve_ffinterleaved_fp32_mla_8x3VL76logplain
-rw-r--r--sve_hybrid_bf16fp32_dot_6x4VL.hpp2980logplain
d---------sve_hybrid_bf16fp32_dot_6x4VL39logplain
-rw-r--r--sve_hybrid_bf16fp32_mmla_6x4VL.hpp2979logplain
d---------sve_hybrid_bf16fp32_mmla_6x4VL39logplain
-rw-r--r--sve_hybrid_fp16_mla_6x4VL.hpp3266logplain
d---------sve_hybrid_fp16_mla_6x4VL76logplain
-rw-r--r--sve_hybrid_fp32_mla_6x4VL.hpp3256logplain
d---------sve_hybrid_fp32_mla_6x4VL76logplain
-rw-r--r--sve_hybrid_fp32_mla_8x1VL.hpp2669logplain
d---------sve_hybrid_fp32_mla_8x1VL76logplain
-rw-r--r--sve_hybrid_fp32bf16fp32_mmla_4x6VL.hpp2987logplain
d---------sve_hybrid_fp32bf16fp32_mmla_4x6VL39logplain
-rw-r--r--sve_hybrid_fp32bf16fp32_mmla_6x4VL.hpp2986logplain
d---------sve_hybrid_fp32bf16fp32_mmla_6x4VL39logplain
-rw-r--r--sve_hybrid_s8qa_dot_4x4VL.hpp2882logplain
d---------sve_hybrid_s8qa_dot_4x4VL39logplain
-rw-r--r--sve_hybrid_s8qa_mmla_4x4VL.hpp2881logplain
d---------sve_hybrid_s8qa_mmla_4x4VL39logplain
-rw-r--r--sve_hybrid_s8qs_dot_6x4VL.hpp2882logplain
d---------sve_hybrid_s8qs_dot_6x4VL39logplain
-rw-r--r--sve_hybrid_s8qs_mmla_6x4VL.hpp2881logplain
d---------sve_hybrid_s8qs_mmla_6x4VL39logplain
-rw-r--r--sve_hybrid_s8s32_dot_6x4VL.hpp3729logplain
d---------sve_hybrid_s8s32_dot_6x4VL76logplain
-rw-r--r--sve_hybrid_s8s32_mmla_6x4VL.hpp3306logplain
d---------sve_hybrid_s8s32_mmla_6x4VL39logplain
-rw-r--r--sve_hybrid_u8qa_dot_4x4VL.hpp2890logplain
d---------sve_hybrid_u8qa_dot_4x4VL39logplain
-rw-r--r--sve_hybrid_u8qa_mmla_4x4VL.hpp2889logplain
d---------sve_hybrid_u8qa_mmla_4x4VL39logplain
-rw-r--r--sve_hybrid_u8u32_dot_6x4VL.hpp3734logplain
d---------sve_hybrid_u8u32_dot_6x4VL76logplain
-rw-r--r--sve_hybrid_u8u32_mmla_6x4VL.hpp3316logplain
d---------sve_hybrid_u8u32_mmla_6x4VL39logplain
-rw-r--r--sve_interleaved_bf16fp32_dot_8x3VL.hpp2885logplain
d---------sve_interleaved_bf16fp32_dot_8x3VL39logplain
-rw-r--r--sve_interleaved_bf16fp32_mmla_8x3VL.hpp3243logplain
d---------sve_interleaved_bf16fp32_mmla_8x3VL39logplain
-rw-r--r--sve_interleaved_fp16_mla_8x3VL.hpp3194logplain
d---------sve_interleaved_fp16_mla_8x3VL76logplain
-rw-r--r--sve_interleaved_fp32_mla_8x3VL.hpp3188logplain
d---------sve_interleaved_fp32_mla_8x3VL76logplain
-rw-r--r--sve_interleaved_fp32_mmla_8x3VL.hpp2170logplain
d---------sve_interleaved_fp32_mmla_8x3VL39logplain
-rw-r--r--sve_interleaved_s8s32_dot_8x3VL.hpp3656logplain
d---------sve_interleaved_s8s32_dot_8x3VL76logplain
-rw-r--r--sve_interleaved_s8s32_mmla_8x3VL.hpp3215logplain
d---------sve_interleaved_s8s32_mmla_8x3VL39logplain
-rw-r--r--sve_interleaved_u8u32_dot_8x3VL.hpp3664logplain
d---------sve_interleaved_u8u32_dot_8x3VL76logplain
-rw-r--r--sve_interleaved_u8u32_mmla_8x3VL.hpp3223logplain
d---------sve_interleaved_u8u32_mmla_8x3VL39logplain