diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-01-14 13:43:40 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-01-18 12:39:30 +0000 |
commit | 33e03074c36d85de87e9032a2583b04ce8ddcd6b (patch) | |
tree | 4442ec5b5022fa5681d689e6ccf3a6423efa8a93 /src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp | |
parent | 8d5337ef18901f1b54d0c062ae7486bc5a4c6610 (diff) | |
download | ComputeLibrary-33e03074c36d85de87e9032a2583b04ce8ddcd6b.tar.gz |
Cycle estimate-based kernel selection for dot product quantized s8/u8 kernels
Resolves: COMPMID-3990
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: If840c79209940535450f4ea1cbf6b0ec646a168e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4866
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp | 16 |
1 files changed, 10 insertions, 6 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp index 2b3e170a3b..02d9486cc6 100644 --- a/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp +++ b/src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp @@ -314,27 +314,25 @@ template void Interleave<6, 1, VLType::None>(float *, const bfloat16 *, size_t, /* AArch64 */ #ifdef __aarch64__ -/* FP64 */ -/* NEON/SVE implementation (height 8) */ -template void IndirectInterleave<8, 1, VLType::None>(double *, const double * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -template void ConvolutionInterleave<8, 1, VLType::None>(double *, const double *, size_t, const convolver<double> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); -template void Interleave<8, 1, VLType::None>(double *, const double *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); - /* FP32 */ /* NEON/SVE implementation (height 8) */ template void IndirectInterleave<8, 1, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 1, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 1, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#if defined(__ARM_FEATURE_SVE) && defined(MMLA_FP32) /* FMMLA */ template void IndirectInterleave<8, 2, VLType::None>(float *, const float * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 2, VLType::None>(float *, const float *, size_t, const convolver<float> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 2, VLType::None>(float *, const float *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#endif // SVE && MMLA_FP32 /* FP16 */ +#if defined(FP16_KERNELS) || defined(__ARM_FEATURE_FP16_VECTOR_ARITHMETIC) template void IndirectInterleave<8, 1, VLType::None>(__fp16 *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 1, VLType::None>(__fp16 *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 1, VLType::None>(__fp16 *, const __fp16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#endif // FP16_KERNELS ar __ARM_FEATURE_FP16_VECTOR_ARITHMETIC template void IndirectInterleave<8, 1, VLType::None>(float *, const __fp16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, const convolver<__fp16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -342,6 +340,7 @@ template void Interleave<8, 1, VLType::None>(float *, const __fp16 *, size_t, un /* BF16 */ /* NEON/SVE BFDOT */ +#ifdef V8P6_BF template void IndirectInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -349,6 +348,7 @@ template void Interleave<8, 2, VLType::None>(bfloat16 *, const bfloat16 *, size_ template void IndirectInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void ConvolutionInterleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, const convolver<bfloat16> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(bfloat16 *, const bfloat16 *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#endif // V8P6_BF /* NEON/SVE using FP32 kernel */ template void IndirectInterleave<8, 1, VLType::None>(float *, const bfloat16 * const * const *, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); @@ -375,10 +375,12 @@ template void IndirectInterleave<8, 4, VLType::None>(int8_t *, const int8_t * co template void ConvolutionInterleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#ifdef MMLA_INT8 /* MMLA SMMLA (height 8, block 8) */ template void IndirectInterleave<8, 8, VLType::None>(int8_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); template void ConvolutionInterleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, const convolver<int8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 8, VLType::None>(int8_t *, const int8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#endif // MMLA_INT8 /* NEON SDOT (height 8, block 1) */ template void IndirectInterleave<8, 1, VLType::None>(int16_t *, const int8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); @@ -395,10 +397,12 @@ template void IndirectInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t * template void ConvolutionInterleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 4, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#ifdef MMLA_INT8 /* MMLA SMMLA (height 8, block 8) */ template void IndirectInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); template void ConvolutionInterleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, const convolver<uint8_t> &, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); template void Interleave<8, 8, VLType::None>(uint8_t *, const uint8_t *, size_t, unsigned int, unsigned int, unsigned int, unsigned int, bool, int32_t); +#endif // MMLA_INT8 /* NEON 16-bit (height 8, block 1) */ template void IndirectInterleave<8, 1, VLType::None>(uint16_t *, const uint8_t * const * const *, unsigned int, unsigned int, unsigned int y0, unsigned int ymax, unsigned int k0, unsigned int kmax, bool, int32_t); |