From b2758f35da97319fd15722485e9b4ba7b35c8cfa Mon Sep 17 00:00:00 2001 From: David Mansell Date: Thu, 30 Mar 2023 19:10:52 +0100 Subject: Add FP16 depthwise kernels for SME2 Resolves: COMPMID-5988 Change-Id: I93e78edf31c9eec8242ccbb8c3c768f46a7c7c38 Signed-off-by: David Mansell Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9456 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/CMakeLists.txt | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'src/CMakeLists.txt') diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 336d2cd5cc..28028e85ef 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -108,6 +108,16 @@ target_sources( core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp + core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp @@ -733,6 +743,8 @@ target_sources( cpu/kernels/elementwise_unary/generic/neon/impl.cpp cpu/kernels/elementwise_unary/generic/neon/integer.cpp cpu/kernels/elementwise_unary/generic/neon/q8.cpp + cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp + cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp cpu/kernels/floor/neon/fp16.cpp cpu/kernels/floor/neon/fp32.cpp cpu/kernels/fuse_batch_normalization/generic/fp16.cpp @@ -962,4 +974,5 @@ target_sources( runtime/Tensor.cpp runtime/TensorAllocator.cpp runtime/Utils.cpp -) \ No newline at end of file +) + \ No newline at end of file -- cgit v1.2.1