From b2758f35da97319fd15722485e9b4ba7b35c8cfa Mon Sep 17 00:00:00 2001 From: David Mansell Date: Thu, 30 Mar 2023 19:10:52 +0100 Subject: Add FP16 depthwise kernels for SME2 Resolves: COMPMID-5988 Change-Id: I93e78edf31c9eec8242ccbb8c3c768f46a7c7c38 Signed-off-by: David Mansell Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9456 Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- src/BUILD.bazel | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'src/BUILD.bazel') diff --git a/src/BUILD.bazel b/src/BUILD.bazel index 26acc14a68..e0140addb7 100644 --- a/src/BUILD.bazel +++ b/src/BUILD.bazel @@ -129,6 +129,16 @@ filegroup( srcs = ["core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp", "core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp", "core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp", + "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp", "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp", "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp", "core/NEON/kernels/arm_conv/depthwise/kernels/sme2_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp", @@ -741,6 +751,8 @@ filegroup( "cpu/kernels/elementwise_unary/generic/neon/impl.cpp", "cpu/kernels/elementwise_unary/generic/neon/integer.cpp", "cpu/kernels/elementwise_unary/generic/neon/q8.cpp", + "cpu/kernels/elementwise_unary/generic/neon/qasymm8.cpp", + "cpu/kernels/elementwise_unary/generic/neon/qasymm8_signed.cpp", "cpu/kernels/floor/neon/fp16.cpp", "cpu/kernels/floor/neon/fp32.cpp", "cpu/kernels/fuse_batch_normalization/generic/fp16.cpp", -- cgit v1.2.1