aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2022-06-01 11:47:14 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2022-11-28 16:57:42 +0000
commit03b2971ac69a86f10a1566938d1a25afee15746c (patch)
treeaec7cfc047e1da278b4b71a706cda7b1b0faa158 /src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
parent7dc0234331f2150a6b4ac5c2b49de419870f7cf5 (diff)
downloadComputeLibrary-03b2971ac69a86f10a1566938d1a25afee15746c.tar.gz
Integrate SME2 kernels
* Add SME/SME2 detection. * Integrate SME2 implementation for: - Normal convolution - Winograd - Depthwise convolution - Pooling Resolves: COMPMID-5700 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I2f1ca1d05f8cfeee9309ed1c0a36096a4a6aad5c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8692 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp')
-rw-r--r--src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp8
1 files changed, 8 insertions, 0 deletions
diff --git a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
index 73abe8b945..a221aee5d8 100644
--- a/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
+++ b/src/core/NEON/kernels/convolution/winograd/output_transforms_fp32.cpp
@@ -29,6 +29,11 @@ namespace arm_conv {
namespace winograd {
namespace output_transform {
+#if defined(__aarch64__)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SME)
+void sme_fp32_mopa_4x4_3x3(unsigned int, const float *, size_t, const float *, float *, size_t, size_t, float, float);
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SME)
+#endif // defined(__aarch64__)
void arm_fp32_4x4_3x3(unsigned int, const float *, size_t, const float *, float *, size_t, size_t, float, float);
void arm_fp32_2x2_3x3(unsigned int, const float *, size_t, const float *, float *, size_t, size_t, float, float);
void arm_fp32_2x2_5x5(unsigned int, const float *, size_t, const float *, float *, size_t, size_t, float, float);
@@ -44,6 +49,9 @@ void arm_fp32_1x2_1x7(unsigned int, const float *, size_t, const float *, float
static const TransformImplementation<float> transforms_fp32[] = {
#if defined(__aarch64__)
+#if defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SME)
+ { IMPL(4, 4, 3, 3, sme_fp32_mopa_4x4_3x3, Unpadded), MethodConstraints::RequiresSME },
+#endif // defined(ARM_COMPUTE_ENABLE_SVE) && defined(ARM_COMPUTE_ENABLE_SME)
#endif // defined(__aarch64__)
{ IMPL(4, 4, 3, 3, arm_fp32_4x4_3x3, Unpadded), MethodConstraints::LargerShape },
{ IMPL(2, 2, 3, 3, arm_fp32_2x2_3x3, Unpadded) },