Integrate SME2 kernels

* Add SME/SME2 detection. * Integrate SME2 implementation for: - Normal convolution - Winograd - Depthwise convolution - Pooling Resolves: COMPMID-5700 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I2f1ca1d05f8cfeee9309ed1c0a36096a4a6aad5c Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8692 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Viet-Hoa Do <viet-hoa.do@arm.com> 2022-06-01 11:47:14 +0100
committer: Viet-Hoa Do <viet-hoa.do@arm.com> 2022-11-28 16:57:42 +0000
commit: 03b2971ac69a86f10a1566938d1a25afee15746c (patch)
tree: aec7cfc047e1da278b4b71a706cda7b1b0faa158 /src/core/NEON/kernels/arm_gemm/std_transforms_sme.hpp
parent: 7dc0234331f2150a6b4ac5c2b49de419870f7cf5 (diff)
download: ComputeLibrary-03b2971ac69a86f10a1566938d1a25afee15746c.tar.gz
1 files changed, 75 insertions, 0 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/std_transforms_sme.hpp b/src/core/NEON/kernels/arm_gemm/std_transforms_sme.hpp
new file mode 100644
index 0000000000..054088e0b5
--- /dev/null
+++ b/src/core/NEON/kernels/arm_gemm/std_transforms_sme.hpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2022 ARM Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#pragma once
+
+#include "interleave_indirect.hpp"
+#include "transform.hpp"
+
+namespace arm_gemm {
+
+/*
+ * Define "standard" transforms for the blocked GEMMs for SVE.
+ *
+ * This assumes that A is interleaved 'height' ways, B is interleaved
+ * 'width'xVL ways and transposed, and that the merge needs to work in
+ * 'height' x 'width'xVL blocks.
+ *
+ * The optional 'block' parameter is for kernels using dot-product type
+ * instructions like UDOT and SDOT.
+ */
+template<typename TOperand, typename TResult, unsigned int height_vectors, unsigned int width_vectors, unsigned int block=1, bool integrate_sums=false>
+class StdTransformsSME
+{
+public:
+    template<typename TIn>
+    void PrepareA(TOperand *out, const TIn *in, const int stride, const int y0,
+                  const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier) {
+        Interleave<height_vectors, block, VLType::SME>(out, in, stride, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
+    }
+
+    template<typename TIn>
+    void PrepareA_indirect(TOperand *out, const TIn * const * const *ptr, size_t stringlen, size_t rounded_stringlen, const int y0,
+                           const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier) {
+        IndirectInterleave<height_vectors, block, VLType::SME>(out, ptr, stringlen, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
+    }
+
+    template<typename TIn>
+    void PrepareA_convolution(TOperand *out, const TIn *ptr, size_t stride, const convolver<TIn> &conv, size_t rounded_stringlen,
+                              const int y0, const int ymax, const int k0, const int kmax, int32_t row_sum_multiplier) {
+        ConvolutionInterleave<height_vectors, block, VLType::SME>(out, ptr, stride, conv, rounded_stringlen, y0, ymax, k0, kmax, integrate_sums, row_sum_multiplier);
+    }
+
+    template<typename TIn>
+    void PrepareB(TOperand *out, const TIn *in, const int stride, const int x0,
+                  const int xmax, const int k0, const int kmax) {
+        Transform<width_vectors, block,  true, VLType::SME>(out, in, stride, x0, xmax, k0, kmax);
+    }
+
+    template<typename TOut>
+    void Merge(TOut *out, const TResult *in, int stride, int y0, int ymax, int x0, int xmax, const TOut *bias, const Activation act, bool accumulate) {
+        // Separate merge not supported for SME.
+    }
+};
+
+} // namespace arm_gemm
author	Viet-Hoa Do <viet-hoa.do@arm.com>	2022-06-01 11:47:14 +0100
committer	Viet-Hoa Do <viet-hoa.do@arm.com>	2022-11-28 16:57:42 +0000
commit	03b2971ac69a86f10a1566938d1a25afee15746c (patch)
tree	aec7cfc047e1da278b4b71a706cda7b1b0faa158 /src/core/NEON/kernels/arm_gemm/std_transforms_sme.hpp
parent	7dc0234331f2150a6b4ac5c2b49de419870f7cf5 (diff)
download	ComputeLibrary-03b2971ac69a86f10a1566938d1a25afee15746c.tar.gz