aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transforms
diff options
context:
space:
mode:
authorDavid Mansell <David.Mansell@arm.com>2018-07-06 14:52:52 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:10 +0000
commitd93991e290618a685b67506c78090350e6aee43f (patch)
tree1d5c3b3017cfccd3f0ec3f24e8e11334cf977ce3 /src/core/NEON/kernels/arm_gemm/transforms
parentdec32a9edd4b3c6dc55c60d7436e79af6be58c3d (diff)
downloadComputeLibrary-d93991e290618a685b67506c78090350e6aee43f.tar.gz
COMPMID-1380: Pre-work for SVE support.
This patch makes the needed infrastructure changes to allow SVE kernels to be added later on. Change-Id: Ide5bccac2f47278e93fff3d648231aee2d5f8c2e Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139070 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transforms')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_16bit.hpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_half_to_float.hpp2
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24way_16bit.hpp8
9 files changed, 18 insertions, 18 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
index e485ca7009..492abe51ed 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a32_interleave_6way_32bit.hpp
@@ -31,7 +31,7 @@
template<>
template<typename T>
-inline void TransformImpl<6, 1, false, 4, 4>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
+inline void TransformImpl<6, 1, false, 4, 4, false>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
uint32_t *outptr = reinterpret_cast<uint32_t *>(out);
const uint32_t *inptr = reinterpret_cast<const uint32_t *>(in);
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp
index a7e17fa074..56a226fce0 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a32_transpose_interleave_8way_32bit.hpp
@@ -30,12 +30,12 @@
// Generic unblocked transposed 8x32-bit sized specialisation
template <>
template <typename T>
-inline void TransformImpl<8, 1, true, 4, 4>::Transform(
+inline void TransformImpl<8, 1, true, 4, 4, false>::Transform(
T* out, const T* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
// Redirect to a 16x uint16_t specialisation
- TransformImpl<16, 1, true, 2, 2>::Transform(
+ TransformImpl<16, 1, true, 2, 2, false>::Transform(
reinterpret_cast<uint16_t *>(out),
reinterpret_cast<const uint16_t * const>(in),
stride*2, x0*2, xmax*2, k0, kmax
@@ -45,7 +45,7 @@ inline void TransformImpl<8, 1, true, 4, 4>::Transform(
// Generic 12x16-bit sized specialisation
template <>
template <typename T>
-inline void TransformImpl<16, 1, true, 2, 2>::Transform(
+inline void TransformImpl<16, 1, true, 2, 2, false>::Transform(
T* out, const T* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
@@ -117,7 +117,7 @@ inline void TransposeInterleaveCommon<16, uint16_t, uint16_t>::moveblock_1x4(con
template <>
template <>
-inline void TransformImpl<16, 1, true, 2, 2>::Transform(
+inline void TransformImpl<16, 1, true, 2, 2, false>::Transform(
uint16_t* out, const uint16_t* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
index 7e61f425d4..8ea0483a50 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
@@ -32,7 +32,7 @@
template<>
template<typename T>
-void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
+void TransformImpl<4, 16, false, 1, 1, false>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
uint8_t *outptr = (uint8_t *)out;
const uint8_t *inptr = (uint8_t *)in;
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
index 99bb2d66bd..91ee49229b 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_16bit.hpp
@@ -31,7 +31,7 @@
template<>
template<typename T>
-void TransformImpl<8, 1, false, 2, 2>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
+void TransformImpl<8, 1, false, 2, 2, false>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
uint16_t *outptr = (uint16_t *)out;
const uint16_t *inptr = (const uint16_t *)in;
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
index 83391cc59f..7a32f331ea 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_32bit.hpp
@@ -31,7 +31,7 @@
template<>
template<typename T>
-inline void TransformImpl<8, 1, false, 4, 4>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
+inline void TransformImpl<8, 1, false, 4, 4, false>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
uint32_t *outptr = (uint32_t *)out;
const uint32_t *inptr = (uint32_t *)in;
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp
index fd812165fd..773d56d913 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_interleave_8way_half_to_float.hpp
@@ -31,7 +31,7 @@
template<>
template<>
-inline void TransformImpl<8, 1, false, 4, 2>::Transform(float *out, const __fp16 *in, int ldin, int y0, int ymax, int k0, int kmax) {
+inline void TransformImpl<8, 1, false, 4, 2, false>::Transform(float *out, const __fp16 *in, int ldin, int y0, int ymax, int k0, int kmax) {
float *outptr = out;
const __fp16 *inptr = in;
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_16bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_16bit.hpp
index 6e07064a0c..16fa31eb67 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_16bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_16bit.hpp
@@ -30,12 +30,12 @@
// Generic unblocked transposed 6x32-bit sized specialisation
template <>
template <typename T>
-inline void TransformImpl<6, 1, true, 4, 4>::Transform(
+inline void TransformImpl<6, 1, true, 4, 4, false>::Transform(
T* out, const T* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
// Redirect to a 12 x uint16_t specialisation
- TransformImpl<12, 1, true, 2, 2>::Transform(
+ TransformImpl<12, 1, true, 2, 2, false>::Transform(
reinterpret_cast<uint16_t *>(out),
reinterpret_cast<const uint16_t * const>(in),
stride*2, x0*2, xmax*2, k0, kmax
@@ -45,7 +45,7 @@ inline void TransformImpl<6, 1, true, 4, 4>::Transform(
// Generic 12x16-bit sized specialisation
template <>
template <typename T>
-inline void TransformImpl<12, 1, true, 2, 2>::Transform(
+inline void TransformImpl<12, 1, true, 2, 2, false>::Transform(
T* out, const T* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
@@ -135,7 +135,7 @@ inline void TransposeInterleaveCommon<12, uint16_t, uint16_t>::moveblock_1x4(con
template <>
template <>
-inline void TransformImpl<12, 1, true, 2, 2>::Transform(
+inline void TransformImpl<12, 1, true, 2, 2, false>::Transform(
uint16_t* out, const uint16_t* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_half_to_float.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_half_to_float.hpp
index 2f90c18ebd..46b4bf5149 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_half_to_float.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_12way_half_to_float.hpp
@@ -110,7 +110,7 @@ inline void TransposeInterleaveCommon<12, __fp16, float>::moveblock_1x4(const __
template <>
template <>
-inline void TransformImpl<12, 1, true, 4, 2>::Transform(
+inline void TransformImpl<12, 1, true, 4, 2, false>::Transform(
float* out, const __fp16* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24way_16bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24way_16bit.hpp
index b6565baa23..c39dd82119 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24way_16bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_transpose_interleave_24way_16bit.hpp
@@ -30,12 +30,12 @@
// Generic unblocked transposed 12x32-bit sized specialisation
template <>
template <typename T>
-inline void TransformImpl<12, 1, true, 4, 4>::Transform(
+inline void TransformImpl<12, 1, true, 4, 4, false>::Transform(
T* out, const T* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
// Redirect to a 24 x uint16_t specialisation
- TransformImpl<24, 1, true, 2, 2>::Transform(
+ TransformImpl<24, 1, true, 2, 2, false>::Transform(
reinterpret_cast<uint16_t *>(out),
reinterpret_cast<const uint16_t * const>(in),
stride*2, x0*2, xmax*2, k0, kmax
@@ -45,7 +45,7 @@ inline void TransformImpl<12, 1, true, 4, 4>::Transform(
// Generic 24x16-bit sized specialisation
template <>
template <typename T>
-inline void TransformImpl<24, 1, true, 2, 2>::Transform(
+inline void TransformImpl<24, 1, true, 2, 2, false>::Transform(
T* out, const T* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {
@@ -120,7 +120,7 @@ inline void TransposeInterleaveCommon<24, uint16_t, uint16_t>::moveblock_1x4(con
template <>
template <>
-inline void TransformImpl<24, 1, true, 2, 2>::Transform(
+inline void TransformImpl<24, 1, true, 2, 2, false>::Transform(
uint16_t* out, const uint16_t* const in, const int stride,
const int x0, const int xmax, const int k0, const int kmax
) {