aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transform.hpp
diff options
context:
space:
mode:
authorDavid Mansell <David.Mansell@arm.com>2018-07-06 14:52:52 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:10 +0000
commitd93991e290618a685b67506c78090350e6aee43f (patch)
tree1d5c3b3017cfccd3f0ec3f24e8e11334cf977ce3 /src/core/NEON/kernels/arm_gemm/transform.hpp
parentdec32a9edd4b3c6dc55c60d7436e79af6be58c3d (diff)
downloadComputeLibrary-d93991e290618a685b67506c78090350e6aee43f.tar.gz
COMPMID-1380: Pre-work for SVE support.
This patch makes the needed infrastructure changes to allow SVE kernels to be added later on. Change-Id: Ide5bccac2f47278e93fff3d648231aee2d5f8c2e Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139070 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transform.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transform.hpp10
1 files changed, 6 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transform.hpp b/src/core/NEON/kernels/arm_gemm/transform.hpp
index 35e61b05a4..77d0d87a4d 100644
--- a/src/core/NEON/kernels/arm_gemm/transform.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transform.hpp
@@ -34,11 +34,14 @@
* Need to cope with the work requested in either dimension not actually
* being a multiple of the block sizes.
*/
-template <unsigned IntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize>
+template <unsigned int tIntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize, bool sve>
struct TransformImpl {
template <typename TOut, typename TIn>
static void Transform(TOut* out, const TIn* const in, const int stride,
const int y0, const int ymax, const int x0, const int xmax) {
+ // For SVE cases we multiply the interleave factor by the vector length.
+ const unsigned int IntBy = tIntBy * (sve ? get_vector_length<TOut>() : 1);
+
const int n_whole_y_blocks = (ymax - y0) / IntBy;
const int y_remainders = (ymax - y0) % IntBy;
const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0);
@@ -95,17 +98,16 @@ struct TransformImpl {
};
/*****************************************************************************/
-template <unsigned int IntBy, unsigned int BlockBy, bool Transposed, typename TOut, typename TIn>
+template <unsigned int IntBy, unsigned int BlockBy, bool Transposed, bool sve=false, typename TOut, typename TIn>
void Transform(
TOut* out, const TIn* const in, const int stride,
const int k0, const int kmax, const int x0, const int xmax
) {
// Redirect to a specialised implementation predicated on argument size.
- TransformImpl<IntBy, BlockBy, Transposed, sizeof(TOut), sizeof(TIn)>::Transform(
+ TransformImpl<IntBy, BlockBy, Transposed, sizeof(TOut), sizeof(TIn), sve>::Transform(
out, in, stride, k0, kmax, x0, xmax
);
}
/*****************************************************************************/
#include "transforms/list.hpp"
-