diff options
author | David Mansell <David.Mansell@arm.com> | 2018-07-06 14:52:52 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:10 +0000 |
commit | d93991e290618a685b67506c78090350e6aee43f (patch) | |
tree | 1d5c3b3017cfccd3f0ec3f24e8e11334cf977ce3 /src/core/NEON/kernels/arm_gemm/transform.hpp | |
parent | dec32a9edd4b3c6dc55c60d7436e79af6be58c3d (diff) | |
download | ComputeLibrary-d93991e290618a685b67506c78090350e6aee43f.tar.gz |
COMPMID-1380: Pre-work for SVE support.
This patch makes the needed infrastructure changes to allow SVE
kernels to be added later on.
Change-Id: Ide5bccac2f47278e93fff3d648231aee2d5f8c2e
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139070
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transform.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/transform.hpp | 10 |
1 files changed, 6 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transform.hpp b/src/core/NEON/kernels/arm_gemm/transform.hpp index 35e61b05a4..77d0d87a4d 100644 --- a/src/core/NEON/kernels/arm_gemm/transform.hpp +++ b/src/core/NEON/kernels/arm_gemm/transform.hpp @@ -34,11 +34,14 @@ * Need to cope with the work requested in either dimension not actually * being a multiple of the block sizes. */ -template <unsigned IntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize> +template <unsigned int tIntBy, unsigned int BlockBy, bool Transposed, size_t TOutSize, size_t TInSize, bool sve> struct TransformImpl { template <typename TOut, typename TIn> static void Transform(TOut* out, const TIn* const in, const int stride, const int y0, const int ymax, const int x0, const int xmax) { + // For SVE cases we multiply the interleave factor by the vector length. + const unsigned int IntBy = tIntBy * (sve ? get_vector_length<TOut>() : 1); + const int n_whole_y_blocks = (ymax - y0) / IntBy; const int y_remainders = (ymax - y0) % IntBy; const int n_y_blocks = n_whole_y_blocks + (y_remainders ? 1 : 0); @@ -95,17 +98,16 @@ struct TransformImpl { }; /*****************************************************************************/ -template <unsigned int IntBy, unsigned int BlockBy, bool Transposed, typename TOut, typename TIn> +template <unsigned int IntBy, unsigned int BlockBy, bool Transposed, bool sve=false, typename TOut, typename TIn> void Transform( TOut* out, const TIn* const in, const int stride, const int k0, const int kmax, const int x0, const int xmax ) { // Redirect to a specialised implementation predicated on argument size. - TransformImpl<IntBy, BlockBy, Transposed, sizeof(TOut), sizeof(TIn)>::Transform( + TransformImpl<IntBy, BlockBy, Transposed, sizeof(TOut), sizeof(TIn), sve>::Transform( out, in, stride, k0, kmax, x0, xmax ); } /*****************************************************************************/ #include "transforms/list.hpp" - |