diff options
author | David Mansell <David.Mansell@arm.com> | 2018-07-06 14:52:52 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:10 +0000 |
commit | d93991e290618a685b67506c78090350e6aee43f (patch) | |
tree | 1d5c3b3017cfccd3f0ec3f24e8e11334cf977ce3 /src/core/NEON/kernels/arm_gemm/mergeresults.hpp | |
parent | dec32a9edd4b3c6dc55c60d7436e79af6be58c3d (diff) | |
download | ComputeLibrary-d93991e290618a685b67506c78090350e6aee43f.tar.gz |
COMPMID-1380: Pre-work for SVE support.
This patch makes the needed infrastructure changes to allow SVE
kernels to be added later on.
Change-Id: Ide5bccac2f47278e93fff3d648231aee2d5f8c2e
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139070
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/mergeresults.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/mergeresults.hpp | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/mergeresults.hpp b/src/core/NEON/kernels/arm_gemm/mergeresults.hpp index b1e2ca1daa..04d1343b1c 100644 --- a/src/core/NEON/kernels/arm_gemm/mergeresults.hpp +++ b/src/core/NEON/kernels/arm_gemm/mergeresults.hpp @@ -32,15 +32,19 @@ namespace arm_gemm { -template<unsigned int width, unsigned int height, typename Tin, typename Tout> +template<unsigned int twidth, unsigned int height, bool sve=false, typename Tin, typename Tout> inline void MergeResults(Tout * out, const Tin * in, int ldc, int y0, int ymax, int x0, int xmax, const Tout alpha, const Tout beta) { - int full_y_blocks = (ymax - y0) / height; - int y_remainder = (ymax - y0) % height; - int y_blocks = full_y_blocks + (y_remainder ? 1 : 0); + // For SVE cases, multiply the width up by the vector length. + // Use the *input* type to determine this, since this will be what the kernel operated on. + const int width = twidth * (sve ? get_vector_length<Tin>() : 1); - int full_x_blocks = (xmax - x0) / width; - int x_remainder = (xmax - x0) % width; - int x_blocks = full_x_blocks + (x_remainder ? 1 : 0); + const int full_y_blocks = (ymax - y0) / height; + const int y_remainder = (ymax - y0) % height; + const int y_blocks = full_y_blocks + (y_remainder ? 1 : 0); + + const int full_x_blocks = (xmax - x0) / width; + const int x_remainder = (xmax - x0) % width; + const int x_blocks = full_x_blocks + (x_remainder ? 1 : 0); for (int y_block = 0; y_block < y_blocks; y_block++) { int ybase = y0 + (y_block * height); |