From b3c81cb4100b3a449db5232364e18e649b26df58 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Mon, 13 Nov 2017 17:11:23 +0000 Subject: COMPMID-632: Updated Ananke_r1's SDOT product kernel. Change-Id: Ib81d1ff12017431fc4cbeb8d3069d4bb7dfc405f Reviewed-on: http://mpd-gerrit.cambridge.arm.com/95808 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- .../assembly/kernels/a64_gemm_s8_12x8/a55r1.hpp | 159 ++++++++++++--------- 1 file changed, 95 insertions(+), 64 deletions(-) (limited to 'arm_compute/core/NEON') diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_12x8/a55r1.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_12x8/a55r1.hpp index 5ed930c0b0..4ac2ba4234 100644 --- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_12x8/a55r1.hpp +++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_gemm_s8_12x8/a55r1.hpp @@ -33,20 +33,20 @@ void a64_gemm_s8_12x8_a55r1(const int8_t *Apanel, const int8_t *Bpanel, int32_t assert(Apanel); assert(Bpanel); assert(Cpanel); - K/=4; - const long int row_jump=0; - const long int block_jump=0; - const int32_t *a_ptr = reinterpret_cast(Apanel); - int32_t *c_ptr = reinterpret_cast(Cpanel); + const int8_t *a_ptr = Apanel; + int32_t *c_ptr = Cpanel; + // We divide K by 4 because the sdot instruction processes 4 elements at a time. + const int W = K/4; + // Fix up for odd lengths - set a flag if K is odd, but make. + // sure we round up the iteration count. + const int oddk = (W & 1); + const int init_value_k = ((W+1)/2) - 1; for (int yb=0; yb(Bpanel); + const int8_t *a_ptr0 = a_ptr; + const int8_t *b_ptr = Bpanel; for (int xb=0; xb