aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
diff options
context:
space:
mode:
authorAnthony Barbier <anthony.barbier@arm.com>2018-07-03 16:22:02 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:10 +0000
commit5f707736413aeac77818c42838296966f8dc6761 (patch)
treeb829ed3243ea5f3085f288836132416c78bc2e72 /src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
parent7485d5a62685cb745ab50e970adb722cb71557ac (diff)
downloadComputeLibrary-5f707736413aeac77818c42838296966f8dc6761.tar.gz
COMPMID-1369: Revert accidental formatting of RSH's repo
Pulled latest fixes from David's repo: commit f43ebe932c84083332b0b1a0348241b69dda63a7 Author: David Mansell <David.Mansell@arm.com> Date: Tue Jul 3 18:09:01 2018 +0100 Whitespace tidying, fixed comment in gemv_batched imported from ACL. Change-Id: Ie37a623f44e90d88072236cb853ac55ac82d5f51 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/138530 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: David Mansell <david.mansell@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp71
1 files changed, 33 insertions, 38 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp b/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
index 8d61f15cec..7e61f425d4 100644
--- a/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
+++ b/src/core/NEON/kernels/arm_gemm/transforms/a64_block16_interleave4_8bit.hpp
@@ -30,17 +30,15 @@
#include "../asmlib.hpp"
#include "../utils.hpp"
-template <>
-template <typename T>
-void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax)
-{
- uint8_t *outptr = (uint8_t *)out;
- const uint8_t *inptr = (uint8_t *)in;
+template<>
+template<typename T>
+void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin, int y0, int ymax, int k0, int kmax) {
+ uint8_t *outptr = (uint8_t *)out;
+ const uint8_t *inptr = (uint8_t *)in;
uint8_t zerobuff[16];
- for(int y = y0; y < ymax; y += 4)
- {
+ for (int y=y0; y<ymax; y+=4) {
const uint8_t *inptr0 = inptr + y * ldin + k0;
const uint8_t *inptr1 = inptr0 + ldin;
const uint8_t *inptr2 = inptr1 + ldin;
@@ -51,14 +49,11 @@ void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin,
prefetch_2x(inptr2);
prefetch_2x(inptr3);
- int x = (kmax - k0);
- for(; x > 15; x -= 16)
- {
+ int x=(kmax-k0);
+ for (;x>15;x-=16) {
/* Cope with ragged cases by copying from a buffer of zeroes instead */
- if((y + 3) >= ymax)
- {
- switch((y + 3) - ymax)
- {
+ if ((y + 3) >= ymax) {
+ switch ((y + 3) - ymax) {
/* Everything falls through in here */
case 2:
inptr1 = zerobuff;
@@ -73,23 +68,28 @@ void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin,
}
}
- __asm __volatile(
- "LDR q0, [%[inptr0]], #16\n" ASM_PREFETCH("[%[inptr0], #176]") "LDR q1, [%[inptr1]], #16\n" ASM_PREFETCH("[%[inptr1], #176]")
- "STP q0, q1, [%[outptr]], #32\n"
- "LDR q0, [%[inptr2]], #16\n" ASM_PREFETCH("[%[inptr2], #176]") "LDR q1, [%[inptr3]], #16\n" ASM_PREFETCH("[%[inptr3], #176]") "STP q0, q1, [%[outptr]], #32\n"
- : [inptr0] "+r"(inptr0), [inptr1] "+r"(inptr1), [inptr2] "+r"(inptr2), [inptr3] "+r"(inptr3),
- [outptr] "+r"(outptr)
+ __asm __volatile (
+ "LDR q0, [%[inptr0]], #16\n"
+ ASM_PREFETCH("[%[inptr0], #176]")
+ "LDR q1, [%[inptr1]], #16\n"
+ ASM_PREFETCH("[%[inptr1], #176]")
+ "STP q0, q1, [%[outptr]], #32\n"
+ "LDR q0, [%[inptr2]], #16\n"
+ ASM_PREFETCH("[%[inptr2], #176]")
+ "LDR q1, [%[inptr3]], #16\n"
+ ASM_PREFETCH("[%[inptr3], #176]")
+ "STP q0, q1, [%[outptr]], #32\n"
+ : [inptr0] "+r" (inptr0), [inptr1] "+r" (inptr1), [inptr2] "+r" (inptr2), [inptr3] "+r" (inptr3),
+ [outptr] "+r" (outptr)
:
- : "v0", "v1");
+ : "v0", "v1"
+ );
}
- if(x > 0)
- {
+ if (x>0) {
/* Need to duplicate this here, in case we didn't run the main loop. */
- if((y + 3) >= ymax)
- {
- switch((y + 3) - ymax)
- {
+ if ((y + 3) >= ymax) {
+ switch ((y + 3) - ymax) {
/* Everything falls through in here */
case 2:
inptr1 = zerobuff;
@@ -105,16 +105,11 @@ void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin,
}
/* We have to write out 16 values, copy as many legal values as there are and pad with 0 */
- auto f = [&outptr, x](const uint8_t *&p)
- {
- for(int i = 0; i < 16; i++)
- {
- if(i < x)
- {
+ auto f = [&outptr, x](const uint8_t *&p) {
+ for (int i=0; i<16; i++) {
+ if (i < x) {
*outptr++ = *p++;
- }
- else
- {
+ } else {
*outptr++ = 0;
}
}
@@ -128,4 +123,4 @@ void TransformImpl<4, 16, false, 1, 1>::Transform(T *out, const T *in, int ldin,
}
}
-#endif // __aarch64__ \ No newline at end of file
+#endif // __aarch64__ \ No newline at end of file