aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-10-18 16:07:22 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:35:24 +0000
commitb4276c5b76f6eda22d973bfa48ff9612e7f183e5 (patch)
treea20ca6ec8698e71136532049651153c5ebd37d4e /arm_compute
parent7457a8d2a8451dc8957e65c88d046cfff12b06e9 (diff)
downloadComputeLibrary-b4276c5b76f6eda22d973bfa48ff9612e7f183e5.tar.gz
COMPMID-635: updated aarch64 assembly kernels.
Change-Id: I58dc459eb01d1e7328e1dbef1481027b9bde780b Reviewed-on: http://mpd-gerrit.cambridge.arm.com/92256 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/NEON/kernels/assembly/gemm_common.hpp2
-rw-r--r--arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a53.hpp5
-rw-r--r--arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/generic.hpp4
-rw-r--r--arm_compute/core/NEON/kernels/assembly/merges/a64_merge_float_12x8.hpp2
4 files changed, 7 insertions, 6 deletions
diff --git a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
index 00974436ff..ef89e3aac3 100644
--- a/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/gemm_common.hpp
@@ -28,6 +28,6 @@ template<typename To, typename Tr>
class GemmCommon {
public:
virtual size_t get_working_size() const = 0;
- virtual void execute(const To *, const int, const To *, const int, Tr *, const int, const Tr, const Tr, void *working_space = NULL) const = 0;
+ virtual void execute(const To *, const int, const To *, const int, Tr *, const int, const Tr, const Tr, void *working_space) const = 0;
virtual ~GemmCommon() { }
};
diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a53.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a53.hpp
index e58ce66825..1c9b4b38fc 100644
--- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a53.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/a53.hpp
@@ -206,7 +206,7 @@ inline void a64_sgemm_asimd_12x8_a53(const float *Apanel, const float *Bpanel, f
// Branch here if K=1 or 2. Do the right thing for odd/even at the end.
"4:\n"
- "cbnz %[oddk], 2f\n"
+ "cbnz %w[oddk], 2f\n"
// Detached final iteration. (even K)
"ldr %d[b2], [%[b_ptr], #32]\n"
@@ -360,8 +360,9 @@ inline void a64_sgemm_asimd_12x8_a53(const float *Apanel, const float *Bpanel, f
[b0] "+w" (b0), [b1] "+w" (b1), [b2] "+w" (b2), [k] "+r" (k)
: [oddk] "r" (oddk)
: "x20", "x21", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
- "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+ "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc"
);
}
}
}
+
diff --git a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/generic.hpp b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/generic.hpp
index 082c200646..c4a5875a31 100644
--- a/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/generic.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/kernels/a64_sgemm_12x8/generic.hpp
@@ -181,7 +181,7 @@ inline void a64_sgemm_asimd_12x8_jumps(const float *Apanel, const float *Bpanel,
"4:\n"
// Branch to alternative tail for odd K
- "cbnz %[oddk], 2f\n"
+ "cbnz %w[oddk], 2f\n"
// Detached final iteration (even K)
"fmla v8.4s , %[b0].4s, %[a0].s[0]\n"
@@ -347,7 +347,7 @@ inline void a64_sgemm_asimd_12x8_jumps(const float *Apanel, const float *Bpanel,
[b0] "+w" (b0), [b1] "+w" (b1), [b2] "+w" (b2), [k] "+r" (k)
: [oddk] "r" (oddk), [row_jump] "r" (row_jump), [block_jump] "r" (block_jump)
: "x20", "x21", "v8", "v9", "v10", "v11", "v12", "v13", "v14", "v15", "v16", "v17", "v18",
- "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31"
+ "v19", "v20", "v21", "v22", "v23", "v24", "v25", "v26", "v27", "v28", "v29", "v30", "v31", "cc"
);
}
}
diff --git a/arm_compute/core/NEON/kernels/assembly/merges/a64_merge_float_12x8.hpp b/arm_compute/core/NEON/kernels/assembly/merges/a64_merge_float_12x8.hpp
index f2c5fd86b9..e8edddb4f4 100644
--- a/arm_compute/core/NEON/kernels/assembly/merges/a64_merge_float_12x8.hpp
+++ b/arm_compute/core/NEON/kernels/assembly/merges/a64_merge_float_12x8.hpp
@@ -226,7 +226,7 @@ inline void MergeResults<12, 8>(float *out, const float *in, const int ldout, co
[outptr4] "+r" (outptr4), [outptr5] "+r" (outptr5), [outptr6] "+r" (outptr6), [outptr7] "+r" (outptr7),
[inptr] "+r" (inptr)
: [av] "w" (av), [bv] "w" (bv)
- : "q0", "q1", "q2", "q3", "q4", "q5", "q6", "q16", "q17", "q18", "q19", "q20", "q21"
+ : "v0", "v1", "v2", "v3", "v4", "v5", "v6", "v16", "v17", "v18", "v19", "v20", "v21"
);
}
}