aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-14 19:03:09 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-23 12:08:12 +0000
commit48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch)
treef857d733ccf446c704823dc7ac796a96eb55095e /src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp
parent1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff)
downloadComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp')
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp19
1 files changed, 15 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp
index 55b1f9bbe6..49681ec404 100644
--- a/src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemv_native_transposed.hpp
@@ -53,7 +53,7 @@ class GemvNativeTransposed : public GemmCommon<To, Tr> {
const unsigned int _nmultis;
- const Tr _beta;
+ const Activation _act;
const CPUInfo * const _ci;
@@ -64,8 +64,8 @@ public:
GemvNativeTransposed(GemvNativeTransposed &) = delete;
GemvNativeTransposed & operator= (GemvNativeTransposed &) = delete;
- GemvNativeTransposed(const GemmArgs<Tr> &args)
- : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _beta(args._beta), _ci(args._ci) {
+ GemvNativeTransposed(const GemmArgs &args)
+ : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _act(args._act), _ci(args._ci) {
/* For now don't do any blocking. TODO: figure out if we should. */
m_block = _Ksize;
n_block = _Nsize;
@@ -111,7 +111,18 @@ public:
strat.kernel(this->_Bptr + (multi * this->_B_multi_stride) + (m0 * this->_ldb) + n0,
this->_Aptr + (multi * this->_A_multi_stride) + m0,
this->_Cptr + (multi * this->_C_multi_stride) + n0,
- _beta, this->_ldb, (mmax-m0), (nmax-n0));
+ static_cast<Tr>(0), this->_ldb, (mmax-m0), (nmax-n0));
+
+ // Handle activation separately for now
+ if (this->_bias) {
+ activator<true>(this->_Cptr + (multi * this->_C_multi_stride) + n0, 0,
+ this->_bias + (multi * this->_bias_multi_stride) + n0,
+ _act, 1, (nmax-n0));
+ } else {
+ activator<false>(this->_Cptr + (multi * this->_C_multi_stride) + n0, 0,
+ static_cast<const Tr *>(nullptr),
+ _act, 1, (nmax-n0));
+ }
}
}
}