diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-14 19:03:09 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-23 12:08:12 +0000 |
commit | 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch) | |
tree | f857d733ccf446c704823dc7ac796a96eb55095e /src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp | |
parent | 1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff) | |
download | ComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz |
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2141
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp | 21 |
1 files changed, 16 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp index 92064180a2..26fdfba8ff 100644 --- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp @@ -26,7 +26,7 @@ #include <stdio.h> #include "arm_gemm.hpp" - +#include "bias_adder.hpp" #include "mergeresults.hpp" #include "transform.hpp" @@ -53,7 +53,7 @@ class GemvPretransposed : public GemmCommon<To, Tr> { const bool _trB; - const Tr _beta; + const Activation _act; const CPUInfo * const _ci; @@ -68,8 +68,8 @@ public: GemvPretransposed(GemvPretransposed &) = delete; GemvPretransposed & operator= (GemvPretransposed &) = delete; - GemvPretransposed(const GemmArgs<Tr> &args) - : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _trB(args._trB), _beta(args._beta), _ci(args._ci), + GemvPretransposed(const GemmArgs &args) + : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _trB(args._trB), _act(args._act), _ci(args._ci), _buffer_per_multi(_Ksize * iceildiv(_Nsize, strategy::A_interleave()) * strategy::A_interleave()) { /* For now don't do any blocking. TODO: figure out if we should. */ if (args._cfg && args._cfg->inner_block_size) { @@ -128,7 +128,18 @@ public: (_Ksize * strategy::A_interleave()), this->_Aptr + (multi * this->_A_multi_stride) + m0, this->_Cptr + (multi * this->_C_multi_stride) + n, - _beta, (mmax-m0), (nmax-n)); + static_cast<Tr>(0), (mmax-m0), (nmax-n)); + + // Handle activation separately for now + if (this->_bias) { + activator<true>(this->_Cptr + (multi * this->_C_multi_stride) + n, 0, + this->_bias + (multi * this->_bias_multi_stride) + n, + _act, 1, (nmax-n)); + } else { + activator<false>(this->_Cptr + (multi * this->_C_multi_stride) + n, 0, + static_cast<const Tr *>(nullptr), + _act, 1, (nmax-n)); + } } } } |