From 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 14 Oct 2019 19:03:09 +0100 Subject: COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- .../NEON/kernels/arm_gemm/gemv_pretransposed.hpp | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp') diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp index 92064180a2..26fdfba8ff 100644 --- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp +++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp @@ -26,7 +26,7 @@ #include #include "arm_gemm.hpp" - +#include "bias_adder.hpp" #include "mergeresults.hpp" #include "transform.hpp" @@ -53,7 +53,7 @@ class GemvPretransposed : public GemmCommon { const bool _trB; - const Tr _beta; + const Activation _act; const CPUInfo * const _ci; @@ -68,8 +68,8 @@ public: GemvPretransposed(GemvPretransposed &) = delete; GemvPretransposed & operator= (GemvPretransposed &) = delete; - GemvPretransposed(const GemmArgs &args) - : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _trB(args._trB), _beta(args._beta), _ci(args._ci), + GemvPretransposed(const GemmArgs &args) + : _Nsize(args._Nsize), _Ksize(args._Ksize), _nmultis(args._nmulti), _trB(args._trB), _act(args._act), _ci(args._ci), _buffer_per_multi(_Ksize * iceildiv(_Nsize, strategy::A_interleave()) * strategy::A_interleave()) { /* For now don't do any blocking. TODO: figure out if we should. */ if (args._cfg && args._cfg->inner_block_size) { @@ -128,7 +128,18 @@ public: (_Ksize * strategy::A_interleave()), this->_Aptr + (multi * this->_A_multi_stride) + m0, this->_Cptr + (multi * this->_C_multi_stride) + n, - _beta, (mmax-m0), (nmax-n)); + static_cast(0), (mmax-m0), (nmax-n)); + + // Handle activation separately for now + if (this->_bias) { + activator(this->_Cptr + (multi * this->_C_multi_stride) + n, 0, + this->_bias + (multi * this->_bias_multi_stride) + n, + _act, 1, (nmax-n)); + } else { + activator(this->_Cptr + (multi * this->_C_multi_stride) + n, 0, + static_cast(nullptr), + _act, 1, (nmax-n)); + } } } } -- cgit v1.2.1