COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels

Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2019-10-14 19:03:09 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2019-10-23 12:08:12 +0000
commit: 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch)
tree: f857d733ccf446c704823dc7ac796a96eb55095e /arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
parent: 1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff)
download: ComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz
1 files changed, 4 insertions, 0 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 5b6a0dd943..12c120934e 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -24,6 +24,7 @@
 #ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
 #define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
 
+#include "NEActivationLayer.h"
 #include "arm_compute/core/NEON/INEKernel.h"
 #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
 #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
@@ -46,6 +47,7 @@ class ITensor;
  *  -# @ref NEGEMMTranspose1xWKernel
  *  -# @ref NEGEMMLowpMatrixMultiplyKernel
  *  -# @ref NEGEMMLowpOffsetContributionKernel
+ *  -# @ref NEActivationLayer
  *
  * otherwise if the DOT product instruction is available:
  *
@@ -113,6 +115,7 @@ private:
     NEGEMMLowpMatrixBReductionKernel              _mtx_b_reduction_kernel;
     NEGEMMLowpOffsetContributionKernel            _offset_contribution_kernel;
     NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
+    NEActivationLayer                             _activation_func;
     Tensor                                        _vector_sum_col;
     Tensor                                        _vector_sum_row;
     Tensor                                        _tmp_a;
@@ -127,6 +130,7 @@ private:
     bool                                          _reshape_b_only_on_first_run;
     bool                                          _is_prepared;
     bool                                          _fuse_output_stage;
+    bool                                          _run_activation;
 };
 } // namespace arm_compute
 #endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */
author	Georgios Pinitas <georgios.pinitas@arm.com>	2019-10-14 19:03:09 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2019-10-23 12:08:12 +0000
commit	48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch)
tree	f857d733ccf446c704823dc7ac796a96eb55095e /arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
parent	1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff)
download	ComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz