From 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 14 Oct 2019 19:03:09 +0100 Subject: COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h') diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 5b6a0dd943..12c120934e 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ #define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ +#include "NEActivationLayer.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" @@ -46,6 +47,7 @@ class ITensor; * -# @ref NEGEMMTranspose1xWKernel * -# @ref NEGEMMLowpMatrixMultiplyKernel * -# @ref NEGEMMLowpOffsetContributionKernel + * -# @ref NEActivationLayer * * otherwise if the DOT product instruction is available: * @@ -113,6 +115,7 @@ private: NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; + NEActivationLayer _activation_func; Tensor _vector_sum_col; Tensor _vector_sum_row; Tensor _tmp_a; @@ -127,6 +130,7 @@ private: bool _reshape_b_only_on_first_run; bool _is_prepared; bool _fuse_output_stage; + bool _run_activation; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */ -- cgit v1.2.1