diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-14 19:03:09 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-23 12:08:12 +0000 |
commit | 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch) | |
tree | f857d733ccf446c704823dc7ac796a96eb55095e /arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | |
parent | 1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff) | |
download | ComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz |
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2141
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h | 4 |
1 files changed, 4 insertions, 0 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h index 5b6a0dd943..12c120934e 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ #define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ +#include "NEActivationLayer.h" #include "arm_compute/core/NEON/INEKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h" @@ -46,6 +47,7 @@ class ITensor; * -# @ref NEGEMMTranspose1xWKernel * -# @ref NEGEMMLowpMatrixMultiplyKernel * -# @ref NEGEMMLowpOffsetContributionKernel + * -# @ref NEActivationLayer * * otherwise if the DOT product instruction is available: * @@ -113,6 +115,7 @@ private: NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel; NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; + NEActivationLayer _activation_func; Tensor _vector_sum_col; Tensor _vector_sum_row; Tensor _tmp_a; @@ -127,6 +130,7 @@ private: bool _reshape_b_only_on_first_run; bool _is_prepared; bool _fuse_output_stage; + bool _run_activation; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */ |