aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-14 19:03:09 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2019-10-23 12:08:12 +0000
commit48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch)
treef857d733ccf446c704823dc7ac796a96eb55095e /arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
parent1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff)
downloadComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h4
1 files changed, 4 insertions, 0 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
index 5b6a0dd943..12c120934e 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h
@@ -24,6 +24,7 @@
#ifndef __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
#define __ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__
+#include "NEActivationLayer.h"
#include "arm_compute/core/NEON/INEKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMLowpOffsetContributionOutputStageKernel.h"
@@ -46,6 +47,7 @@ class ITensor;
* -# @ref NEGEMMTranspose1xWKernel
* -# @ref NEGEMMLowpMatrixMultiplyKernel
* -# @ref NEGEMMLowpOffsetContributionKernel
+ * -# @ref NEActivationLayer
*
* otherwise if the DOT product instruction is available:
*
@@ -113,6 +115,7 @@ private:
NEGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
NEGEMMLowpOffsetContributionKernel _offset_contribution_kernel;
NEGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
+ NEActivationLayer _activation_func;
Tensor _vector_sum_col;
Tensor _vector_sum_row;
Tensor _tmp_a;
@@ -127,6 +130,7 @@ private:
bool _reshape_b_only_on_first_run;
bool _is_prepared;
bool _fuse_output_stage;
+ bool _run_activation;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEGEMMLOWPMATRIXMULTIPLYCORE_H__ */