diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-14 19:03:09 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-10-23 12:08:12 +0000 |
commit | 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 (patch) | |
tree | f857d733ccf446c704823dc7ac796a96eb55095e /arm_compute/runtime/NEON/functions/NEGEMM.h | |
parent | 1dce3101ef8d77c8cf0af7dfd4af6595a0136b91 (diff) | |
download | ComputeLibrary-48b3ef89de5f21a0169d8416e3d54081f82c7bf8.tar.gz |
COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels
Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2141
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMM.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 36 |
1 files changed, 26 insertions, 10 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index d947be1ef9..e4d69eb93d 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_NEGEMM_H__ #define __ARM_COMPUTE_NEGEMM_H__ +#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" @@ -33,20 +34,27 @@ #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/Tensor.h" -#include <memory> - namespace arm_compute { /** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: * + * If optimized assembly is available: + * -# @ref NEGEMMAssemblyDispatch + * -# @ref NEActivationLayer (if alpha != 1.0) + * Else: * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) * -# @ref NEGEMMMatrixMultiplyKernel - * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0) + * In both cases: + * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) + * Else: + * -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place) * + * -# @ref NEActivationLayer (if activation is specified in GEMMInfo) */ class NEGEMM : public IFunction { @@ -103,13 +111,21 @@ private: NEGEMMMatrixMultiplyKernel _mm_kernel; NEGEMMAssemblyDispatch _asm_glue; NEGEMMMatrixAdditionKernel _ma_kernel; - Tensor _tmp_a; - Tensor _tmp_b; - const ITensor *_original_b; - bool _run_vector_matrix_multiplication; - bool _run_addition; - bool _reshape_b_only_on_first_run; - bool _is_prepared; + NEActivationLayer _alpha_scale_func; + NEArithmeticAdditionKernel _add_bias_kernel; + NEActivationLayer _activation_func; + + Tensor _tmp_a; + Tensor _tmp_b; + Tensor _tmp_d; + const ITensor *_original_b; + bool _run_vector_matrix_multiplication; + bool _run_alpha_scale; + bool _run_addition; + bool _run_bias_addition; + bool _run_activation; + bool _reshape_b_only_on_first_run; + bool _is_prepared; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEGEMM_H__ */ |