From adb3291dda4e56de1af10e783b787445d6587a38 Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Mon, 17 Feb 2020 16:39:27 +0000 Subject: COMPMID-3100 Fuse bias addition with fully connected layer NEON NEGEMM and NEGEMMLowpMatrixMultiplyCore are already fuse with bias addition. Expose them to NEFullyConnectedLayer. Change-Id: I42a909565bf49de1a019a07dc4dca11ae0981ada Signed-off-by: SiCongLi Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2769 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Reviewed-by: Gian Marco Iodice --- arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arm_compute/runtime') diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 78f12daf9c..db09da45ee 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -27,13 +27,11 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -107,7 +105,7 @@ private: * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) + * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -164,9 +162,9 @@ public: void prepare() override; private: - void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output); - void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output); - void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); + void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); + void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); MemoryGroup _memory_group; IWeightsManager *_weights_manager; @@ -177,17 +175,13 @@ private: weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function; NEGEMM _mm_gemm; NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NEGEMMLowpOutputStage _gemmlowp_output_stage; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; Tensor _flatten_output; - Tensor _gemmlowp_output; Tensor _converted_weights_output; Tensor _reshape_weights_output; const ITensor *_original_weights; bool _are_weights_converted; bool _are_weights_reshaped; bool _is_fc_after_conv; - bool _accumulate_biases; bool _is_quantized; bool _is_prepared; }; -- cgit v1.2.1