diff options
author | SiCong Li <sicong.li@arm.com> | 2020-02-17 16:39:27 +0000 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2020-03-03 09:55:55 +0000 |
commit | adb3291dda4e56de1af10e783b787445d6587a38 (patch) | |
tree | 33c8e76b34839999fd8a4537c243807803a5c91b /arm_compute | |
parent | 28287afbea9549e8e2904084ae895c04cca88e95 (diff) | |
download | ComputeLibrary-adb3291dda4e56de1af10e783b787445d6587a38.tar.gz |
COMPMID-3100 Fuse bias addition with fully connected layer NEON
NEGEMM and NEGEMMLowpMatrixMultiplyCore are already fuse with bias
addition. Expose them to NEFullyConnectedLayer.
Change-Id: I42a909565bf49de1a019a07dc4dca11ae0981ada
Signed-off-by: SiCongLi <sicong.li@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2769
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h | 14 |
1 files changed, 4 insertions, 10 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index 78f12daf9c..db09da45ee 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -27,13 +27,11 @@ #include "arm_compute/runtime/IFunction.h" #include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h" #include "arm_compute/runtime/NEON/functions/NEGEMM.h" #include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute @@ -107,7 +105,7 @@ private: * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) + * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -164,9 +162,9 @@ public: void prepare() override; private: - void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output); - void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output); - void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); + void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); + void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output); MemoryGroup _memory_group; IWeightsManager *_weights_manager; @@ -177,17 +175,13 @@ private: weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function; NEGEMM _mm_gemm; NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - NEGEMMLowpOutputStage _gemmlowp_output_stage; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; Tensor _flatten_output; - Tensor _gemmlowp_output; Tensor _converted_weights_output; Tensor _reshape_weights_output; const ITensor *_original_weights; bool _are_weights_converted; bool _are_weights_reshaped; bool _is_fc_after_conv; - bool _accumulate_biases; bool _is_quantized; bool _is_prepared; }; |