aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
diff options
context:
space:
mode:
authorSiCong Li <sicong.li@arm.com>2020-02-17 16:39:27 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-03-03 09:55:55 +0000
commitadb3291dda4e56de1af10e783b787445d6587a38 (patch)
tree33c8e76b34839999fd8a4537c243807803a5c91b /arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
parent28287afbea9549e8e2904084ae895c04cca88e95 (diff)
downloadComputeLibrary-adb3291dda4e56de1af10e783b787445d6587a38.tar.gz
COMPMID-3100 Fuse bias addition with fully connected layer NEON
NEGEMM and NEGEMMLowpMatrixMultiplyCore are already fuse with bias addition. Expose them to NEFullyConnectedLayer. Change-Id: I42a909565bf49de1a019a07dc4dca11ae0981ada Signed-off-by: SiCongLi <sicong.li@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2769 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h14
1 files changed, 4 insertions, 10 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index 78f12daf9c..db09da45ee 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -27,13 +27,11 @@
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/core/NEON/kernels/NEFlattenLayerKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEConvertFullyConnectedWeights.h"
#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
@@ -107,7 +105,7 @@ private:
* -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
* -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
* -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
- * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr)
+ * -# @ref NEGEMMMatrixAdditionKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
@@ -164,9 +162,9 @@ public:
void prepare() override;
private:
- void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output);
- void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output);
- void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
+ void configure_fc_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output);
+ void configure_conv_fc(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output);
+ void configure_mm(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output);
MemoryGroup _memory_group;
IWeightsManager *_weights_manager;
@@ -177,17 +175,13 @@ private:
weights_transformations::NEFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
NEGEMM _mm_gemm;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
- NEGEMMLowpOutputStage _gemmlowp_output_stage;
- NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
Tensor _flatten_output;
- Tensor _gemmlowp_output;
Tensor _converted_weights_output;
Tensor _reshape_weights_output;
const ITensor *_original_weights;
bool _are_weights_converted;
bool _are_weights_reshaped;
bool _is_fc_after_conv;
- bool _accumulate_biases;
bool _is_quantized;
bool _is_prepared;
};