From a855af10a486c53c2271361cb87f349eca64b749 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Mon, 16 Jul 2018 17:20:38 +0100 Subject: COMPMID-1401 Implement NEFullyConnectedLayer for QASYMM8 Change-Id: I0404df6d369855e2f458f2db8f26e81c80a1ee87 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140148 Reviewed-by: Georgios Pinitas Reviewed-by: Anthony Barbier Reviewed-by: Gian Marco Iodice Tested-by: Jenkins --- .../runtime/NEON/functions/NEFullyConnectedLayer.h | 113 ++++++++++----------- 1 file changed, 56 insertions(+), 57 deletions(-) (limited to 'arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h') diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h index ea0762ea79..92ca17a3a4 100644 --- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h +++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h @@ -26,66 +26,47 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h" #include "arm_compute/core/NEON/kernels/NETransposeKernel.h" #include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/NEON/functions/NEGEMM.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h" #include "arm_compute/runtime/Tensor.h" namespace arm_compute { /** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels: * - * -# @ref NETransposeKernel (if @p transpose_weights is set to true) - * -# @ref NEGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true) + * -# @ref NETransposeKernel * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ -class NEFullyConnectedLayerReshapeWeights : public IFunction +class NEFullyConnectedLayerReshapeWeights : public INESimpleFunction { public: - /** Constructor */ - NEFullyConnectedLayerReshapeWeights(std::shared_ptr memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: F32. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. - * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32. + * @param[out] output Destination tensor. Data type supported: Same as @p input. */ - void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer); + void configure(const ITensor *input, ITensor *output); /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights * - * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: F32. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights. - * @param[in] is_batched_fc_layer True if it is a batched fully connected layer + * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, bool transpose_weights, bool is_batched_fc_layer); - - // Inherited methods overridden: - void run() override; - -private: - MemoryGroup _memory_group; - NETransposeKernel _transpose_kernel; - NEGEMMTranspose1xWKernel _transpose1xW_kernel; - Tensor _transpose_output; - bool _transpose_weights; - bool _is_batched_fc_layer; + static Status validate(const ITensorInfo *input, const ITensorInfo *output); }; /** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels: - * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) - * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped flag is set to false) (called once) - * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input) - * -# @ref NEGEMMMatrixMultiplyKernel - * -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr) + * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer) + * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once) + * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric) + * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr) * * @note The fully connected layer accepts "weights" tensors only with 2 dimensions. */ @@ -104,21 +85,33 @@ public: NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default; /** Set the input and output tensors. * - * @param[in] input Source tensor. Data type supported: F16/F32. - * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input. + * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. The weights must be 2 dimensional. + * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. + * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. + * Data type supported: Same as @p input. * @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between: + * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer + * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. + * Data type supported: Same as @p input. * @param[in] fc_info (Optional) Fully connected layer additional info */ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer + /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer * - * @param[in] input Source tensor info. Data type supported: F16/F32. - * @param[in] weights Weights tensor info. The weights must be 2 dimensional. Data type supported: Same as @p input - * @param[in] biases Bias tensor info. It can be nullptr. Data type supported:Same as @p input. - * @param[in] output Destination tensor info. Data type supported: Same as @p input. - * @param[in] fc_info (Optional) Fully connected layer additional info + * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor info. The weights must be 2 dimensional. + * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions. + * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension. + * Data type supported: Same as @p input. + * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input. + * @param[out] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between: + * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer + * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer. + * Data type supported: Same as @p input. + * @param[in] fc_info (Optional) Fully connected layer additional info * * @return a status */ @@ -130,20 +123,26 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEIm2ColKernel _im2col_kernel; - NEFullyConnectedLayerReshapeWeights _reshape_weights_function; - NEGEMMInterleave4x4Kernel _interleave4x4_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; - Tensor _im2col_output; - Tensor _interleave4x4_output; - Tensor _reshape_weights_output; - const ITensor *_original_weights; - bool _is_batched_fc_layer; - bool _linearize_input; - bool _accumulate_biases; - bool _is_prepared; + void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output); + void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output); + + MemoryGroup _memory_group; + NEIm2ColKernel _im2col_kernel; + NEFullyConnectedLayerReshapeWeights _reshape_weights_function; + NEGEMM _mm_gemm; + NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp; + NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; + NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel; + Tensor _im2col_output; + Tensor _gemmlowp_output; + Tensor _reshape_weights_output; + const ITensor *_original_weights; + bool _are_weights_reshaped; + bool _is_fc_after_conv; + bool _accumulate_biases; + bool _is_quantized; + bool _is_prepared; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */ -- cgit v1.2.1