aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-07-16 17:20:38 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commita855af10a486c53c2271361cb87f349eca64b749 (patch)
treeb326b63bdcaf76c9620b1bbf22942d4683503a65 /arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
parent5a3ee4f708a9e1642b0211955ff905e7b67e831d (diff)
downloadComputeLibrary-a855af10a486c53c2271361cb87f349eca64b749.tar.gz
COMPMID-1401 Implement NEFullyConnectedLayer for QASYMM8
Change-Id: I0404df6d369855e2f458f2db8f26e81c80a1ee87 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/140148 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h113
1 files changed, 56 insertions, 57 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
index ea0762ea79..92ca17a3a4 100644
--- a/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEFullyConnectedLayer.h
@@ -26,66 +26,47 @@
#include "arm_compute/runtime/IFunction.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NETransposeKernel.h"
#include "arm_compute/runtime/MemoryGroup.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/Tensor.h"
namespace arm_compute
{
/** Basic function to reshape the weights of Fully Connected layer with NEON. This function calls the following kernels:
*
- * -# @ref NETransposeKernel (if @p transpose_weights is set to true)
- * -# @ref NEGEMMTranspose1xWKernel (if @p is_batched_fc_layer is set to true)
+ * -# @ref NETransposeKernel
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
-class NEFullyConnectedLayerReshapeWeights : public IFunction
+class NEFullyConnectedLayerReshapeWeights : public INESimpleFunction
{
public:
- /** Constructor */
- NEFullyConnectedLayerReshapeWeights(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
/** Set the input and output tensors.
*
- * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: F32.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
- * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
- * @param[in] is_batched_fc_layer True if it is a batched fully connected layer
+ * @param[in] input Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
+ * @param[out] output Destination tensor. Data type supported: Same as @p input.
*/
- void configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer);
+ void configure(const ITensor *input, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayerReshapeWeights
*
- * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: F32.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] transpose_weights True if the weights must be transposed. Data types supported: Same as @p weights.
- * @param[in] is_batched_fc_layer True if it is a batched fully connected layer
+ * @param[in] input Weights tensor info. The weights must be 2 dimensional. Data types supported: QASYMM8/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: Same as @p input.
*
* @return a status
*/
- static Status validate(const ITensorInfo *input, const ITensorInfo *output, bool transpose_weights, bool is_batched_fc_layer);
-
- // Inherited methods overridden:
- void run() override;
-
-private:
- MemoryGroup _memory_group;
- NETransposeKernel _transpose_kernel;
- NEGEMMTranspose1xWKernel _transpose1xW_kernel;
- Tensor _transpose_output;
- bool _transpose_weights;
- bool _is_batched_fc_layer;
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output);
};
/** Basic function to compute a Fully Connected layer on NEON. This function calls the following NEON kernels:
- * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
- * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped flag is set to false) (called once)
- * -# @ref NEGEMMInterleave4x4Kernel (called if we have a multi-batch input)
- * -# @ref NEGEMMMatrixMultiplyKernel
- * -# @ref NEGEMMMatrixAccumulateBiasesKernel (if @p biases is not equal to nullptr)
+ * -# @ref NEIm2ColKernel (called when the input comes from a convolutional layer)
+ * -# @ref NEFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
+ * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
+ * -# @ref NEGEMMMatrixAccumulateBiasesKernel or @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if quantized asymmetric) (if @p biases is not equal to nullptr)
*
* @note The fully connected layer accepts "weights" tensors only with 2 dimensions.
*/
@@ -104,21 +85,33 @@ public:
NEFullyConnectedLayer &operator=(NEFullyConnectedLayer &&) = default;
/** Set the input and output tensors.
*
- * @param[in] input Source tensor. Data type supported: F16/F32.
- * @param[in] weights Weights tensor. The weights must be 2 dimensional. Data type supported: Same as @p input.
+ * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
* @param[in] biases Bias tensor. Can be nullptr. Data type supported:Same as @p input.
- * @param[out] output Destination tensor. Data type supported: Same as @p input.
+ * @param[out] output Destination tensor. Its shape should be equal to the output of a matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
* @param[in] fc_info (Optional) Fully connected layer additional info
*/
void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output,
FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo());
- /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayer
+ /** Static function to check if given info will lead to a valid configuration of @ref NEFullyConnectedLayer
*
- * @param[in] input Source tensor info. Data type supported: F16/F32.
- * @param[in] weights Weights tensor info. The weights must be 2 dimensional. Data type supported: Same as @p input
- * @param[in] biases Bias tensor info. It can be nullptr. Data type supported:Same as @p input.
- * @param[in] output Destination tensor info. Data type supported: Same as @p input.
- * @param[in] fc_info (Optional) Fully connected layer additional info
+ * @param[in] input Source tensor info. Data type supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor info. The weights must be 2 dimensional.
+ * If this function is called after a Convolution Layer, the (transposed) weights will have as many rows as the product of the first 3 input's dimensions.
+ * If it is called after another FullyConnected Layer, the (transposed) weights will have as many rows as the input's first dimension.
+ * Data type supported: Same as @p input.
+ * @param[in] biases Bias tensor info. Can be nullptr. Data type supported:Same as @p input.
+ * @param[out] output Destination tensor info. Its shape should be equal to the output of a matrix multiplication between:
+ * - The output of im2col on the input and the (transposed) 2D weights, if the function is called after a Convolution Layer
+ * - The input tensor and the (transposed) 2D weights, if the function is called after another FullyConnected Layer.
+ * Data type supported: Same as @p input.
+ * @param[in] fc_info (Optional) Fully connected layer additional info
*
* @return a status
*/
@@ -130,20 +123,26 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEIm2ColKernel _im2col_kernel;
- NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
- NEGEMMInterleave4x4Kernel _interleave4x4_kernel;
- NEGEMMMatrixMultiplyKernel _mm_kernel;
- NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
- Tensor _im2col_output;
- Tensor _interleave4x4_output;
- Tensor _reshape_weights_output;
- const ITensor *_original_weights;
- bool _is_batched_fc_layer;
- bool _linearize_input;
- bool _accumulate_biases;
- bool _is_prepared;
+ void configure_fc_fc(const ITensor *input, const ITensor *weights, ITensor *output);
+ void configure_conv_fc(const ITensor *input, const ITensor *weights, ITensor *output);
+ void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output);
+
+ MemoryGroup _memory_group;
+ NEIm2ColKernel _im2col_kernel;
+ NEFullyConnectedLayerReshapeWeights _reshape_weights_function;
+ NEGEMM _mm_gemm;
+ NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
+ NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
+ NEGEMMMatrixAccumulateBiasesKernel _accumulate_biases_kernel;
+ Tensor _im2col_output;
+ Tensor _gemmlowp_output;
+ Tensor _reshape_weights_output;
+ const ITensor *_original_weights;
+ bool _are_weights_reshaped;
+ bool _is_fc_after_conv;
+ bool _accumulate_biases;
+ bool _is_quantized;
+ bool _is_prepared;
};
} // namespace arm_compute
#endif /* __ARM_COMPUTE_NEFULLYCONNECTEDLAYER_H__ */