aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2018-08-01 15:06:06 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit597a85666a84c9a9414264966651551564b79299 (patch)
tree6f2fd1bd8648c495b7e3324433ed902266fb2053 /arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
parent883f489da93e88d74aa0dfb206c56697ba0e63f0 (diff)
downloadComputeLibrary-597a85666a84c9a9414264966651551564b79299.tar.gz
COMPMID-872 - Rework NEGEMMConvolutionLayer to use NEGEMM
Change-Id: I55f0018ac7214775ebbca63f58a3bf5c93732fec Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/142632 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h')
-rw-r--r--arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h101
1 files changed, 48 insertions, 53 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
index 8f41462b0b..a362a29a82 100644
--- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h
@@ -28,17 +28,13 @@
#include "arm_compute/core/NEON/kernels/NEArithmeticAdditionKernel.h"
#include "arm_compute/core/NEON/kernels/NECol2ImKernel.h"
-#include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
-#include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
#include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "arm_compute/core/NEON/kernels/NEIm2ColKernel.h"
#include "arm_compute/core/NEON/kernels/NEWeightsReshapeKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/MemoryGroup.h"
#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h"
-#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
+#include "arm_compute/runtime/NEON/functions/NEGEMM.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.h"
#include "arm_compute/runtime/NEON/functions/NEGEMMLowpOutputStage.h"
#include "arm_compute/runtime/Tensor.h"
@@ -49,55 +45,47 @@ namespace arm_compute
{
class ITensor;
-/** Function to reshape and perform 1xW transposition on the weights. This function calls the following kernels:
+/** Function to reshape the weights. This function calls the following kernel:
* -# @ref NEWeightsReshapeKernel
- * -# @ref NEGEMMTranspose1xWKernel (executed in case GEMM is required for the operation)
*/
class NEConvolutionLayerReshapeWeights : public IFunction
{
public:
/** Constructor */
- NEConvolutionLayerReshapeWeights(std::shared_ptr<IMemoryManager> memory_manager = nullptr);
+ NEConvolutionLayerReshapeWeights();
/** Set the input and output tensors.
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[out] output Destination tensor. Data types supported: Same as @p weights.
- * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
- * Data types supported: Same as @p weights.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[out] output Destination tensor. Data types supported: Same as @p weights.
*/
- void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW);
+ void configure(const ITensor *weights, const ITensor *biases, ITensor *output);
/** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayerReshapeWeights
*
- * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32.
- * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
- * @param[in] output Destination tensor. Data types supported: Same as @p weights.
- * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise.
- * Data types supported: Same as @p weights.
+ * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: QASYMM8/F16/F32.
+ * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights.
+ * @param[in] output Destination tensor. Data types supported: Same as @p weights.
*
* @return an error status
*/
- static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, bool transpose1xW);
+ static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output);
// Inherited methods overridden:
void run() override;
private:
- MemoryGroup _memory_group;
- NEWeightsReshapeKernel _weights_reshape_kernel;
- NEGEMMTranspose1xWKernel _weights_transposed_kernel;
- Tensor _weights_reshaped;
- bool _transpose1xW;
+ NEWeightsReshapeKernel _weights_reshape_kernel;
};
-/** Basic function to simulate a convolution layer. This function calls the following NEON kernels:
- * -# @ref NEWeightsReshapeKernel (executed only once for each configuration)
+/** Basic function to compute the convolution layer. This function calls the following NEON kernels/functions:
+ *
* -# @ref NEIm2ColKernel
- * -# @ref NEGEMMInterleave4x4Kernel (executed only in case GEMM is required for the operation)
- * -# @ref NEGEMMMatrixMultiplyKernel or @ref NEGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
- * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8Scale (if quantized asymmetric)
+ * -# @ref NEGEMM (if the data type is FP32 or FP16)
+ * -# @ref NEGEMMLowpMatrixMultiplyCore (if the data type is QASYMM8)
+ * -# @ref NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint (if the data type is QASYMM8)
+ * -# @ref NEArithmeticAdditionKernel (if biases != nullptr and we have a 1x1 convolution with the NHWC data layout)
* -# @ref NECol2ImKernel
- * -# @ref NEActivationLayer (executed only if the activation layer is enabled)
+ *
*/
class NEGEMMConvolutionLayer : public IFunction
{
@@ -158,45 +146,52 @@ public:
private:
/** Configures the appropriate matrix multiply routine
*
- * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
- * @param[in] weights Weights tensor. Data type supported: Same as @p input.
- * @param[out] output Output tensor. Data types supported: Same as @p input,
- * except for input of QASYMM8 type where output should be of S32 type.
- * @param[in] is_interleaved (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
- * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
+ * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. Data type supported: Same as @p input.
+ * @param[out] output Output tensor. Data types supported: Same as @p input,
+ * except for input of QASYMM8 type where output should be of S32 type.
+ * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
*/
- void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, bool is_interleaved, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
+ void configure_mm(const ITensor *input, const ITensor *weights, ITensor *output, int gemm_3d_depth = 1);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer matrix multiply routines
+ *
+ * @param[in] input Input tensor. Data types supported: QASYMM8/F16/F32.
+ * @param[in] weights Weights tensor. Data type supported: Same as @p input.
+ * @param[in] output Output tensor. Data types supported: Same as @p input,
+ * except for input of QASYMM8 type where output should be of S32 type.
+ * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1)
+ * @param[in] skip_im2col (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
+ *
+ * @return a status
+ */
+ static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int gemm_3d_depth = 1, bool skip_im2col = false);
private:
MemoryGroup _memory_group;
- NEGEMMAssemblyDispatch _asm_glue;
- NEIm2ColKernel _input_im2col_kernel;
- NEGEMMInterleave4x4Kernel _input_interleave_kernel;
NEConvolutionLayerReshapeWeights _reshape_weights;
- NEGEMMMatrixMultiplyKernel _mm_kernel;
+ NEIm2ColKernel _im2col_kernel;
+ NEGEMM _mm_gemm;
NEGEMMLowpMatrixMultiplyCore _mm_gemmlowp;
NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
- NECol2ImKernel _output_col2im_kernel;
+ NECol2ImKernel _col2im_kernel;
NEActivationLayer _activationlayer_function;
NEArithmeticAdditionKernel _add_bias_kernel;
const ITensor *_original_weights;
- Tensor _input_im2col_reshaped;
- Tensor _input_interleaved_reshaped;
+ Tensor _im2col_output;
Tensor _weights_reshaped;
Tensor _gemm_output;
Tensor _tmp_output;
DataLayout _data_layout;
- bool _append_bias;
- bool _is_fully_connected_convolution;
- bool _are_weights_reshaped;
- bool _is_quantized;
- bool _is_interleaved;
- bool _is_activationlayer_enabled;
- bool _skip_im2col;
- bool _is_prepared;
+
+ bool _append_bias;
+ bool _skip_im2col;
+ bool _skip_col2im;
+ bool _is_quantized;
+ bool _is_activationlayer_enabled;
+ bool _is_prepared;
};
}
#endif /* __ARM_COMPUTE_NECONVOLUTIONGEMMLAYER_H__ */