From f07d28d9ee8ae73a93fe433f72855b6dcf58ad90 Mon Sep 17 00:00:00 2001 From: Isabella Gottardi Date: Tue, 6 Feb 2018 14:52:43 +0000 Subject: COMPMID-845: Create a ConvolutionLayer for CL Change-Id: Ifcc406d2d0a99c911d6b6c875657b0e0028255d5 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/119148 Tested-by: Jenkins Reviewed-by: Anthony Barbier Reviewed-by: Georgios Pinitas --- .../runtime/CL/functions/CLConvolutionLayer.h | 132 +++++++-------------- 1 file changed, 41 insertions(+), 91 deletions(-) (limited to 'arm_compute/runtime/CL/functions/CLConvolutionLayer.h') diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index f6672cef1d..53d59c3176 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -26,71 +26,18 @@ #include "arm_compute/runtime/IFunction.h" -#include "arm_compute/core/CL/kernels/CLCol2ImKernel.h" -#include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h" -#include "arm_compute/core/CL/kernels/CLIm2ColKernel.h" -#include "arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLMemoryGroup.h" -#include "arm_compute/runtime/CL/CLTensor.h" -#include "arm_compute/runtime/CL/functions/CLGEMM.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h" -#include "arm_compute/runtime/CL/functions/CLGEMMLowpOutputStage.h" +#include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h" #include "arm_compute/runtime/IMemoryManager.h" #include namespace arm_compute { -class ICLTensor; - -/** Function to reshape and transpose the weights. This function calls the following kernels: - * -# @ref CLWeightsReshapeKernel - * -# @ref CLGEMMTranspose1xWKernel - */ -class CLConvolutionLayerReshapeWeights : public IFunction -{ -public: - /** Constructor */ - CLConvolutionLayerReshapeWeights(std::shared_ptr memory_manager = nullptr); - /** Set the input and output tensors. - * - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QS8/QASYMM8/QS16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[out] output Destination tensor. Data types supported: Same as @p weights. - * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. - * Data types supported: Same as @p weights. - */ - void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW); - // Inherited methods overridden: - void run() override; - -private: - CLMemoryGroup _memory_group; - CLWeightsReshapeKernel _weights_reshape_kernel; - CLGEMMTranspose1xWKernel _weights_transposed_kernel; - CLTensor _weights_reshaped; - bool _transpose1xW; -}; - /** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: * - * Note: weights already reshaped for quantized asymmetric is not supported - * - * -# @ref CLIm2ColKernel - * -# @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric) - * -# @ref CLGEMMLowpQuantizeDownInt32ToUint8Scale (if quantized asymmetric) - * -# @ref CLCol2ImKernel - * - * if the weights are already reshaped: - * -# @ref CLGEMMInterleave4x4Kernel - * -# @ref CLGEMMMatrixMultiplyKernel - * else - * -# @ref CLGEMM + * -# @ref CLGEMMConvolutionLayer + * -# @ref CLDirectConvolutionLayer */ class CLConvolutionLayer : public IFunction { @@ -108,46 +55,49 @@ public: * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights - * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. + */ + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported:Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported:Same as @p input. + * @param[in] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. + * @param[in] gpu_target Specifies the @p GPUTarget. + * + * @return a status */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + const WeightsInfo &weights_info, const GPUTarget gpu_target); // Inherited methods overridden: void run() override; private: - /** Configures the appropriate matrix multiply routine - * - * @param input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. - * @param weights Weights tensor. Data type supported: Same as @p input. - * @param output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8 type where output should be of S32 type. - * @param is_interleaved_transposed Flag that signals if matrix is interleaved transposed - */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool is_interleaved_transposed, bool are_weights_reshaped); - -private: - CLMemoryGroup _memory_group; - CLConvolutionLayerReshapeWeights _reshape_weights; - CLIm2ColKernel _im2col_kernel; - CLGEMMInterleave4x4Kernel _interleave_kernel; - CLGEMMMatrixMultiplyKernel _mm_kernel; - CLGEMM _mm_gemm; - CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; - CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; - CLCol2ImKernel _col2im_kernel; - - CLTensor _im2col_output; - CLTensor _interleave_output; - CLTensor _weights_reshaped; - CLTensor _weights_transposed; - CLTensor _gemm_output; - CLTensor _tmp_output; - - bool _are_weights_reshaped; - bool _is_quantized; - bool _is_interleaved_transposed; + std::shared_ptr _memory_manager; + std::unique_ptr _function; /**< Function to run */ }; } #endif /* __ARM_COMPUTE_CLCONVOLUTIONLAYER_H__ */ -- cgit v1.2.1