From 6d109965f3641056bb8164dc8450a7327e76e939 Mon Sep 17 00:00:00 2001 From: giuros01 Date: Mon, 7 Jan 2019 17:47:19 +0000 Subject: COMPMID-1691: Optimize CLDepthwiseConvolutionKernel (QASYMM8/NHWC) for 3x3 kernels (stride=1 and stride=2) Change-Id: I7d0d2dc350feeb40d253d17f9ffd5051a8fb42ef Reviewed-on: https://review.mlplatform.org/511 Reviewed-by: Gian Marco Iodice Tested-by: Arm Jenkins --- arm_compute/core/CL/CLKernels.h | 3 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 11 +-- ...seConvolutionLayerReshapeWeightsGenericKernel.h | 79 ++++++++++++++++++++++ ...DepthwiseConvolutionLayerReshapeWeightsKernel.h | 77 +++++++++++++++++++++ .../CL/kernels/CLDepthwiseWeightsReshapeKernel.h | 79 ---------------------- arm_compute/core/Types.h | 6 ++ arm_compute/core/utils/misc/ShapeCalculator.h | 56 +++++++++++++++ .../CL/functions/CLDepthwiseConvolutionLayer.h | 44 ++++++------ 8 files changed, 250 insertions(+), 105 deletions(-) create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h create mode 100644 arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h delete mode 100644 arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h (limited to 'arm_compute') diff --git a/arm_compute/core/CL/CLKernels.h b/arm_compute/core/CL/CLKernels.h index 07e214be3f..cc4888c663 100644 --- a/arm_compute/core/CL/CLKernels.h +++ b/arm_compute/core/CL/CLKernels.h @@ -51,9 +51,10 @@ #include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" #include "arm_compute/core/CL/kernels/CLDequantizationLayerKernel.h" #include "arm_compute/core/CL/kernels/CLDerivativeKernel.h" #include "arm_compute/core/CL/kernels/CLDilateKernel.h" diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h index 85fbaaee37..2fc9780a2f 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -53,11 +53,11 @@ public: ActivationLayerInfo act_info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel * - * @param[in] input Source tensor. DataType supported: QASYMM8. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * @param[in] input Source tensor info. DataType supported: QASYMM8. + * @param[in] weights Weights tensor info. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor info. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. * Data type supported: Same as @p input. - * @param[in] output Destination tensor. Data type supported: Same as @p input. + * @param[in] output Destination tensor info. Data type supported: Same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. @@ -67,6 +67,7 @@ public: static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info = ActivationLayerInfo()); + // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; BorderSize border_size() const override; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h new file mode 100644 index 0000000000..3f969957e1 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2017-2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSGENERICKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSGENERICKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the depthwise weights reshape kernel. + * This kernel reshape original weights' low 2D dimensions into a single row and + * have the second dimension as the original depth size. + * + **/ +class CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel(const CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &) = delete; + /** Allow instances of this class to be moved */ + CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel(CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &&) = default; + /** Allow instances of this class to be moved */ + CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel &&) = default; + /** Set the input and output of the kernel. + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. + * Data type supported: QASYMM8/F16/F32. + * @param[out] output The output tensor. Data type supported: same as @p input. + * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. + */ + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *biases = nullptr); + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel + * + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. + * Data type supported: QASYMM8/F32. + * @param[in] output The output tensor. Data type supported: same as @p input. + * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases = nullptr); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + const ICLTensor *_biases; + ICLTensor *_output; +}; +} // arm_compute +#endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSGENERICKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h new file mode 100644 index 0000000000..e75f310c29 --- /dev/null +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H__ +#define __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H__ + +#include "arm_compute/core/CL/ICLKernel.h" + +namespace arm_compute +{ +class ICLTensor; + +/** Interface for the kernel to reshape the weights of depthwise convolution. */ +class CLDepthwiseConvolutionLayerReshapeWeightsKernel : public ICLKernel +{ +public: + /** Default constructor */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(); + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(const CLDepthwiseConvolutionLayerReshapeWeightsKernel &) = delete; + /** Default Move Constructor. */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; + /** Default move assignment operator */ + CLDepthwiseConvolutionLayerReshapeWeightsKernel &operator=(CLDepthwiseConvolutionLayerReshapeWeightsKernel &&) = default; + + /** Initialize the function's source and destination. + * + * @param[in] input The input tensor of dimension [IFM, W, H]. Data types supported: QASYMM8. Data layouts supported: NHWC + * @param[out] output The output tensor of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + */ + void configure(const ICLTensor *input, ICLTensor *output, const DepthwiseConvolutionReshapeInfo &info); + + /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel + * + * @param[in] input The input tensor info of dimension [IFM, W, H]. Data types supported: QASYMM8. Data layouts supported: NHWC + * @param[in] output The output tensor info of dimension [W*H*C0, ceil(IFM/C0)]. C0 is the number of channels read by each thread. Data types supported: same as @p weights. + * @param[in] info Depthwise convolution information to reshape the input tensor. + * + * @return a Status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const DepthwiseConvolutionReshapeInfo &info); + + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + +private: + const ICLTensor *_input; + ICLTensor *_output; + + void configure_dot_product(const DepthwiseConvolutionReshapeInfo &info); + void configure_generic(const DepthwiseConvolutionReshapeInfo &info); +}; +} // namespace arm_compute +#endif /* __ARM_COMPUTE_CLDEPTHWISECONVOLUTIONLAYERRESHAPEWEIGHTSKERNEL_H__ */ diff --git a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h deleted file mode 100644 index 34ffa17c2b..0000000000 --- a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_CLDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ -#define __ARM_COMPUTE_CLDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ - -#include "arm_compute/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** Interface for the depthwise weights reshape kernel. - * This kernel reshape original weights' low 2D dimensions into a single row and - * have the second dimension as the original depth size. - * - **/ -class CLDepthwiseWeightsReshapeKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLDepthwiseWeightsReshapeKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseWeightsReshapeKernel(const CLDepthwiseWeightsReshapeKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLDepthwiseWeightsReshapeKernel &operator=(const CLDepthwiseWeightsReshapeKernel &) = delete; - /** Allow instances of this class to be moved */ - CLDepthwiseWeightsReshapeKernel(CLDepthwiseWeightsReshapeKernel &&) = default; - /** Allow instances of this class to be moved */ - CLDepthwiseWeightsReshapeKernel &operator=(CLDepthwiseWeightsReshapeKernel &&) = default; - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. - * Data type supported: QASYMM8/F16/F32. - * @param[out] output The output tensor. Data type supported: same as @p input. - * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. - */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *biases = nullptr); - /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseWeightsReshapeKernel - * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. - * Data type supported: QASYMM8/F32. - * @param[in] output The output tensor. Data type supported: same as @p input. - * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. - * - * @return a status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ITensorInfo *biases = nullptr); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -private: - const ICLTensor *_input; - const ICLTensor *_biases; - ICLTensor *_output; -}; -} // arm_compute -#endif /*__ARM_COMPUTE_CLDEPTHWISEWEIGHTSRESHAPEKERNEL_H__ */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 317c8990fa..9fbd0ef9fb 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1745,6 +1745,12 @@ private: const bool _reinterpret_input_as_3d; }; +struct DepthwiseConvolutionReshapeInfo +{ + unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */ + bool transpose{ false }; /**< True if the block MxC0 (where M is the area of the filter i.e. KwxKh) has to be transposed */ +}; + /** GEMMLowp output stage type */ enum class GEMMLowpOutputStageType { diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 35e21679d2..b256e73146 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -250,6 +250,30 @@ inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_inte return shape_interleaved_a; } +/** Calculate the reshaped shape of the weights to use in depthwise convolution + * + * @param[in] input Input tensor info + * @param[in] info Depthwise convolution information to be used for reshaping. + * + * @return the calculated shape + */ +inline TensorShape compute_reshaped_depthwise_weights_shape(const ITensorInfo &input, const DepthwiseConvolutionReshapeInfo &info) +{ + const auto data_layout = input.data_layout(); + TensorShape weights_shape{}; + + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const size_t num_channels = input.dimension(channel_idx); + const size_t num_rows = input.dimension(height_idx); + const size_t num_cols = input.dimension(width_idx); + + weights_shape.set(0, num_rows * num_cols * info.c0); + weights_shape.set(1, DIV_CEIL(num_channels, info.c0)); + return weights_shape; +} + /** Calculate the transposed 1xW shape * * @param[in] b Input tensor info @@ -405,6 +429,38 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, return output_shape; } +/** Calculate the depthwise convolution output shape of a tensor + * + * @param[in] input Input tensor info + * @param[in] weights_width Weights width + * @param[in] weights_height Weights height + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. + * + * @return the calculated shape + */ +inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, int weights_width, int weights_height, PadStrideInfo conv_info, unsigned int depth_multiplier) +{ + const TensorShape input_shape{ input.tensor_shape() }; + + const DataLayout data_layout = input.data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + unsigned int output_width = 0; + unsigned int output_height = 0; + std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], + weights_width, weights_width, conv_info); + + TensorShape output_shape{ input_shape }; + output_shape.set(width_idx, output_width); + output_shape.set(height_idx, output_height); + output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); + + return output_shape; +} + /** Calculate the upsampled output shape used for deconvolution * * @param[in] input Input tensor info diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 60dddbb853..23034c2b7c 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,9 +26,10 @@ #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel.h" +#include "arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayerReshapeWeightsKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h" #include "arm_compute/core/CL/kernels/CLDepthwiseVectorToTensorKernel.h" -#include "arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h" #include "arm_compute/core/CL/kernels/CLDirectConvolutionLayerOutputStageKernel.h" #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMMatrixVectorMultiplyKernel.h" @@ -48,6 +49,7 @@ class ICLTensor; * * -# @ref CLDepthwiseConvolutionLayer3x3NCHWKernel (if data_layout == NCHW) * -# @ref CLDepthwiseConvolutionLayer3x3NHWCKernel (if data_layout == NHWC) + * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsKernel (if data_layout == NHWC) * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) * */ @@ -105,11 +107,13 @@ private: CLPermute _permute_input_to_nchw; CLPermute _permute_weights_to_nchw; CLPermute _permute_output_to_nhwc; + CLDepthwiseConvolutionLayerReshapeWeightsKernel _reshape_weights; CLTensor _permuted_input; CLTensor _permuted_weights; CLTensor _permuted_output; const ITensor *_original_weights; bool _needs_permute; + bool _needs_weights_reshape; bool _is_prepared; }; @@ -117,7 +121,7 @@ private: * * -# @ref CLDepthwiseIm2ColKernel * -# @ref CLGEMMMatrixVectorMultiplyKernel - * -# @ref CLDepthwiseWeightsReshapeKernel + * -# @ref CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel * -# @ref CLFillBorderKernel (if pad_x or pad_y > 0) * */ @@ -169,23 +173,23 @@ public: void prepare() override; private: - CLDepthwiseIm2ColKernel _im2col_kernel; - CLDepthwiseWeightsReshapeKernel _weights_reshape_kernel; - CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; - CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; - CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel; - CLActivationLayer _activationlayer_function; - CLFillBorderKernel _v2mm_input_fill_border; - CLFillBorderKernel _v2mm_weights_fill_border; - CLTensor _input_reshaped; - CLTensor _weights_reshaped; - CLTensor _v2mm_output; - CLTensor _output_reshaped; - bool _is_prepared; - bool _is_quantized; - bool _is_activationlayer_enabled; - const ICLTensor *_original_weights; - std::unique_ptr _optimised_function; + CLDepthwiseIm2ColKernel _im2col_kernel; + CLDepthwiseConvolutionLayerReshapeWeightsGenericKernel _weights_reshape_kernel; + CLGEMMMatrixVectorMultiplyKernel _v2mm_kernel; + CLDepthwiseVectorToTensorKernel _vector_to_tensor_kernel; + CLDirectConvolutionLayerOutputStageKernel _output_stage_kernel; + CLActivationLayer _activationlayer_function; + CLFillBorderKernel _v2mm_input_fill_border; + CLFillBorderKernel _v2mm_weights_fill_border; + CLTensor _input_reshaped; + CLTensor _weights_reshaped; + CLTensor _v2mm_output; + CLTensor _output_reshaped; + bool _is_prepared; + bool _is_quantized; + bool _is_activationlayer_enabled; + const ICLTensor *_original_weights; + std::unique_ptr _optimised_function; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_CLDEPTHWISECONVOLUTION_H__ */ -- cgit v1.2.1