From 60c3b0e6821a80d78ffca5be30e05d062d071cd2 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 8 Apr 2021 12:02:58 +0100 Subject: Port DepthwiseConvolution to new API Resolves: COMPMID-4185 Change-Id: Ib5f22356356a022d567bb18d44ea272b62d10ebf Signed-off-by: Michalis Spyrou Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5424 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- arm_compute/core/Types.h | 13 +++ arm_compute/core/utils/misc/ShapeCalculator.h | 15 +-- .../NEON/functions/NEDepthwiseConvolutionLayer.h | 64 ++--------- .../NEDepthwiseConvolutionAssemblyDispatch.h | 123 --------------------- 4 files changed, 29 insertions(+), 186 deletions(-) delete mode 100644 arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h (limited to 'arm_compute') diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 53333ff608..b1f340d18e 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1861,6 +1861,19 @@ private: bool _broadcast_bias; }; +struct ConvolutionInfo +{ + ConvolutionInfo() = default; + ConvolutionInfo(const PadStrideInfo &pad_stride_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info, const Size2D &dilation) + : pad_stride_info(pad_stride_info), depth_multiplier(depth_multiplier), act_info(act_info), dilation(dilation) + { + } + PadStrideInfo pad_stride_info{}; /**< Convolution info (Pads, strides,...) */ + unsigned int depth_multiplier{ 1 }; /**< Multiplier to apply to input's depth to retrieve the output depth. Defaults to 1 */ + ActivationLayerInfo act_info{}; /**< Fused activation to apply after convolution. */ + Size2D dilation{ Size2D(1, 1) }; /**< Dilation, in elements, across x and y. Defaults to (1, 1). */ +}; + struct DepthwiseConvolutionReshapeInfo { unsigned int c0{ 1 }; /**< Number of channels processed by the depth-wise convolution */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 56038dd853..ba37f9a61e 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -435,16 +435,13 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input) /** Calculate the depthwise convolution output shape of a tensor * - * @param[in] input Input tensor info - * @param[in] weights Weights tensor info - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier Multiplier to apply to the input's depth in order to retrieve the output's depth. - * @param[in] dilation Dilation, in elements, across x and y. Defaults to (1, 1). + * @param[in] input Input tensor info + * @param[in] weights Weights tensor info + * @param[in] info Convolution info * * @return the calculated shape */ -inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier, const Size2D &dilation = Size2D(1U, - 1U)) +inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, const ConvolutionInfo &info) { const TensorShape input_shape{ input.tensor_shape() }; const TensorShape weights_shape{ weights.tensor_shape() }; @@ -462,12 +459,12 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, unsigned int output_height = 0; std::tie(output_width, output_height) = scaled_dimensions(input_shape[width_idx], input_shape[height_idx], weights_shape[weights_width_idx], weights_shape[weights_height_idx], - conv_info, dilation); + info.pad_stride_info, info.dilation); TensorShape output_shape{ input_shape }; output_shape.set(width_idx, output_width); output_shape.set(height_idx, output_height); - output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); + output_shape.set(channel_idx, input_shape[channel_idx] * info.depth_multiplier); return output_shape; } diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index 9aa8f04eb8..c74b2a93ee 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -24,9 +24,10 @@ #ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H #define ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" #include "arm_compute/runtime/NEON/functions/NEPermute.h" -#include "arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h" #include namespace arm_compute @@ -91,25 +92,6 @@ public: void prepare() override; private: - /** Static function to choose the best depthwise convolution function for @ref NEDepthwiseConvolutionLayer - * - * @param[in] input Source tensor info. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32 - * @param[in] weights Weights tensor info. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. - * Data type supported: Same as @p input or QASYMM8/QASYMM8_SIGNED/QSYMM8_PER_CHANNEL when @p input is QASYMM8/QASYMM8_SIGNED. - * @param[in] biases Biases tensor info. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8/QASYMM8_SIGNED. - * @param[in] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 quantized are supported. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return a Depthwise Convolution Function - */ - static DepthwiseConvolutionFunction get_depthwiseconvolution_function(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, - ActivationLayerInfo act_info = ActivationLayerInfo(), const Size2D &dilation = Size2D(1U, 1U)); - /** Basic function to execute optimized depthwise convolution routines. This function calls the following kernels: * * @note At the moment 3x3 and 5x5 convolution of stride 1, 2 are supported @@ -173,23 +155,9 @@ private: void prepare() override; private: - MemoryGroup _memory_group; - NEDepthwiseConvolutionAssemblyDispatch _dwc_optimized_func; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _accumulator; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - const ITensor *_original_weights; - bool _has_bias; - bool _is_quantized; - bool _is_nchw; - bool _permute; - bool _is_activationlayer_enabled; - bool _is_prepared; + MemoryGroup _memory_group; + struct Impl; + std::unique_ptr _impl; }; /** Basic function to execute a generic depthwise convolution. This function calls the following kernel: @@ -248,26 +216,14 @@ private: // Inherited methods overriden: void run() override; - void prepare() override; private: - std::unique_ptr _depthwise_conv_kernel; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - NEActivationLayer _activationlayer_function; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_prepared; - bool _is_nchw; - bool _is_activationlayer_enabled; - const ITensor *_original_weights; + struct Impl; + std::unique_ptr _impl; }; - - DepthwiseConvolutionFunction _depth_conv_func; - NEDepthwiseConvolutionLayerOptimizedInternal _func_optimized; - NEDepthwiseConvolutionLayerGeneric _func_generic; + MemoryGroup _memory_group; + struct Impl; + std::unique_ptr _impl; }; } // namespace arm_compute #endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTION_H */ diff --git a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h deleted file mode 100644 index 7f63717b02..0000000000 --- a/arm_compute/runtime/NEON/functions/assembly/NEDepthwiseConvolutionAssemblyDispatch.h +++ /dev/null @@ -1,123 +0,0 @@ -/* - * Copyright (c) 2019 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H -#define ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H - -#include "arm_compute/runtime/IFunction.h" - -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -namespace arm_compute -{ -/** Depthwise convolution assembly kernel glue */ -class NEDepthwiseConvolutionAssemblyDispatch : public IFunction -{ -public: - /** Default constructor - * - * @param[in,out] memory_manager Memory manager to use - */ - NEDepthwiseConvolutionAssemblyDispatch(std::shared_ptr memory_manager = nullptr); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyDispatch(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move constructor */ - NEDepthwiseConvolutionAssemblyDispatch(NEDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - NEDepthwiseConvolutionAssemblyDispatch &operator=(const NEDepthwiseConvolutionAssemblyDispatch &) = delete; - /** Default move assignment operator */ - NEDepthwiseConvolutionAssemblyDispatch &operator=(NEDepthwiseConvolutionAssemblyDispatch &&) = default; - /** Default destructor */ - ~NEDepthwiseConvolutionAssemblyDispatch(); - /** Initialize the function's source, destination, kernels and border_size. - * - * @note Supports only NHWC format - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - const Size2D &dilation = Size2D(1, 1)); - /** Static function to check if given info will lead to a valid configuration of @ref NEDepthwiseConvolutionAssemblyDispatch - * - * @note Supports only NHWC format - * - * @param[in] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [W, H, IFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return An error status - */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, - const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, const ActivationLayerInfo &act_info = ActivationLayerInfo(), - const Size2D &dilation = Size2D(1, 1)); - /** Check if the optimized kernel can be used for the given kernel sizes and strides - * - * @warning Even if this return true the inputs and outputs might need to get permuted as the only layout supported is NHWC - * - * @param[in] input Input tensor info. - * @param[in] weights Weights tensor info. - * @param[in] conv_info Convolution layer metadata. - * @param[in] depth_multiplier (Optional) Depth multiplier to be used. - * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). - * - * @return True if the assembly kernel could be used else false. Note that transformations of input/output could be needed. - */ - static bool is_optimized_supported(const ITensorInfo *input, const ITensorInfo *weights, PadStrideInfo conv_info, unsigned int depth_multiplier = 1, const Size2D &dilation = Size2D(1, 1)); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - struct LocalImpl; - -private: - MemoryGroup _memory_group; - const ITensor *_input; - const ITensor *_weights; - const ITensor *_bias; - ITensor *_output; - Tensor _packed_weights; - Tensor _workspace; - bool _is_prepared; - std::unique_ptr _pImpl; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_NEDEPTHWISECONVOLUTIONASSEMBLYDISPATCH_H */ -- cgit v1.2.1