From 7657224de2b697a8a92cccf26d98e53ccd7c1a03 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 4 Apr 2018 17:44:26 +0100 Subject: COMPMID-926 Add depth multiplier support to NEON/CL/GLES depthwise convolution Change-Id: I03f32c62350e5ea43e77bb15fc5a832d83719e3b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/126657 Tested-by: Jenkins Reviewed-by: Michele DiGiorgio Reviewed-by: Georgios Pinitas --- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.h | 17 +-- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.h | 34 +++--- .../core/CL/kernels/CLDepthwiseIm2ColKernel.h | 19 +-- .../ICLDepthwiseConvolutionLayer3x3Kernel.h | 17 +-- .../kernels/GCDepthwiseConvolutionLayer3x3Kernel.h | 15 +-- .../kernels/NEDepthwiseConvolutionLayer3x3Kernel.h | 25 ++-- .../core/NEON/kernels/NEDepthwiseIm2ColKernel.h | 18 +-- arm_compute/core/utils/misc/ShapeCalculator.h | 4 +- .../CL/functions/CLDepthwiseConvolutionLayer.h | 33 ++--- .../functions/GCDepthwiseConvolutionLayer.h | 15 +-- .../NEON/functions/NEDepthwiseConvolutionLayer.h | 30 ++--- src/core/CL/cl_kernels/depthwise_convolution.cl | 30 +++-- .../cl_kernels/depthwise_convolution_quantized.cl | 2 + .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 5 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 13 +- src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp | 9 +- .../cs_shaders/depthwise_convolution3x3.cs | 6 +- .../GCDepthwiseConvolutionLayer3x3Kernel.cpp | 32 ++--- .../NEDepthwiseConvolutionLayer3x3Kernel.cpp | 41 ++++--- src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp | 19 +-- .../CL/functions/CLDepthwiseConvolutionLayer.cpp | 23 ++-- .../functions/GCDepthwiseConvolutionLayer.cpp | 4 +- .../NEON/functions/NEDepthwiseConvolutionLayer.cpp | 27 +++-- .../fixtures/DepthwiseConvolutionLayerFixture.h | 14 ++- tests/datasets/DepthwiseConvolutionLayerDataset.h | 103 ++++++++-------- .../MobileNetDepthwiseConvolutionLayerDataset.h | 18 +-- tests/validation/CL/DepthwiseConvolutionLayer.cpp | 56 +++++---- .../GLES_COMPUTE/DepthwiseConvolutionLayer.cpp | 8 +- .../validation/NEON/DepthwiseConvolutionLayer.cpp | 62 +++++++--- .../fixtures/DepthwiseConvolutionLayerFixture.h | 37 ++++-- .../reference/DepthwiseConvolutionLayer.cpp | 135 ++++++++++----------- .../reference/DepthwiseConvolutionLayer.h | 5 +- .../DepthwiseSeparableConvolutionLayer.cpp | 4 +- 33 files changed, 487 insertions(+), 393 deletions(-) diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h index 0f3f4bfc76..f80985a936 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.h @@ -39,15 +39,16 @@ public: CLDepthwiseConvolutionLayer3x3NCHWKernel(); /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info) override; void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h index 4ecc07af6a..62b9a8682e 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.h @@ -40,29 +40,31 @@ public: /** Default move assignment operator. */ /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] input Source tensor. DataType supported: QASYMM8. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info) override; /** Static function to check if given info will lead to a valid configuration of @ref CLDepthwiseConvolutionLayer3x3NHWCKernel * - * @param[in] input Source tensor. DataType supported: QASYMM8. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[in] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. + * @param[in] input Source tensor. DataType supported: QASYMM8. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [IFM, 3, 3]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[in] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU are supported. * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, ActivationLayerInfo act_info = ActivationLayerInfo()); void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h index 86b8b90e94..b8343da50a 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h @@ -33,7 +33,7 @@ class ICLTensor; /** Interface for the depthwise im2col reshape kernel. * This kernel reshape the input low 3 dimensions to a new 3D shape where the output's first dimension is - * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches in per image and third dimension unchanged . + * the linear patch size (FILTER_WIDTH * FILTER_HEIGHT) and second dimension is number of patches per image and third dimension unchanged . **/ class CLDepthwiseIm2ColKernel : public ICLKernel { @@ -50,15 +50,16 @@ public: CLDepthwiseIm2ColKernel &operator=(CLDepthwiseIm2ColKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32 - * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, - * while every dimension above 3 represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8/F32 + * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, + * while every dimension above 3 represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h index f02ba331b5..15233c5c32 100644 --- a/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/CL/kernels/ICLDepthwiseConvolutionLayer3x3Kernel.h @@ -50,15 +50,16 @@ public: ICLDepthwiseConvolutionLayer3x3Kernel &operator=(ICLDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. + * @param[in] input Source tensor. DataType supported: QASYMM8/F16/F32. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for QASYMM8 supported. */ - virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + virtual void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, ActivationLayerInfo act_info = ActivationLayerInfo()) = 0; protected: diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h index 46b9e897bb..7d0d80649e 100644 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.h @@ -47,14 +47,15 @@ public: GCDepthwiseConvolutionLayer3x3Kernel &operator=(GCDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: F16. - * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] input Source tensor. DataType supported: F16. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info); + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run(const Window &window) override; diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h index 0c2f30a98c..bd9e7eb781 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.h @@ -53,23 +53,25 @@ public: NEDepthwiseConvolutionLayer3x3Kernel &operator=(NEDepthwiseConvolutionLayer3x3Kernel &&) = default; /** Initialize the function's source, destination, conv and border_size. * - * @param[in] input Source tensor. DataType supported: QASYMM8, F32. - * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] data_layout (Optional) Data layout of the input and weights tensor + * @param[in] input Source tensor. DataType supported: QASYMM8, F32. + * @param[in] weights Weights tensor. This is a 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: Same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] data_layout (Optional) Data layout of the input and weights tensor */ - void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, DataLayout data_layout = DataLayout::NCHW); + void configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, DataLayout data_layout = DataLayout::NCHW); /** Static method that checks if optimized execution is supported for the given parameters * - * @param[in] input_shape Input shape - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] dt Data type of the input and weights - * @param[in] data_layout (Optional) Data layout of the input and weights tensor + * @param[in] input_shape Input shape + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] dt Data type of the input and weights + * @param[in] data_layout (Optional) Data layout of the input and weights tensor + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. * * @return True if the optimized kernels can be executed else false */ - static bool is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, DataLayout data_layout = DataLayout::NCHW); + static bool is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, unsigned int depth_multiplier = 1, DataLayout data_layout = DataLayout::NCHW); /** Generates the convolver object */ void generate_convolver(); @@ -110,6 +112,7 @@ private: std::unique_ptr _convolver; unsigned int _num_elems_written_per_iteration; bool _run_optimized; + unsigned int _depth_multiplier; }; } // namespace arm_compute #endif /* __ARM_COMPUTE_NEDEPTHWISECONVOLUTIONKERNEL3x3_H__ */ diff --git a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h index ca10bfaab2..9c11cfa425 100644 --- a/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthwiseIm2ColKernel.h @@ -54,15 +54,16 @@ public: NEDepthwiseIm2ColKernel &operator=(NEDepthwiseIm2ColKernel &&) = default; /** Set the input and output of the kernel. * - * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32 - * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, - * while every dimension above 3 represents a batch. Data types supported: Same as @p input - * @param[in] kernel_dims The kernel dimensions (width and height). - * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. - * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QASYMM8, F32 + * @param[out] output The output tensor. First 3 lower dimensions represent a transform of each 3D input, + * while every dimension above 3 represents a batch. Data types supported: Same as @p input + * @param[in] kernel_dims The kernel dimensions (width and height). + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false); + void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -87,6 +88,7 @@ private: Size2D _kernel_dims; PadStrideInfo _conv_info; bool _has_bias; + unsigned int _depth_multiplier; }; } // arm_compute #endif /*__ARM_COMPUTE_NEDEPTHWISEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 7d07d4619b..a0bc4eab54 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -125,7 +125,7 @@ inline TensorShape compute_transposed_shape(const ITensorInfo &input) return shape_transposed; } -inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info) +inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const ITensorInfo &weights, PadStrideInfo conv_info, unsigned int depth_multiplier) { const TensorShape input_shape{ input.tensor_shape() }; const TensorShape weights_shape{ weights.tensor_shape() }; @@ -133,6 +133,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, const DataLayout data_layout = input.data_layout(); const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); unsigned int output_width = 0; unsigned int output_height = 0; @@ -143,6 +144,7 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, TensorShape output_shape{ input_shape }; output_shape.set(width_idx, output_width); output_shape.set(height_idx, output_height); + output_shape.set(channel_idx, input_shape[channel_idx] * depth_multiplier); return output_shape; } diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h index 82947bc7e6..6e5ce4cd48 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolutionLayer.h @@ -55,15 +55,17 @@ public: CLDepthwiseConvolutionLayer3x3(); /** Initialize the function's source, destination, conv and border_size. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. - * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F16/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. + * @param[in] act_info (Optional) Activation layer information in case of a fused activation. Only RELU, BOUNDED_RELU and LU_BOUNDED_RELU for 3x3 QASYMM8 supported. */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, ActivationLayerInfo act_info = ActivationLayerInfo()); + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1, + ActivationLayerInfo act_info = ActivationLayerInfo()); // Inherited methods overriden: void run() override; @@ -96,14 +98,15 @@ public: CLDepthwiseConvolutionLayer &operator=(CLDepthwiseConvolutionLayer &&) = default; /** Initialize the function's source, destination, weights and convolution information. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info); + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h index e523356fca..c99485634c 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.h @@ -47,14 +47,15 @@ public: GCDepthwiseConvolutionLayer3x3(); /** Initialize the function's source, destination, conv and border_size. * - * @param[in, out] input Source tensor. Data type supported: F16. (Written to only for border filling). - * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: F16. (Written to only for border filling). + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info); + void configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overridden: void run() override final; diff --git a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h index 84d3594426..b80fb7f2c8 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h @@ -55,14 +55,15 @@ public: NEDepthwiseConvolutionLayer3x3(); /** Initialize the function's source, destination, kernels and border_size. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input. - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; @@ -109,14 +110,15 @@ public: NEDepthwiseConvolutionLayer &operator=(NEDepthwiseConvolutionLayer &&) = default; /** Initialize the function's source, destination, weights and convolution information. * - * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). - * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. - * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. - * Data type supported: Same as @p input, S32 when input is QASYMM8. - * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in, out] input Source tensor. Data type supported: QASYMM8/F32. (Written to only for border filling). + * @param[out] output Destination tensor. Data type supported: same as @p input. + * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input, S32 when input is QASYMM8. + * @param[in] conv_info Padding and stride information to use for the convolution. + * @param[in] depth_multiplier (Optional) Multiplier to apply to the input's depth in order to retrieve the output's depth. Defaults to 1. */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier = 1); // Inherited methods overriden: void run() override; diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 07e67f4f2c..21c28539ef 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -24,6 +24,7 @@ #include "helpers.h" +#if defined(DEPTH_MULTIPLIER) #if defined(CONV_STRIDE_X) #if CONV_STRIDE_X == 1 @@ -192,6 +193,8 @@ __kernel void depthwise_convolution_3x3( Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); #endif //defined(HAS_BIAS) + src.ptr -= (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; + uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y; float3 weights_values0 = vload3(0, (__global float *)(weights.ptr + offset.s0)); float3 weights_values1 = vload3(0, (__global float *)(weights.ptr + offset.s1)); @@ -312,7 +315,7 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32( float2 pixels3 = 0.0f; __global uchar *weights_addr = (__global uchar *)weights.ptr; - __global uchar *src_addr = (__global uchar *)offset(&src, 0, 0); + __global uchar *src_addr = src.ptr - (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; // Load the weights float3 weights_row0 = vload3(0, (__global float *)(weights_addr + 0 * weights_stride_y)); @@ -407,7 +410,7 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32( float2 pixels1 = 0.0f; __global uchar *weights_addr = (__global uchar *)weights.ptr; - __global uchar *src_addr = (__global uchar *)offset(&src, 0, 0); + __global uchar *src_addr = src.ptr - (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; // Load the weights float3 weights_row0 = vload3(0, (__global float *)(weights_addr + 0 * weights_stride_y)); @@ -446,6 +449,8 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32( vstore2(pixels1, 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); } +#endif // defined(DEPTH_MULTIPLIER) + #if defined(SRC_WIDTH) && defined(DATA_TYPE) /** This kernel reshapes each of the tensor's low three dimensions to single rows. * @@ -501,11 +506,11 @@ __kernel void depthwise_weights_reshape( } #endif //defined(SRC_WIDTH) && defined(DATA_TYPE) -#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE) && defined(PAD_VALUE) +#if defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(SRC_HEIGHT) && defined(DATA_TYPE) && defined(PAD_VALUE) && defined(DEPTH_MULTIPLIER) /** This kernel performs a reshaping of the input tensor to a tensor used to perform depthwise convolution using vector to matrix multiplication. * * @note The data type must be passed at compile time using -DDATA_TYPE: e.g. -DDATA_TYPE=float - * @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_LEFT, -DPAD_TOP, -DPAD_RIGHT, -DPAD_BOTTOM, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT + * @note The convolution information must be passed at compile time using -DSTRIDE_X, -DSTRIDE_Y, -DPAD_LEFT, -DPAD_TOP, -DPAD_RIGHT, -DPAD_BOTTOM, -DKERNEL_WIDHT, -DKERNEL_HEIGHT, -DSRC_WIDTH, -DSRC_HEIGHT, -DDEPTH_MULTIPLIER * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QS16/F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -534,7 +539,7 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d const int src_x = -PAD_LEFT + src_pixel_linear % max_initial_x; const int src_y = -PAD_TOP + src_pixel_linear / max_initial_x * STRIDE_Y; - const int src_z = get_global_id(2); + const int src_z = get_global_id(2) / DEPTH_MULTIPLIER; __global uchar *input_ptr = src_ptr + src_offset_first_element_in_bytes + src_z * src_stride_z; __global DATA_TYPE *output_ptr = ((__global DATA_TYPE *)(dst.ptr)); @@ -558,7 +563,7 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d #endif // defined(HAS_BIAS) } -#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE) && defined(PAD_VALUE) +#endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE) && defined(PAD_VALUE) && defined(DEPTH_MULTIPLIER) #if defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE) @@ -597,7 +602,7 @@ __kernel void depthwise_vector_to_tensor( #endif //defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE) -#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) +#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) #if defined(CONV_STRIDE_X) #if CONV_STRIDE_X == 1 #define convolution1x3_f16 convolution1x3_stride_1_f16 @@ -716,6 +721,8 @@ inline half4 convolution3x3_f16( return pixels; } +#if defined(DEPTH_MULTIPLIER) + /** This OpenCL kernel computes the depthwise convolution 3x3 * * @param[in] src_ptr Pointer to the source image. Supported data types: F16 @@ -764,6 +771,8 @@ __kernel void depthwise_convolution_3x3_f16( Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); #endif //defined(HAS_BIAS) + src.ptr -= (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; + uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y; half3 weights_values0 = vload3(0, (__global half *)(weights.ptr + offset.s0)); half3 weights_values1 = vload3(0, (__global half *)(weights.ptr + offset.s1)); @@ -778,6 +787,7 @@ __kernel void depthwise_convolution_3x3_f16( vstore4(pixels, 0, (__global half *)dst.ptr); } +#endif // defined(DEPTH_MULTIPLIER) #endif // defined(CONV_STRIDE_X) /** This OpenCL kernel is optimized for Bifrost architectures and computes the 16bit floating point depthwise convolution 3x3 @@ -838,7 +848,7 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16( half4 pixels3 = 0.0f; __global uchar *weights_addr = (__global uchar *)weights.ptr; - __global uchar *src_addr = (__global uchar *)offset(&src, 0, 0); + __global uchar *src_addr = (__global uchar *)offset(&src, 0, 0) - (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; // Load the weights half3 weights_row0 = vload3(0, (__global half *)(weights_addr + 0 * weights_stride_y)); @@ -935,7 +945,7 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( half4 pixels1 = 0.0f; __global uchar *weights_addr = (__global uchar *)weights.ptr; - __global uchar *src_addr = (__global uchar *)offset(&src, 0, 0); + __global uchar *src_addr = (__global uchar *)offset(&src, 0, 0) - (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; // Load the weights half3 weights_row0 = vload3(0, (__global half *)(weights_addr + 0 * weights_stride_y)); @@ -969,4 +979,4 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( vstore4(pixels0, 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); vstore4(pixels1, 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); } -#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) +#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) diff --git a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl index a0c0a8b1fb..ccb3a1ffe2 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution_quantized.cl @@ -126,6 +126,8 @@ __kernel void depthwise_convolution_3x3_quantized_nchw( int bias_value = *((__global int *)(vector_offset(&biases, get_global_id(2)))); #endif //defined(HAS_BIAS) + src.ptr -= (get_global_id(2) - get_global_id(2) / DEPTH_MULTIPLIER) * src_step_z; + uchar3 w0 = vload3(0, weights.ptr + 0 * weights_stride_y); uchar3 w1 = vload3(0, weights.ptr + 1 * weights_stride_y); uchar3 w2 = vload3(0, weights.ptr + 2 * weights_stride_y); diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index de68ceda11..1997a901fe 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -50,6 +50,7 @@ BorderSize CLDepthwiseConvolutionLayer3x3NCHWKernel::border_size() const } void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); @@ -73,7 +74,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, } // Get convolved dimensions - const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), @@ -84,6 +85,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, input->info()->quantization_info()); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2)); _input = input; _output = output; @@ -98,6 +100,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, // Set build options ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); CLBuildOptions build_opts; + build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier)); build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x)); build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS"); diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index d783b9e159..a02b84fba1 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -41,7 +41,7 @@ using namespace arm_compute::misc::shape_calculator; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8); @@ -50,6 +50,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU), "For QASYMM8 only relu, lower bounded relu and lower-upper bounded relu are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); + ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier > 1); // COMPMID-1071 Add depth multiplier support for NHWC ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(1) != 3 || weights->dimension(2) != 3); if(biases != nullptr) @@ -61,7 +62,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, if(output->total_size() != 0) { - const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*input, *weights, conv_info, depth_multiplier); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape); } @@ -105,12 +106,13 @@ BorderSize CLDepthwiseConvolutionLayer3x3NHWCKernel::border_size() const } void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier, ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); // Get convolved dimensions - const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), @@ -120,7 +122,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, input->info()->fixed_point_position(), input->info()->quantization_info()); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, act_info)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), weights->info(), (biases != nullptr) ? biases->info() : nullptr, output->info(), conv_info, depth_multiplier, act_info)); const unsigned int conv_stride_x = conv_info.stride().first; ARM_COMPUTE_ERROR_ON(conv_stride_x < 1 || conv_stride_x > 2); @@ -208,9 +210,10 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, } Status CLDepthwiseConvolutionLayer3x3NHWCKernel::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier, ActivationLayerInfo act_info) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, act_info)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, weights, biases, output, conv_info, depth_multiplier, act_info)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), weights->clone().get(), output->clone().get(), conv_info).first); return Status{}; diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index a0784dcad6..0aef52f791 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -42,13 +42,13 @@ CLDepthwiseIm2ColKernel::CLDepthwiseIm2ColKernel() { } -void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && has_bias); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != output->info()->dimension(2)); ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0))); _input = input; @@ -68,6 +68,7 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); + build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier)); build_opts.add_option_if(has_bias, "-DHAS_BIAS"); build_opts.add_option_if_else(is_data_type_quantized_asymmetric(input->info()->data_type()), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), @@ -85,8 +86,8 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu } // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - // The CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped + Window win = calculate_max_window(*output->info(), Steps()); + // CLDepthwiseIm2ColKernel doesn't need padding so update_window_and_padding() can be skipped output->info()->set_valid_region(ValidRegion(Coordinates(), output->info()->tensor_shape())); ICLKernel::configure(win); diff --git a/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs b/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs index adfc126c95..134cc1060f 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/depthwise_convolution3x3.cs @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -108,6 +108,8 @@ void main() uint z_index = gl_GlobalInvocationID.z; TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_attrs.stride_z); + src_iter.current_offset_in_bytes -= int((z_index - z_index / uint(DEPTH_MULTIPLIER)) * src_attrs.step_z); + vec4 w[3]; w[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(weights_ptr, weights_iter); w[1] = LOAD_UNPACK4_HALF(weights_ptr, TENSOR3D_OFFSET(weights_iter, 0, 1, 0)); @@ -263,6 +265,8 @@ void main() uint z_index = gl_GlobalInvocationID.z; TENSOR_ITERATOR_ADVANCE_IN_BYTES(weights_iter, z_index * weights_attrs.stride_z); + src_iter.current_offset_in_bytes -= int((z_index - z_index / uint(DEPTH_MULTIPLIER)) * src_attrs.step_z); + vec4 w[3]; w[0] = LOAD_UNPACK4_CURRENT_ITEM_HALF(weights_ptr, weights_iter); w[1] = LOAD_UNPACK4_HALF(weights_ptr, TENSOR3D_OFFSET(weights_iter, 0, 1, 0)); diff --git a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp index 9343268d9e..c2374096a2 100644 --- a/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCDepthwiseConvolutionLayer3x3Kernel.cpp @@ -33,31 +33,10 @@ #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/Utils.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" using namespace arm_compute; - -namespace -{ -/** Calculates expected output shape dimension - * - * @param[in] Input shape - * - * @return Expected output shape - */ -TensorShape get_output_shape(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info) -{ - unsigned int output_width = 0; - unsigned int output_height = 0; - - std::tie(output_width, output_height) = scaled_dimensions(input_shape.x(), input_shape.y(), weights_shape.x(), weights_shape.y(), conv_info); - - TensorShape output_shape = input_shape; - output_shape.set(0, output_width); - output_shape.set(1, output_height); - - return output_shape; -} -} // namespace +using namespace arm_compute::misc::shape_calculator; GCDepthwiseConvolutionLayer3x3Kernel::GCDepthwiseConvolutionLayer3x3Kernel() : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_left(0), _conv_pad_top(0), _lws(gles::NDRange(1U, 1U, 1U)) @@ -69,7 +48,8 @@ BorderSize GCDepthwiseConvolutionLayer3x3Kernel::border_size() const return _border_size; } -void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) +void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -83,7 +63,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con } // Get convolved dimensions - TensorShape output_shape = get_output_shape(input->info()->tensor_shape(), weights->info()->tensor_shape(), conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); // Output auto inizialitation if not yet initialized auto_init_if_empty(*output->info(), @@ -93,6 +73,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con input->info()->fixed_point_position()); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(2) != weights->info()->dimension(2)); _input = input; _output = output; @@ -108,6 +89,7 @@ void GCDepthwiseConvolutionLayer3x3Kernel::configure(const IGCTensor *input, con ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); std::set options; + options.emplace("#define DEPTH_MULTIPLIER " + support::cpp11::to_string(depth_multiplier)); options.emplace("#define LOCAL_SIZE_X " + support::cpp11::to_string(_lws[0])); options.emplace("#define LOCAL_SIZE_Y " + support::cpp11::to_string(_lws[1])); options.emplace("#define LOCAL_SIZE_Z " + support::cpp11::to_string(_lws[2])); diff --git a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp index 49c67d19bb..8cdf175d8a 100644 --- a/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseConvolutionLayer3x3Kernel.cpp @@ -52,13 +52,14 @@ class convolver_3x3 { public: static void convolve(const Window &window, unsigned int num_elems_written_per_iteration, - const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) + const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { const int input_offset = -input->info()->quantization_info().offset; const int weights_offset = -weights->info()->quantization_info().offset; const int input_stride_x = input->info()->strides_in_bytes().x(); const int input_stride_y = input->info()->strides_in_bytes().y(); + const int input_stride_z = input->info()->strides_in_bytes().z(); const int output_stride_y = output->info()->strides_in_bytes().y(); const int kernel_stride_y = weights->info()->strides_in_bytes().y(); const int kernel_stride_z = weights->info()->strides_in_bytes().z(); @@ -93,7 +94,7 @@ public: int ih = 0; int oh = 0; - const uint8_t *input_ptr = in.ptr() - conv_pad_x * input_stride_x - conv_pad_y * input_stride_y; + const uint8_t *input_ptr = in.ptr() - conv_pad_x * input_stride_x - conv_pad_y * input_stride_y - (id.z() - id.z() / depth_multiplier) * input_stride_z; const uint8_t *ptr_weights_base = weights_ptr + id.z() * kernel_stride_z; const auto ptr_weights_r0 = reinterpret_cast(ptr_weights_base); @@ -125,19 +126,19 @@ public: template inline void convolve_3x3(const Window &window, unsigned int num_elems_written_per_iteration, - const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info) + const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { const unsigned int conv_stride_x = std::get<0>(conv_info.stride()); switch(conv_stride_x) { case 1: - convolver_3x3::convolve(window, num_elems_written_per_iteration, input, weights, output, conv_info); + convolver_3x3::convolve(window, num_elems_written_per_iteration, input, weights, output, conv_info, depth_multiplier); break; case 2: - convolver_3x3::convolve(window, num_elems_written_per_iteration, input, weights, output, conv_info); + convolver_3x3::convolve(window, num_elems_written_per_iteration, input, weights, output, conv_info, depth_multiplier); break; case 3: - convolver_3x3::convolve(window, num_elems_written_per_iteration, input, weights, output, conv_info); + convolver_3x3::convolve(window, num_elems_written_per_iteration, input, weights, output, conv_info, depth_multiplier); break; default: ARM_COMPUTE_ERROR("Not implemented"); @@ -146,7 +147,7 @@ inline void convolve_3x3(const Window &window, unsigned int num_elems_written_pe } // namespace NEDepthwiseConvolutionLayer3x3Kernel::NEDepthwiseConvolutionLayer3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _conv_info(), _convolver(nullptr), _num_elems_written_per_iteration(0), _run_optimized(false) + : _border_size(0), _input(), _output(), _weights(), _conv_info(), _convolver(nullptr), _num_elems_written_per_iteration(0), _run_optimized(false), _depth_multiplier(1) { } @@ -155,20 +156,22 @@ BorderSize NEDepthwiseConvolutionLayer3x3Kernel::border_size() const return _border_size; } -void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, DataLayout data_layout) +void NEDepthwiseConvolutionLayer3x3Kernel::configure(const ITensor *input, const ITensor *weights, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, + DataLayout data_layout) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - _input = input; - _output = output; - _weights = weights; - _conv_info = conv_info; - _convolver = nullptr; + _input = input; + _output = output; + _weights = weights; + _conv_info = conv_info; + _depth_multiplier = depth_multiplier; + _convolver = nullptr; _run_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->info()->tensor_shape(), conv_info, - input->info()->data_type(), + input->info()->data_type(), depth_multiplier, data_layout); (_run_optimized) ? configure_optimized() : configure_generic(); @@ -182,7 +185,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::run(const Window &window, const Threa (_run_optimized) ? run_optimized(window, info) : run_generic(window, info); } -bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, DataLayout data_layout) +bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(TensorShape input_shape, PadStrideInfo conv_info, DataType dt, unsigned int depth_multiplier, DataLayout data_layout) { // Reshape input shape if in NHWC format TensorShape in_shape{ input_shape }; @@ -210,7 +213,7 @@ bool NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(Tenso bool is_valid_padding = (pad_top == 0) && (pad_right == 0) && (pad_bottom == 0) && (pad_left == 0); bool supported_padding = is_same_padding || is_valid_padding; - return supported_datatype && supported_strides && supported_padding; + return supported_datatype && supported_strides && supported_padding && (depth_multiplier == 1); } void NEDepthwiseConvolutionLayer3x3Kernel::generate_convolver() @@ -227,7 +230,7 @@ void NEDepthwiseConvolutionLayer3x3Kernel::configure_generic() ARM_COMPUTE_ERROR_ON(_weights->info()->dimension(0) != 3 || _weights->info()->dimension(1) != 3); // Get convolved dimensions - const TensorShape output_shape = compute_depthwise_convolution_shape(*_input->info(), *_weights->info(), _conv_info); + const TensorShape output_shape = compute_depthwise_convolution_shape(*_input->info(), *_weights->info(), _conv_info, _depth_multiplier); const DataType output_dt = (_input->info()->data_type() == DataType::QASYMM8) ? DataType::S32 : _input->info()->data_type(); // Output auto inizialitation if not yet initialized @@ -317,10 +320,10 @@ void NEDepthwiseConvolutionLayer3x3Kernel::run_generic(const Window &window, con switch(_input->info()->data_type()) { case DataType::F32: - convolve_3x3(window, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); + convolve_3x3(window, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info, _depth_multiplier); break; case DataType::QASYMM8: - convolve_3x3(window, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info); + convolve_3x3(window, _num_elems_written_per_iteration, _input, _weights, _output, _conv_info, _depth_multiplier); break; default: ARM_COMPUTE_ERROR("Not implemented"); diff --git a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp index b924d9f8bd..cfd8eacfdd 100644 --- a/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEDepthwiseIm2ColKernel.cpp @@ -85,7 +85,7 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window) const int src_y = -pad_top + src_pixel_linear / max_initial_x * stride_y; // Get pointers - const uint8_t *const input_ptr = in.ptr() + id.z() * input_stride_z; + const uint8_t *const input_ptr = in.ptr() + id.z() / _depth_multiplier * input_stride_z; auto output_ptr = reinterpret_cast(out.ptr()); const int height = src_y + _kernel_dims.height; const int width = src_x + _kernel_dims.width; @@ -114,24 +114,25 @@ void NEDepthwiseIm2ColKernel::run_generic(const Window &window) } NEDepthwiseIm2ColKernel::NEDepthwiseIm2ColKernel() - : _func(nullptr), _input(nullptr), _output(nullptr), _kernel_dims(), _conv_info(), _has_bias() + : _func(nullptr), _input(nullptr), _output(nullptr), _kernel_dims(), _conv_info(), _has_bias(), _depth_multiplier(1) { } -void NEDepthwiseIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void NEDepthwiseIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); ARM_COMPUTE_ERROR_ON(is_data_type_quantized_asymmetric(input->info()->data_type()) && has_bias); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != output->info()->dimension(2)); ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0))); - _input = input; - _output = output; - _kernel_dims = kernel_dims; - _conv_info = conv_info; - _has_bias = has_bias; + _input = input; + _output = output; + _kernel_dims = kernel_dims; + _conv_info = conv_info; + _has_bias = has_bias; + _depth_multiplier = depth_multiplier; // Configure kernel window Window win = calculate_max_window(*input->info(), Steps()); diff --git a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp index 0276b37e09..ea2f93b85d 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp @@ -39,7 +39,8 @@ CLDepthwiseConvolutionLayer3x3::CLDepthwiseConvolutionLayer3x3() { } -void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, ActivationLayerInfo act_info) +void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier, + ActivationLayerInfo act_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -54,7 +55,7 @@ void CLDepthwiseConvolutionLayer3x3::configure(ICLTensor *input, const ICLTensor } _kernel->set_target(CLScheduler::get().target()); - _kernel->configure(input, weights, biases, output, conv_info, act_info); + _kernel->configure(input, weights, biases, output, conv_info, depth_multiplier, act_info); // Configure border handler PixelValue &&zero_value(0.f); @@ -77,11 +78,11 @@ CLDepthwiseConvolutionLayer::CLDepthwiseConvolutionLayer() { } -void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info) +void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != weights->info()->dimension(2)); const size_t weights_w = weights->info()->dimension(0); const size_t weights_h = weights->info()->dimension(1); @@ -95,11 +96,15 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w const GPUTarget gpu_target = CLScheduler::get().target(); // Calculate output shape - TensorShape dwc_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); + TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); // Output width and height - const unsigned int conv_w = dwc_output_shape.x(); - const unsigned int conv_h = dwc_output_shape.y(); + const unsigned int conv_w = output_shape.x(); + const unsigned int conv_h = output_shape.y(); // Set up intermediate tensors const size_t patch_size = weights_w * weights_h + ((append_bias) ? 1 : 0); @@ -112,7 +117,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w shape_im2col.set(2, weights_z); _input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); _im2col_kernel.set_target(gpu_target); - _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias); + _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier); // Weights reshape configuration const TensorShape shape_weights_reshape(patch_size, weights_z); @@ -128,7 +133,7 @@ void CLDepthwiseConvolutionLayer::configure(ICLTensor *input, const ICLTensor *w _v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out)); _v2mm_kernel.set_target(gpu_target); _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); - _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(dwc_output_shape)); + _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape)); _vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output, conv_w, conv_h); // Output staged configuration diff --git a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp index 9cba37110b..7121654a75 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCDepthwiseConvolutionLayer.cpp @@ -35,10 +35,10 @@ GCDepthwiseConvolutionLayer3x3::GCDepthwiseConvolutionLayer3x3() { } -void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info) +void GCDepthwiseConvolutionLayer3x3::configure(IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { auto k = arm_compute::support::cpp14::make_unique(); - k->configure(input, weights, biases, output, conv_info); + k->configure(input, weights, biases, output, conv_info, depth_multiplier); _kernel = std::move(k); // Configure border handler diff --git a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp index 8691fb9f76..0a977ad08d 100644 --- a/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp @@ -41,7 +41,7 @@ NEDepthwiseConvolutionLayer3x3::NEDepthwiseConvolutionLayer3x3() { } -void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) +void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -53,6 +53,7 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we _is_optimized = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input->info()->tensor_shape(), conv_info, input->info()->data_type(), + depth_multiplier, input->info()->data_layout()); _are_weights_reshaped = false; _is_nchw = input->info()->data_layout() == DataLayout::NCHW; @@ -70,7 +71,7 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we _permute_weights.configure(weights, &_weights_hwio, PermutationVector(2U, 0U, 1U)); // Configure optimized depthwise - _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, DataLayout::NHWC); + _dwc_kernel.configure(&_input_nhwc, &_weights_hwio, &_output_nhwc, conv_info, depth_multiplier, DataLayout::NHWC); // Configure the function to transform the convoluted output to ACL's native ordering format NCHW _permute_output.configure(&_output_nhwc, output, PermutationVector(1U, 2U, 0U)); @@ -82,7 +83,7 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we } else { - _dwc_kernel.configure(input, weights, output, conv_info, DataLayout::NHWC); + _dwc_kernel.configure(input, weights, output, conv_info, depth_multiplier, DataLayout::NHWC); } } else @@ -96,7 +97,7 @@ void NEDepthwiseConvolutionLayer3x3::configure(ITensor *input, const ITensor *we } // Configure depthwise convolution kernel - _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info); + _dwc_kernel.configure(input, weights, (_is_quantized) ? &_accumulator : output, conv_info, depth_multiplier); // Configure border handler _border_handler.configure(input, _dwc_kernel.border_size(), BorderMode::CONSTANT, zero_value); @@ -175,11 +176,11 @@ NEDepthwiseConvolutionLayer::NEDepthwiseConvolutionLayer() { } -void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info) +void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, unsigned int depth_multiplier) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != weights->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON((input->info()->dimension(2) * depth_multiplier) != weights->info()->dimension(2)); const size_t weights_w = weights->info()->dimension(0); const size_t weights_h = weights->info()->dimension(1); @@ -193,11 +194,15 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh bool append_bias = (biases != nullptr) && !_is_quantized; // Calculate output shape - TensorShape dwc_output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info); + TensorShape output_shape = shape_calculator::compute_depthwise_convolution_shape(*input->info(), *weights->info(), conv_info, depth_multiplier); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape)); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); // Output width and height - const unsigned int conv_w = dwc_output_shape.x(); - const unsigned int conv_h = dwc_output_shape.y(); + const unsigned int conv_w = output_shape.x(); + const unsigned int conv_h = output_shape.y(); // Set up intermediate tensors const size_t patch_size = weights_w * weights_h + (append_bias ? 1 : 0); @@ -209,7 +214,7 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh shape_im2col.set(1, conv_size); shape_im2col.set(2, weights_z); _input_reshaped.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(shape_im2col)); - _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias); + _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, append_bias, depth_multiplier); // Weights reshape configuration const TensorShape shape_weights_reshape(patch_size, weights_z); @@ -224,7 +229,7 @@ void NEDepthwiseConvolutionLayer::configure(ITensor *input, const ITensor *weigh shape_v2mm_out.set(2, 1); _v2mm_output.allocator()->init(input->info()->clone()->set_is_resizable(true).reset_padding().set_data_type(v2mm_dt).set_tensor_shape(shape_v2mm_out)); _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); - _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(dwc_output_shape)); + _output_reshaped.allocator()->init(_v2mm_output.info()->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(output_shape)); _vector_to_tensor_kernel.configure(&_v2mm_output, (_is_quantized) ? &_output_reshaped : output, conv_w, conv_h); // Output staged configuration diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h index 9d29aee76a..9276431de8 100644 --- a/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/benchmark/fixtures/DepthwiseConvolutionLayerFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/Globals.h" #include "tests/Utils.h" #include "tests/framework/Fixture.h" @@ -36,14 +37,25 @@ namespace test { namespace benchmark { +using namespace arm_compute::misc::shape_calculator; + /** Fixture that can be used for NEON and CL */ template class DepthwiseConvolutionLayerFixture : public framework::Fixture { public: template - void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches) + void setup(TensorShape src_shape, Size2D kernel_size, PadStrideInfo info, DataType data_type, int batches) { + // Get shapes + TensorShape weights_shape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(src_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + TensorShape dst_shape = compute_depthwise_convolution_shape(in_info, we_info, info, 1); + + weights_shape.set(2, dst_shape.z()); + // Set batched in source and destination shapes const unsigned int fixed_point_position = 4; src_shape.set(3 /* batch */, batches); diff --git a/tests/datasets/DepthwiseConvolutionLayerDataset.h b/tests/datasets/DepthwiseConvolutionLayerDataset.h index 1e77a0c8dd..4b6d85b82d 100644 --- a/tests/datasets/DepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/DepthwiseConvolutionLayerDataset.h @@ -38,17 +38,15 @@ namespace datasets class DepthwiseConvolutionLayerDataset { public: - using type = std::tuple; + using type = std::tuple; struct iterator { iterator(std::vector::const_iterator src_it, - std::vector::const_iterator weights_it, - std::vector::const_iterator dst_it, + std::vector::const_iterator weights_it, std::vector::const_iterator infos_it) : _src_it{ std::move(src_it) }, _weights_it{ std::move(weights_it) }, - _dst_it{ std::move(dst_it) }, _infos_it{ std::move(infos_it) } { } @@ -58,21 +56,19 @@ public: std::stringstream description; description << "In=" << *_src_it << ":"; description << "Weights=" << *_weights_it << ":"; - description << "Out=" << *_dst_it << ":"; description << "Info=" << *_infos_it; return description.str(); } DepthwiseConvolutionLayerDataset::type operator*() const { - return std::make_tuple(*_src_it, *_weights_it, *_dst_it, *_infos_it); + return std::make_tuple(*_src_it, *_weights_it, *_infos_it); } iterator &operator++() { ++_src_it; ++_weights_it; - ++_dst_it; ++_infos_it; return *this; @@ -80,26 +76,24 @@ public: private: std::vector::const_iterator _src_it; - std::vector::const_iterator _weights_it; - std::vector::const_iterator _dst_it; + std::vector::const_iterator _weights_it; std::vector::const_iterator _infos_it; }; iterator begin() const { - return iterator(_src_shapes.begin(), _weight_shapes.begin(), _dst_shapes.begin(), _infos.begin()); + return iterator(_src_shapes.begin(), _weight_shapes.begin(), _infos.begin()); } int size() const { - return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_dst_shapes.size(), _infos.size()))); + return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), _infos.size())); } - void add_config(TensorShape src, TensorShape weights, TensorShape dst, PadStrideInfo info) + void add_config(TensorShape src, Size2D weights, PadStrideInfo info) { _src_shapes.emplace_back(std::move(src)); _weight_shapes.emplace_back(std::move(weights)); - _dst_shapes.emplace_back(std::move(dst)); _infos.emplace_back(std::move(info)); } @@ -109,8 +103,7 @@ protected: private: std::vector _src_shapes{}; - std::vector _weight_shapes{}; - std::vector _dst_shapes{}; + std::vector _weight_shapes{}; std::vector _infos{}; }; @@ -120,20 +113,20 @@ class SmallDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionL public: SmallDepthwiseConvolutionLayerDataset() { - add_config(TensorShape(7U, 7U, 1U), TensorShape(3U, 3U, 1U), TensorShape(5U, 5U, 1U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); - add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(7U, 7U, 1U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(23U, 27U, 5U), Size2D(3U, 5U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 7U), Size2D(7U, 3U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(17U, 31U, 2U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(23U, 27U, 5U), Size2D(11U, 3U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(17U, 31U, 2U, 3U), Size2D(5U, 9U), PadStrideInfo(1, 2, 1, 1)); // Asymmetric padding - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), Size2D(5U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; @@ -143,12 +136,12 @@ class LargeDepthwiseConvolutionLayerDataset final : public DepthwiseConvolutionL public: LargeDepthwiseConvolutionLayerDataset() { - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1)); } }; @@ -158,12 +151,12 @@ class SmallDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvoluti public: SmallDepthwiseConvolutionLayerDataset3x3() { - add_config(TensorShape(3U, 3U, 2U), TensorShape(3U, 3U, 2U), TensorShape(1U, 1U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(7U, 7U, 3U, 2U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U, 2U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(21U, 31U, 9U, 4U), TensorShape(3U, 3U, 9U), TensorShape(21U, 15U, 9U, 4U), PadStrideInfo(1, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U, 3U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U, 3U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(3U, 3U, 2U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(7U, 7U, 3U, 2U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(21U, 31U, 9U, 4U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U, 3U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 1)); // Asymmetric padding - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(16U, 13U, 11U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; @@ -172,9 +165,9 @@ class SmallDepthwiseConvolutionLayerDataset3x3NCHW final : public DepthwiseConvo public: SmallDepthwiseConvolutionLayerDataset3x3NCHW() { - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1)); + add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(3, 2, 1, 1)); // Asymmetric padding - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(18U, 14U, 11U), PadStrideInfo(2, 2, 3, 1, 2, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 11U), Size2D(3U, 3U), PadStrideInfo(2, 2, 3, 1, 2, 1, DimensionRoundingType::FLOOR)); } }; @@ -184,11 +177,11 @@ class LargeDepthwiseConvolutionLayerDataset3x3 final : public DepthwiseConvoluti public: LargeDepthwiseConvolutionLayerDataset3x3() { - add_config(TensorShape(233U, 277U, 55U, 3U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U, 3U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U, 5U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U, 5U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); + add_config(TensorShape(233U, 277U, 55U, 3U), Size2D(3U, 3U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), Size2D(3U, 3U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 277U, 77U, 5U), Size2D(3U, 3U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(177U, 311U, 22U), Size2D(3U, 3U), PadStrideInfo(2, 1, 1, 1)); } }; @@ -199,16 +192,16 @@ public: OptimizedDepthwiseConvolutionLayerDataset3x3() { // Stride 1 - add_config(TensorShape(7U, 7U, 16U), TensorShape(3U, 3U, 16U), TensorShape(5U, 5U, 16U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(7U, 7U, 16U), TensorShape(3U, 3U, 16U), TensorShape(7U, 7U, 16U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); - add_config(TensorShape(28U, 28U, 16U), TensorShape(3U, 3U, 16U), TensorShape(26U, 26U, 16U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(28U, 28U, 16U), TensorShape(3U, 3U, 16U), TensorShape(28U, 28U, 16U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(28U, 28U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(28U, 28U, 16U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); // Stride 2 - add_config(TensorShape(7U, 7U, 32U), TensorShape(3U, 3U, 32U), TensorShape(3U, 3U, 32U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(7U, 7U, 32U), TensorShape(3U, 3U, 32U), TensorShape(4U, 4U, 32U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL)); - add_config(TensorShape(8U, 8U, 32U), TensorShape(3U, 3U, 32U), TensorShape(3U, 3U, 32U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); - add_config(TensorShape(8U, 8U, 32U), TensorShape(3U, 3U, 32U), TensorShape(4U, 4U, 32U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); - add_config(TensorShape(64U, 64U, 128U), TensorShape(3U, 3U, 128U), TensorShape(32U, 32U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)); + add_config(TensorShape(7U, 7U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 1, 1, 1, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(8U, 8U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(8U, 8U, 32U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); + add_config(TensorShape(64U, 64U, 128U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)); } }; } // namespace datasets diff --git a/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h index c4c199a233..25ac1c16f8 100644 --- a/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h +++ b/tests/datasets/system_tests/mobilenet/MobileNetDepthwiseConvolutionLayerDataset.h @@ -42,15 +42,15 @@ class MobileNetDepthwiseConvolutionLayerDataset final : public DepthwiseConvolut public: MobileNetDepthwiseConvolutionLayerDataset() { - add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(7U, 7U, 1024U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); - add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(7U, 7U, 1024U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(14U, 14U, 512U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(14U, 14U, 512U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(28U, 28U, 256U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(28U, 28U, 256U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(56U, 56U, 128U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(56U, 56U, 128U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), Size2D(3U, 3U), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(112U, 112U, 32U), Size2D(3U, 3U), PadStrideInfo(1, 1, 1, 1)); } }; } // namespace datasets diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index 450bb21e77..ad7a5d819b 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -45,6 +45,8 @@ namespace RelativeTolerance tolerance_f16(half_float::half(0.001)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ + +const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); } // namespace TEST_SUITE(CL) @@ -54,14 +56,15 @@ template using CLDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) @@ -77,15 +80,17 @@ TEST_SUITE(Float) TEST_SUITE(F16) TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, - combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + depth_multipliers), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", DataLayout::NCHW))) @@ -98,15 +103,17 @@ TEST_SUITE_END() TEST_SUITE(FP32) TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, - combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) @@ -125,34 +132,41 @@ using CLDepthwiseConvolutionLayerQuantizedFixture3x3 = DepthwiseConvolutionLayer TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", DataLayout::NCHW))) +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", DataLayout::NCHW))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), // COMPMID-1071 Add depth multiplier support for NHWC framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), // COMPMID-1071 Add depth multiplier support for NHWC + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp index 2baa93e413..22b1e08d5b 100644 --- a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp @@ -44,6 +44,8 @@ namespace { RelativeTolerance tolerance_fp16(half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ constexpr float tolerance_num = 0.07f; /**< Tolerance number */ + +const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); } // namespace TEST_SUITE(GC) @@ -55,14 +57,16 @@ using GCDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio TEST_SUITE(Float) TEST_SUITE(FP16) TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataLayout", DataLayout::NCHW))) diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index 236d4bd653..b1cc491ac8 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/functions/NEDepthwiseConvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" @@ -40,20 +41,34 @@ namespace test { namespace validation { +using namespace arm_compute::misc::shape_calculator; + namespace { constexpr RelativeTolerance tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ constexpr AbsoluteTolerance tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for DataType::QASYMM8 */ + +const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); } // namespace TEST_SUITE(NEON) TEST_SUITE(DepthwiseConvLayer) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - datasets::LargeDepthwiseConvolutionLayerDataset3x3()), +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::LargeDepthwiseConvolutionLayerDataset3x3()), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - input_shape, weights_shape, output_shape, info, data_type) + input_shape, kernel_size, info, depth_multiplier, data_type) { + // Get shapes + TensorShape weights_shape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(input_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + const TensorShape output_shape = compute_depthwise_convolution_shape(in_info, we_info, info, depth_multiplier); + + weights_shape.set(2, output_shape.z()); + // Create tensors Tensor src = create_tensor(input_shape, data_type); Tensor dst = create_tensor(output_shape, data_type); @@ -68,7 +83,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da // Create and configure function NEDepthwiseConvolutionLayer3x3 depthwise_layer; - depthwise_layer.configure(&src, &weights, &bias, &dst, info); + depthwise_layer.configure(&src, &weights, &bias, &dst, info, depth_multiplier); // Validate valid region const ValidRegion input_valid_region = shape_to_valid_region(input_shape); @@ -82,7 +97,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da validate(bias.info()->valid_region(), bias_valid_region); // Validate padding - bool is_optimized_run = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input_shape, info, data_type, DataLayout::NCHW); + bool is_optimized_run = NEDepthwiseConvolutionLayer3x3Kernel::is_optimized_execution_possible(input_shape, info, data_type, depth_multiplier, DataLayout::NCHW); const int step_non_opt_dwc = 16 >> info.stride().first; const int step_bias_add = 16 / src.info()->element_size(); const int step = is_optimized_run ? step_bias_add : std::max(step_non_opt_dwc, step_bias_add); @@ -95,14 +110,16 @@ TEST_SUITE(F32) TEST_SUITE(Generic) template using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) @@ -114,21 +131,24 @@ TEST_SUITE_END() TEST_SUITE(W3x3) template using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunOptimized, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::OptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), framework::dataset::make("DataType", DataType::F32)), framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) @@ -148,27 +168,31 @@ using NEDepthwiseConvolutionLayerQuantizedFixture = DepthwiseConvolutionLayerVal TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", DataLayout::NCHW))) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::QASYMM8)), framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_qasymm8); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", DataLayout::NCHW))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", DataLayout::NCHW))) { validate(Accessor(_target), _reference, tolerance_qasymm8); } diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index bb756f806e..b7bca8dbf3 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -26,6 +26,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -44,6 +45,8 @@ namespace test { namespace validation { +using namespace arm_compute::misc::shape_calculator; + template class DepthwiseConvolutionLayerValidationGenericFixture : public framework::Fixture { @@ -52,12 +55,20 @@ public: public: template - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) + void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) { - _quantization_info = quantization_info; - _data_type = data_type; + _quantization_info = quantization_info; + _data_type = data_type; + const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + TensorShape weights_shape(kernel_size.width, kernel_size.height); + + const TensorInfo in_info(in_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + TensorShape out_shape = compute_depthwise_convolution_shape(in_info, we_info, pad_stride_info, depth_multiplier); + + weights_shape.set(2, out_shape.z()); const TensorShape biases_shape(weights_shape[2]); - const DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; if(data_layout == DataLayout::NHWC) { @@ -66,8 +77,8 @@ public: permute(out_shape, PermutationVector(2U, 0U, 1U)); } - _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout); - _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, data_type, bias_data_type, quantization_info, data_layout); + _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout); + _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout); } protected: @@ -101,6 +112,7 @@ protected: } TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info, + unsigned int depth_multiplier, const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout) { // Create tensors @@ -111,7 +123,7 @@ protected: // Create Depthwise Convolution configure function FunctionType dwc; - dwc.configure(&src, &weights, &biases, &dst, pad_stride_info); + dwc.configure(&src, &weights, &biases, &dst, pad_stride_info, depth_multiplier); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -141,6 +153,7 @@ protected: } SimpleTensor compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info, + unsigned int depth_multiplier, const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout) { SimpleTensor src{ in_shape, data_type, 1, 0, quantization_info, data_layout }; @@ -151,7 +164,7 @@ protected: fill(weights, 1); fill(biases, 2); - return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info); + return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info, depth_multiplier); } TensorType _target{}; @@ -165,9 +178,9 @@ class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLa { public: template - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, DataLayout data_layout) + void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout) { - DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, weights_shape, out_shape, pad_stride_info, + DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, depth_multiplier, data_type, QuantizationInfo(), data_layout); } }; @@ -177,9 +190,9 @@ class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConv { public: template - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) + void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, DataLayout data_layout) { - DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, weights_shape, out_shape, pad_stride_info, + DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, depth_multiplier, data_type, quantization_info, data_layout); } }; diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.cpp b/tests/validation/reference/DepthwiseConvolutionLayer.cpp index d05da9140b..207e5fc45c 100644 --- a/tests/validation/reference/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/reference/DepthwiseConvolutionLayer.cpp @@ -51,7 +51,8 @@ namespace reference * */ template -void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info) +void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { // Compute reference const int filter_width = weights.shape().x(); @@ -75,40 +76,47 @@ void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor(x), static_cast(y), static_cast(z), static_cast(r)); - size_t filter_offset = filter_plane * z; + const int out_z = z * depth_multiplier + m; - T val(0); - for(int j = y - filter_half_height; j <= static_cast(y + filter_half_height); ++j) + for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + { + for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) { - for(int i = x - filter_half_width; i <= static_cast(x + filter_half_width); ++i) + Coordinates coords(static_cast(x), static_cast(y), static_cast(z), static_cast(r)); + size_t filter_offset = filter_plane * out_z; + + T val(0); + for(int j = y - filter_half_height; j <= static_cast(y + filter_half_height); ++j) { - coords.set(0, i); - coords.set(1, j); - T border_value(0); - val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value); - ++filter_offset; + for(int i = x - filter_half_width; i <= static_cast(x + filter_half_width); ++i) + { + coords.set(0, i); + coords.set(1, j); + + val += *(weights.data() + filter_offset) * tensor_elem_at(src, coords, BorderMode::CONSTANT, border_value); + ++filter_offset; + } } + + dst[out_pos++] = saturate_cast(val + *static_cast(biases(Coordinates(out_z)))); } - coords.set(0, x); - coords.set(1, y); - dst[out_pos++] = saturate_cast(val + *static_cast(biases(Coordinates(z)))); } } } } } -void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info) +void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, SimpleTensor &dst, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { // Create reference const int input_offset = -src.quantization_info().offset; @@ -150,89 +158,76 @@ void depthwise_convolution_nchw(const SimpleTensor &src, const SimpleTe { for(int z = 0; z < input_depth; ++z) { - int32_t bias_val = *static_cast(biases(Coordinates(z))); - for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + for(unsigned int m = 0; m < depth_multiplier; ++m) { - for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) - { - Coordinates coords(x, y, z, r); - int filter_offset = filter_plane * z; + const int out_z = z * depth_multiplier + m; + const int32_t bias_val = *static_cast(biases(Coordinates(out_z))); - int32_t val = 0; - for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j) + for(int y = minimum_y; y < minimum_y + maximum_y; y += conv_info.stride().second) + { + for(int x = minimum_x; x < minimum_x + maximum_x; x += conv_info.stride().first) { - for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i) + Coordinates coords(x, y, z, r); + int filter_offset = filter_plane * out_z; + + int32_t val = 0; + for(int j = y - filter_half_height; j <= (y + filter_half_height); ++j) { - coords.set(0, i); - coords.set(1, j); - const auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, -input_offset); - const uint8_t w_val = *(weights.data() + filter_offset); - val += (in_val + input_offset) * (w_val + weights_offset); - ++filter_offset; + for(int i = x - filter_half_width; i <= (x + filter_half_width); ++i) + { + coords.set(0, i); + coords.set(1, j); + const auto in_val = tensor_elem_at(src, coords, BorderMode::CONSTANT, -input_offset); + const uint8_t w_val = *(weights.data() + filter_offset); + val += (in_val + input_offset) * (w_val + weights_offset); + ++filter_offset; + } } + val += bias_val; + val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); + val += output_offset; + val = std::max(val, 0); + val = std::min(val, 255); + + // Store the result + dst[out_pos++] = val; } - val += bias_val; - val = asymm_rounding_divide_by_pow2(asymm_int_mult(val, output_multiplier), output_shift); - val += output_offset; - val = std::max(val, 0); - val = std::min(val, 255); - - // Store the result - dst[out_pos++] = val; } } } } } -template <> -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info) -{ - SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; - - if(src.data_layout() == DataLayout::NHWC) - { - SimpleTensor src_nchw = reference::permute(src, PermutationVector(1U, 2U, 0U)); - SimpleTensor weights_nchw = reference::permute(weights, PermutationVector(1U, 2U, 0U)); - SimpleTensor dst_nchw = reference::permute(dst, PermutationVector(1U, 2U, 0U)); - - depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info); - - return reference::permute(dst_nchw, PermutationVector(2U, 0U, 1U)); - } - - depthwise_convolution_nchw(src, weights, biases, dst, conv_info); - - return dst; -} - template -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) +SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, + unsigned int depth_multiplier) { - SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; + SimpleTensor dst{ dst_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; - if(src.data_layout() == DataLayout::NHWC && src.data_type() == DataType::F32) + if(src.data_layout() == DataLayout::NHWC) { SimpleTensor src_nchw = reference::permute(src, PermutationVector(1U, 2U, 0U)); SimpleTensor weights_nchw = reference::permute(weights, PermutationVector(1U, 2U, 0U)); SimpleTensor dst_nchw = reference::permute(dst, PermutationVector(1U, 2U, 0U)); - depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info); + depthwise_convolution_nchw(src_nchw, weights_nchw, biases, dst_nchw, conv_info, depth_multiplier); return reference::permute(dst_nchw, PermutationVector(2U, 0U, 1U)); } - depthwise_convolution_nchw(src, weights, biases, dst, conv_info); + depthwise_convolution_nchw(src, weights, biases, dst, conv_info, depth_multiplier); return dst; } +template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, + const PadStrideInfo &conv_info, unsigned int depth_multiplier); + template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info); + const PadStrideInfo &conv_info, unsigned int depth_multiplier); template SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, - const PadStrideInfo &conv_info); + const PadStrideInfo &conv_info, unsigned int depth_multiplier); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DepthwiseConvolutionLayer.h b/tests/validation/reference/DepthwiseConvolutionLayer.h index df743a5b8e..bab338723d 100644 --- a/tests/validation/reference/DepthwiseConvolutionLayer.h +++ b/tests/validation/reference/DepthwiseConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,8 @@ namespace validation namespace reference { template -SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info); +SimpleTensor depthwise_convolution(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info, + unsigned int depth_multiplier); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp index ca6c168114..8bc6ddb696 100644 --- a/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp +++ b/tests/validation/reference/DepthwiseSeparableConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,7 +46,7 @@ SimpleTensor depthwise_separable_convolution_layer(const SimpleTensor &src const SimpleTensor &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info) { // Compute reference - SimpleTensor depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info); + SimpleTensor depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info, 1); SimpleTensor dst = convolution_layer(depthwise_out, pointwise_weights, pointwise_biases, dst_shape, pointwise_conv_info); return dst; -- cgit v1.2.1