From ed5a492ba791d8c8b3334749d4ae946b8f11d13d Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Thu, 13 Sep 2018 16:22:01 +0100 Subject: COMPMID-1586: Add support for NHWC CLDeconvolutionLayer COMPMID-1651: Fix QASYMM8 CLDeconvolutionLayer This patch also extends the range of values used for testing Convolution and Deconvolution to cover quantized [-1.0f, 1.0f]. Change-Id: I8b280669db67bb3ec25bf5d411c8f5954f5b0dab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/149869 Reviewed-by: Michalis Spyrou Tested-by: bsgcomp --- .../core/CPP/kernels/CPPFlipWeightsKernel.h | 8 ++--- arm_compute/core/Utils.h | 10 ------ arm_compute/core/utils/misc/ShapeCalculator.h | 39 +++++++++++++++++----- .../runtime/CL/functions/CLDeconvolutionLayer.h | 17 ++++++++-- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 17 +++++++--- 5 files changed, 60 insertions(+), 31 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h index 801934159d..04567ed959 100644 --- a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h +++ b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h @@ -53,7 +53,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32 + * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32. Data layouts supported: NCHW/NHWC. * @param[out] output The output tensor. Data types supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); @@ -64,17 +64,15 @@ public: /** Function to perform flipping. * * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. */ template - void flip_weights(const Window &window_input, const Window &window); + void flip_weights(const Window &window_input); /** Common signature for all the specialised Flip functions * * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. */ - using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input, const Window &window); + using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input); private: const ITensor *_input; diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 7ee24e2736..cfd273618c 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -815,16 +815,6 @@ inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t siz */ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info); -/** Returns expected shape for the deconvolution output tensor. - * - * @param[in] out_dims widht and height of the output tensor, these values can be obtained with the function deconvolution_output_dimensions. - * @param[in] input Shape of the input tensor. - * @param[in] weights Shape of the weights tensor. - * - * @return Deconvolution output tensor shape. - */ -TensorShape deconvolution_output_shape(const std::pair &out_dims, TensorShape input, TensorShape weights); - /** Returns expected width and height of the deconvolution's output tensor. * * @param[in] in_width Width of input tensor (Number of columns) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index f68401c1b9..11d20c919f 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -229,26 +229,49 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, return output_shape; } -inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top, - std::pair &out_dims) +inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, unsigned int inner_border_right, + unsigned int inner_border_top, + std::pair &out_dims, unsigned int &padx, unsigned int &pady) { + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + // Find the upsampled dimensions - unsigned int out_x = (input.dimension(0) - 1) * sx + inner_border_right + 1; - unsigned int out_y = (input.dimension(1) - 1) * sy + inner_border_top + 1; + unsigned int out_x = (input.dimension(idx_w) - 1) * sx + inner_border_right + 1; + unsigned int out_y = (input.dimension(idx_h) - 1) * sy + inner_border_top + 1; // Find the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = out_dims.first - (out_x - weights.dimension(0) + 1); - unsigned int pady = out_dims.second - (out_y - weights.dimension(1) + 1); + padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); + pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); out_x += padx; out_y += pady; TensorShape scale_out_shape(input.tensor_shape()); - scale_out_shape.set(0, out_x); - scale_out_shape.set(1, out_y); + scale_out_shape.set(idx_w, out_x); + scale_out_shape.set(idx_h, out_y); return scale_out_shape; } +inline TensorShape compute_deconvolution_output_shape(const std::pair &out_dims, const ITensorInfo &input, const ITensorInfo &weights) +{ + const TensorShape input_shape{ input.tensor_shape() }; + const TensorShape weights_shape{ weights.tensor_shape() }; + + const DataLayout data_layout = input.data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape out_shape{ input_shape }; + out_shape.set(width_idx, out_dims.first); + out_shape.set(height_idx, out_dims.second); + out_shape.set(channel_idx, weights_shape[batch_idx]); + return out_shape; +} + inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups = 1) { diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h index 6716cd6fdd..39cbe0cafa 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h @@ -46,8 +46,12 @@ class ICLTensor; * specified value where a < stride - 1, that increases the padding top and right of the input image. * * The relation between input to output is as follows: - * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right ) - * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top ) + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] * * where: * width_input is the size of the first input dimension. @@ -55,9 +59,16 @@ class ICLTensor; * width_output is the size of the first output dimension. * height_output is the size of the second output dimension. * kernel_x and kernel_y are the convolution sizes in x and y. - * inner_border_right and inner_border_top the number of zeros added to the right and top edges of the input. * stride_x and stride_y is the input stride of the first and second dimension. * + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel. + * + * This function calls the following OpenCL kernels/functions: + * + * -# @ref CLDeconvolutionLayerUpsample + * -# @ref CLConvolutionLayer + * */ class CLDeconvolutionLayer : public IFunction { diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 0cca555621..73870093b7 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -46,8 +46,12 @@ namespace arm_compute * specified value where a < stride - 1 that increases the padding top and right of the input image. * * The relation between input to output is as follows: - * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right ) - * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top ) + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] * * where * width is the size of the first input dimension. @@ -55,12 +59,15 @@ namespace arm_compute * width_output is the size of the first output dimension. * height_output is the size of the second output dimension. * kernel_x and kernel_y are the convolution sizes in x and y. - * inner_border_right and inner_border_top the number of zeros added to the top and right edges of the input. * stride_x and stride_y is the input stride of the first and second dimension. * - * This function calls the following NEON kernels: + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel. * - * -# @ref NEDirectConvolutionLayer + * This function calls the following NEON kernels/functions: + * + * -# @ref CPPUpsample + * -# @ref NEConvolutionLayer * */ class NEDeconvolutionLayer : public IFunction -- cgit v1.2.1