aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-09-13 16:22:01 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:55:45 +0000
commited5a492ba791d8c8b3334749d4ae946b8f11d13d (patch)
tree89c8cd6f705dc88a21c61668164aad079800aff7 /arm_compute
parent7e9391bb14d219cda310bff355669b5964b1f576 (diff)
downloadComputeLibrary-ed5a492ba791d8c8b3334749d4ae946b8f11d13d.tar.gz
COMPMID-1586: Add support for NHWC CLDeconvolutionLayer
COMPMID-1651: Fix QASYMM8 CLDeconvolutionLayer This patch also extends the range of values used for testing Convolution and Deconvolution to cover quantized [-1.0f, 1.0f]. Change-Id: I8b280669db67bb3ec25bf5d411c8f5954f5b0dab Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/149869 Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: bsgcomp <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h8
-rw-r--r--arm_compute/core/Utils.h10
-rw-r--r--arm_compute/core/utils/misc/ShapeCalculator.h39
-rw-r--r--arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h17
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h17
5 files changed, 60 insertions, 31 deletions
diff --git a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h
index 801934159d..04567ed959 100644
--- a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h
+++ b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h
@@ -53,7 +53,7 @@ public:
/** Set the input and output of the kernel.
*
- * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32
+ * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32. Data layouts supported: NCHW/NHWC.
* @param[out] output The output tensor. Data types supported: Same as @p input
*/
void configure(const ITensor *input, ITensor *output);
@@ -64,17 +64,15 @@ public:
/** Function to perform flipping.
*
* @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
*/
template <typename T>
- void flip_weights(const Window &window_input, const Window &window);
+ void flip_weights(const Window &window_input);
/** Common signature for all the specialised Flip functions
*
* @param[in] window_input Input region on which to execute the kernel.
- * @param[in] window Output region on which to execute the kernel.
*/
- using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input, const Window &window);
+ using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input);
private:
const ITensor *_input;
diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h
index 7ee24e2736..cfd273618c 100644
--- a/arm_compute/core/Utils.h
+++ b/arm_compute/core/Utils.h
@@ -815,16 +815,6 @@ inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t siz
*/
PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info);
-/** Returns expected shape for the deconvolution output tensor.
- *
- * @param[in] out_dims widht and height of the output tensor, these values can be obtained with the function deconvolution_output_dimensions.
- * @param[in] input Shape of the input tensor.
- * @param[in] weights Shape of the weights tensor.
- *
- * @return Deconvolution output tensor shape.
- */
-TensorShape deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, TensorShape input, TensorShape weights);
-
/** Returns expected width and height of the deconvolution's output tensor.
*
* @param[in] in_width Width of input tensor (Number of columns)
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index f68401c1b9..11d20c919f 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -229,26 +229,49 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input,
return output_shape;
}
-inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top,
- std::pair<unsigned int, unsigned int> &out_dims)
+inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, unsigned int inner_border_right,
+ unsigned int inner_border_top,
+ std::pair<unsigned int, unsigned int> &out_dims, unsigned int &padx, unsigned int &pady)
{
+ const DataLayout data_layout = input.data_layout();
+ const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+
// Find the upsampled dimensions
- unsigned int out_x = (input.dimension(0) - 1) * sx + inner_border_right + 1;
- unsigned int out_y = (input.dimension(1) - 1) * sy + inner_border_top + 1;
+ unsigned int out_x = (input.dimension(idx_w) - 1) * sx + inner_border_right + 1;
+ unsigned int out_y = (input.dimension(idx_h) - 1) * sy + inner_border_top + 1;
// Find the padding needed for the convolution with stride 1 in order to match output shape
- unsigned int padx = out_dims.first - (out_x - weights.dimension(0) + 1);
- unsigned int pady = out_dims.second - (out_y - weights.dimension(1) + 1);
+ padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1);
+ pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1);
out_x += padx;
out_y += pady;
TensorShape scale_out_shape(input.tensor_shape());
- scale_out_shape.set(0, out_x);
- scale_out_shape.set(1, out_y);
+ scale_out_shape.set(idx_w, out_x);
+ scale_out_shape.set(idx_h, out_y);
return scale_out_shape;
}
+inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights)
+{
+ const TensorShape input_shape{ input.tensor_shape() };
+ const TensorShape weights_shape{ weights.tensor_shape() };
+
+ const DataLayout data_layout = input.data_layout();
+ const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
+ const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
+ const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
+ const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES);
+
+ TensorShape out_shape{ input_shape };
+ out_shape.set(width_idx, out_dims.first);
+ out_shape.set(height_idx, out_dims.second);
+ out_shape.set(channel_idx, weights_shape[batch_idx]);
+ return out_shape;
+}
+
inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z,
unsigned int num_groups = 1)
{
diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
index 6716cd6fdd..39cbe0cafa 100644
--- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h
@@ -46,8 +46,12 @@ class ICLTensor;
* specified value where a < stride - 1, that increases the padding top and right of the input image.
*
* The relation between input to output is as follows:
- * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right )
- * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top )
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * \f]
*
* where:
* width_input is the size of the first input dimension.
@@ -55,9 +59,16 @@ class ICLTensor;
* width_output is the size of the first output dimension.
* height_output is the size of the second output dimension.
* kernel_x and kernel_y are the convolution sizes in x and y.
- * inner_border_right and inner_border_top the number of zeros added to the right and top edges of the input.
* stride_x and stride_y is the input stride of the first and second dimension.
*
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel.
+ *
+ * This function calls the following OpenCL kernels/functions:
+ *
+ * -# @ref CLDeconvolutionLayerUpsample
+ * -# @ref CLConvolutionLayer
+ *
*/
class CLDeconvolutionLayer : public IFunction
{
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index 0cca555621..73870093b7 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -46,8 +46,12 @@ namespace arm_compute
* specified value where a < stride - 1 that increases the padding top and right of the input image.
*
* The relation between input to output is as follows:
- * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right )
- * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top )
+ * \f[
+ * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x
+ * \f]
+ * \f[
+ * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y
+ * \f]
*
* where
* width is the size of the first input dimension.
@@ -55,12 +59,15 @@ namespace arm_compute
* width_output is the size of the first output dimension.
* height_output is the size of the second output dimension.
* kernel_x and kernel_y are the convolution sizes in x and y.
- * inner_border_right and inner_border_top the number of zeros added to the top and right edges of the input.
* stride_x and stride_y is the input stride of the first and second dimension.
*
- * This function calls the following NEON kernels:
+ * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
+ * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel.
*
- * -# @ref NEDirectConvolutionLayer
+ * This function calls the following NEON kernels/functions:
+ *
+ * -# @ref CPPUpsample
+ * -# @ref NEConvolutionLayer
*
*/
class NEDeconvolutionLayer : public IFunction