diff options
19 files changed, 281 insertions, 164 deletions
diff --git a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h index 801934159d..04567ed959 100644 --- a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h +++ b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h @@ -53,7 +53,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32 + * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32. Data layouts supported: NCHW/NHWC. * @param[out] output The output tensor. Data types supported: Same as @p input */ void configure(const ITensor *input, ITensor *output); @@ -64,17 +64,15 @@ public: /** Function to perform flipping. * * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. */ template <typename T> - void flip_weights(const Window &window_input, const Window &window); + void flip_weights(const Window &window_input); /** Common signature for all the specialised Flip functions * * @param[in] window_input Input region on which to execute the kernel. - * @param[in] window Output region on which to execute the kernel. */ - using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input, const Window &window); + using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input); private: const ITensor *_input; diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index 7ee24e2736..cfd273618c 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -815,16 +815,6 @@ inline DataType data_type_for_convolution_matrix(const int16_t *conv, size_t siz */ PadStrideInfo calculate_same_pad(TensorShape input_shape, TensorShape weights_shape, PadStrideInfo conv_info); -/** Returns expected shape for the deconvolution output tensor. - * - * @param[in] out_dims widht and height of the output tensor, these values can be obtained with the function deconvolution_output_dimensions. - * @param[in] input Shape of the input tensor. - * @param[in] weights Shape of the weights tensor. - * - * @return Deconvolution output tensor shape. - */ -TensorShape deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, TensorShape input, TensorShape weights); - /** Returns expected width and height of the deconvolution's output tensor. * * @param[in] in_width Width of input tensor (Number of columns) diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index f68401c1b9..11d20c919f 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -229,26 +229,49 @@ inline TensorShape compute_depthwise_convolution_shape(const ITensorInfo &input, return output_shape; } -inline TensorShape compute_deconvolution_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, unsigned int inner_border_right, unsigned int inner_border_top, - std::pair<unsigned int, unsigned int> &out_dims) +inline TensorShape compute_deconvolution_upsampled_shape(const ITensorInfo &input, const ITensorInfo &weights, unsigned int sx, unsigned int sy, unsigned int inner_border_right, + unsigned int inner_border_top, + std::pair<unsigned int, unsigned int> &out_dims, unsigned int &padx, unsigned int &pady) { + const DataLayout data_layout = input.data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + // Find the upsampled dimensions - unsigned int out_x = (input.dimension(0) - 1) * sx + inner_border_right + 1; - unsigned int out_y = (input.dimension(1) - 1) * sy + inner_border_top + 1; + unsigned int out_x = (input.dimension(idx_w) - 1) * sx + inner_border_right + 1; + unsigned int out_y = (input.dimension(idx_h) - 1) * sy + inner_border_top + 1; // Find the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = out_dims.first - (out_x - weights.dimension(0) + 1); - unsigned int pady = out_dims.second - (out_y - weights.dimension(1) + 1); + padx = out_dims.first - (out_x - weights.dimension(idx_w) + 1); + pady = out_dims.second - (out_y - weights.dimension(idx_h) + 1); out_x += padx; out_y += pady; TensorShape scale_out_shape(input.tensor_shape()); - scale_out_shape.set(0, out_x); - scale_out_shape.set(1, out_y); + scale_out_shape.set(idx_w, out_x); + scale_out_shape.set(idx_h, out_y); return scale_out_shape; } +inline TensorShape compute_deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, const ITensorInfo &input, const ITensorInfo &weights) +{ + const TensorShape input_shape{ input.tensor_shape() }; + const TensorShape weights_shape{ weights.tensor_shape() }; + + const DataLayout data_layout = input.data_layout(); + const int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + const int batch_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::BATCHES); + + TensorShape out_shape{ input_shape }; + out_shape.set(width_idx, out_dims.first); + out_shape.set(height_idx, out_dims.second); + out_shape.set(channel_idx, weights_shape[batch_idx]); + return out_shape; +} + inline TensorShape compute_im2col_conv_shape(const ITensorInfo *input, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation, bool batch_size_on_z, unsigned int num_groups = 1) { diff --git a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h index 6716cd6fdd..39cbe0cafa 100644 --- a/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h @@ -46,8 +46,12 @@ class ICLTensor; * specified value where a < stride - 1, that increases the padding top and right of the input image. * * The relation between input to output is as follows: - * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right ) - * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top ) + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] * * where: * width_input is the size of the first input dimension. @@ -55,9 +59,16 @@ class ICLTensor; * width_output is the size of the first output dimension. * height_output is the size of the second output dimension. * kernel_x and kernel_y are the convolution sizes in x and y. - * inner_border_right and inner_border_top the number of zeros added to the right and top edges of the input. * stride_x and stride_y is the input stride of the first and second dimension. * + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel. + * + * This function calls the following OpenCL kernels/functions: + * + * -# @ref CLDeconvolutionLayerUpsample + * -# @ref CLConvolutionLayer + * */ class CLDeconvolutionLayer : public IFunction { diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 0cca555621..73870093b7 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -46,8 +46,12 @@ namespace arm_compute * specified value where a < stride - 1 that increases the padding top and right of the input image. * * The relation between input to output is as follows: - * width_output = round((width_input − 1) ∗ (stride_x - 1) − 2 ∗ padding_x + kernel_x + inner_border_right ) - * height_output = round((height_input − 1) ∗ (stride_y - 1) − 2 ∗ padding_y + kernel_y + inner_border_top ) + * \f[ + * width\_output = (width\_input - 1) \cdot stride\_x - 2 \cdot padding\_x + kernel\_x + * \f] + * \f[ + * height\_output = (height\_input - 1) \cdot stride\_y - 2 \cdot padding\_y + kernel\_y + * \f] * * where * width is the size of the first input dimension. @@ -55,12 +59,15 @@ namespace arm_compute * width_output is the size of the first output dimension. * height_output is the size of the second output dimension. * kernel_x and kernel_y are the convolution sizes in x and y. - * inner_border_right and inner_border_top the number of zeros added to the top and right edges of the input. * stride_x and stride_y is the input stride of the first and second dimension. * - * This function calls the following NEON kernels: + * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the + * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel. * - * -# @ref NEDirectConvolutionLayer + * This function calls the following NEON kernels/functions: + * + * -# @ref CPPUpsample + * -# @ref NEConvolutionLayer * */ class NEDeconvolutionLayer : public IFunction diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp index be3a926b96..dd7d79002d 100644 --- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp +++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp @@ -45,11 +45,19 @@ Status CLDeconvolutionLayerUpsampleKernel::validate(const ITensorInfo *input, co ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) == 0); - ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) == 0); + + const DataLayout data_layout = input->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_w) == 0); + ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(idx_h) == 0); ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); - for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) + ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(idx_c) != output->dimension(idx_c)); + for(size_t i = 3; i < Coordinates::num_max_dimensions; ++i) { ARM_COMPUTE_RETURN_ERROR_ON(input->dimension(i) != output->dimension(i)); } @@ -93,28 +101,61 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); + const DataLayout data_layout = _input->info()->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const int out_start_x = _info.pad().first; - const int out_end_x = _output->info()->dimension(0) - _inner_border.right - _info.pad().first + _info.stride().first - 1; + const int out_end_x = _output->info()->dimension(idx_w) - _inner_border.right - _info.pad().first + _info.stride().first - 1; const int out_step_x = _info.stride().first; const int out_start_y = _inner_border.top + _info.pad().second; - const int out_end_y = _output->info()->dimension(1) - _info.pad().second + _info.stride().second - 1; + const int out_end_y = _output->info()->dimension(idx_h) - _info.pad().second + _info.stride().second - 1; const int out_step_y = _info.stride().second; - Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); - - Window slice_out = collapsed.first_slice_window_3D(); - slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x)); - slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y)); - - Window slice_in = collapsed.first_slice_window_3D(); - - do + switch(data_layout) { - unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_in); - add_3D_tensor_argument(idx, _output, slice_out); - enqueue(queue, *this, slice_out); + case DataLayout::NCHW: + { + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + + Window slice_out = collapsed.first_slice_window_3D(); + slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x)); + slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y)); + + Window slice_in = collapsed.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice_in); + add_3D_tensor_argument(idx, _output, slice_out); + enqueue(queue, *this, slice_out); + } + while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out)); + break; + } + case DataLayout::NHWC: + { + // NOTE: not collapsing in NHWC + Window slice_out = window.first_slice_window_3D(); + slice_out.set(Window::DimY, Window::Dimension(out_start_x, out_end_x, out_step_x)); + slice_out.set(Window::DimZ, Window::Dimension(out_start_y, out_end_y, out_step_y)); + + Window slice_in = window.first_slice_window_3D(); + + do + { + unsigned int idx = 0; + add_3D_tensor_argument(idx, _input, slice_in); + add_3D_tensor_argument(idx, _output, slice_out); + enqueue(queue, *this, slice_out); + } + while(window.slide_window_slice_3D(slice_in) && window.slide_window_slice_3D(slice_out)); + break; + } + default: + ARM_COMPUTE_ERROR("Unsupported data layout"); } - while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out)); } diff --git a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp b/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp index 741218e4f7..2d4c0ce5c8 100644 --- a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp +++ b/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp @@ -42,25 +42,36 @@ CPPFlipWeightsKernel::CPPFlipWeightsKernel() } template <typename T> -void CPPFlipWeightsKernel::flip_weights(const Window &window_input, const Window &window) +void CPPFlipWeightsKernel::flip_weights(const Window &window_input) { // Create iterators Iterator in(_input, window_input); - Iterator out(_output, window); + const DataLayout data_layout = _input->info()->data_layout(); + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - const int kernel_size = _input->info()->dimension(0); + const int kernel_width = _input->info()->dimension(idx_w); + const int kernel_height = _input->info()->dimension(idx_h); execute_window_loop(window_input, [&](const Coordinates & id) { - *((reinterpret_cast<T *>(out.ptr()) + kernel_size * (kernel_size - id.y() - 1) + (kernel_size - id.x() - 1))) = *(reinterpret_cast<const T *>(in.ptr())); + const unsigned int x = kernel_width - id[idx_w] - 1; + const unsigned int y = kernel_height - id[idx_h] - 1; + Coordinates output_coord(id); + output_coord.set(idx_w, x); + output_coord.set(idx_h, y); + *(reinterpret_cast<T *>(_output->ptr_to_element(output_coord))) = *(reinterpret_cast<const T *>(in.ptr())); }, - in, out); + in); } void CPPFlipWeightsKernel::configure(const ITensor *input, ITensor *output) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); _input = input; _output = output; @@ -98,9 +109,5 @@ void CPPFlipWeightsKernel::run(const Window &window, const ThreadInfo &info) ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); ARM_COMPUTE_ERROR_ON(_func == nullptr); - Window out_window{ window }; - out_window.set(Window::DimX, Window::Dimension(0, 0, 0)); - out_window.set(Window::DimY, Window::Dimension(0, 0, 0)); - - (this->*_func)(window, out_window); + (this->*_func)(window); } diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index a6a5771ec1..41fc87e87a 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -323,15 +323,6 @@ PadStrideInfo arm_compute::calculate_same_pad(TensorShape input_shape, TensorSha return PadStrideInfo(strides.first, strides.second, same_pad_left, same_pad_right, same_pad_top, same_pad_bottom, DimensionRoundingType::CEIL); } -TensorShape arm_compute::deconvolution_output_shape(const std::pair<unsigned int, unsigned int> &out_dims, TensorShape input, TensorShape weights) -{ - TensorShape out_shape(input); - out_shape.set(0, out_dims.first); - out_shape.set(1, out_dims.second); - out_shape.set(2, weights[3]); - return out_shape; -} - const std::pair<unsigned int, unsigned int> arm_compute::deconvolution_output_dimensions( unsigned int in_width, unsigned int in_height, unsigned int kernel_width, unsigned int kernel_height, unsigned int padx, unsigned int pady, unsigned int stride_x, unsigned int stride_y) diff --git a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp index 26d44e9c96..951d1ec4f0 100644 --- a/src/runtime/CL/functions/CLDeconvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDeconvolutionLayer.cpp @@ -53,8 +53,16 @@ Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1)); - ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, weights); + + const DataLayout data_layout = input->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + const size_t idx_c = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL); + + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) != weights->dimension(idx_h)); + ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(idx_w) < 1); ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); const unsigned int stride_x = info.stride().first; @@ -63,10 +71,10 @@ Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_right > stride_x - 1, "inner_border_right must be smaller than stride_x"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(inner_border_top > stride_y - 1, "inner_border_top must be smaller than stride_y"); - auto out_dims = deconvolution_output_dimensions(input->dimension(0), input->dimension(1), weights->dimension(0), weights->dimension(1), + auto out_dims = deconvolution_output_dimensions(input->dimension(idx_w), input->dimension(idx_h), weights->dimension(idx_w), weights->dimension(idx_h), info.pad().first, info.pad().second, stride_x, stride_y); - const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape()); + const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input, *weights); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output, weights); @@ -80,15 +88,17 @@ Status CLDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); } + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, bias); } - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid."); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_w) != output_shape[idx_w], "Output's width is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_h) != output_shape[idx_h], "Output's height is invalid."); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(idx_c) != output_shape[idx_c], "Output's depth is invalid."); - TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_deconvolution_shape(*input, *weights, stride_x, stride_y, inner_border_right, - inner_border_top, - out_dims))); + unsigned int padx = 0; + unsigned int pady = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx, pady); + TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape).set_data_layout(data_layout)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); ARM_COMPUTE_RETURN_ON_ERROR(CLDeconvolutionLayerUpsample::validate(input, &scale_out_info, BorderSize(inner_border_right, inner_border_top), info)); @@ -105,17 +115,22 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const const unsigned int stride_x = info.stride().first; const unsigned int stride_y = info.stride().second; + const DataLayout data_layout = input->info()->data_layout(); + + const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); + const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); + _weights = weights; - _weights_flipped.allocator()->init(TensorInfo(weights->info()->tensor_shape(), 1, weights->info()->data_type())); + _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); _flip_weights.configure(weights, &_weights_flipped); - auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1), + auto out_dims = deconvolution_output_dimensions(input->info()->dimension(idx_w), input->info()->dimension(idx_h), weights->info()->dimension(idx_w), weights->info()->dimension(idx_h), info.pad().first, info.pad().second, stride_x, stride_y); - const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape()); + const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); // Output auto initialization if not yet initialized - auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + auto_init_if_empty(*output->info(), input->info()->clone()->set_tensor_shape(output_shape).set_data_layout(data_layout)); // Perform validation step ARM_COMPUTE_ERROR_THROW_ON(CLDeconvolutionLayer::validate(input->info(), weights->info(), bias == nullptr ? nullptr : bias->info(), output->info(), info, inner_border_right, inner_border_top)); @@ -125,20 +140,13 @@ void CLDeconvolutionLayer::configure(ICLTensor *input, ICLTensor *weights, const _memory_group.manage(&_scaled_output); _memory_group.manage(&_weights_flipped); - // Find the upsampled dimensions - unsigned int out_x = (input->info()->dimension(0) - 1) * stride_x + inner_border_right + 1; - unsigned int out_y = (input->info()->dimension(1) - 1) * stride_y + inner_border_top + 1; - - // Find the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = out_dims.first - (out_x - weights->info()->dimension(0) + 1); - unsigned int pady = out_dims.second - (out_y - weights->info()->dimension(1) + 1); - out_x += padx; - out_y += pady; + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape + unsigned int padx = 0; + unsigned int pady = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx, pady); - TensorShape scale_out_shape(input->info()->tensor_shape()); - scale_out_shape.set(0, out_x); - scale_out_shape.set(1, out_y); TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + scale_out_info.set_data_layout(data_layout); _scaled_output.allocator()->init(scale_out_info); // configure scale function diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 6ca60c66a4..cbe7c51662 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -76,15 +76,17 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf { ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - const TensorShape output_shape = deconvolution_output_shape(out_dims, input->tensor_shape(), weights->tensor_shape()); + const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input, *weights); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimX) != output_shape.x(), "Output's width is invalid."); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimY) != output_shape.y(), "Output's height is invalid."); ARM_COMPUTE_RETURN_ERROR_ON_MSG(output->dimension(Window::DimZ) != output_shape.z(), "Output's depth is invalid."); } - TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(compute_deconvolution_shape(*input, *weights, stride_x, stride_y, inner_border_right, - inner_border_top, - out_dims))); + unsigned int padx = 0; + unsigned int pady = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input, *weights, stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx, pady); + TensorInfo scale_out_info(input->clone()->set_is_resizable(true).reset_padding().set_tensor_shape(scale_out_shape)); const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); for(size_t i = 2; i < Coordinates::num_max_dimensions; ++i) @@ -116,7 +118,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con auto out_dims = deconvolution_output_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights->info()->dimension(0), weights->info()->dimension(1), info.pad().first, info.pad().second, stride_x, stride_y); - const TensorShape output_shape = deconvolution_output_shape(out_dims, input->info()->tensor_shape(), weights->info()->tensor_shape()); + const TensorShape output_shape = compute_deconvolution_output_shape(out_dims, *input->info(), *weights->info()); // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); @@ -125,19 +127,11 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _memory_group.manage(&_scaled_output); - // Find the upsampled dimensions - unsigned int out_x = (input->info()->dimension(0) - 1) * stride_x + inner_border_right + 1; - unsigned int out_y = (input->info()->dimension(1) - 1) * stride_y + inner_border_top + 1; - - // Find the padding needed for the convolution with stride 1 in order to match output shape - unsigned int padx = out_dims.first - (out_x - weights->info()->dimension(0) + 1); - unsigned int pady = out_dims.second - (out_y - weights->info()->dimension(1) + 1); - out_x += padx; - out_y += pady; + // Find the upsampled dimensions and the padding needed for the convolution with stride 1 in order to match output shape + unsigned int padx = 0; + unsigned int pady = 0; + const TensorShape scale_out_shape = compute_deconvolution_upsampled_shape(*input->info(), *weights->info(), stride_x, stride_y, inner_border_right, inner_border_top, out_dims, padx, pady); - TensorShape scale_out_shape(input->info()->tensor_shape()); - scale_out_shape.set(0, out_x); - scale_out_shape.set(1, out_y); TensorInfo scale_out_info(scale_out_shape, 1, input->info()->data_type(), input->info()->quantization_info()); _scaled_output.allocator()->init(scale_out_info); @@ -171,4 +165,4 @@ void NEDeconvolutionLayer::prepare() _conv_f.prepare(); _is_prepared = true; } -}
\ No newline at end of file +} diff --git a/tests/datasets/SmallConvolutionLayerDataset.h b/tests/datasets/SmallConvolutionLayerDataset.h index ca4abd1671..bbfc760bf3 100644 --- a/tests/datasets/SmallConvolutionLayerDataset.h +++ b/tests/datasets/SmallConvolutionLayerDataset.h @@ -164,6 +164,9 @@ public: add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(11U, 12U, 16U, 5U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(10U, 11U, 16U, 5U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); add_config(TensorShape(33U, 27U, 7U, 5U), TensorShape(5U, 7U, 7U, 16U), TensorShape(16U), TensorShape(10U, 11U, 16U, 5U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + // TODO (micgio01) - COMPMID-1604: investigate issue in GLES and re-enable the following dataset + // Single output channel + //add_config(TensorShape(5U, 4U, 3U, 2U), TensorShape(4U, 4U, 3U, 1U), TensorShape(1U), TensorShape(2U, 1U, 1U, 2U), PadStrideInfo(1, 1, 0, 0, 0, 0, DimensionRoundingType::FLOOR)); } }; diff --git a/tests/validation/CL/DeconvolutionLayer.cpp b/tests/validation/CL/DeconvolutionLayer.cpp index 84a2b01797..7727d9029d 100644 --- a/tests/validation/CL/DeconvolutionLayer.cpp +++ b/tests/validation/CL/DeconvolutionLayer.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/CL/kernels/CLFillBorderKernel.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/CL/CLTensor.h" #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLDeconvolutionLayer.h" @@ -45,7 +46,7 @@ namespace { constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's for DataType::F16 */ -constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +constexpr AbsoluteTolerance<float> tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ constexpr float tolerance_num = 0.07f; /**< Tolerance number */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 5) * framework::dataset::make("StrideY", 1, 5) * framework::dataset::make("PadX", 0, 3) @@ -57,6 +58,7 @@ const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset:: const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 }); +const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }); } // namespace TEST_SUITE(CL) @@ -72,7 +74,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::Sm const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, 1, 1, 1, 1); - TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, TensorInfo(input_shape, 1, data_type), TensorInfo(weights_shape, 1, data_type)); // Create tensors CLTensor src = create_tensor<CLTensor>(input_shape, data_type, 1); @@ -169,7 +171,7 @@ TEST_SUITE(Float) TEST_SUITE(FP32) TEST_SUITE(W4x4) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -177,7 +179,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<float>, framework::Da TEST_SUITE_END() TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -185,7 +187,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<float>, framework::Da TEST_SUITE_END() TEST_SUITE(W1x1) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_fp32); @@ -197,7 +199,7 @@ TEST_SUITE_END() TEST_SUITE(FP16) TEST_SUITE(W4x4) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::DatasetMode::ALL, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -205,7 +207,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture4x4<half>, framework::Dat TEST_SUITE_END() TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<half>, framework::DatasetMode::ALL, combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -213,7 +215,7 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture3x3<half>, framework::Dat TEST_SUITE_END() TEST_SUITE(W1x1) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F16))) +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerFixture1x1<half>, framework::DatasetMode::ALL, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)), data_layouts_dataset)) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); @@ -236,7 +238,8 @@ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) TEST_SUITE(W4x4) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<uint8_t>, framework::DatasetMode::ALL, combine(combine(data4x4, framework::dataset::make("DataType", DataType::QASYMM8)), +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(data4x4, framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) { // Validate output @@ -245,7 +248,8 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr TEST_SUITE_END() TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::ALL, combine(combine(data3x3, framework::dataset::make("DataType", DataType::QASYMM8)), +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(data3x3, framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) { // Validate output @@ -254,7 +258,8 @@ FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture3x3<uint8_t>, fr TEST_SUITE_END() TEST_SUITE(W1x1) -FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(data1x1, framework::dataset::make("DataType", DataType::QASYMM8)), +FIXTURE_DATA_TEST_CASE(Run, CLDeconvolutionLayerQuantizedFixture1x1<uint8_t>, framework::DatasetMode::ALL, combine(combine(combine(data1x1, framework::dataset::make("DataType", DataType::QASYMM8)), + data_layouts_dataset), framework::dataset::make("QuantizationInfo", QuantizationInfo(2.f / 255.f, 0)))) { // Validate output diff --git a/tests/validation/Helpers.cpp b/tests/validation/Helpers.cpp index fd034b649e..eab6d5629f 100644 --- a/tests/validation/Helpers.cpp +++ b/tests/validation/Helpers.cpp @@ -302,6 +302,15 @@ void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &sh } } +std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, float min, float max) +{ + ARM_COMPUTE_ERROR_ON_MSG(min > max, "min must be lower equal than max"); + + const int min_bound = quant_info.quantize(min, RoundingPolicy::TO_NEAREST_UP); + const int max_bound = quant_info.quantize(max, RoundingPolicy::TO_NEAREST_UP); + return std::pair<int, int>(min_bound, max_bound); +} + template void get_tile(const SimpleTensor<float> &in, SimpleTensor<float> &roi, const Coordinates &coord); template void get_tile(const SimpleTensor<half> &in, SimpleTensor<half> &roi, const Coordinates &coord); template void zeros(SimpleTensor<float> &in, const Coordinates &anchor, const TensorShape &shape); diff --git a/tests/validation/Helpers.h b/tests/validation/Helpers.h index 779ecdca11..4d1d21440d 100644 --- a/tests/validation/Helpers.h +++ b/tests/validation/Helpers.h @@ -231,6 +231,14 @@ void get_tile(const SimpleTensor<T> &in, SimpleTensor<T> &tile, const Coordinate */ template <typename T> void zeros(SimpleTensor<T> &in, const Coordinates &anchor, const TensorShape &shape); + +/** Helper function to compute quantized min and max bounds + * + * @param[in] quant_info Quantization info to be used for conversion + * @param[in] min Floating point minimum value to be quantized + * @param[in] max Floating point maximum value to be quantized + */ +std::pair<int, int> get_quantized_bounds(const QuantizationInfo &quant_info, float min, float max); } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index eb643b8e7c..1b74400676 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h" #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" @@ -53,6 +54,7 @@ const auto data3x3 = datasets::SmallDeconvolutionShapes() * framework::dataset:: const auto data1x1 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 1) * framework::dataset::make("PadY", 0, 1) * framework::dataset::make("ax", 0) * framework::dataset::make("ay", 0) * framework::dataset::make("NumKernels", { 1, 3 }); +const auto data_layouts_dataset = framework::dataset::make("DataLayout", { DataLayout::NCHW }); } // namespace TEST_SUITE(NEON) @@ -68,7 +70,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::Sm const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, 1, 1, 1, 1); - TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, TensorInfo(input_shape, 1, data_type), TensorInfo(weights_shape, 1, data_type)); // Create tensors Tensor src = create_tensor<Tensor>(input_shape, data_type, 1); @@ -172,7 +174,7 @@ TEST_SUITE(Float) TEST_SUITE(FP32) TEST_SUITE(W4x4) -FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(data4x4, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4<float>, framework::DatasetMode::ALL, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32); @@ -181,7 +183,7 @@ TEST_SUITE_END() TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(data3x3, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture3x3<float>, framework::DatasetMode::ALL, combine(combine(data3x3, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32); @@ -189,7 +191,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture3x3<float>, framework::Da TEST_SUITE_END() TEST_SUITE(W1x1) -FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(data1x1, framework::dataset::make("DataType", DataType::F32))) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1<float>, framework::DatasetMode::ALL, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { // Validate output validate(Accessor(_target), _reference, tolerance_fp32); diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 3b420eac09..795b9de6cd 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -77,7 +77,8 @@ protected: { case DataType::QASYMM8: { - std::uniform_int_distribution<uint8_t> distribution(0, 3); + std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, i); break; } diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index d3a7be74b0..85c7ed5604 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -23,6 +23,7 @@ */ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "tests/AssetsLibrary.h" #include "tests/Globals.h" #include "tests/IAccessor.h" @@ -39,6 +40,8 @@ namespace test { namespace validation { +using namespace arm_compute::misc::shape_calculator; + template <typename TensorType, typename AccessorType, typename FunctionType, typename T> class DeconvolutionLayerFixtureBase : public framework::Fixture { @@ -48,12 +51,15 @@ public: public: template <typename...> void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, - const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, QuantizationInfo quantization_info) + const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info) { - _data_type = data_type; + _data_type = data_type; + _bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + _data_layout = data_layout; + _quantization_info = quantization_info; - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, quantization_info); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, quantization_info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, inner_border); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, inner_border); } protected: @@ -64,7 +70,8 @@ protected: { case DataType::QASYMM8: { - std::uniform_int_distribution<uint8_t> distribution(0, 3); + std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<uint8_t> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, i); break; } @@ -86,14 +93,21 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, - const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &inner_border, DataType data_type, QuantizationInfo quantization_info) + TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, const TensorShape bias_shape, TensorShape output_shape, + const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> &inner_border) { + if(_data_layout == DataLayout::NHWC) + { + permute(input_shape, PermutationVector(2U, 0U, 1U)); + permute(weights_shape, PermutationVector(2U, 0U, 1U)); + permute(output_shape, PermutationVector(2U, 0U, 1U)); + } + // Create tensors - TensorType src = create_tensor<TensorType>(input_shape, data_type, 1, quantization_info); - TensorType weights = create_tensor<TensorType>(weights_shape, data_type, 1, quantization_info); - TensorType bias = create_tensor<TensorType>(bias_shape, is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type, 1, quantization_info); - TensorType dst = create_tensor<TensorType>(output_shape, data_type, 1, quantization_info); + TensorType src = create_tensor<TensorType>(input_shape, _data_type, 1, _quantization_info, _data_layout); + TensorType weights = create_tensor<TensorType>(weights_shape, _data_type, 1, _quantization_info, _data_layout); + TensorType bias = create_tensor<TensorType>(bias_shape, _bias_data_type, 1, _quantization_info, _data_layout); + TensorType dst = create_tensor<TensorType>(output_shape, _data_type, 1, _quantization_info, _data_layout); // Create and configure function FunctionType conv; @@ -127,12 +141,12 @@ protected: } SimpleTensor<T> compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, - const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> inner_border, DataType data_type, QuantizationInfo quantization_info) + const PadStrideInfo &info, const std::pair<unsigned int, unsigned int> inner_border) { // Create reference - SimpleTensor<T> src{ input_shape, data_type, 1, quantization_info }; - SimpleTensor<T> weights{ weights_shape, data_type, 1, quantization_info }; - SimpleTensor<TBias> bias{ bias_shape, data_type, 1, quantization_info }; + SimpleTensor<T> src{ input_shape, _data_type, 1, _quantization_info }; + SimpleTensor<T> weights{ weights_shape, _data_type, 1, _quantization_info }; + SimpleTensor<TBias> bias{ bias_shape, _bias_data_type, 1, _quantization_info }; // Fill reference fill(src, 0); @@ -142,9 +156,12 @@ protected: return reference::deconvolution_layer<T>(src, weights, bias, output_shape, info, inner_border); } - TensorType _target{}; - SimpleTensor<T> _reference{}; - DataType _data_type{}; + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + DataType _bias_data_type{}; + DataLayout _data_layout{}; + QuantizationInfo _quantization_info{}; }; template <typename TensorType, typename AccessorType, typename FunctionType, typename T, unsigned int kernel_size_x, unsigned int kernel_size_y> @@ -153,16 +170,18 @@ class DeconvolutionValidationFixture : public DeconvolutionLayerFixtureBase<Tens public: template <typename...> void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, - unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type) + unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type, DataLayout data_layout) { ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); const std::pair<unsigned int, unsigned int> inner_border(inner_border_right, inner_border_top); - auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, sx, sy); - TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape); - DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, QuantizationInfo()); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, sx, sy); + TensorInfo input_info(input_shape, 1, data_type); + TensorInfo weights_info(weights_shape, 1, data_type); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); + DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, data_layout, QuantizationInfo()); } }; @@ -172,16 +191,18 @@ class DeconvolutionValidationQuantizedFixture : public DeconvolutionLayerFixture public: template <typename...> void setup(TensorShape input_shape, unsigned int sx, unsigned int sy, unsigned int padx, unsigned int pady, - unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type, QuantizationInfo quantization_info) + unsigned int inner_border_right, unsigned int inner_border_top, unsigned int num_kernels, DataType data_type, DataLayout data_layout, QuantizationInfo quantization_info) { ARM_COMPUTE_ERROR_ON_MSG(kernel_size_x != kernel_size_y, "Only square kernels supported"); const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); const TensorShape bias_shape(num_kernels); const PadStrideInfo info(sx, sy, padx, pady, DimensionRoundingType::CEIL); const std::pair<unsigned int, unsigned int> inner_border(inner_border_right, inner_border_top); - auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, sx, sy); - TensorShape output_shape = deconvolution_output_shape(out_dim, input_shape, weights_shape); - DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, quantization_info); + auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, padx, pady, sx, sy); + TensorInfo input_info(input_shape, 1, data_type, quantization_info); + TensorInfo weights_info(weights_shape, 1, data_type, quantization_info); + TensorShape output_shape = compute_deconvolution_output_shape(out_dim, input_info, weights_info); + DeconvolutionLayerFixtureBase<TensorType, AccessorType, FunctionType, T>::setup(input_shape, weights_shape, bias_shape, output_shape, info, inner_border, data_type, data_layout, quantization_info); } }; diff --git a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h index 3bb935e49f..93e4e64830 100644 --- a/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h +++ b/tests/validation/fixtures/NormalizePlanarYUVLayerFixture.h @@ -68,11 +68,10 @@ protected: } else if(is_data_type_quantized_asymmetric(_data_type)) { - const QuantizationInfo quant_info = src_tensor.quantization_info(); - const int min_bound = quant_info.quantize(-1.f, RoundingPolicy::TO_NEAREST_UP); - const int max_bound = quant_info.quantize(1.f, RoundingPolicy::TO_NEAREST_UP); - std::uniform_int_distribution<> distribution(min_bound, max_bound); - std::uniform_int_distribution<> distribution_std(quant_info.quantize(0.1f, RoundingPolicy::TO_NEAREST_UP), max_bound); + const QuantizationInfo quant_info = src_tensor.quantization_info(); + std::pair<int, int> bounds = get_quantized_bounds(quant_info, -1.f, 1.0f); + std::uniform_int_distribution<> distribution(bounds.first, bounds.second); + std::uniform_int_distribution<> distribution_std(quant_info.quantize(0.1f, RoundingPolicy::TO_NEAREST_UP), bounds.second); library->fill(src_tensor, distribution, 0); library->fill(mean_tensor, distribution, 1); library->fill(std_tensor, distribution_std, 2); diff --git a/tests/validation/fixtures/ReduceMeanFixture.h b/tests/validation/fixtures/ReduceMeanFixture.h index 6debd4a038..8692213641 100644 --- a/tests/validation/fixtures/ReduceMeanFixture.h +++ b/tests/validation/fixtures/ReduceMeanFixture.h @@ -32,6 +32,7 @@ #include "tests/IAccessor.h" #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" +#include "tests/validation/Helpers.h" #include "tests/validation/reference/ReductionOperation.h" #include "tests/validation/reference/ReshapeLayer.h" @@ -63,10 +64,8 @@ protected: } else { - const QuantizationInfo quant_info = tensor.quantization_info(); - const int min_bound = quant_info.quantize(-1.f, RoundingPolicy::TO_NEAREST_UP); - const int max_bound = quant_info.quantize(1.f, RoundingPolicy::TO_NEAREST_UP); - std::uniform_int_distribution<> distribution(min_bound, max_bound); + std::pair<int, int> bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution<> distribution(bounds.first, bounds.second); library->fill(tensor, distribution, 0); } |