From fedefc3a8d76b9dea5945414324427ef5a01835d Mon Sep 17 00:00:00 2001 From: Luca Foschiani Date: Mon, 17 Feb 2020 17:02:49 +0000 Subject: COMPMID-2765 Add support for QASYMM8_SIGNED in NEDeconvolutionLayer Signed-off-by: Luca Foschiani Change-Id: I8295fadee15311a9ab846aa24c031b82c0b799eb Signed-off-by: Michalis Spyrou Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2952 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Sheri Zhang --- Android.bp | 1 - arm_compute/core/CPP/CPPKernels.h | 3 +- .../core/CPP/kernels/CPPFlipWeightsKernel.h | 83 --------------- .../CL/functions/CLDirectDeconvolutionLayer.h | 4 +- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 51 +++++----- src/core/CPP/kernels/CPPFlipWeightsKernel.cpp | 113 --------------------- src/core/NEON/kernels/NEReverseKernel.cpp | 69 +++++-------- .../NEON/functions/NEDeconvolutionLayer.cpp | 24 +++-- tests/validation/NEON/DeconvolutionLayer.cpp | 109 +++++++++++--------- .../fixtures/DeconvolutionLayerFixture.h | 2 +- 10 files changed, 131 insertions(+), 328 deletions(-) delete mode 100644 arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h delete mode 100644 src/core/CPP/kernels/CPPFlipWeightsKernel.cpp diff --git a/Android.bp b/Android.bp index 528467a44e..6e34cb5cca 100644 --- a/Android.bp +++ b/Android.bp @@ -212,7 +212,6 @@ cc_library_static { "src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp", "src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp", "src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp", - "src/core/CPP/kernels/CPPFlipWeightsKernel.cpp", "src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp", "src/core/CPP/kernels/CPPPermuteKernel.cpp", "src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp", diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h index d12cb2857a..c7b40baf22 100644 --- a/arm_compute/core/CPP/CPPKernels.h +++ b/arm_compute/core/CPP/CPPKernels.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -28,7 +28,6 @@ #include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h" #include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h" #include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h" -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" #include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h" #include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h" #include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h" diff --git a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h deleted file mode 100644 index 285aa585be..0000000000 --- a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h +++ /dev/null @@ -1,83 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CPP_FLIP_WEIGHTS_KERNEL_H -#define ARM_COMPUTE_CPP_FLIP_WEIGHTS_KERNEL_H - -#include "arm_compute/core/CPP/ICPPKernel.h" - -namespace arm_compute -{ -class ITensor; - -/** CPP kernel to perform 180 degrees flipping on deconvolution weights. */ -class CPPFlipWeightsKernel : public ICPPKernel -{ -public: - const char *name() const override - { - return "CPPFlipWeightsKernel"; - } - /** Default constructor */ - CPPFlipWeightsKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPFlipWeightsKernel(const CPPFlipWeightsKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CPPFlipWeightsKernel &operator=(const CPPFlipWeightsKernel &) = delete; - /** Allow instances of this class to be moved */ - CPPFlipWeightsKernel(CPPFlipWeightsKernel &&) = default; - /** Allow instances of this class to be moved */ - CPPFlipWeightsKernel &operator=(CPPFlipWeightsKernel &&) = default; - /** Default destructor */ - ~CPPFlipWeightsKernel() = default; - - /** Set the input and output of the kernel. - * - * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32. Data layouts supported: NCHW/NHWC. - * @param[out] output The output tensor. Data types supported: Same as @p input - */ - void configure(const ITensor *input, ITensor *output); - - // Inherited methods overridden: - void run(const Window &window, const ThreadInfo &info) override; - - /** Function to perform flipping. - * - * @param[in] window_input Input region on which to execute the kernel. - */ - template - void flip_weights(const Window &window_input); - - /** Common signature for all the specialised Flip functions - * - * @param[in] window_input Input region on which to execute the kernel. - */ - using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input); - -private: - const ITensor *_input; - ITensor *_output; - FlipWeightsFunction _func; -}; -} // namespace arm_compute -#endif /*ARM_COMPUTE_CPP_FLIP_WEIGHTS_KERNEL_H */ diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h index 76cd0fb0c2..6632bfce80 100644 --- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -61,7 +61,7 @@ class ICLTensor; * stride_x and stride_y is the input stride of the first and second dimension. * * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel. + * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse. * * This function calls the following OpenCL kernels/functions: * diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index d031076ee7..c4c1664f20 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,8 +27,8 @@ #include "arm_compute/runtime/CPP/functions/CPPUpsample.h" #include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" #include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEReverse.h" -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" @@ -62,12 +62,14 @@ namespace arm_compute * stride_x and stride_y is the input stride of the first and second dimension. * * The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the - * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel. + * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse. * * This function calls the following NEON kernels/functions: * * -# @ref CPPUpsample * -# @ref NEConvolutionLayer + * -# @ref NEPermute + * -# @ref NEReverse * */ class NEDeconvolutionLayer : public IFunction @@ -89,9 +91,9 @@ public: /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @@ -99,9 +101,9 @@ public: void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @@ -114,23 +116,24 @@ public: void prepare() override; private: - MemoryGroup _memory_group; - NEConvolutionLayer _conv_f; - CPPUpsample _upsample_f; - CPPFlipWeightsKernel _flip_weights; - NEPermute _permute_input; - NEPermute _permute_weights; - NEPermute _permute_output; - Tensor _scaled_output; - Tensor _weights_flipped; - Tensor _permuted_input; - Tensor _permuted_weights; - Tensor _permuted_output; - bool _is_nchw; - const ITensor *_original_weights; - ITensor *_input; - PadStrideInfo _info; - bool _is_prepared; + MemoryGroup _memory_group; + NEConvolutionLayer _conv_f; + CPPUpsample _upsample_f; + NEReverse _flip_weights; + NEPermute _permute_input; + NEPermute _permute_weights; + NEPermute _permute_output; + Tensor _scaled_output; + Tensor _weights_flipped; + Tensor _permuted_input; + Tensor _permuted_weights; + Tensor _permuted_output; + Tensor _flip_axis; + bool _is_nchw; + const ITensor *_original_weights; + ITensor *_input; + PadStrideInfo _info; + bool _is_prepared; }; } // arm_compute #endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */ diff --git a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp b/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp deleted file mode 100644 index 2d4c0ce5c8..0000000000 --- a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* - * Copyright (c) 2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h" - -#include "arm_compute/core/Error.h" -#include "arm_compute/core/Helpers.h" -#include "arm_compute/core/ITensor.h" -#include "arm_compute/core/TensorInfo.h" -#include "arm_compute/core/Types.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" - -#include -#include - -using namespace arm_compute; - -CPPFlipWeightsKernel::CPPFlipWeightsKernel() - : _input(nullptr), _output(nullptr), _func(nullptr) -{ -} - -template -void CPPFlipWeightsKernel::flip_weights(const Window &window_input) -{ - // Create iterators - Iterator in(_input, window_input); - - const DataLayout data_layout = _input->info()->data_layout(); - const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - const int kernel_width = _input->info()->dimension(idx_w); - const int kernel_height = _input->info()->dimension(idx_h); - - execute_window_loop(window_input, [&](const Coordinates & id) - { - const unsigned int x = kernel_width - id[idx_w] - 1; - const unsigned int y = kernel_height - id[idx_h] - 1; - Coordinates output_coord(id); - output_coord.set(idx_w, x); - output_coord.set(idx_h, y); - *(reinterpret_cast(_output->ptr_to_element(output_coord))) = *(reinterpret_cast(in.ptr())); - }, - in); -} - -void CPPFlipWeightsKernel::configure(const ITensor *input, ITensor *output) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output); - ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); - - _input = input; - _output = output; - - // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps()); - - // The CPPFlipWeightsKernel doesn't need padding so update_window_and_padding() can be skipped - Coordinates coord; - coord.set_num_dimensions(output->info()->num_dimensions()); - output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape())); - - ICPPKernel::configure(win); - - switch(input->info()->data_type()) - { - case DataType::F32: - _func = &CPPFlipWeightsKernel::flip_weights; - break; - case DataType::F16: - _func = &CPPFlipWeightsKernel::flip_weights; - break; - case DataType::QASYMM8: - _func = &CPPFlipWeightsKernel::flip_weights; - break; - default: - ARM_COMPUTE_ERROR("Not supported"); - } -} - -void CPPFlipWeightsKernel::run(const Window &window, const ThreadInfo &info) -{ - ARM_COMPUTE_UNUSED(info); - ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); - ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window); - ARM_COMPUTE_ERROR_ON(_func == nullptr); - - (this->*_func)(window); -} diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp index 2f584164dc..5a8c446ddd 100644 --- a/src/core/NEON/kernels/NEReverseKernel.cpp +++ b/src/core/NEON/kernels/NEReverseKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -106,33 +106,20 @@ void run_reverse(const Window &window, const ITensor *input, const ITensor *axis } // Check if we need a left-over loop for the y dimension - const int window_step_x = 16 / input->info()->element_size(); - const int window_start_x = window.x().start(); - const int window_end_x = std::min(window.x().end(), static_cast(input->info()->dimension(0))); - const int window_end_x_multiple_of = ((window_end_x - window_start_x) / window_step_x) * window_step_x; - bool left_over_loop_x = (((window_end_x - window_start_x) % window_step_x) != 0); + const int window_step_x = 16 / input->info()->element_size(); + const int window_start_x = window.x().start(); + const int window_end_x = window.x().end(); - Window slice = window.first_slice_window_4D(); + Window win(window); + win.set(Window::DimX, Window::Dimension(0, 1, 1)); - if(left_over_loop_x) + Iterator input_it(input, win); + execute_window_loop(win, [&](const Coordinates & id) { - // Check if window_end_y_multiple_of is greater than window_start_y - if(window_end_x_multiple_of > window_start_x) + int x = window_start_x; + for(; x <= (window_end_x - window_step_x); x += window_step_x) { - slice.set(Window::DimX, Window::Dimension(window_start_x, window_end_x_multiple_of, window_step_x)); - } - else - { - slice.set(Window::DimX, Window::Dimension(0, 0, 1)); - } - } - - do - { - Iterator input_it(input, slice); - execute_window_loop(slice, [&](const Coordinates & id) - { - auto in = wrapper::vloadq(reinterpret_cast(input_it.ptr())); + auto in = wrapper::vloadq(reinterpret_cast(input_it.ptr()) + x); // Reverse 0 axis if(axis_bit & 0x1) @@ -141,39 +128,29 @@ void run_reverse(const Window &window, const ITensor *input, const ITensor *axis in = wrapper::vcombine(wrapper::vgethigh(in), wrapper::vgetlow(in)); } - const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - id.x() - window_step_x : id.x(); + const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - x - window_step_x : x; const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y(); const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z(); const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3]; auto out_ptr = reinterpret_cast(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))); wrapper::vstore(out_ptr, in); - }, - input_it); + } - if(left_over_loop_x) + // Compute left-over elements + for(; x < window_end_x; ++x) { - slice.set(Window::DimX, Window::Dimension(window_end_x_multiple_of, window_end_x, 1)); + const auto in = *(reinterpret_cast(input_it.ptr()) + x); - Iterator input_it(input, slice); - - // Compute left-over elements along the y dimension (1x1) - execute_window_loop(slice, [&](const Coordinates & id) - { - const auto in = *reinterpret_cast(input_it.ptr()); - - const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - id.x() - 1 : id.x(); - const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y(); - const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z(); - const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3]; + const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - x - 1 : x; + const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y(); + const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z(); + const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3]; - *reinterpret_cast(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))) = in; - }, - input_it); + *reinterpret_cast(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))) = in; } - - } - while(window.slide_window_slice_4D(slice)); + }, + input_it); } void NEReverseKernel::run(const Window &window, const ThreadInfo &info) diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index 0411b41220..06885d59e5 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -46,6 +46,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memor _permuted_input(), _permuted_weights(), _permuted_output(), + _flip_axis(), _is_nchw(false), _original_weights(nullptr), _input(nullptr), @@ -57,7 +58,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memor Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input); const unsigned int width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH); @@ -122,6 +123,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _info = info; _is_prepared = false; _is_nchw = data_layout == DataLayout::NCHW; + _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); const unsigned int pad_left = info.pad_left(); const unsigned int pad_right = info.pad_right(); @@ -139,6 +141,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con // Output auto initialization if not yet initialized auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info()); + _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32)); _memory_group.manage(&_scaled_output); if(!_is_nchw) @@ -185,7 +188,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _weights_flipped.allocator()->init(*_permuted_weights.info()->clone()); _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info()); - _flip_weights.configure(&_permuted_weights, &_weights_flipped); + _flip_weights.configure(&_permuted_weights, &_weights_flipped, &_flip_axis); // setup the function to convolve the upscaled output const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); @@ -230,13 +233,19 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con _upsample_f.configure(input, &_scaled_output, upsample_info); _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout)); - _flip_weights.configure(weights, &_weights_flipped); + _flip_weights.configure(weights, &_weights_flipped, &_flip_axis); // setup the function to convolve the upscaled output const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL); _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info); } _scaled_output.allocator()->allocate(); + + // Setup flip axis data + _flip_axis.allocator()->allocate(); + auto axis_data = reinterpret_cast(_flip_axis.buffer()); + axis_data[0] = 0; + axis_data[1] = 1; } void NEDeconvolutionLayer::run() @@ -276,16 +285,13 @@ void NEDeconvolutionLayer::prepare() // Run weights flipping and mark original weights tensor as unused _weights_flipped.allocator()->allocate(); - NEScheduler::get().schedule(&_flip_weights, Window::DimZ); + _flip_weights.run(); _original_weights->mark_as_unused(); // Prepare convolution _conv_f.prepare(); - if(!_weights_flipped.is_used()) - { - _weights_flipped.allocator()->free(); - } + // Unused weights are already released in _conv_f if(!_is_nchw) { diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index 89f9d98ed5..38256eb2ad 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,8 +43,8 @@ namespace validation { namespace { -constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ -constexpr AbsoluteTolerance tolerance_qasymm8(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ +constexpr AbsoluteTolerance tolerance_quantized(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const RelativeTolerance tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ @@ -85,46 +85,6 @@ const auto output_qinfo_dataset = framework::dataset::make("OutputQInfo", TEST_SUITE(NEON) TEST_SUITE(DeconvolutionLayer) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::SmallDeconvolutionShapes(), framework::dataset::make("DataType", DataType::F32))), - input_shape, data_type) -{ - // Create shapes - const unsigned int kernel_size_x = 3; - const unsigned int kernel_size_y = 3; - const unsigned int num_kernels = 1; - const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels); - const TensorShape bias_shape(num_kernels); - const PadStrideInfo info(1, 1, 1, 1); - auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info); - TensorShape output_shape = compute_deconvolution_output_shape(out_dim, TensorInfo(input_shape, 1, data_type), TensorInfo(weights_shape, 1, data_type)); - - // Create tensors - Tensor src = create_tensor(input_shape, data_type, 1); - Tensor weights = create_tensor(weights_shape, data_type, 1); - Tensor bias = create_tensor(bias_shape, data_type, 1); - Tensor dst = create_tensor(output_shape, data_type, 1); - - ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); - - // Create and configure function - NEDeconvolutionLayer deconv; - deconv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)); - - // Validate valid region - const ValidRegion src_valid_region = shape_to_valid_region(input_shape); - const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); - const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); - const ValidRegion dst_valid_region = shape_to_valid_region(output_shape); - - validate(src.info()->valid_region(), src_valid_region); - validate(weights.info()->valid_region(), weights_valid_region); - validate(bias.info()->valid_region(), bias_valid_region); - validate(dst.info()->valid_region(), dst_valid_region); -} - // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( @@ -294,7 +254,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4, fr add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); } TEST_SUITE_END() // W4x4 @@ -308,7 +268,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3, framework::dataset::make("DataType", @@ -319,7 +279,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3, fr add_bias_dataset)) { // Validate output - validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num); + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); } TEST_SUITE_END() // W1x1 TEST_SUITE_END() // QASYMM8 + +TEST_SUITE(QASYMM8_SIGNED) + +TEST_SUITE(W4x4) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data4x4, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); +} +TEST_SUITE_END() // W4x4 + +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(data3x3_precommit, + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3, + framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); +} +TEST_SUITE_END() // W3x3 + +TEST_SUITE(W1x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType", + DataType::QASYMM8_SIGNED)), + data_layouts_dataset), + input_qinfo_dataset), + output_qinfo_dataset), + add_bias_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num); +} +TEST_SUITE_END() // W1x1 + +TEST_SUITE_END() // QASYMM8_SIGNED TEST_SUITE_END() // Quantized TEST_SUITE_END() // DeconvolutionLayer diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h index b9a478b1a9..57951c0f36 100644 --- a/tests/validation/fixtures/DeconvolutionLayerFixture.h +++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h @@ -46,7 +46,7 @@ template ::value || std::is_same::value, int32_t, T >::type; + using TBias = typename std::conditional < std::is_same::type, uint8_t>::value || std::is_same::type, int8_t>::value, int32_t, T >::type; public: template -- cgit v1.2.1