aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Foschiani <luca.foschiani@arm.com>2020-02-17 17:02:49 +0000
committerLuca Foschiani <luca.foschiani@arm.com>2020-04-07 09:04:19 +0000
commitfedefc3a8d76b9dea5945414324427ef5a01835d (patch)
treeb2a2f6ab45d8a16ab26b5a99c832a18e207899aa
parent0d008f77b0085619c446d0ab5dc1228a80776706 (diff)
downloadComputeLibrary-fedefc3a8d76b9dea5945414324427ef5a01835d.tar.gz
COMPMID-2765 Add support for QASYMM8_SIGNED in NEDeconvolutionLayer
Signed-off-by: Luca Foschiani <luca.foschiani@arm.com> Change-Id: I8295fadee15311a9ab846aa24c031b82c0b799eb Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2952 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--arm_compute/core/CPP/CPPKernels.h3
-rw-r--r--arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h83
-rw-r--r--arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h4
-rw-r--r--arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h51
-rw-r--r--src/core/CPP/kernels/CPPFlipWeightsKernel.cpp113
-rw-r--r--src/core/NEON/kernels/NEReverseKernel.cpp69
-rw-r--r--src/runtime/NEON/functions/NEDeconvolutionLayer.cpp24
-rw-r--r--tests/validation/NEON/DeconvolutionLayer.cpp109
-rw-r--r--tests/validation/fixtures/DeconvolutionLayerFixture.h2
10 files changed, 131 insertions, 328 deletions
diff --git a/Android.bp b/Android.bp
index 528467a44e..6e34cb5cca 100644
--- a/Android.bp
+++ b/Android.bp
@@ -212,7 +212,6 @@ cc_library_static {
"src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp",
"src/core/CPP/kernels/CPPCornerCandidatesKernel.cpp",
"src/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.cpp",
- "src/core/CPP/kernels/CPPFlipWeightsKernel.cpp",
"src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp",
"src/core/CPP/kernels/CPPPermuteKernel.cpp",
"src/core/CPP/kernels/CPPSortEuclideanDistanceKernel.cpp",
diff --git a/arm_compute/core/CPP/CPPKernels.h b/arm_compute/core/CPP/CPPKernels.h
index d12cb2857a..c7b40baf22 100644
--- a/arm_compute/core/CPP/CPPKernels.h
+++ b/arm_compute/core/CPP/CPPKernels.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -28,7 +28,6 @@
#include "arm_compute/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.h"
#include "arm_compute/core/CPP/kernels/CPPCornerCandidatesKernel.h"
#include "arm_compute/core/CPP/kernels/CPPDetectionWindowNonMaximaSuppressionKernel.h"
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
#include "arm_compute/core/CPP/kernels/CPPNonMaximumSuppressionKernel.h"
#include "arm_compute/core/CPP/kernels/CPPPermuteKernel.h"
#include "arm_compute/core/CPP/kernels/CPPSortEuclideanDistanceKernel.h"
diff --git a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h b/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h
deleted file mode 100644
index 285aa585be..0000000000
--- a/arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Copyright (c) 2018-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CPP_FLIP_WEIGHTS_KERNEL_H
-#define ARM_COMPUTE_CPP_FLIP_WEIGHTS_KERNEL_H
-
-#include "arm_compute/core/CPP/ICPPKernel.h"
-
-namespace arm_compute
-{
-class ITensor;
-
-/** CPP kernel to perform 180 degrees flipping on deconvolution weights. */
-class CPPFlipWeightsKernel : public ICPPKernel
-{
-public:
- const char *name() const override
- {
- return "CPPFlipWeightsKernel";
- }
- /** Default constructor */
- CPPFlipWeightsKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPFlipWeightsKernel(const CPPFlipWeightsKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CPPFlipWeightsKernel &operator=(const CPPFlipWeightsKernel &) = delete;
- /** Allow instances of this class to be moved */
- CPPFlipWeightsKernel(CPPFlipWeightsKernel &&) = default;
- /** Allow instances of this class to be moved */
- CPPFlipWeightsKernel &operator=(CPPFlipWeightsKernel &&) = default;
- /** Default destructor */
- ~CPPFlipWeightsKernel() = default;
-
- /** Set the input and output of the kernel.
- *
- * @param[in] input The input tensor to flip. Data types supported: QASYMM8/F16/F32. Data layouts supported: NCHW/NHWC.
- * @param[out] output The output tensor. Data types supported: Same as @p input
- */
- void configure(const ITensor *input, ITensor *output);
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
- /** Function to perform flipping.
- *
- * @param[in] window_input Input region on which to execute the kernel.
- */
- template <typename T>
- void flip_weights(const Window &window_input);
-
- /** Common signature for all the specialised Flip functions
- *
- * @param[in] window_input Input region on which to execute the kernel.
- */
- using FlipWeightsFunction = void (CPPFlipWeightsKernel::*)(const Window &window_input);
-
-private:
- const ITensor *_input;
- ITensor *_output;
- FlipWeightsFunction _func;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CPP_FLIP_WEIGHTS_KERNEL_H */
diff --git a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
index 76cd0fb0c2..6632bfce80 100644
--- a/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLDirectDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,7 +61,7 @@ class ICLTensor;
* stride_x and stride_y is the input stride of the first and second dimension.
*
* The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel.
+ * reverse order to perform an actual convolution. This is achieved by using @ref CLReverse.
*
* This function calls the following OpenCL kernels/functions:
*
diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
index d031076ee7..c4c1664f20 100644
--- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,8 +27,8 @@
#include "arm_compute/runtime/CPP/functions/CPPUpsample.h"
#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h"
#include "arm_compute/runtime/NEON/functions/NEDirectConvolutionLayer.h"
+#include "arm_compute/runtime/NEON/functions/NEReverse.h"
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
#include "arm_compute/core/Types.h"
#include "arm_compute/runtime/IFunction.h"
#include "arm_compute/runtime/IMemoryManager.h"
@@ -62,12 +62,14 @@ namespace arm_compute
* stride_x and stride_y is the input stride of the first and second dimension.
*
* The weights used by Deconvolution are supposed to be the same as the ones used for Convolution. Therefore, it will be necessary to use the weights in the
- * reverse order to perform an actual convolution. This is achieved by using the @ref CPPFlipWeightsKernel.
+ * reverse order to perform an actual convolution. This is achieved by using @ref NEReverse.
*
* This function calls the following NEON kernels/functions:
*
* -# @ref CPPUpsample
* -# @ref NEConvolutionLayer
+ * -# @ref NEPermute
+ * -# @ref NEReverse
*
*/
class NEDeconvolutionLayer : public IFunction
@@ -89,9 +91,9 @@ public:
/** Set the input, weights, biases and output tensors.
*
- * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
* @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
* @param[out] output Output tensor. The output has the same number of dimensions as the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
*
@@ -99,9 +101,9 @@ public:
void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info);
/** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer
*
- * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8.
+ * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8/QASYMM8_SIGNED.
* @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input.
- * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input.
+ * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 and QASYMM8_SIGNED input, F32 for F32 input, F16 for F16 input.
* @param[in] output Output tensor info. The output has the same number of dimensions as the @p input.
* @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo.
*
@@ -114,23 +116,24 @@ public:
void prepare() override;
private:
- MemoryGroup _memory_group;
- NEConvolutionLayer _conv_f;
- CPPUpsample _upsample_f;
- CPPFlipWeightsKernel _flip_weights;
- NEPermute _permute_input;
- NEPermute _permute_weights;
- NEPermute _permute_output;
- Tensor _scaled_output;
- Tensor _weights_flipped;
- Tensor _permuted_input;
- Tensor _permuted_weights;
- Tensor _permuted_output;
- bool _is_nchw;
- const ITensor *_original_weights;
- ITensor *_input;
- PadStrideInfo _info;
- bool _is_prepared;
+ MemoryGroup _memory_group;
+ NEConvolutionLayer _conv_f;
+ CPPUpsample _upsample_f;
+ NEReverse _flip_weights;
+ NEPermute _permute_input;
+ NEPermute _permute_weights;
+ NEPermute _permute_output;
+ Tensor _scaled_output;
+ Tensor _weights_flipped;
+ Tensor _permuted_input;
+ Tensor _permuted_weights;
+ Tensor _permuted_output;
+ Tensor _flip_axis;
+ bool _is_nchw;
+ const ITensor *_original_weights;
+ ITensor *_input;
+ PadStrideInfo _info;
+ bool _is_prepared;
};
} // arm_compute
#endif /* ARM_COMPUTE_NEDECONVOLUTIONLAYER_H */
diff --git a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp b/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp
deleted file mode 100644
index 2d4c0ce5c8..0000000000
--- a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-
-CPPFlipWeightsKernel::CPPFlipWeightsKernel()
- : _input(nullptr), _output(nullptr), _func(nullptr)
-{
-}
-
-template <typename T>
-void CPPFlipWeightsKernel::flip_weights(const Window &window_input)
-{
- // Create iterators
- Iterator in(_input, window_input);
-
- const DataLayout data_layout = _input->info()->data_layout();
- const size_t idx_w = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
- const size_t idx_h = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
- const int kernel_width = _input->info()->dimension(idx_w);
- const int kernel_height = _input->info()->dimension(idx_h);
-
- execute_window_loop(window_input, [&](const Coordinates & id)
- {
- const unsigned int x = kernel_width - id[idx_w] - 1;
- const unsigned int y = kernel_height - id[idx_h] - 1;
- Coordinates output_coord(id);
- output_coord.set(idx_w, x);
- output_coord.set(idx_h, y);
- *(reinterpret_cast<T *>(_output->ptr_to_element(output_coord))) = *(reinterpret_cast<const T *>(in.ptr()));
- },
- in);
-}
-
-void CPPFlipWeightsKernel::configure(const ITensor *input, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- _input = input;
- _output = output;
-
- // Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps());
-
- // The CPPFlipWeightsKernel doesn't need padding so update_window_and_padding() can be skipped
- Coordinates coord;
- coord.set_num_dimensions(output->info()->num_dimensions());
- output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
- ICPPKernel::configure(win);
-
- switch(input->info()->data_type())
- {
- case DataType::F32:
- _func = &CPPFlipWeightsKernel::flip_weights<float>;
- break;
- case DataType::F16:
- _func = &CPPFlipWeightsKernel::flip_weights<half>;
- break;
- case DataType::QASYMM8:
- _func = &CPPFlipWeightsKernel::flip_weights<uint8_t>;
- break;
- default:
- ARM_COMPUTE_ERROR("Not supported");
- }
-}
-
-void CPPFlipWeightsKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
- (this->*_func)(window);
-}
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 2f584164dc..5a8c446ddd 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -106,33 +106,20 @@ void run_reverse(const Window &window, const ITensor *input, const ITensor *axis
}
// Check if we need a left-over loop for the y dimension
- const int window_step_x = 16 / input->info()->element_size();
- const int window_start_x = window.x().start();
- const int window_end_x = std::min(window.x().end(), static_cast<int>(input->info()->dimension(0)));
- const int window_end_x_multiple_of = ((window_end_x - window_start_x) / window_step_x) * window_step_x;
- bool left_over_loop_x = (((window_end_x - window_start_x) % window_step_x) != 0);
+ const int window_step_x = 16 / input->info()->element_size();
+ const int window_start_x = window.x().start();
+ const int window_end_x = window.x().end();
- Window slice = window.first_slice_window_4D();
+ Window win(window);
+ win.set(Window::DimX, Window::Dimension(0, 1, 1));
- if(left_over_loop_x)
+ Iterator input_it(input, win);
+ execute_window_loop(win, [&](const Coordinates & id)
{
- // Check if window_end_y_multiple_of is greater than window_start_y
- if(window_end_x_multiple_of > window_start_x)
+ int x = window_start_x;
+ for(; x <= (window_end_x - window_step_x); x += window_step_x)
{
- slice.set(Window::DimX, Window::Dimension(window_start_x, window_end_x_multiple_of, window_step_x));
- }
- else
- {
- slice.set(Window::DimX, Window::Dimension(0, 0, 1));
- }
- }
-
- do
- {
- Iterator input_it(input, slice);
- execute_window_loop(slice, [&](const Coordinates & id)
- {
- auto in = wrapper::vloadq(reinterpret_cast<T *>(input_it.ptr()));
+ auto in = wrapper::vloadq(reinterpret_cast<T *>(input_it.ptr()) + x);
// Reverse 0 axis
if(axis_bit & 0x1)
@@ -141,39 +128,29 @@ void run_reverse(const Window &window, const ITensor *input, const ITensor *axis
in = wrapper::vcombine(wrapper::vgethigh(in), wrapper::vgetlow(in));
}
- const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - id.x() - window_step_x : id.x();
+ const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - x - window_step_x : x;
const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y();
const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z();
const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3];
auto out_ptr = reinterpret_cast<T *>(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w)));
wrapper::vstore(out_ptr, in);
- },
- input_it);
+ }
- if(left_over_loop_x)
+ // Compute left-over elements
+ for(; x < window_end_x; ++x)
{
- slice.set(Window::DimX, Window::Dimension(window_end_x_multiple_of, window_end_x, 1));
+ const auto in = *(reinterpret_cast<T *>(input_it.ptr()) + x);
- Iterator input_it(input, slice);
-
- // Compute left-over elements along the y dimension (1x1)
- execute_window_loop(slice, [&](const Coordinates & id)
- {
- const auto in = *reinterpret_cast<T *>(input_it.ptr());
-
- const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - id.x() - 1 : id.x();
- const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y();
- const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z();
- const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3];
+ const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - x - 1 : x;
+ const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y();
+ const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z();
+ const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3];
- *reinterpret_cast<T *>(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))) = in;
- },
- input_it);
+ *reinterpret_cast<T *>(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))) = in;
}
-
- }
- while(window.slide_window_slice_4D(slice));
+ },
+ input_it);
}
void NEReverseKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 0411b41220..06885d59e5 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -46,6 +46,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
_permuted_input(),
_permuted_weights(),
_permuted_output(),
+ _flip_axis(),
_is_nchw(false),
_original_weights(nullptr),
_input(nullptr),
@@ -57,7 +58,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
const unsigned int width_idx = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
@@ -122,6 +123,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_info = info;
_is_prepared = false;
_is_nchw = data_layout == DataLayout::NCHW;
+ _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
const unsigned int pad_left = info.pad_left();
const unsigned int pad_right = info.pad_right();
@@ -139,6 +141,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
// Output auto initialization if not yet initialized
auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
+ _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
_memory_group.manage(&_scaled_output);
if(!_is_nchw)
@@ -185,7 +188,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
_weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
- _flip_weights.configure(&_permuted_weights, &_weights_flipped);
+ _flip_weights.configure(&_permuted_weights, &_weights_flipped, &_flip_axis);
// setup the function to convolve the upscaled output
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
@@ -230,13 +233,19 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
_upsample_f.configure(input, &_scaled_output, upsample_info);
_weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
- _flip_weights.configure(weights, &_weights_flipped);
+ _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
// setup the function to convolve the upscaled output
const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
_conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
}
_scaled_output.allocator()->allocate();
+
+ // Setup flip axis data
+ _flip_axis.allocator()->allocate();
+ auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+ axis_data[0] = 0;
+ axis_data[1] = 1;
}
void NEDeconvolutionLayer::run()
@@ -276,16 +285,13 @@ void NEDeconvolutionLayer::prepare()
// Run weights flipping and mark original weights tensor as unused
_weights_flipped.allocator()->allocate();
- NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
+ _flip_weights.run();
_original_weights->mark_as_unused();
// Prepare convolution
_conv_f.prepare();
- if(!_weights_flipped.is_used())
- {
- _weights_flipped.allocator()->free();
- }
+ // Unused weights are already released in _conv_f
if(!_is_nchw)
{
diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp
index 89f9d98ed5..38256eb2ad 100644
--- a/tests/validation/NEON/DeconvolutionLayer.cpp
+++ b/tests/validation/NEON/DeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,8 +43,8 @@ namespace validation
{
namespace
{
-constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
-constexpr AbsoluteTolerance<float> tolerance_qasymm8(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+constexpr AbsoluteTolerance<float> tolerance_fp32(0.001f); /**< Tolerance for floating point tests */
+constexpr AbsoluteTolerance<float> tolerance_quantized(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
const RelativeTolerance<half_float::half> tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */
#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/
@@ -85,46 +85,6 @@ const auto output_qinfo_dataset = framework::dataset::make("OutputQInfo",
TEST_SUITE(NEON)
TEST_SUITE(DeconvolutionLayer)
-DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, (combine(datasets::SmallDeconvolutionShapes(), framework::dataset::make("DataType", DataType::F32))),
- input_shape, data_type)
-{
- // Create shapes
- const unsigned int kernel_size_x = 3;
- const unsigned int kernel_size_y = 3;
- const unsigned int num_kernels = 1;
- const TensorShape weights_shape(kernel_size_x, kernel_size_y, input_shape.z(), num_kernels);
- const TensorShape bias_shape(num_kernels);
- const PadStrideInfo info(1, 1, 1, 1);
- auto out_dim = deconvolution_output_dimensions(input_shape.x(), input_shape.y(), kernel_size_x, kernel_size_y, info);
- TensorShape output_shape = compute_deconvolution_output_shape(out_dim, TensorInfo(input_shape, 1, data_type), TensorInfo(weights_shape, 1, data_type));
-
- // Create tensors
- Tensor src = create_tensor<Tensor>(input_shape, data_type, 1);
- Tensor weights = create_tensor<Tensor>(weights_shape, data_type, 1);
- Tensor bias = create_tensor<Tensor>(bias_shape, data_type, 1);
- Tensor dst = create_tensor<Tensor>(output_shape, data_type, 1);
-
- ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS);
- ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS);
-
- // Create and configure function
- NEDeconvolutionLayer deconv;
- deconv.configure(&src, &weights, &bias, &dst, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL));
-
- // Validate valid region
- const ValidRegion src_valid_region = shape_to_valid_region(input_shape);
- const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape);
- const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape);
- const ValidRegion dst_valid_region = shape_to_valid_region(output_shape);
-
- validate(src.info()->valid_region(), src_valid_region);
- validate(weights.info()->valid_region(), weights_valid_region);
- validate(bias.info()->valid_region(), bias_valid_region);
- validate(dst.info()->valid_region(), dst_valid_region);
-}
-
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(
@@ -294,7 +254,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<uint8_t>, fr
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
}
TEST_SUITE_END() // W4x4
@@ -308,7 +268,7 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
}
FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
framework::dataset::make("DataType",
@@ -319,7 +279,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<uint8_t
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
}
TEST_SUITE_END() // W3x3
@@ -332,11 +292,66 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<uint8_t>, fr
add_bias_dataset))
{
// Validate output
- validate(Accessor(_target), _reference, tolerance_qasymm8, tolerance_num);
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
}
TEST_SUITE_END() // W1x1
TEST_SUITE_END() // QASYMM8
+
+TEST_SUITE(QASYMM8_SIGNED)
+
+TEST_SUITE(W4x4)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture4x4<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data4x4, framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+TEST_SUITE_END() // W4x4
+
+TEST_SUITE(W3x3)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(combine(data3x3_precommit,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerQuantizedFixture3x3<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data3x3,
+ framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+TEST_SUITE_END() // W3x3
+
+TEST_SUITE(W1x1)
+FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerQuantizedFixture1x1<int8_t>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(combine(data1x1, framework::dataset::make("DataType",
+ DataType::QASYMM8_SIGNED)),
+ data_layouts_dataset),
+ input_qinfo_dataset),
+ output_qinfo_dataset),
+ add_bias_dataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_quantized, tolerance_num);
+}
+TEST_SUITE_END() // W1x1
+
+TEST_SUITE_END() // QASYMM8_SIGNED
TEST_SUITE_END() // Quantized
TEST_SUITE_END() // DeconvolutionLayer
diff --git a/tests/validation/fixtures/DeconvolutionLayerFixture.h b/tests/validation/fixtures/DeconvolutionLayerFixture.h
index b9a478b1a9..57951c0f36 100644
--- a/tests/validation/fixtures/DeconvolutionLayerFixture.h
+++ b/tests/validation/fixtures/DeconvolutionLayerFixture.h
@@ -46,7 +46,7 @@ template <typename TensorType, typename AccessorType, typename FunctionType, typ
class DeconvolutionLayerFixtureBase : public framework::Fixture
{
public:
- using TBias = typename std::conditional < std::is_same<T, uint8_t>::value || std::is_same<T, int8_t>::value, int32_t, T >::type;
+ using TBias = typename std::conditional < std::is_same<typename std::decay<T>::type, uint8_t>::value || std::is_same<typename std::decay<T>::type, int8_t>::value, int32_t, T >::type;
public:
template <typename...>