COMPMID-2765 Add support for QASYMM8_SIGNED in NEDeconvolutionLayer

Signed-off-by: Luca Foschiani <luca.foschiani@arm.com> Change-Id: I8295fadee15311a9ab846aa24c031b82c0b799eb Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2952 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Sheri Zhang <sheri.zhang@arm.com>
author: Luca Foschiani <luca.foschiani@arm.com> 2020-02-17 17:02:49 +0000
committer: Luca Foschiani <luca.foschiani@arm.com> 2020-04-07 09:04:19 +0000
commit: fedefc3a8d76b9dea5945414324427ef5a01835d (patch)
tree: b2a2f6ab45d8a16ab26b5a99c832a18e207899aa /src
parent: 0d008f77b0085619c446d0ab5dc1228a80776706 (diff)
download: ComputeLibrary-fedefc3a8d76b9dea5945414324427ef5a01835d.tar.gz
3 files changed, 38 insertions, 168 deletions
diff --git a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp b/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp
deleted file mode 100644
index 2d4c0ce5c8..0000000000
--- a/src/core/CPP/kernels/CPPFlipWeightsKernel.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Copyright (c) 2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/core/CPP/kernels/CPPFlipWeightsKernel.h"
-
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-
-#include <cstddef>
-#include <cstdint>
-
-using namespace arm_compute;
-
-CPPFlipWeightsKernel::CPPFlipWeightsKernel()
-    : _input(nullptr), _output(nullptr), _func(nullptr)
-{
-}
-
-template <typename T>
-void CPPFlipWeightsKernel::flip_weights(const Window &window_input)
-{
-    // Create iterators
-    Iterator in(_input, window_input);
-
-    const DataLayout data_layout = _input->info()->data_layout();
-    const size_t     idx_w       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH);
-    const size_t     idx_h       = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT);
-
-    const int kernel_width  = _input->info()->dimension(idx_w);
-    const int kernel_height = _input->info()->dimension(idx_h);
-
-    execute_window_loop(window_input, [&](const Coordinates & id)
-    {
-        const unsigned int x = kernel_width - id[idx_w] - 1;
-        const unsigned int y = kernel_height - id[idx_h] - 1;
-        Coordinates        output_coord(id);
-        output_coord.set(idx_w, x);
-        output_coord.set(idx_h, y);
-        *(reinterpret_cast<T *>(_output->ptr_to_element(output_coord))) = *(reinterpret_cast<const T *>(in.ptr()));
-    },
-    in);
-}
-
-void CPPFlipWeightsKernel::configure(const ITensor *input, ITensor *output)
-{
-    ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
-    ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_LAYOUT(input, output);
-    ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
-    _input  = input;
-    _output = output;
-
-    // Configure kernel window
-    Window win = calculate_max_window(*input->info(), Steps());
-
-    // The CPPFlipWeightsKernel doesn't need padding so update_window_and_padding() can be skipped
-    Coordinates coord;
-    coord.set_num_dimensions(output->info()->num_dimensions());
-    output->info()->set_valid_region(ValidRegion(coord, output->info()->tensor_shape()));
-
-    ICPPKernel::configure(win);
-
-    switch(input->info()->data_type())
-    {
-        case DataType::F32:
-            _func = &CPPFlipWeightsKernel::flip_weights<float>;
-            break;
-        case DataType::F16:
-            _func = &CPPFlipWeightsKernel::flip_weights<half>;
-            break;
-        case DataType::QASYMM8:
-            _func = &CPPFlipWeightsKernel::flip_weights<uint8_t>;
-            break;
-        default:
-            ARM_COMPUTE_ERROR("Not supported");
-    }
-}
-
-void CPPFlipWeightsKernel::run(const Window &window, const ThreadInfo &info)
-{
-    ARM_COMPUTE_UNUSED(info);
-    ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-    ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICPPKernel::window(), window);
-    ARM_COMPUTE_ERROR_ON(_func == nullptr);
-
-    (this->*_func)(window);
-}
diff --git a/src/core/NEON/kernels/NEReverseKernel.cpp b/src/core/NEON/kernels/NEReverseKernel.cpp
index 2f584164dc..5a8c446ddd 100644
--- a/src/core/NEON/kernels/NEReverseKernel.cpp
+++ b/src/core/NEON/kernels/NEReverseKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -106,33 +106,20 @@ void run_reverse(const Window &window, const ITensor *input, const ITensor *axis
     }
 
     // Check if we need a left-over loop for the y dimension
-    const int window_step_x            = 16 / input->info()->element_size();
-    const int window_start_x           = window.x().start();
-    const int window_end_x             = std::min(window.x().end(), static_cast<int>(input->info()->dimension(0)));
-    const int window_end_x_multiple_of = ((window_end_x - window_start_x) / window_step_x) * window_step_x;
-    bool      left_over_loop_x         = (((window_end_x - window_start_x) % window_step_x) != 0);
+    const int window_step_x  = 16 / input->info()->element_size();
+    const int window_start_x = window.x().start();
+    const int window_end_x   = window.x().end();
 
-    Window slice = window.first_slice_window_4D();
+    Window win(window);
+    win.set(Window::DimX, Window::Dimension(0, 1, 1));
 
-    if(left_over_loop_x)
+    Iterator input_it(input, win);
+    execute_window_loop(win, [&](const Coordinates & id)
     {
-        // Check if window_end_y_multiple_of is greater than window_start_y
-        if(window_end_x_multiple_of > window_start_x)
+        int x = window_start_x;
+        for(; x <= (window_end_x - window_step_x); x += window_step_x)
         {
-            slice.set(Window::DimX, Window::Dimension(window_start_x, window_end_x_multiple_of, window_step_x));
-        }
-        else
-        {
-            slice.set(Window::DimX, Window::Dimension(0, 0, 1));
-        }
-    }
-
-    do
-    {
-        Iterator input_it(input, slice);
-        execute_window_loop(slice, [&](const Coordinates & id)
-        {
-            auto in = wrapper::vloadq(reinterpret_cast<T *>(input_it.ptr()));
+            auto in = wrapper::vloadq(reinterpret_cast<T *>(input_it.ptr()) + x);
 
             // Reverse 0 axis
             if(axis_bit & 0x1)
@@ -141,39 +128,29 @@ void run_reverse(const Window &window, const ITensor *input, const ITensor *axis
                 in = wrapper::vcombine(wrapper::vgethigh(in), wrapper::vgetlow(in));
             }
 
-            const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - id.x() - window_step_x : id.x();
+            const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - x - window_step_x : x;
             const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y();
             const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z();
             const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3];
 
             auto out_ptr = reinterpret_cast<T *>(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w)));
             wrapper::vstore(out_ptr, in);
-        },
-        input_it);
+        }
 
-        if(left_over_loop_x)
+        // Compute left-over elements
+        for(; x < window_end_x; ++x)
         {
-            slice.set(Window::DimX, Window::Dimension(window_end_x_multiple_of, window_end_x, 1));
+            const auto in = *(reinterpret_cast<T *>(input_it.ptr()) + x);
 
-            Iterator input_it(input, slice);
-
-            // Compute left-over elements along the y dimension (1x1)
-            execute_window_loop(slice, [&](const Coordinates & id)
-            {
-                const auto in = *reinterpret_cast<T *>(input_it.ptr());
-
-                const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - id.x() - 1 : id.x();
-                const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y();
-                const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z();
-                const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3];
+            const int offset_x = (axis_bit & 0x1) ? output->info()->dimension(0) - x - 1 : x;
+            const int offset_y = (axis_bit & 0x2) ? output->info()->dimension(1) - id.y() - 1 : id.y();
+            const int offset_z = (axis_bit & 0x4) ? output->info()->dimension(2) - id.z() - 1 : id.z();
+            const int offset_w = (axis_bit & 0x8) ? output->info()->dimension(3) - id[3] - 1 : id[3];
 
-                *reinterpret_cast<T *>(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))) = in;
-            },
-            input_it);
+            *reinterpret_cast<T *>(output->ptr_to_element(Coordinates(offset_x, offset_y, offset_z, offset_w))) = in;
         }
-
-    }
-    while(window.slide_window_slice_4D(slice));
+    },
+    input_it);
 }
 
 void NEReverseKernel::run(const Window &window, const ThreadInfo &info)
diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
index 0411b41220..06885d59e5 100644
--- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
+++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -46,6 +46,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
       _permuted_input(),
       _permuted_weights(),
       _permuted_output(),
+      _flip_axis(),
       _is_nchw(false),
       _original_weights(nullptr),
       _input(nullptr),
@@ -57,7 +58,7 @@ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr<IMemoryManager> memor
 Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *bias, const ITensorInfo *output, const PadStrideInfo &info)
 {
     ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output);
-    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8, DataType::QASYMM8_SIGNED);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input);
     ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(weights, input);
     const unsigned int width_idx  = get_data_layout_dimension_index(weights->data_layout(), DataLayoutDimension::WIDTH);
@@ -122,6 +123,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
     _info             = info;
     _is_prepared      = false;
     _is_nchw          = data_layout == DataLayout::NCHW;
+    _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
 
     const unsigned int pad_left   = info.pad_left();
     const unsigned int pad_right  = info.pad_right();
@@ -139,6 +141,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
     // Output auto initialization if not yet initialized
     auto_init_if_empty(*output->info(), output_shape, 1, input->info()->data_type(), input->info()->quantization_info());
 
+    _flip_axis.allocator()->init(TensorInfo(TensorShape(2U), 1, DataType::U32));
     _memory_group.manage(&_scaled_output);
 
     if(!_is_nchw)
@@ -185,7 +188,7 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
 
         _weights_flipped.allocator()->init(*_permuted_weights.info()->clone());
         _weights_flipped.info()->set_quantization_info(weights->info()->quantization_info());
-        _flip_weights.configure(&_permuted_weights, &_weights_flipped);
+        _flip_weights.configure(&_permuted_weights, &_weights_flipped, &_flip_axis);
 
         // setup the function to convolve the upscaled output
         const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
@@ -230,13 +233,19 @@ void NEDeconvolutionLayer::configure(ITensor *input, const ITensor *weights, con
         _upsample_f.configure(input, &_scaled_output, upsample_info);
 
         _weights_flipped.allocator()->init(weights->info()->clone()->set_data_layout(data_layout));
-        _flip_weights.configure(weights, &_weights_flipped);
+        _flip_weights.configure(weights, &_weights_flipped, &_flip_axis);
 
         // setup the function to convolve the upscaled output
         const PadStrideInfo conv_info(1, 1, 0, 0, 0, 0, DimensionRoundingType::CEIL);
         _conv_f.configure(&_scaled_output, &_weights_flipped, bias, output, conv_info);
     }
     _scaled_output.allocator()->allocate();
+
+    // Setup flip axis data
+    _flip_axis.allocator()->allocate();
+    auto axis_data = reinterpret_cast<uint32_t *>(_flip_axis.buffer());
+    axis_data[0]   = 0;
+    axis_data[1]   = 1;
 }
 
 void NEDeconvolutionLayer::run()
@@ -276,16 +285,13 @@ void NEDeconvolutionLayer::prepare()
 
         // Run weights flipping and mark original weights tensor as unused
         _weights_flipped.allocator()->allocate();
-        NEScheduler::get().schedule(&_flip_weights, Window::DimZ);
+        _flip_weights.run();
         _original_weights->mark_as_unused();
 
         // Prepare convolution
         _conv_f.prepare();
 
-        if(!_weights_flipped.is_used())
-        {
-            _weights_flipped.allocator()->free();
-        }
+        // Unused weights are already released in _conv_f
 
         if(!_is_nchw)
         {
author	Luca Foschiani <luca.foschiani@arm.com>	2020-02-17 17:02:49 +0000
committer	Luca Foschiani <luca.foschiani@arm.com>	2020-04-07 09:04:19 +0000
commit	fedefc3a8d76b9dea5945414324427ef5a01835d (patch)
tree	b2a2f6ab45d8a16ab26b5a99c832a18e207899aa /src
parent	0d008f77b0085619c446d0ab5dc1228a80776706 (diff)
download	ComputeLibrary-fedefc3a8d76b9dea5945414324427ef5a01835d.tar.gz