COMPMID-784: Winograd refactoring

Removed the code that created a subtensor and imported memory from the workspace in the function run() method. The subtensor is no longer needed because we perform the reordering of the tensors with NEPermute. The call to the method winograd::Winograd2x2_3x3GEMM<TOut, TIn>::reshape_output() will transform the results from the winograd domain into the spatial domain and this will be stored in the member _output_nhwc. Change-Id: Iae09d26c7587cd2eed98968c3ce214e20031038e Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/115483 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
author: Pablo Tello <pablo.tello@arm.com> 2018-01-09 10:30:27 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:42:33 +0000
commit: 4e2c139dae6afeda11da165bfff8fba60d2e84f3 (patch)
tree: f3fecbeaa19a2e8fe2ac37f66d948ea96055f683
parent: e671d6a4d0ba5cfa86ae1856433eab597bd996d2 (diff)
download: ComputeLibrary-4e2c139dae6afeda11da165bfff8fba60d2e84f3.tar.gz
3 files changed, 18 insertions, 40 deletions
diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
index 9ab2e4de11..73b7e8d2b7 100644
--- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,18 +32,15 @@ namespace arm_compute
 class ITensor;
 class NEWinogradLayerKernel;
 
-class Winograd3x3F32
+class Winograd3x3F32 final
 {
 public:
     friend class NEWinogradLayerKernel;
     Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage);
     ~Winograd3x3F32();
-    std::pair<void *, void *> get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space);
     void transform_weights(const void *const kernel, void *transform_working_space);
     void reshape_input(const Tensor4DShape &input_shape, const PaddingType padding_type, const void *const input, void *working_space);
     void reshape_output(const Tensor4DShape &input_shape, const PaddingType padding_type, void *const output);
-    void nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input);
-    void nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output);
 
 private:
     class Private;
diff --git a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
index eaf77e6253..d17630a92e 100644
--- a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -29,9 +29,12 @@
 #include "arm_compute/core/TensorInfo.h"
 #include "support/ToolchainSupport.h"
 
-#include "src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp"
+#include "src/core/NEON/kernels/winograd/winograd_gemm.hpp"
 
-using T = winograd_shim_nchw::Winograd2x2_3x3GEMM<float, float>;
+namespace
+{
+using T = winograd::Winograd2x2_3x3GEMM<float, float>;
+} // namespace
 
 namespace arm_compute
 {
@@ -50,16 +53,6 @@ Winograd3x3F32::~Winograd3x3F32()
 {
 }
 
-void Winograd3x3F32::nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input)
-{
-    _pimpl->convolver.nchw2nhwc(input_shape, padding_type, working_space, reinterpret_cast<const float *>(input));
-}
-
-void Winograd3x3F32::nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output)
-{
-    _pimpl->convolver.nhwc2nchw(input_shape, padding_type, working_space, reinterpret_cast<float *const>(output));
-}
-
 void Winograd3x3F32::transform_weights(const void *const kernel, void *transform_working_space)
 {
     _pimpl->convolver.transform_weights(reinterpret_cast<const float *>(kernel), transform_working_space);
@@ -82,11 +75,6 @@ void Winograd3x3F32::reshape_output(const Tensor4DShape &input_shape, const Padd
 #endif /* __aarch64__ */
 }
 
-std::pair<void *, void *> Winograd3x3F32::get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space)
-{
-    return _pimpl->convolver.get_nhwc_ptrs(input_shape, padding_type, working_space);
-}
-
 Winograd3x3F32::Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage)
     : _pimpl(support::cpp14::make_unique<Private>(kernel_shape, input_shape, padding_type, kernel_storage))
 {
diff --git a/src/runtime/NEON/functions/NEWinogradLayer.cpp b/src/runtime/NEON/functions/NEWinogradLayer.cpp
index 800153e8b1..21f298ca25 100644
--- a/src/runtime/NEON/functions/NEWinogradLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradLayer.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -125,6 +125,13 @@ void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, co
     // configure the kernel to transform the input tensor from NCHW -> NHWC
     _permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
 
+    // configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
+    TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0),
+                                _output->info()->dimension(1), _output->info()->dimension(3)),
+                    1, _output->info()->data_type());
+    _output_nhwc.allocator()->init(info);
+
+    _output_nhwc.allocator()->allocate();
     _weights_hwio.allocator()->allocate();
     _input_nhwc.allocator()->allocate();
 }
@@ -145,9 +152,6 @@ void NEWinogradLayer::run()
     //Bring channels to the front as Winograd code expects the tensor to be in the format NHWC
     _permute_input.run();
 
-    //Get ptrs into the workspace
-    std::pair<void *, void *> nhwc_ptrs = _conv->get_nhwc_ptrs(in_shape, padding, _workspace.buffer());
-
     //Setup matrices ptrs and transfor the input tensor to the appropriate form before running GEMM.
     _conv->reshape_input(in_shape, padding, reinterpret_cast<float *>(_input_nhwc.buffer()), _workspace.buffer());
 
@@ -155,21 +159,10 @@ void NEWinogradLayer::run()
     NEScheduler::get().schedule(&_winograd_kernel, Window::DimX);
 
     //Transform the output to the appropriate form
-    _conv->reshape_output(in_shape, padding, nhwc_ptrs.first);
-
-    const unsigned int out_width    = _output->info()->dimension(0);
-    const unsigned int out_height   = _output->info()->dimension(1);
-    const unsigned int out_channels = _output->info()->dimension(2);
-    const unsigned int out_batches  = _output->info()->dimension(3);
-
-    // We create a temporary tensor with the results in the workspace so that the we can run a function to reorder from NHWC -> NCHW
-    Tensor     output_nhwc;
-    TensorInfo info(TensorShape(out_channels, out_width, out_height, out_batches), 1, _output->info()->data_type());
-    output_nhwc.allocator()->init(info);
-    output_nhwc.allocator()->import_memory(Memory(static_cast<uint8_t *>(nhwc_ptrs.first)));
+    _conv->reshape_output(in_shape, padding, reinterpret_cast<float *>(_output_nhwc.buffer()));
 
     // Reorder the convoluted output to ACL's ordering NCHW
-    _permute_output.configure(&output_nhwc, _output, PermutationVector(1U, 2U, 0U));
+    _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
     _permute_output.run();
 
     _memory_group.release();
author	Pablo Tello <pablo.tello@arm.com>	2018-01-09 10:30:27 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:42:33 +0000
commit	4e2c139dae6afeda11da165bfff8fba60d2e84f3 (patch)
tree	f3fecbeaa19a2e8fe2ac37f66d948ea96055f683
parent	e671d6a4d0ba5cfa86ae1856433eab597bd996d2 (diff)
download	ComputeLibrary-4e2c139dae6afeda11da165bfff8fba60d2e84f3.tar.gz