aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h7
-rw-r--r--src/core/NEON/kernels/NEWinogradLayerKernel.cpp24
-rw-r--r--src/runtime/NEON/functions/NEWinogradLayer.cpp27
3 files changed, 18 insertions, 40 deletions
diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
index 9ab2e4de11..73b7e8d2b7 100644
--- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -32,18 +32,15 @@ namespace arm_compute
class ITensor;
class NEWinogradLayerKernel;
-class Winograd3x3F32
+class Winograd3x3F32 final
{
public:
friend class NEWinogradLayerKernel;
Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage);
~Winograd3x3F32();
- std::pair<void *, void *> get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space);
void transform_weights(const void *const kernel, void *transform_working_space);
void reshape_input(const Tensor4DShape &input_shape, const PaddingType padding_type, const void *const input, void *working_space);
void reshape_output(const Tensor4DShape &input_shape, const PaddingType padding_type, void *const output);
- void nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input);
- void nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output);
private:
class Private;
diff --git a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
index eaf77e6253..d17630a92e 100644
--- a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -29,9 +29,12 @@
#include "arm_compute/core/TensorInfo.h"
#include "support/ToolchainSupport.h"
-#include "src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp"
+#include "src/core/NEON/kernels/winograd/winograd_gemm.hpp"
-using T = winograd_shim_nchw::Winograd2x2_3x3GEMM<float, float>;
+namespace
+{
+using T = winograd::Winograd2x2_3x3GEMM<float, float>;
+} // namespace
namespace arm_compute
{
@@ -50,16 +53,6 @@ Winograd3x3F32::~Winograd3x3F32()
{
}
-void Winograd3x3F32::nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input)
-{
- _pimpl->convolver.nchw2nhwc(input_shape, padding_type, working_space, reinterpret_cast<const float *>(input));
-}
-
-void Winograd3x3F32::nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output)
-{
- _pimpl->convolver.nhwc2nchw(input_shape, padding_type, working_space, reinterpret_cast<float *const>(output));
-}
-
void Winograd3x3F32::transform_weights(const void *const kernel, void *transform_working_space)
{
_pimpl->convolver.transform_weights(reinterpret_cast<const float *>(kernel), transform_working_space);
@@ -82,11 +75,6 @@ void Winograd3x3F32::reshape_output(const Tensor4DShape &input_shape, const Padd
#endif /* __aarch64__ */
}
-std::pair<void *, void *> Winograd3x3F32::get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space)
-{
- return _pimpl->convolver.get_nhwc_ptrs(input_shape, padding_type, working_space);
-}
-
Winograd3x3F32::Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage)
: _pimpl(support::cpp14::make_unique<Private>(kernel_shape, input_shape, padding_type, kernel_storage))
{
diff --git a/src/runtime/NEON/functions/NEWinogradLayer.cpp b/src/runtime/NEON/functions/NEWinogradLayer.cpp
index 800153e8b1..21f298ca25 100644
--- a/src/runtime/NEON/functions/NEWinogradLayer.cpp
+++ b/src/runtime/NEON/functions/NEWinogradLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017, 2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -125,6 +125,13 @@ void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, co
// configure the kernel to transform the input tensor from NCHW -> NHWC
_permute_input.configure(input, &_input_nhwc, PermutationVector(2U, 0U, 1U));
+ // configure and allocate dst tensor to be used to convert from winograd domain to spatial domain when calling to reshape_output()
+ TensorInfo info(TensorShape(_output->info()->dimension(2), _output->info()->dimension(0),
+ _output->info()->dimension(1), _output->info()->dimension(3)),
+ 1, _output->info()->data_type());
+ _output_nhwc.allocator()->init(info);
+
+ _output_nhwc.allocator()->allocate();
_weights_hwio.allocator()->allocate();
_input_nhwc.allocator()->allocate();
}
@@ -145,9 +152,6 @@ void NEWinogradLayer::run()
//Bring channels to the front as Winograd code expects the tensor to be in the format NHWC
_permute_input.run();
- //Get ptrs into the workspace
- std::pair<void *, void *> nhwc_ptrs = _conv->get_nhwc_ptrs(in_shape, padding, _workspace.buffer());
-
//Setup matrices ptrs and transfor the input tensor to the appropriate form before running GEMM.
_conv->reshape_input(in_shape, padding, reinterpret_cast<float *>(_input_nhwc.buffer()), _workspace.buffer());
@@ -155,21 +159,10 @@ void NEWinogradLayer::run()
NEScheduler::get().schedule(&_winograd_kernel, Window::DimX);
//Transform the output to the appropriate form
- _conv->reshape_output(in_shape, padding, nhwc_ptrs.first);
-
- const unsigned int out_width = _output->info()->dimension(0);
- const unsigned int out_height = _output->info()->dimension(1);
- const unsigned int out_channels = _output->info()->dimension(2);
- const unsigned int out_batches = _output->info()->dimension(3);
-
- // We create a temporary tensor with the results in the workspace so that the we can run a function to reorder from NHWC -> NCHW
- Tensor output_nhwc;
- TensorInfo info(TensorShape(out_channels, out_width, out_height, out_batches), 1, _output->info()->data_type());
- output_nhwc.allocator()->init(info);
- output_nhwc.allocator()->import_memory(Memory(static_cast<uint8_t *>(nhwc_ptrs.first)));
+ _conv->reshape_output(in_shape, padding, reinterpret_cast<float *>(_output_nhwc.buffer()));
// Reorder the convoluted output to ACL's ordering NCHW
- _permute_output.configure(&output_nhwc, _output, PermutationVector(1U, 2U, 0U));
+ _permute_output.configure(&_output_nhwc, _output, PermutationVector(1U, 2U, 0U));
_permute_output.run();
_memory_group.release();