diff options
-rw-r--r-- | arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h | 38 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEWinogradLayer.h | 2 | ||||
-rwxr-xr-x | scripts/clang_tidy_rules.py | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/NEWinogradLayerKernel.cpp | 79 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/gemm.hpp (renamed from arm_compute/core/NEON/kernels/winograd/gemm.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/gemm/a64_sgemm.hpp (renamed from arm_compute/core/NEON/kernels/winograd/gemm/a64_sgemm.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/gemm/a64_sgemm_4x16.hpp (renamed from arm_compute/core/NEON/kernels/winograd/gemm/a64_sgemm_4x16.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/perf.h (renamed from arm_compute/core/NEON/kernels/winograd/perf.h) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/profiler.hpp (renamed from arm_compute/core/NEON/kernels/winograd/profiler.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/shims.hpp (renamed from arm_compute/core/NEON/kernels/winograd/shims.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/input_2x2_3x3.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3.hpp) | 3 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float_channelwise.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float_channelwise.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3/a64_float.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3/a64_float.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/output_2x2_3x3.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float_two_stage.hpp (renamed from arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float_two_stage.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/utils.hpp (renamed from arm_compute/core/NEON/kernels/winograd/utils.hpp) | 0 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/winograd_gemm.hpp (renamed from arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp) | 1 | ||||
-rw-r--r-- | src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp (renamed from arm_compute/core/NEON/kernels/winograd/winograd_shim_nchw.hpp) | 1 | ||||
-rw-r--r-- | src/runtime/NEON/functions/NEWinogradLayer.cpp | 8 |
23 files changed, 119 insertions, 14 deletions
diff --git a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h index 1e7ca64b8c..3ab3aa792b 100644 --- a/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEWinogradLayerKernel.h @@ -25,17 +25,34 @@ #define __ARM_COMPUTE_NEGEMMWINOGRADLAYERKERNEL_H__ #include "arm_compute/core/NEON/INEKernel.h" - -#include "arm_compute/core/NEON/kernels/winograd/winograd_shim_nchw.hpp" +#include "arm_compute/core/NEON/kernels/winograd/tensor.hpp" namespace arm_compute { class ITensor; +class NEWinogradLayerKernel; +class Winograd3x3F32 +{ +public: + friend class NEWinogradLayerKernel; + Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage); + ~Winograd3x3F32(); + std::pair<void *, void *> get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space); + void transform_weights(const void *const kernel, void *transform_working_space); + void reshape_input(const Tensor4DShape &input_shape, const PaddingType padding_type, const void *const input, void *working_space); + void reshape_output(const Tensor4DShape &input_shape, const PaddingType padding_type, void *const output); + void nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input); + void nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output); + +private: + class Private; + std::unique_ptr<Private> _pimpl; +}; class NEWinogradLayerKernel : public INEKernel { public: - using Winograd3x3F32 = winograd_shim_nchw::Winograd2x2_3x3GEMM<float, float>; + // using Winograd3x3F32 = winograd_shim_nchw::Winograd2x2_3x3GEMM<float, float>; /** Constructor */ NEWinogradLayerKernel(); @@ -61,9 +78,22 @@ public: // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; + /* Get the memory required to instantiate a new Winograd operator. + */ + static size_t get_kernel_storage_size(const KernelShape &shape); + + /* Get the memory required to apply a Winograd operator to some input. + */ + static size_t get_working_space_size(const Tensor4DShape &input_shape, const KernelShape &k_shape, const PaddingType padding); + + /* Get the memory required to transform the kernel. + */ + static size_t get_kernel_transform_working_size(const KernelShape &shape); + protected: Winograd3x3F32 *_convolver; - ITensor *_output; + // std::unique_ptr<Winograd3x3F32> _conv; + ITensor *_output; }; } // namespace arm_compute diff --git a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h index 7dca4570e5..77707060ec 100644 --- a/arm_compute/runtime/NEON/functions/NEWinogradLayer.h +++ b/arm_compute/runtime/NEON/functions/NEWinogradLayer.h @@ -67,8 +67,6 @@ public: NEWinogradLayer &operator=(const NEWinogradLayer &) = delete; private: - using Winograd3x3F32 = NEWinogradLayerKernel::Winograd3x3F32; - MemoryGroup _memory_group; NEWinogradLayerKernel _winograd_kernel; Tensor _weights_workspace; diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 5b27dd5be5..7a13d045e7 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -91,6 +91,7 @@ def filter_clang_tidy_lines( lines ): ("parameter 'memory_manager' is unused" in line) or ("parameter 'memory_manager' is copied for each invocation but only used as a const reference" in line) or ("DeconvolutionLayer.cpp" in line and "casting (double + 0.5) to integer leads to incorrect rounding; consider using lround" in line) or + ("NEWinogradLayerKernel.cpp" in line and "use '= default' to define a trivial destructor" in line) or "3rdparty" in line): print_context=False continue diff --git a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp index b9109dcff2..fe633368c0 100644 --- a/src/core/NEON/kernels/NEWinogradLayerKernel.cpp +++ b/src/core/NEON/kernels/NEWinogradLayerKernel.cpp @@ -27,9 +27,86 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/TensorInfo.h" +#include "support/ToolchainSupport.h" + +#include "src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp" + +using T = winograd_shim_nchw::Winograd2x2_3x3GEMM<float, float>; namespace arm_compute { +class Winograd3x3F32::Private +{ +public: + Private(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage) + : convolver(kernel_shape, input_shape, padding_type, kernel_storage) + { + } + + T convolver; +}; + +Winograd3x3F32::~Winograd3x3F32() +{ +} + +void Winograd3x3F32::nchw2nhwc(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, const void *const input) +{ + _pimpl->convolver.nchw2nhwc(input_shape, padding_type, working_space, reinterpret_cast<const float *>(input)); +} + +void Winograd3x3F32::nhwc2nchw(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space, void *const output) +{ + _pimpl->convolver.nhwc2nchw(input_shape, padding_type, working_space, reinterpret_cast<float *const>(output)); +} + +void Winograd3x3F32::transform_weights(const void *const kernel, void *transform_working_space) +{ + _pimpl->convolver.transform_weights(reinterpret_cast<const float *>(kernel), transform_working_space); +} + +void Winograd3x3F32::reshape_input(const Tensor4DShape &input_shape, const PaddingType padding_type, const void *const input, void *working_space) +{ + _pimpl->convolver.reshape_input(input_shape, padding_type, reinterpret_cast<const float *>(input), working_space); +} + +void Winograd3x3F32::reshape_output(const Tensor4DShape &input_shape, const PaddingType padding_type, void *const output) +{ +#if defined(__aarch64__) + _pimpl->convolver.reshape_output(input_shape, padding_type, reinterpret_cast<float *const>(output)); +#else /* __aarch64__ */ + ARM_COMPUTE_UNUSED(input_shape); + ARM_COMPUTE_UNUSED(padding_type); + ARM_COMPUTE_UNUSED(output); + ARM_COMPUTE_ERROR("Not implemented"); +#endif /* __aarch64__ */ +} + +std::pair<void *, void *> Winograd3x3F32::get_nhwc_ptrs(const Tensor4DShape &input_shape, const PaddingType padding_type, void *working_space) +{ + return _pimpl->convolver.get_nhwc_ptrs(input_shape, padding_type, working_space); +} + +Winograd3x3F32::Winograd3x3F32(const KernelShape &kernel_shape, const Tensor4DShape input_shape, const PaddingType padding_type, void *kernel_storage) + : _pimpl(support::cpp14::make_unique<Private>(kernel_shape, input_shape, padding_type, kernel_storage)) +{ +} + +size_t NEWinogradLayerKernel::get_kernel_storage_size(const KernelShape &shape) +{ + return T::get_kernel_storage_size(shape); +} + +size_t NEWinogradLayerKernel::get_working_space_size(const Tensor4DShape &input_shape, const KernelShape &k_shape, const PaddingType padding) +{ + return T::get_working_space_size(input_shape, k_shape, padding); +} + +size_t NEWinogradLayerKernel::get_kernel_transform_working_size(const KernelShape &shape) +{ + return T::get_kernel_transform_working_size(shape); +} + NEWinogradLayerKernel::NEWinogradLayerKernel() : _convolver(nullptr), _output(nullptr) { @@ -55,6 +132,6 @@ void NEWinogradLayerKernel::run(const Window &window, const ThreadInfo &info) const size_t num_gemms_per_thread = 16 / num_threads; const size_t first_gemm = tid * num_gemms_per_thread; const size_t last_gemm = (tid == (num_threads - 1)) ? 15 : first_gemm + num_gemms_per_thread - 1; - _convolver->execute(first_gemm, last_gemm); + _convolver->_pimpl->convolver.execute(first_gemm, last_gemm); } } // namespace arm_compute diff --git a/arm_compute/core/NEON/kernels/winograd/gemm.hpp b/src/core/NEON/kernels/winograd/gemm.hpp index 564016a646..564016a646 100644 --- a/arm_compute/core/NEON/kernels/winograd/gemm.hpp +++ b/src/core/NEON/kernels/winograd/gemm.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/gemm/a64_sgemm.hpp b/src/core/NEON/kernels/winograd/gemm/a64_sgemm.hpp index e1b7488c31..e1b7488c31 100644 --- a/arm_compute/core/NEON/kernels/winograd/gemm/a64_sgemm.hpp +++ b/src/core/NEON/kernels/winograd/gemm/a64_sgemm.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/gemm/a64_sgemm_4x16.hpp b/src/core/NEON/kernels/winograd/gemm/a64_sgemm_4x16.hpp index e74610ef27..e74610ef27 100644 --- a/arm_compute/core/NEON/kernels/winograd/gemm/a64_sgemm_4x16.hpp +++ b/src/core/NEON/kernels/winograd/gemm/a64_sgemm_4x16.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/perf.h b/src/core/NEON/kernels/winograd/perf.h index 11fb0c452f..11fb0c452f 100644 --- a/arm_compute/core/NEON/kernels/winograd/perf.h +++ b/src/core/NEON/kernels/winograd/perf.h diff --git a/arm_compute/core/NEON/kernels/winograd/profiler.hpp b/src/core/NEON/kernels/winograd/profiler.hpp index 143192b589..143192b589 100644 --- a/arm_compute/core/NEON/kernels/winograd/profiler.hpp +++ b/src/core/NEON/kernels/winograd/profiler.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/shims.hpp b/src/core/NEON/kernels/winograd/shims.hpp index 249e5757f0..249e5757f0 100644 --- a/arm_compute/core/NEON/kernels/winograd/shims.hpp +++ b/src/core/NEON/kernels/winograd/shims.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms.hpp b/src/core/NEON/kernels/winograd/transforms.hpp index 8546ee9e2e..8546ee9e2e 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms.hpp +++ b/src/core/NEON/kernels/winograd/transforms.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3.hpp b/src/core/NEON/kernels/winograd/transforms/input_2x2_3x3.hpp index 7013c66ac0..ca8d012e5e 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3.hpp +++ b/src/core/NEON/kernels/winograd/transforms/input_2x2_3x3.hpp @@ -22,7 +22,8 @@ * SOFTWARE. */ #pragma once -#include "../tensor.hpp" +#include "arm_compute/core/NEON/kernels/winograd/tensor.hpp" + namespace winograd { /* Transform an input tensor into the Winograd domain. diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float.hpp b/src/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float.hpp index a99cbe325b..a99cbe325b 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float.hpp +++ b/src/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float_channelwise.hpp b/src/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float_channelwise.hpp index ad1ad55291..ad1ad55291 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float_channelwise.hpp +++ b/src/core/NEON/kernels/winograd/transforms/input_2x2_3x3/a64_float_channelwise.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3.hpp b/src/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3.hpp index 033442aa14..033442aa14 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3.hpp +++ b/src/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3/a64_float.hpp b/src/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3/a64_float.hpp index 3dd62d1ac1..3dd62d1ac1 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3/a64_float.hpp +++ b/src/core/NEON/kernels/winograd/transforms/kernel_2x2_3x3/a64_float.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3.hpp b/src/core/NEON/kernels/winograd/transforms/output_2x2_3x3.hpp index 0992c0bb44..0992c0bb44 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3.hpp +++ b/src/core/NEON/kernels/winograd/transforms/output_2x2_3x3.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float.hpp b/src/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float.hpp index 5925f9d569..5925f9d569 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float.hpp +++ b/src/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float_two_stage.hpp b/src/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float_two_stage.hpp index f551b12b52..f551b12b52 100644 --- a/arm_compute/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float_two_stage.hpp +++ b/src/core/NEON/kernels/winograd/transforms/output_2x2_3x3/a64_float_two_stage.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/utils.hpp b/src/core/NEON/kernels/winograd/utils.hpp index 14e709f028..14e709f028 100644 --- a/arm_compute/core/NEON/kernels/winograd/utils.hpp +++ b/src/core/NEON/kernels/winograd/utils.hpp diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp b/src/core/NEON/kernels/winograd/winograd_gemm.hpp index c990cd0252..59afa2f5ab 100644 --- a/arm_compute/core/NEON/kernels/winograd/winograd_gemm.hpp +++ b/src/core/NEON/kernels/winograd/winograd_gemm.hpp @@ -26,7 +26,6 @@ #include <cstdlib> #include <cassert> -#include "alloc.hpp" #include "gemm.hpp" #include "profiler.hpp" #include "utils.hpp" diff --git a/arm_compute/core/NEON/kernels/winograd/winograd_shim_nchw.hpp b/src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp index 4c7e291c58..c5bcffbaef 100644 --- a/arm_compute/core/NEON/kernels/winograd/winograd_shim_nchw.hpp +++ b/src/core/NEON/kernels/winograd/winograd_shim_nchw.hpp @@ -25,7 +25,6 @@ #include <cstdint> #include <cstdlib> -#include "alloc.hpp" #include "gemm.hpp" #include "profiler.hpp" #include "utils.hpp" diff --git a/src/runtime/NEON/functions/NEWinogradLayer.cpp b/src/runtime/NEON/functions/NEWinogradLayer.cpp index a9dec4ea0d..3251de4ae4 100644 --- a/src/runtime/NEON/functions/NEWinogradLayer.cpp +++ b/src/runtime/NEON/functions/NEWinogradLayer.cpp @@ -83,18 +83,18 @@ void NEWinogradLayer::configure(const ITensor *input, const ITensor *weights, co // Get the memory required to instantiate a new Winograd operator. constexpr size_t kstore_alignment = 64; - const size_t kernel_storage_per_thread = Winograd3x3F32::get_kernel_storage_size(kernel_shape); + const size_t kernel_storage_per_thread = NEWinogradLayerKernel::get_kernel_storage_size(kernel_shape); _kernel_storage.allocator()->init(TensorInfo(TensorShape{ (kernel_storage_per_thread + kstore_alignment - 1) }, 1, DataType::U8)); _memory_group.manage(&_kernel_storage); // Get workbench size and allocate memory constexpr size_t wspace_alignment = 64; - const size_t ws_size = Winograd3x3F32::get_working_space_size(in_shape, kernel_shape, padding); + const size_t ws_size = NEWinogradLayerKernel::get_working_space_size(in_shape, kernel_shape, padding); _workspace.allocator()->init(TensorInfo(TensorShape{ (ws_size + wspace_alignment - 1) }, 1, DataType::U8)); _memory_group.manage(&_workspace); // Workspace for weights transform - const size_t weights_transform_size = Winograd3x3F32::get_kernel_transform_working_size(kernel_shape); + const size_t weights_transform_size = NEWinogradLayerKernel::get_kernel_transform_working_size(kernel_shape); _weights_workspace.allocator()->init(TensorInfo(TensorShape{ (weights_transform_size + wspace_alignment - 1) }, 1, DataType::U8)); _memory_group.manage(&_weights_workspace); @@ -125,7 +125,7 @@ void NEWinogradLayer::run() _conv->nchw2nhwc(in_shape, padding, _workspace.buffer(), reinterpret_cast<const float *>(_input->buffer())); //Get ptrs into the workspace - std::pair<float *, float *> nhwc_ptrs = _conv->get_nhwc_ptrs(in_shape, padding, _workspace.buffer()); + std::pair<void *, void *> nhwc_ptrs = _conv->get_nhwc_ptrs(in_shape, padding, _workspace.buffer()); //Setup matrices ptrs and transfor the input tensor to the appropriate form before running GEMM. _conv->reshape_input(in_shape, padding, nhwc_ptrs.second, _workspace.buffer()); |