From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/runtime/NEON/functions/NESoftmaxLayer.cpp | 30 +++++++++++++++++++-------- 1 file changed, 21 insertions(+), 9 deletions(-) (limited to 'src/runtime/NEON/functions/NESoftmaxLayer.cpp') diff --git a/src/runtime/NEON/functions/NESoftmaxLayer.cpp b/src/runtime/NEON/functions/NESoftmaxLayer.cpp index 4f773861d2..e79ab0ee2d 100644 --- a/src/runtime/NEON/functions/NESoftmaxLayer.cpp +++ b/src/runtime/NEON/functions/NESoftmaxLayer.cpp @@ -24,13 +24,19 @@ #include "arm_compute/runtime/NEON/functions/NESoftmaxLayer.h" #include "arm_compute/core/Helpers.h" -#include "arm_compute/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" +#include "src/core/NEON/kernels/NESoftmaxLayerKernel.h" #include "src/core/helpers/SoftmaxHelpers.h" +#include "support/MemorySupport.h" namespace arm_compute { +template +NESoftmaxLayerGeneric::~NESoftmaxLayerGeneric() = default; + template NESoftmaxLayerGeneric::NESoftmaxLayerGeneric(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _permute_input(), _permute_output(), _max_kernel(), _softmax_kernel(), _fill_border_kernel(), _max(), _tmp(), _input_permuted(), _output_permuted(), @@ -76,15 +82,17 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f _memory_group.manage(&_max); _memory_group.manage(&_tmp); - // Configure Kernels - _max_kernel.configure(tmp_input, &_max); + // Configure kernels + _max_kernel = arm_compute::support::cpp14::make_unique(); + _softmax_kernel = arm_compute::support::cpp14::make_unique>(); + _max_kernel->configure(tmp_input, &_max); if(_needs_permute) { // Add to the memory manager _output_permuted _memory_group.manage(&_output_permuted); // The normalization kernel stores the result in a permuted output tensor - _softmax_kernel.configure(tmp_input, &_max, &_output_permuted, beta, &_tmp); + _softmax_kernel->configure(tmp_input, &_max, &_output_permuted, beta, &_tmp); _input_permuted.allocator()->allocate(); // Re-permute the permuted output into the requested (4D) output @@ -96,8 +104,9 @@ void NESoftmaxLayerGeneric::configure(ITensor *input, ITensor *output, f else { // Softmax 2D case - _fill_border_kernel.configure(tmp_input, _max_kernel.border_size(), BorderMode::REPLICATE); - _softmax_kernel.configure(tmp_input, &_max, output, beta, &_tmp); + _fill_border_kernel = arm_compute::support::cpp14::make_unique(); + _fill_border_kernel->configure(tmp_input, _max_kernel->border_size(), BorderMode::REPLICATE); + _softmax_kernel->configure(tmp_input, &_max, output, beta, &_tmp); } // Allocate intermediate buffers @@ -152,10 +161,13 @@ void NESoftmaxLayerGeneric::run() { _permute_input.run(); } + else + { + NEScheduler::get().schedule(_fill_border_kernel.get(), Window::DimY); + } - NEScheduler::get().schedule(&_fill_border_kernel, Window::DimY); - NEScheduler::get().schedule(&_max_kernel, Window::DimY); - NEScheduler::get().schedule(&_softmax_kernel, Window::DimY); + NEScheduler::get().schedule(_max_kernel.get(), Window::DimY); + NEScheduler::get().schedule(_softmax_kernel.get(), Window::DimY); if(_needs_permute) { -- cgit v1.2.1