From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/runtime/NEON/functions/NEConvolution.cpp | 48 ++++++++++++++++++++-------- 1 file changed, 35 insertions(+), 13 deletions(-) (limited to 'src/runtime/NEON/functions/NEConvolution.cpp') diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp index 8200a08ca8..07ac8bd42b 100644 --- a/src/runtime/NEON/functions/NEConvolution.cpp +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -25,28 +25,38 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include #include -using namespace arm_compute; +namespace arm_compute +{ +NEConvolution3x3::~NEConvolution3x3() = default; void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +template +NEConvolutionSquare::~NEConvolutionSquare() = default; + template NEConvolutionSquare::NEConvolutionSquare(std::shared_ptr memory_manager) : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() @@ -66,6 +76,7 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size); + auto b = arm_compute::support::cpp14::make_unique(); if(_is_separable) { DataType intermediate_type = DataType::UNKNOWN; @@ -82,35 +93,40 @@ void NEConvolutionSquare::configure(ITensor *input, ITensor *output scale = calculate_matrix_scale(conv, matrix_size); } - _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); + _kernel_hor = arm_compute::support::cpp14::make_unique>(); + _kernel_vert = arm_compute::support::cpp14::make_unique>(); + + _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); _tmp.allocator()->allocate(); - _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } else { - _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel = arm_compute::support::cpp14::make_unique>(); + _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } + _border_handler = std::move(b); } template void NEConvolutionSquare::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); if(_is_separable) { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_kernel_hor, Window::DimY); - NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); } else { - NEScheduler::get().schedule(&_kernel, Window::DimY); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); } } @@ -118,10 +134,16 @@ template class arm_compute::NEConvolutionSquare<5>; template class arm_compute::NEConvolutionSquare<7>; template class arm_compute::NEConvolutionSquare<9>; +NEConvolutionRectangle::~NEConvolutionRectangle() = default; + void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute -- cgit v1.2.1