diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-10-21 00:04:14 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-11-03 15:10:47 +0000 |
commit | ebcebf1dee7f8314976b1e0cabd62b4cf893d765 (patch) | |
tree | 95d3e691a0e88a3e213a1d30446a9224497f2055 /src/runtime/NEON/functions/NEConvolution.cpp | |
parent | da4b1b2055d96aaf73704eb9b0b82d74dc2d699c (diff) | |
download | ComputeLibrary-ebcebf1dee7f8314976b1e0cabd62b4cf893d765.tar.gz |
COMPMID-3638: Move NEON kernels
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEConvolution.cpp')
-rw-r--r-- | src/runtime/NEON/functions/NEConvolution.cpp | 48 |
1 files changed, 35 insertions, 13 deletions
diff --git a/src/runtime/NEON/functions/NEConvolution.cpp b/src/runtime/NEON/functions/NEConvolution.cpp index 8200a08ca8..07ac8bd42b 100644 --- a/src/runtime/NEON/functions/NEConvolution.cpp +++ b/src/runtime/NEON/functions/NEConvolution.cpp @@ -25,29 +25,39 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/ITensor.h" -#include "arm_compute/core/NEON/kernels/NEConvolutionKernel.h" #include "arm_compute/core/PixelValue.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Utils.h" #include "arm_compute/core/Validate.h" #include "arm_compute/runtime/NEON/NEScheduler.h" #include "arm_compute/runtime/TensorAllocator.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEConvolutionKernel.h" +#include "src/core/NEON/kernels/NEFillBorderKernel.h" #include "support/MemorySupport.h" #include <array> #include <utility> -using namespace arm_compute; +namespace arm_compute +{ +NEConvolution3x3::~NEConvolution3x3() = default; void NEConvolution3x3::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique<NEConvolution3x3Kernel>(); k->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } template <unsigned int matrix_size> +NEConvolutionSquare<matrix_size>::~NEConvolutionSquare() = default; + +template <unsigned int matrix_size> NEConvolutionSquare<matrix_size>::NEConvolutionSquare(std::shared_ptr<IMemoryManager> memory_manager) : _memory_group(std::move(memory_manager)), _tmp(), _is_separable(false), _kernel_hor(), _kernel_vert(), _kernel(), _border_handler() { @@ -66,6 +76,7 @@ void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output _is_separable = separate_matrix(conv, conv_col.data(), conv_row.data(), matrix_size); + auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>(); if(_is_separable) { DataType intermediate_type = DataType::UNKNOWN; @@ -82,35 +93,40 @@ void NEConvolutionSquare<matrix_size>::configure(ITensor *input, ITensor *output scale = calculate_matrix_scale(conv, matrix_size); } - _kernel_hor.configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); - _kernel_vert.configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); + _kernel_hor = arm_compute::support::cpp14::make_unique<NESeparableConvolutionHorKernel<matrix_size>>(); + _kernel_vert = arm_compute::support::cpp14::make_unique<NESeparableConvolutionVertKernel<matrix_size>>(); + + _kernel_hor->configure(input, &_tmp, conv_row.data(), border_mode == BorderMode::UNDEFINED); + _kernel_vert->configure(&_tmp, output, conv_col.data(), scale, border_mode == BorderMode::UNDEFINED); _tmp.allocator()->allocate(); - _border_handler.configure(input, _kernel_hor.border_size(), border_mode, PixelValue(constant_border_value)); + b->configure(input, _kernel_hor->border_size(), border_mode, PixelValue(constant_border_value)); } else { - _kernel.configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); - _border_handler.configure(input, _kernel.border_size(), border_mode, PixelValue(constant_border_value)); + _kernel = arm_compute::support::cpp14::make_unique<NEConvolutionKernel<matrix_size>>(); + _kernel->configure(input, output, conv, scale, border_mode == BorderMode::UNDEFINED); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); } + _border_handler = std::move(b); } template <unsigned int matrix_size> void NEConvolutionSquare<matrix_size>::run() { - NEScheduler::get().schedule(&_border_handler, Window::DimZ); + NEScheduler::get().schedule(_border_handler.get(), Window::DimZ); if(_is_separable) { MemoryGroupResourceScope scope_mg(_memory_group); - NEScheduler::get().schedule(&_kernel_hor, Window::DimY); - NEScheduler::get().schedule(&_kernel_vert, Window::DimY); + NEScheduler::get().schedule(_kernel_hor.get(), Window::DimY); + NEScheduler::get().schedule(_kernel_vert.get(), Window::DimY); } else { - NEScheduler::get().schedule(&_kernel, Window::DimY); + NEScheduler::get().schedule(_kernel.get(), Window::DimY); } } @@ -118,10 +134,16 @@ template class arm_compute::NEConvolutionSquare<5>; template class arm_compute::NEConvolutionSquare<7>; template class arm_compute::NEConvolutionSquare<9>; +NEConvolutionRectangle::~NEConvolutionRectangle() = default; + void NEConvolutionRectangle::configure(ITensor *input, ITensor *output, const int16_t *conv, uint32_t rows, uint32_t cols, uint32_t scale, BorderMode border_mode, uint8_t constant_border_value) { auto k = arm_compute::support::cpp14::make_unique<NEConvolutionRectangleKernel>(); k->configure(input, output, conv, rows, cols, scale, border_mode == BorderMode::UNDEFINED); _kernel = std::move(k); - _border_handler.configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + + auto b = arm_compute::support::cpp14::make_unique<NEFillBorderKernel>(); + b->configure(input, _kernel->border_size(), border_mode, PixelValue(constant_border_value)); + _border_handler = std::move(b); } +} // namespace arm_compute |