From ebcebf1dee7f8314976b1e0cabd62b4cf893d765 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Wed, 21 Oct 2020 00:04:14 +0100 Subject: COMPMID-3638: Move NEON kernels Signed-off-by: Michalis Spyrou Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins --- src/runtime/NEON/functions/NEGEMM.cpp | 32 ++++++++++++++++++++++---------- 1 file changed, 22 insertions(+), 10 deletions(-) (limited to 'src/runtime/NEON/functions/NEGEMM.cpp') diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 4166cff97a..0215098792 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -34,7 +34,12 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" #include @@ -42,6 +47,8 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { +NEGEMM::~NEGEMM() = default; + NEGEMM::NEGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), @@ -88,11 +95,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _memory_group.manage(&_tmp_d); } + _mm_kernel = arm_compute::support::cpp14::make_unique(); + // Select between GEMV and GEMM if(_run_vector_matrix_multiplication) { // Configure the matrix multiply kernel - _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false); + _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false); } else { @@ -124,13 +133,15 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe int k = a->info()->dimension(0); // Configure interleave kernel - _interleave_kernel.configure(a, &_tmp_a); + _interleave_kernel = arm_compute::support::cpp14::make_unique(); + _interleave_kernel->configure(a, &_tmp_a); // Configure transpose kernel - _transpose_kernel.configure(b, &_tmp_b); + _transpose_kernel = arm_compute::support::cpp14::make_unique(); + _transpose_kernel->configure(b, &_tmp_b); // Configure matrix multiplication kernel - _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); + _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); // Allocate once the all configure methods have been called _tmp_a.allocator()->allocate(); @@ -150,7 +161,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe // Configure matrix addition kernel if(_run_addition) { - _ma_kernel.configure(c, d, beta); + _ma_kernel = arm_compute::support::cpp14::make_unique(); + _ma_kernel->configure(c, d, beta); } // Configure activation @@ -298,16 +310,16 @@ void NEGEMM::run() if(!_run_vector_matrix_multiplication) { // Run interleave kernel - NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); + NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY); if(!_reshape_b_only_on_first_run) { // Run transpose kernel - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); } } - NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); + NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); // Run bias addition kernel if(_run_bias_addition) @@ -319,7 +331,7 @@ void NEGEMM::run() // Run matrix addition kernel if(_run_addition) { - NEScheduler::get().schedule(&_ma_kernel, Window::DimY); + NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY); } // Run activation function @@ -355,7 +367,7 @@ void NEGEMM::prepare() } _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); -- cgit v1.2.1