diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-10-21 00:04:14 +0100 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2020-11-03 15:10:47 +0000 |
commit | ebcebf1dee7f8314976b1e0cabd62b4cf893d765 (patch) | |
tree | 95d3e691a0e88a3e213a1d30446a9224497f2055 /src/runtime/NEON/functions/NEGEMM.cpp | |
parent | da4b1b2055d96aaf73704eb9b0b82d74dc2d699c (diff) | |
download | ComputeLibrary-ebcebf1dee7f8314976b1e0cabd62b4cf893d765.tar.gz |
COMPMID-3638: Move NEON kernels
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMM.cpp')
-rw-r--r-- | src/runtime/NEON/functions/NEGEMM.cpp | 32 |
1 files changed, 22 insertions, 10 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 4166cff97a..0215098792 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -34,7 +34,12 @@ #include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" #include "arm_compute/runtime/TensorAllocator.h" #include "src/core/CPP/Validate.h" +#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" +#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" +#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "support/MemorySupport.h" #include <cmath> @@ -42,6 +47,8 @@ using namespace arm_compute::misc::shape_calculator; namespace arm_compute { +NEGEMM::~NEGEMM() = default; + NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager) : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(), _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), @@ -88,11 +95,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _memory_group.manage(&_tmp_d); } + _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixMultiplyKernel>(); + // Select between GEMV and GEMM if(_run_vector_matrix_multiplication) { // Configure the matrix multiply kernel - _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false); + _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false); } else { @@ -124,13 +133,15 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe int k = a->info()->dimension(0); // Configure interleave kernel - _interleave_kernel.configure(a, &_tmp_a); + _interleave_kernel = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>(); + _interleave_kernel->configure(a, &_tmp_a); // Configure transpose kernel - _transpose_kernel.configure(b, &_tmp_b); + _transpose_kernel = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>(); + _transpose_kernel->configure(b, &_tmp_b); // Configure matrix multiplication kernel - _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); + _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k)); // Allocate once the all configure methods have been called _tmp_a.allocator()->allocate(); @@ -150,7 +161,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe // Configure matrix addition kernel if(_run_addition) { - _ma_kernel.configure(c, d, beta); + _ma_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixAdditionKernel>(); + _ma_kernel->configure(c, d, beta); } // Configure activation @@ -298,16 +310,16 @@ void NEGEMM::run() if(!_run_vector_matrix_multiplication) { // Run interleave kernel - NEScheduler::get().schedule(&_interleave_kernel, Window::DimY); + NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY); if(!_reshape_b_only_on_first_run) { // Run transpose kernel - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); } } - NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); + NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY); // Run bias addition kernel if(_run_bias_addition) @@ -319,7 +331,7 @@ void NEGEMM::run() // Run matrix addition kernel if(_run_addition) { - NEScheduler::get().schedule(&_ma_kernel, Window::DimY); + NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY); } // Run activation function @@ -355,7 +367,7 @@ void NEGEMM::prepare() } _tmp_b.allocator()->allocate(); - NEScheduler::get().schedule(&_transpose_kernel, Window::DimY); + NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY); if(!original_b_managed_by_weights_manager) { _original_b->mark_as_unused(); |