aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEGEMM.cpp
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-10-21 00:04:14 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2020-11-03 15:10:47 +0000
commitebcebf1dee7f8314976b1e0cabd62b4cf893d765 (patch)
tree95d3e691a0e88a3e213a1d30446a9224497f2055 /src/runtime/NEON/functions/NEGEMM.cpp
parentda4b1b2055d96aaf73704eb9b0b82d74dc2d699c (diff)
downloadComputeLibrary-ebcebf1dee7f8314976b1e0cabd62b4cf893d765.tar.gz
COMPMID-3638: Move NEON kernels
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: Ieed3e4bc8be7fef80c90c5094599b477a56fc473 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4285 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMM.cpp')
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp32
1 files changed, 22 insertions, 10 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 4166cff97a..0215098792 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -34,7 +34,12 @@
#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h"
#include "arm_compute/runtime/TensorAllocator.h"
#include "src/core/CPP/Validate.h"
+#include "src/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h"
+#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
+#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "support/MemorySupport.h"
#include <cmath>
@@ -42,6 +47,8 @@ using namespace arm_compute::misc::shape_calculator;
namespace arm_compute
{
+NEGEMM::~NEGEMM() = default;
+
NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(memory_manager, weights_manager), _ma_kernel(),
_alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
@@ -88,11 +95,13 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_memory_group.manage(&_tmp_d);
}
+ _mm_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixMultiplyKernel>();
+
// Select between GEMV and GEMM
if(_run_vector_matrix_multiplication)
{
// Configure the matrix multiply kernel
- _mm_kernel.configure(a, b, gemm_output_to_use, alpha, false);
+ _mm_kernel->configure(a, b, gemm_output_to_use, alpha, false);
}
else
{
@@ -124,13 +133,15 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
int k = a->info()->dimension(0);
// Configure interleave kernel
- _interleave_kernel.configure(a, &_tmp_a);
+ _interleave_kernel = arm_compute::support::cpp14::make_unique<NEGEMMInterleave4x4Kernel>();
+ _interleave_kernel->configure(a, &_tmp_a);
// Configure transpose kernel
- _transpose_kernel.configure(b, &_tmp_b);
+ _transpose_kernel = arm_compute::support::cpp14::make_unique<NEGEMMTranspose1xWKernel>();
+ _transpose_kernel->configure(b, &_tmp_b);
// Configure matrix multiplication kernel
- _mm_kernel.configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
+ _mm_kernel->configure(&_tmp_a, &_tmp_b, gemm_output_to_use, alpha, true, GEMMReshapeInfo(m, n, k));
// Allocate once the all configure methods have been called
_tmp_a.allocator()->allocate();
@@ -150,7 +161,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
// Configure matrix addition kernel
if(_run_addition)
{
- _ma_kernel.configure(c, d, beta);
+ _ma_kernel = arm_compute::support::cpp14::make_unique<NEGEMMMatrixAdditionKernel>();
+ _ma_kernel->configure(c, d, beta);
}
// Configure activation
@@ -298,16 +310,16 @@ void NEGEMM::run()
if(!_run_vector_matrix_multiplication)
{
// Run interleave kernel
- NEScheduler::get().schedule(&_interleave_kernel, Window::DimY);
+ NEScheduler::get().schedule(_interleave_kernel.get(), Window::DimY);
if(!_reshape_b_only_on_first_run)
{
// Run transpose kernel
- NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY);
}
}
- NEScheduler::get().schedule(&_mm_kernel, _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
+ NEScheduler::get().schedule(_mm_kernel.get(), _run_vector_matrix_multiplication ? Window::DimX : Window::DimY);
// Run bias addition kernel
if(_run_bias_addition)
@@ -319,7 +331,7 @@ void NEGEMM::run()
// Run matrix addition kernel
if(_run_addition)
{
- NEScheduler::get().schedule(&_ma_kernel, Window::DimY);
+ NEScheduler::get().schedule(_ma_kernel.get(), Window::DimY);
}
// Run activation function
@@ -355,7 +367,7 @@ void NEGEMM::prepare()
}
_tmp_b.allocator()->allocate();
- NEScheduler::get().schedule(&_transpose_kernel, Window::DimY);
+ NEScheduler::get().schedule(_transpose_kernel.get(), Window::DimY);
if(!original_b_managed_by_weights_manager)
{
_original_b->mark_as_unused();