aboutsummaryrefslogtreecommitdiff
path: root/src/runtime/NEON/functions/NEGEMM.cpp
diff options
context:
space:
mode:
authorSang-Hoon Park <sang-hoon.park@arm.com>2021-05-18 10:46:00 +0100
committerPablo Marquez Tello <pablo.tello@arm.com>2021-05-27 16:33:44 +0000
commitb3be45759bdd0749ae3a16fe470820f0d9830ea9 (patch)
tree10bb8c1c0a049a23c00781c64e993f1b197c0d05 /src/runtime/NEON/functions/NEGEMM.cpp
parentbc91297c865808ed2c321febc405179f63195ff8 (diff)
downloadComputeLibrary-b3be45759bdd0749ae3a16fe470820f0d9830ea9.tar.gz
Implement memory injection in CpuDirectGemmConv2d
The following operators are now stateless by implementing memory injection. - CpuDirectGemmConv2d - CpuGemmAssemblyDispatch A test case is added to test if CpuDirectGemmConv2d can run on different group of tensors with a single configure. Resolves: COMPMID-4506 Change-Id: I48f44ed41236ca7e18da2de07bdbacc9007a3c5e Signed-off-by: Sang-Hoon Park <sang-hoon.park@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5718 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Diffstat (limited to 'src/runtime/NEON/functions/NEGEMM.cpp')
-rw-r--r--src/runtime/NEON/functions/NEGEMM.cpp19
1 files changed, 15 insertions, 4 deletions
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 7318c3e492..b526874790 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -38,6 +38,7 @@
#include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
#include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
#include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/MemoryHelpers.h"
#include "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.h"
#include <cmath>
@@ -46,6 +47,14 @@ using namespace arm_compute::misc::shape_calculator;
namespace arm_compute
{
+using WorkspaceDataType = WorkspaceData<Tensor>;
+
+struct NEGEMM::AsmGlueTensors
+{
+ ITensorPack tensors{};
+ WorkspaceDataType ws{};
+};
+
namespace
{
cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info)
@@ -63,7 +72,7 @@ cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info)
NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
: _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique<cpu::CpuGemmAssemblyDispatch>()), _ma_kernel(),
_alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
- _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
+ _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false), _asm_glue_tensors(std::make_unique<AsmGlueTensors>())
{
}
@@ -94,7 +103,7 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
_asm_glue->configure(a->info(), b->info(), c_info_to_use, d->info(), asm_info);
ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured());
- _asm_glue_tensors =
+ _asm_glue_tensors->tensors =
{
{ ACL_SRC_0, a },
{ ACL_SRC_1, b },
@@ -102,6 +111,8 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
{ ACL_DST, d },
};
+ _asm_glue_tensors->ws = manage_workspace<Tensor>(_asm_glue->workspace(), _memory_group, _asm_glue_tensors->tensors);
+
// Scale product by alpha
if(_run_alpha_scale)
{
@@ -323,7 +334,7 @@ void NEGEMM::run()
if(_asm_glue->is_configured())
{
- _asm_glue->run(_asm_glue_tensors);
+ _asm_glue->run(_asm_glue_tensors->tensors);
if(_run_alpha_scale)
{
_alpha_scale_func.run();
@@ -377,7 +388,7 @@ void NEGEMM::prepare()
ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
}
- _asm_glue->prepare(_asm_glue_tensors);
+ _asm_glue->prepare(_asm_glue_tensors->tensors);
if(!original_b_managed_by_weights_manager)
{
_original_b->mark_as_unused();