From d7316eb877cc4ff8573219374335e917b19a0203 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Wed, 16 Jun 2021 11:14:41 +0100 Subject: Port NEGEMMConv2d to memory injecting interface Resolves: COMPMID-4506, COMPMID-4570 Change-Id: I6d37a06da141f1fcfcaa8525322a319cb0234791 Signed-off-by: Michele Di Giorgio Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5824 Reviewed-by: Georgios Pinitas Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/runtime/NEON/functions/NEGEMM.cpp | 53 +++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 11 deletions(-) (limited to 'src/runtime/NEON/functions/NEGEMM.cpp') diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp index 7318c3e492..9b14052c75 100644 --- a/src/runtime/NEON/functions/NEGEMM.cpp +++ b/src/runtime/NEON/functions/NEGEMM.cpp @@ -38,10 +38,12 @@ #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" #include "src/core/helpers/AutoConfiguration.h" +#include "src/core/helpers/MemoryHelpers.h" #include "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.h" #include +using namespace arm_compute::experimental; using namespace arm_compute::misc::shape_calculator; namespace arm_compute @@ -61,9 +63,31 @@ cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info) } // namespace NEGEMM::NEGEMM(std::shared_ptr memory_manager, IWeightsManager *weights_manager) - : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique()), _ma_kernel(), - _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false), - _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false) + : _memory_group(memory_manager), + _weights_manager(weights_manager), + _interleave_kernel(), + _transpose_kernel(), + _mm_kernel(), + _asm_glue(std::make_unique()), + _ma_kernel(), + _alpha_scale_func(nullptr), + _add_bias(), + _activation_func(), + _tmp_a(), + _tmp_b(), + _tmp_d(), + _original_b(nullptr), + _run_vector_matrix_multiplication(false), + _run_alpha_scale(false), + _run_addition(false), + _run_bias_addition(false), + _run_activation(false), + _reshape_b_only_on_first_run(false), + _is_prepared(false), + _asm_glue_run_pack(), + _asm_glue_prep_pack(), + _asm_glue_workspace(), + _aux_mem_req() { } @@ -94,13 +118,16 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe _asm_glue->configure(a->info(), b->info(), c_info_to_use, d->info(), asm_info); ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured()); - _asm_glue_tensors = + _aux_mem_req = _asm_glue->workspace(); + _asm_glue_run_pack = { { ACL_SRC_0, a }, { ACL_SRC_1, b }, { ACL_SRC_2, c_to_use }, { ACL_DST, d }, }; + _asm_glue_prep_pack = { { ACL_SRC_1, b }, { ACL_SRC_2, c_to_use } }; + _asm_glue_workspace = manage_workspace(_aux_mem_req, _memory_group, _asm_glue_run_pack, _asm_glue_prep_pack); // Scale product by alpha if(_run_alpha_scale) @@ -323,7 +350,7 @@ void NEGEMM::run() if(_asm_glue->is_configured()) { - _asm_glue->run(_asm_glue_tensors); + _asm_glue->run(_asm_glue_run_pack); if(_run_alpha_scale) { _alpha_scale_func.run(); @@ -372,16 +399,20 @@ void NEGEMM::prepare() const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b); if(_asm_glue->is_configured()) { - if(!original_b_managed_by_weights_manager) - { - ARM_COMPUTE_ERROR_ON(!_original_b->is_used()); - } + _asm_glue->prepare(_asm_glue_prep_pack); - _asm_glue->prepare(_asm_glue_tensors); - if(!original_b_managed_by_weights_manager) + auto has_reshape = std::find_if(_aux_mem_req.begin(), + _aux_mem_req.end(), + [](const MemoryInfo & m) -> bool { return m.lifetime == MemoryLifetime::Persistent; }); + + if(has_reshape != std::end(_aux_mem_req)) { _original_b->mark_as_unused(); } + else + { + _asm_glue_run_pack.add_const_tensor(ACL_SRC_1, _original_b); + } } else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured()) { -- cgit v1.2.1