From d7316eb877cc4ff8573219374335e917b19a0203 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Wed, 16 Jun 2021 11:14:41 +0100
Subject: Port NEGEMMConv2d to memory injecting interface

Resolves: COMPMID-4506, COMPMID-4570

Change-Id: I6d37a06da141f1fcfcaa8525322a319cb0234791
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5824
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 src/runtime/NEON/functions/NEGEMM.cpp | 53 +++++++++++++++++++++++++++--------
 1 file changed, 42 insertions(+), 11 deletions(-)

(limited to 'src/runtime/NEON/functions/NEGEMM.cpp')
diff --git a/src/runtime/NEON/functions/NEGEMM.cpp b/src/runtime/NEON/functions/NEGEMM.cpp
index 7318c3e492..9b14052c75 100644
--- a/src/runtime/NEON/functions/NEGEMM.cpp
+++ b/src/runtime/NEON/functions/NEGEMM.cpp
@@ -38,10 +38,12 @@
 #include "src/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h"
 #include "src/core/NEON/kernels/NEGEMMTranspose1xWKernel.h"
 #include "src/core/helpers/AutoConfiguration.h"
+#include "src/core/helpers/MemoryHelpers.h"
 #include "src/runtime/cpu/operators/internal/CpuGemmAssemblyDispatch.h"
 
 #include <cmath>
 
+using namespace arm_compute::experimental;
 using namespace arm_compute::misc::shape_calculator;
 
 namespace arm_compute
@@ -61,9 +63,31 @@ cpu::AsmGemmInfo init_assembly_metadata(const GEMMInfo &info)
 } // namespace
 
 NEGEMM::NEGEMM(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
-    : _memory_group(memory_manager), _weights_manager(weights_manager), _interleave_kernel(), _transpose_kernel(), _mm_kernel(), _asm_glue(std::make_unique<cpu::CpuGemmAssemblyDispatch>()), _ma_kernel(),
-      _alpha_scale_func(nullptr), _add_bias(), _activation_func(), _tmp_a(), _tmp_b(), _tmp_d(), _original_b(nullptr), _run_vector_matrix_multiplication(false), _run_alpha_scale(false),
-      _run_addition(false), _run_bias_addition(false), _run_activation(false), _reshape_b_only_on_first_run(false), _is_prepared(false)
+    : _memory_group(memory_manager),
+      _weights_manager(weights_manager),
+      _interleave_kernel(),
+      _transpose_kernel(),
+      _mm_kernel(),
+      _asm_glue(std::make_unique<cpu::CpuGemmAssemblyDispatch>()),
+      _ma_kernel(),
+      _alpha_scale_func(nullptr),
+      _add_bias(),
+      _activation_func(),
+      _tmp_a(),
+      _tmp_b(),
+      _tmp_d(),
+      _original_b(nullptr),
+      _run_vector_matrix_multiplication(false),
+      _run_alpha_scale(false),
+      _run_addition(false),
+      _run_bias_addition(false),
+      _run_activation(false),
+      _reshape_b_only_on_first_run(false),
+      _is_prepared(false),
+      _asm_glue_run_pack(),
+      _asm_glue_prep_pack(),
+      _asm_glue_workspace(),
+      _aux_mem_req()
 {
 }
 
@@ -94,13 +118,16 @@ void NEGEMM::configure(const ITensor *a, const ITensor *b, const ITensor *c, ITe
         _asm_glue->configure(a->info(), b->info(), c_info_to_use, d->info(), asm_info);
         ARM_COMPUTE_ERROR_ON(!_asm_glue->is_configured());
 
-        _asm_glue_tensors =
+        _aux_mem_req = _asm_glue->workspace();
+        _asm_glue_run_pack =
         {
             { ACL_SRC_0, a },
             { ACL_SRC_1, b },
             { ACL_SRC_2, c_to_use },
             { ACL_DST, d },
         };
+        _asm_glue_prep_pack = { { ACL_SRC_1, b }, { ACL_SRC_2, c_to_use } };
+        _asm_glue_workspace = manage_workspace<Tensor>(_aux_mem_req, _memory_group, _asm_glue_run_pack, _asm_glue_prep_pack);
 
         // Scale product by alpha
         if(_run_alpha_scale)
@@ -323,7 +350,7 @@ void NEGEMM::run()
 
     if(_asm_glue->is_configured())
     {
-        _asm_glue->run(_asm_glue_tensors);
+        _asm_glue->run(_asm_glue_run_pack);
         if(_run_alpha_scale)
         {
             _alpha_scale_func.run();
@@ -372,16 +399,20 @@ void NEGEMM::prepare()
         const bool original_b_managed_by_weights_manager = _weights_manager && _weights_manager->are_weights_managed(_original_b);
         if(_asm_glue->is_configured())
         {
-            if(!original_b_managed_by_weights_manager)
-            {
-                ARM_COMPUTE_ERROR_ON(!_original_b->is_used());
-            }
+            _asm_glue->prepare(_asm_glue_prep_pack);
 
-            _asm_glue->prepare(_asm_glue_tensors);
-            if(!original_b_managed_by_weights_manager)
+            auto has_reshape = std::find_if(_aux_mem_req.begin(),
+                                            _aux_mem_req.end(),
+                                            [](const MemoryInfo & m) -> bool { return m.lifetime == MemoryLifetime::Persistent; });
+
+            if(has_reshape != std::end(_aux_mem_req))
             {
                 _original_b->mark_as_unused();
             }
+            else
+            {
+                _asm_glue_run_pack.add_const_tensor(ACL_SRC_1, _original_b);
+            }
         }
         else if(_reshape_b_only_on_first_run && !_run_vector_matrix_multiplication && !_asm_glue->is_configured())
         {
-- 
cgit v1.2.1