From 63e0beb9fb9646407d123e830165546e9129e95d Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Fri, 24 Sep 2021 14:04:27 +0100
Subject: Add support for non-constant weights and biases in CpuFullyConnected

Changing the approach for specifying that weights and biases tensors are
non-constant by making it a member of TensorInfo rather than an option
of the functions.

Resolves: COMPMID-4222, COMPMID-4811

Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: I9b0081ccbcf8271ce029ba6755563d64c59e1d32
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6313
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez Tello <pablo.tello@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../operators/internal/CpuGemmAssemblyDispatch.cpp | 37 ++++++++++++++++++++++
 1 file changed, 37 insertions(+)

(limited to 'src/cpu/operators/internal')
diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
index 97893b0672..23095d8b84 100644
--- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
+++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
@@ -206,6 +206,9 @@ private:
     std::vector<TypeInput>           _indirect_pad{};
     arm_gemm::ConvolutionParameters  _cp{};
     experimental::MemoryRequirements _aux_mem{ Count };
+    bool                             _B_pretranspose_required{ false };
+    bool                             _is_b_constant{ true };
+    bool                             _is_c_constant{ true };
 };
 
 template <typename TypeInput, typename TypeOutput, class OutputStage>
@@ -348,6 +351,10 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensorInfo *
                                                              const OutputStage &os)
 {
     ARM_COMPUTE_UNUSED(c);
+
+    _is_b_constant = b->are_values_constant();
+    _is_c_constant = c ? c->are_values_constant() : true;
+
     arm_gemm::GemmConfig gemm_cfg;
     _kernel_info = arm_gemm::get_gemm_method<TypeInput, TypeOutput, OutputStage>(args, os);
     if(_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED)
@@ -391,6 +398,7 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensorInfo *
         const size_t       B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size();
         _pretranspose_info                     = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8);
         _aux_mem[Pretranspose]                 = MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment);
+        _B_pretranspose_required               = true;
     }
 
     // Handle indirect GEMM convolution
@@ -485,6 +493,35 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors)
         in1_ptr        = reinterpret_cast<const TypeInput *>(b->buffer() + b->info()->offset_first_element_in_bytes());
     }
 
+    // If necessary, run pretranspose every time if either weights or biases are non-constant
+    if((b && !_is_b_constant) || (c && !_is_c_constant && c->info()->data_type() == DataType::S32))
+    {
+        if(c && c->info()->data_type() == DataType::S32)
+        {
+            _gemm_kernel_asm->set_quantized_bias(reinterpret_cast<const int32_t *>(c->buffer() + c->info()->offset_first_element_in_bytes()), 0);
+        }
+
+        // Pretranspose B if required
+        if(_B_pretranspose_required)
+        {
+            const int  ldb            = b->info()->strides_in_bytes().y() / sizeof(TypeInput);
+            const auto b_ptr          = reinterpret_cast<const TypeInput *>(b->buffer() + b->info()->offset_first_element_in_bytes());
+            const int  multi_stride_b = b->info()->strides_in_bytes().z() / sizeof(TypeInput);
+
+            CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, true);
+            ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr);
+
+            if(_is_b_constant)
+            {
+                _gemm_kernel_asm->requantize_bias(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b);
+            }
+            else
+            {
+                _gemm_kernel_asm->pretranspose_B_array(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b);
+            }
+        }
+    }
+
     const auto scheduling_hint = scheduling_hint_heuristic(_kernel_info.method, d->info()->data_type());
 
     // Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads
-- 
cgit v1.2.1