From 63e0beb9fb9646407d123e830165546e9129e95d Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Fri, 24 Sep 2021 14:04:27 +0100 Subject: Add support for non-constant weights and biases in CpuFullyConnected Changing the approach for specifying that weights and biases tensors are non-constant by making it a member of TensorInfo rather than an option of the functions. Resolves: COMPMID-4222, COMPMID-4811 Signed-off-by: Giorgio Arena Change-Id: I9b0081ccbcf8271ce029ba6755563d64c59e1d32 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6313 Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez Tello Reviewed-by: Michele Di Giorgio Comments-Addressed: Arm Jenkins --- .../operators/internal/CpuGemmAssemblyDispatch.cpp | 37 ++++++++++++++++++++++ 1 file changed, 37 insertions(+) (limited to 'src/cpu/operators/internal') diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp index 97893b0672..23095d8b84 100644 --- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp +++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp @@ -206,6 +206,9 @@ private: std::vector _indirect_pad{}; arm_gemm::ConvolutionParameters _cp{}; experimental::MemoryRequirements _aux_mem{ Count }; + bool _B_pretranspose_required{ false }; + bool _is_b_constant{ true }; + bool _is_c_constant{ true }; }; template @@ -348,6 +351,10 @@ void Fallback::configure(const ITensorInfo * const OutputStage &os) { ARM_COMPUTE_UNUSED(c); + + _is_b_constant = b->are_values_constant(); + _is_c_constant = c ? c->are_values_constant() : true; + arm_gemm::GemmConfig gemm_cfg; _kernel_info = arm_gemm::get_gemm_method(args, os); if(_kernel_info.method != arm_gemm::GemmMethod::GEMV_BATCHED) @@ -391,6 +398,7 @@ void Fallback::configure(const ITensorInfo * const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size(); _pretranspose_info = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8); _aux_mem[Pretranspose] = MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment); + _B_pretranspose_required = true; } // Handle indirect GEMM convolution @@ -485,6 +493,35 @@ void Fallback::run(ITensorPack &tensors) in1_ptr = reinterpret_cast(b->buffer() + b->info()->offset_first_element_in_bytes()); } + // If necessary, run pretranspose every time if either weights or biases are non-constant + if((b && !_is_b_constant) || (c && !_is_c_constant && c->info()->data_type() == DataType::S32)) + { + if(c && c->info()->data_type() == DataType::S32) + { + _gemm_kernel_asm->set_quantized_bias(reinterpret_cast(c->buffer() + c->info()->offset_first_element_in_bytes()), 0); + } + + // Pretranspose B if required + if(_B_pretranspose_required) + { + const int ldb = b->info()->strides_in_bytes().y() / sizeof(TypeInput); + const auto b_ptr = reinterpret_cast(b->buffer() + b->info()->offset_first_element_in_bytes()); + const int multi_stride_b = b->info()->strides_in_bytes().z() / sizeof(TypeInput); + + CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, true); + ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr); + + if(_is_b_constant) + { + _gemm_kernel_asm->requantize_bias(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b); + } + else + { + _gemm_kernel_asm->pretranspose_B_array(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b); + } + } + } + const auto scheduling_hint = scheduling_hint_heuristic(_kernel_info.method, d->info()->data_type()); // Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads -- cgit v1.2.1