aboutsummaryrefslogtreecommitdiff
path: root/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-07-26 13:18:50 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-09-07 13:44:08 +0000
commitaed63ee175e0d64c934389e9d1b2edd0cb1a5cdd (patch)
tree8f025f849e863b9cdec1d6b889bc463e6c4f78d1 /src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
parent58d3c5a7df769def499806e4d26cea518add161a (diff)
downloadComputeLibrary-aed63ee175e0d64c934389e9d1b2edd0cb1a5cdd.tar.gz
Add support for non-constant weights and biases in CpuFullyConnected
Changing the approach for specifying that weights and biases tensors are non-constant by making it a member of TensorInfo rather than an option of the functions. Resolves: COMPMID-4222 Change-Id: I96e6f3868f51785c9700a3ef6a1fe7b05747862c Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6162 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp')
-rw-r--r--src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp31
1 files changed, 31 insertions, 0 deletions
diff --git a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
index 97893b0672..1dd6286dbf 100644
--- a/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
+++ b/src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp
@@ -206,6 +206,7 @@ private:
std::vector<TypeInput> _indirect_pad{};
arm_gemm::ConvolutionParameters _cp{};
experimental::MemoryRequirements _aux_mem{ Count };
+ bool _B_pretranspose_required{ false };
};
template <typename TypeInput, typename TypeOutput, class OutputStage>
@@ -391,6 +392,7 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::configure(const ITensorInfo *
const size_t B_pretranspose_size = _gemm_kernel_asm->get_B_pretransposed_array_size();
_pretranspose_info = TensorInfo(TensorShape(B_pretranspose_size), 1, DataType::U8);
_aux_mem[Pretranspose] = MemoryInfo(offset_int_vec(Pretranspose), MemoryLifetime::Persistent, B_pretranspose_size, alignment);
+ _B_pretranspose_required = true;
}
// Handle indirect GEMM convolution
@@ -485,6 +487,35 @@ void Fallback<TypeInput, TypeOutput, OutputStage>::run(ITensorPack &tensors)
in1_ptr = reinterpret_cast<const TypeInput *>(b->buffer() + b->info()->offset_first_element_in_bytes());
}
+ // If necessary, run pretranspose every time if either weights or biases are non-constant
+ if((b && !b->info()->are_values_constant()) || (c && !c->info()->are_values_constant() && c->info()->data_type() == DataType::S32))
+ {
+ if(c && c->info()->data_type() == DataType::S32)
+ {
+ _gemm_kernel_asm->set_quantized_bias(reinterpret_cast<const int32_t *>(c->buffer() + c->info()->offset_first_element_in_bytes()), 0);
+ }
+
+ // Pretranspose B if required
+ if(_B_pretranspose_required)
+ {
+ const int ldb = b->info()->strides_in_bytes().y() / sizeof(TypeInput);
+ const auto b_ptr = reinterpret_cast<const TypeInput *>(b->buffer() + b->info()->offset_first_element_in_bytes());
+ const int multi_stride_b = b->info()->strides_in_bytes().z() / sizeof(TypeInput);
+
+ CpuAuxTensorHandler pretranspose(offset_int_vec(Pretranspose), _pretranspose_info, tensors, true);
+ ARM_COMPUTE_ERROR_ON(pretranspose.get()->buffer() == nullptr);
+
+ if(b->info()->are_values_constant())
+ {
+ _gemm_kernel_asm->requantize_bias(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b);
+ }
+ else
+ {
+ _gemm_kernel_asm->pretranspose_B_array(pretranspose.get()->buffer(), b_ptr, ldb, multi_stride_b);
+ }
+ }
+ }
+
const auto scheduling_hint = scheduling_hint_heuristic(_kernel_info.method, d->info()->data_type());
// Set workspace if needed and reset number of threads as buffer manager gets re-created with max_threads