aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2021-07-26 13:18:50 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-09-07 13:44:08 +0000
commitaed63ee175e0d64c934389e9d1b2edd0cb1a5cdd (patch)
tree8f025f849e863b9cdec1d6b889bc463e6c4f78d1 /src/core/NEON/kernels
parent58d3c5a7df769def499806e4d26cea518add161a (diff)
downloadComputeLibrary-aed63ee175e0d64c934389e9d1b2edd0cb1a5cdd.tar.gz
Add support for non-constant weights and biases in CpuFullyConnected
Changing the approach for specifying that weights and biases tensors are non-constant by making it a member of TensorInfo rather than an option of the functions. Resolves: COMPMID-4222 Change-Id: I96e6f3868f51785c9700a3ef6a1fe7b05747862c Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6162 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels')
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp6
-rw-r--r--src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp8
-rw-r--r--src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp9
6 files changed, 32 insertions, 9 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp
index 5cbdf20798..20c8230148 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_indirect.hpp
@@ -523,7 +523,7 @@ public:
return size;
}
- void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
if (std::is_same<OutputStage, Requantize32>::value) {
_col_bias = reinterpret_cast<int32_t *>(in_buffer);
@@ -534,6 +534,10 @@ public:
compute_col_sums(*qp_ptr, _args._Nsize, _args._Ksize * _args._Ksections, B + (i * B_multi_stride), ldb, _col_bias + (i * _args._Nsize), _args._Ksize * _args._Ksections, i, 0);
}
}
+ }
+
+ void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ requantize_bias(in_buffer, B, ldb, B_multi_stride);
// Put the transposed data after the column sums - in non-transposing cases get_col_sum_size() == 0
uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer);
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp
index c72dca2e96..efb5bd1bb4 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized.hpp
@@ -269,12 +269,16 @@ public:
return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi));
}
- void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
col_bias = reinterpret_cast<int32_t *>(in_buffer);
for (unsigned int i=0; i<_nmulti; i++) {
compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0);
}
+ }
+
+ void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ requantize_bias(in_buffer, B, ldb, B_multi_stride);
uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer);
Toi *buffer = reinterpret_cast<Toi *>(buffer_int + get_col_sum_size());
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp
index 7376b5ffe3..e84b58dd0f 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_hybrid_quantized_inline.hpp
@@ -219,12 +219,16 @@ public:
return get_col_sum_size() + (roundup(_Nsize, strategy::out_width()) * roundup(_Ksize, strategy::k_unroll()) * _nmulti * sizeof(Toi));
}
- void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
col_bias = reinterpret_cast<int32_t *>(in_buffer);
for (unsigned int i=0; i<_nmulti; i++) {
compute_col_sums(_qp, _Nsize, _Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize, i, 0);
}
+ }
+
+ void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ requantize_bias(in_buffer, B, ldb, B_multi_stride);
uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer);
Toi *buffer = reinterpret_cast<Toi *>(buffer_int + get_col_sum_size());
diff --git a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp
index 5639cb4182..c75c320a6b 100644
--- a/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemm_interleaved.hpp
@@ -923,7 +923,7 @@ public:
return (x_size * _Ktotal * _nmulti * sizeof(Toi)) + get_col_sum_size();
}
- void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
if (std::is_same<OutputStage, Requantize32>::value) {
col_bias = reinterpret_cast<int32_t *>(in_buffer);
@@ -934,6 +934,10 @@ public:
compute_col_sums(*qp_ptr, _Nsize, _Ksize * _Ksections, B + (i * B_multi_stride), ldb, col_bias + (i * _Nsize), _Ksize * _Ksections, i, 0);
}
}
+ }
+
+ void pretranspose_B_array(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ requantize_bias(in_buffer, B, ldb, B_multi_stride);
// Put the transposed data after the column sums - in non-transposing cases get_col_sum_size() == 0
uintptr_t buffer_int = reinterpret_cast<uintptr_t>(in_buffer);
diff --git a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp
index d4348beabf..f0b4e5db9e 100644
--- a/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp
+++ b/src/core/NEON/kernels/arm_gemm/gemv_pretransposed.hpp
@@ -201,11 +201,11 @@ public:
return _buffer_per_multi * _args._nmulti * sizeof(To) + get_col_sum_size();
}
- void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
// Column sums go on the front of the pretransposed buffer in requantized cases.
// We could optimize here in case we don't actually need to sum the columns, but this code is only run on setup.
if (std::is_same<OutputStage, Requantize32>::value) {
- col_bias = reinterpret_cast<int32_t *>(buffer);
+ col_bias = reinterpret_cast<int32_t *>(in_buffer);
Requantize32 *qp_ptr = reinterpret_cast<Requantize32 *>(&_os);
@@ -213,6 +213,10 @@ public:
compute_col_sums(*qp_ptr, _args._Nsize, _args._Ksize, B + (i * B_multi_stride), ldb, col_bias + (i * _args._Nsize), _args._Ksize, i, 0);
}
}
+ }
+
+ void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ requantize_bias(buffer, B, ldb, B_multi_stride);
// The actual transposed buffer goes after the column sums (if any)
uintptr_t buffer_int = reinterpret_cast<uintptr_t>(buffer);
diff --git a/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp b/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp
index 1e2a9acc1d..ce727032e6 100644
--- a/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp
+++ b/src/core/NEON/kernels/arm_gemm/quantize_wrapper.hpp
@@ -179,13 +179,16 @@ public:
return _subgemm->get_B_pretransposed_array_size() + col_sum_size();
}
+ void requantize_bias(void *in_buffer, const To *B, const int ldb, const int B_multi_stride) override {
+ _col_sums = reinterpret_cast<int32_t *>(in_buffer);
+ col_sums_pretransposed(B, ldb, B_multi_stride);
+ }
+
void pretranspose_B_array(void *buffer, const To *B, const int ldb, const int B_multi_stride) override {
uintptr_t buffer_int = reinterpret_cast<uintptr_t>(buffer);
_subgemm->pretranspose_B_array(reinterpret_cast<void *>(buffer_int + col_sum_size()), B, ldb, B_multi_stride);
- _col_sums = reinterpret_cast<int32_t *>(buffer);
-
- col_sums_pretransposed(B, ldb, B_multi_stride);
+ requantize_bias(buffer, B, ldb, B_multi_stride);
}
void set_pretransposed_B_data(void *buffer) override {