diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_gemm/quantized.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_gemm/quantized.cpp | 20 |
1 files changed, 10 insertions, 10 deletions
diff --git a/src/core/NEON/kernels/arm_gemm/quantized.cpp b/src/core/NEON/kernels/arm_gemm/quantized.cpp index 00b42cf422..53e5527a8d 100644 --- a/src/core/NEON/kernels/arm_gemm/quantized.cpp +++ b/src/core/NEON/kernels/arm_gemm/quantized.cpp @@ -57,7 +57,7 @@ namespace { template<bool do_shift_correction, bool per_channel> void requantize_block_32_int(const Requantize32 &qp, unsigned int width, unsigned int height, const int32_t *input, unsigned int in_stride, int8_t *output, unsigned int out_stride, - const int32_t *row_bias, const int32_t *col_bias) { + const int32_t *row_bias, const int32_t *col_bias, const unsigned int start_col) { const int32x4_t v_mul = vdupq_n_s32(qp.per_layer_mul); const int32x4_t v_shift = vdupq_n_s32(qp.per_layer_shift); const int32x4_t v_minval = vdupq_n_s32(qp.minval); @@ -76,8 +76,8 @@ void requantize_block_32_int(const Requantize32 &qp, unsigned int width, unsigne unsigned int odds=(width % 4); const int32_t *colptr = col_bias; - const int32_t *perch_mul_ptr = qp.per_channel_muls; - const int32_t *perch_shift_ptr = qp.per_channel_shifts; + const int32_t *perch_mul_ptr = qp.per_channel_muls + start_col; + const int32_t *perch_shift_ptr = qp.per_channel_shifts + start_col; const int32_t *in_ptr = input + (row * in_stride); int8_t *out_ptr = output + (row * out_stride); @@ -461,33 +461,33 @@ void requantize_block_32_int(const Requantize32 &qp, unsigned int width, unsigne template<typename Tin, typename Tout> void requantize_block_32(const Requantize32 &qp, unsigned int width, unsigned int height, const Tin *input, unsigned int in_stride, Tout *output, unsigned int out_stride, - const int32_t *row_bias, const int32_t *col_bias) { + const int32_t *row_bias, const int32_t *col_bias, unsigned int start_col) { if (qp.per_channel_requant) { if (qp.minval >= qp.c_offset) { requantize_block_32_int<false, true>(qp, width, height, reinterpret_cast<const int32_t *>(input), in_stride, - reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias); + reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias, start_col); } else { requantize_block_32_int<true, true>(qp, width, height, reinterpret_cast<const int32_t *>(input), in_stride, - reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias); + reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias, start_col); } } else { if (qp.minval >= qp.c_offset) { requantize_block_32_int<false, false>(qp, width, height, reinterpret_cast<const int32_t *>(input), in_stride, - reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias); + reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias, start_col); } else { requantize_block_32_int<true, false>(qp, width, height, reinterpret_cast<const int32_t *>(input), in_stride, - reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias); + reinterpret_cast<int8_t *>(output), out_stride, row_bias, col_bias, start_col); } } } template void requantize_block_32(const Requantize32 &qp, unsigned int width, unsigned int height, const int32_t *input, unsigned int in_stride, int8_t *output, unsigned int out_stride, - const int32_t *row_bias, const int32_t *col_bias); + const int32_t *row_bias, const int32_t *col_bias, unsigned int start_col); template void requantize_block_32(const Requantize32 &qp, unsigned int width, unsigned int height, const uint32_t *input, unsigned int in_stride, uint8_t *output, unsigned int out_stride, - const int32_t *row_bias, const int32_t *col_bias); + const int32_t *row_bias, const int32_t *col_bias, unsigned int start_col); /* * Routine (and helpers) to compute row sums needed for offset correction. |