From 58c5794b917dae10ff115dd85ec69e2ca41136c1 Mon Sep 17 00:00:00 2001 From: Gian Marco Date: Tue, 28 Nov 2017 09:10:03 +0000 Subject: COMPMID-706 - Add GEMMLowp output stage for scaling by a fixed point number DoD: - Implement NEON kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Implement OpenCL kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Add test for validating the result Required for: - Integration of GEMMLowp in Android NN - Convolution quantized - Fully connected quantized Change-Id: Ia963d25d695471e963961fb49a5600e78374ac4f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110981 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com Reviewed-by: Georgios Pinitas Reviewed-by: Anthony Barbier --- .../NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp') diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp index 670b11fe67..edd6a9fd80 100644 --- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp @@ -78,7 +78,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITens window_changed = window_changed || update_window_and_padding(win, bias_access); } - output_result_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->tensor_shape())); + output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape())); Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{}; return std::make_pair(err, win); @@ -186,15 +186,15 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window) } }; - // Add the offset terms to GEMM's result and multiply by result_mult_int - scale_input(in_s32, result_offset_s32, _result_mult_int); - // Add the bias to GEMM's result in_s32.val[0] = vaddq_s32(in_s32.val[0], bias_s32.val[0]); in_s32.val[1] = vaddq_s32(in_s32.val[1], bias_s32.val[1]); in_s32.val[2] = vaddq_s32(in_s32.val[2], bias_s32.val[2]); in_s32.val[3] = vaddq_s32(in_s32.val[3], bias_s32.val[3]); + // Add the offset terms to GEMM's result and multiply by result_mult_int + scale_input(in_s32, result_offset_s32, _result_mult_int); + vst1q_u8(out.ptr(), finalize_quantization(in_s32, result_shift_s32, min_u8, max_u8)); }, in, bias, out); @@ -231,6 +231,10 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Output auto inizialitation if not yet initialized + auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(DataType::QASYMM8)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), (bias != nullptr) ? bias->info() : nullptr, output->info(), -- cgit v1.2.1