COMPMID-706 - Add GEMMLowp output stage for scaling by a fixed point number

DoD: - Implement NEON kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Implement OpenCL kernel for quantizing down the gemmlowp result. The result should be scaled by a fixedpoint number - Add test for validating the result Required for: - Integration of GEMMLowp in Android NN - Convolution quantized - Fully connected quantized Change-Id: Ia963d25d695471e963961fb49a5600e78374ac4f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/110981 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Gian Marco <gianmarco.iodice@arm.com> 2017-11-28 09:10:03 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:41:58 +0000
commit: 58c5794b917dae10ff115dd85ec69e2ca41136c1 (patch)
tree: f2cea2d94e6566be720256dc6105056798723699 /src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
parent: 754e9526a7caf50876c2db9563dc72f096093b34 (diff)
download: ComputeLibrary-58c5794b917dae10ff115dd85ec69e2ca41136c1.tar.gz
1 files changed, 8 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
index 670b11fe67..edd6a9fd80 100644
--- a/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
@@ -78,7 +78,7 @@ std::pair<Error, Window> validate_and_configure_window(ITensorInfo *input, ITens
         window_changed = window_changed || update_window_and_padding(win, bias_access);
     }
 
-    output_result_access.set_valid_region(win, ValidRegion(Coordinates(0, 0), output->tensor_shape()));
+    output_result_access.set_valid_region(win, ValidRegion(Coordinates(), output->tensor_shape()));
 
     Error err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Error{};
     return std::make_pair(err, win);
@@ -186,15 +186,15 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::run(const Window &window)
                 }
             };
 
-            // Add the offset terms to GEMM's result and multiply by result_mult_int
-            scale_input(in_s32, result_offset_s32, _result_mult_int);
-
             // Add the bias to GEMM's result
             in_s32.val[0] = vaddq_s32(in_s32.val[0], bias_s32.val[0]);
             in_s32.val[1] = vaddq_s32(in_s32.val[1], bias_s32.val[1]);
             in_s32.val[2] = vaddq_s32(in_s32.val[2], bias_s32.val[2]);
             in_s32.val[3] = vaddq_s32(in_s32.val[3], bias_s32.val[3]);
 
+            // Add the offset terms to GEMM's result and multiply by result_mult_int
+            scale_input(in_s32, result_offset_s32, _result_mult_int);
+
             vst1q_u8(out.ptr(), finalize_quantization<is_bounded_relu>(in_s32, result_shift_s32, min_u8, max_u8));
         },
         in, bias, out);
@@ -231,6 +231,10 @@ void NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel::configure(const ITensor *inp
 {
     // Perform validate step
     ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+    // Output auto inizialitation if not yet initialized
+    auto_init_if_empty(*output->info(), input->info()->clone()->set_data_type(DataType::QASYMM8));
+
     ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(),
                                                   (bias != nullptr) ? bias->info() : nullptr,
                                                   output->info(),
author	Gian Marco <gianmarco.iodice@arm.com>	2017-11-28 09:10:03 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:41:58 +0000
commit	58c5794b917dae10ff115dd85ec69e2ca41136c1 (patch)
tree	f2cea2d94e6566be720256dc6105056798723699 /src/core/NEON/kernels/NEGEMMLowpQuantizeDownInt32ToUint8ScaleKernel.cpp
parent	754e9526a7caf50876c2db9563dc72f096093b34 (diff)
download	ComputeLibrary-58c5794b917dae10ff115dd85ec69e2ca41136c1.tar.gz