From 433ea4981675b64c44c8f47f2f4aac6bfcbfc911 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 26 May 2021 15:32:50 +0100 Subject: Optimize int8 arithmetic addition on CPU Avoid accessing quantization info from TensorInfo in leftover loop. Use the already available UniformQuantizationInfo instead Create another version of the quantize utility function which assumes RoundingPolicy::TO_NEAREST_UP. This allows us to call std::lround() and avoid some overhead Resolve COMPMID-4546 Signed-off-by: Giorgio Arena Change-Id: Ib481a586f879b7e937e3d54ba11100d0a37ef277 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5722 Comments-Addressed: Arm Jenkins Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- src/core/cpu/kernels/add/neon/qasymm8.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'src') diff --git a/src/core/cpu/kernels/add/neon/qasymm8.cpp b/src/core/cpu/kernels/add/neon/qasymm8.cpp index cc97f0067c..e357a7ef7f 100644 --- a/src/core/cpu/kernels/add/neon/qasymm8.cpp +++ b/src/core/cpu/kernels/add/neon/qasymm8.cpp @@ -199,7 +199,7 @@ void add_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, co { const float afs = static_cast((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale; const float bfs = static_cast((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale; - *(output_ptr + x) = quantize_qasymm8((afs + bfs), dst->info()->quantization_info()); + *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info); } }, input1, input2, output); -- cgit v1.2.1