From 433ea4981675b64c44c8f47f2f4aac6bfcbfc911 Mon Sep 17 00:00:00 2001
From: Giorgio Arena <giorgio.arena@arm.com>
Date: Wed, 26 May 2021 15:32:50 +0100
Subject: Optimize int8 arithmetic addition on CPU

Avoid accessing quantization info from TensorInfo in leftover loop. Use the already available UniformQuantizationInfo instead
Create another version of the quantize utility function which assumes RoundingPolicy::TO_NEAREST_UP. This allows us to call std::lround() and avoid some overhead

Resolve COMPMID-4546

Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Change-Id: Ib481a586f879b7e937e3d54ba11100d0a37ef277
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5722
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 src/core/cpu/kernels/add/neon/qasymm8.cpp | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'src')

diff --git a/src/core/cpu/kernels/add/neon/qasymm8.cpp b/src/core/cpu/kernels/add/neon/qasymm8.cpp
index cc97f0067c..e357a7ef7f 100644
--- a/src/core/cpu/kernels/add/neon/qasymm8.cpp
+++ b/src/core/cpu/kernels/add/neon/qasymm8.cpp
@@ -199,7 +199,7 @@ void add_qasymm8_neon(const ITensor *src0, const ITensor *src1, ITensor *dst, co
             {
                 const float afs   = static_cast<int32_t>((*(input1_ptr + x)) - iq1_info.offset) * iq1_info.scale;
                 const float bfs   = static_cast<int32_t>((*(input2_ptr + x)) - iq2_info.offset) * iq2_info.scale;
-                *(output_ptr + x) = quantize_qasymm8((afs + bfs), dst->info()->quantization_info());
+                *(output_ptr + x) = quantize_qasymm8((afs + bfs), oq_info);
             }
         },
         input1, input2, output);
-- 
cgit v1.2.1