aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichalis Spyrou <michalis.spyrou@arm.com>2020-07-16 11:47:12 +0100
committerMichalis Spyrou <michalis.spyrou@arm.com>2020-07-16 13:08:33 +0000
commit85b7599eb1dd5c9f5a61ac70a7b18e908b4ac536 (patch)
treea94d262ca2ff5a01ff51490f338486e17cb81c16
parent6977b378174af882750fc58020258077c4875056 (diff)
downloadComputeLibrary-85b7599eb1dd5c9f5a61ac70a7b18e908b4ac536.tar.gz
COMPMID-3324: ADD CTS test failing with data type QUANT8
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com> Change-Id: I744b1916801c6d299be24e48da2e82548c3bf514 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3582 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp14
1 files changed, 10 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
index fc211f7b42..0ad4b3f12e 100644
--- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp
@@ -156,11 +156,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform();
- const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale);
- const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale);
const float32x4_t invvscaleo = vdupq_n_f32(1.f / oq_info.scale);
- const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset);
- const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset);
const float32x4_t voffseto = vdupq_n_f32(oq_info.offset);
if(is_broadcast_across_x)
@@ -173,6 +169,11 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
const UniformQuantizationInfo broadcast_qinfo = broadcast_tensor->info()->quantization_info().uniform();
const UniformQuantizationInfo non_broadcast_qinfo = non_broadcast_tensor->info()->quantization_info().uniform();
+ const float32x4_t vscale1 = is_broadcast_input_2 ? vdupq_n_f32(iq1_info.scale) : vdupq_n_f32(iq2_info.scale);
+ const float32x4_t vscale2 = is_broadcast_input_2 ? vdupq_n_f32(iq2_info.scale) : vdupq_n_f32(iq1_info.scale);
+ const int32x4_t voffset1 = is_broadcast_input_2 ? vdupq_n_s32(iq1_info.offset) : vdupq_n_s32(iq2_info.offset);
+ const int32x4_t voffset2 = is_broadcast_input_2 ? vdupq_n_s32(iq2_info.offset) : vdupq_n_s32(iq1_info.offset);
+
// Clear X Dimension on execution window as we handle manually
non_broadcast_win.set(Window::DimX, Window::Dimension(0, 1, 1));
@@ -255,6 +256,11 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor
Iterator input2(in2, input2_win);
Iterator output(out, win);
+ const float32x4_t vscale1 = vdupq_n_f32(iq1_info.scale);
+ const float32x4_t vscale2 = vdupq_n_f32(iq2_info.scale);
+ const int32x4_t voffset1 = vdupq_n_s32(iq1_info.offset);
+ const int32x4_t voffset2 = vdupq_n_s32(iq2_info.offset);
+
execute_window_loop(win, [&](const Coordinates &)
{
const auto input1_ptr = reinterpret_cast<const uint8_t *>(input1.ptr());