From d7341fb9e3b24b904edf7ac9d83e1e063bc77765 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 12 Nov 2020 15:05:01 +0000 Subject: COMPMID-3960: Mismatch on NEArithmeticSubtraction Corner-case failure when both input shapes had unit shape on the X axis. Broadcasting was enabled leading to invalid window execution. Check is updated to cross-validate the presence of broadcasting by checking the X dimension in both input shapes. Signed-off-by: Georgios Pinitas Change-Id: I0b79542279e8d155d2661fddff9691d94a1f6855 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4391 Tested-by: Arm Jenkins Reviewed-by: Gian Marco Iodice Comments-Addressed: Arm Jenkins --- src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp | 8 ++++---- src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp | 6 +++--- src/core/NEON/kernels/NEElementwiseOperationKernel.cpp | 8 ++++---- src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp | 8 ++++---- 4 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp index fa26b903f1..aa7af54e9c 100644 --- a/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticAdditionKernel.cpp @@ -56,7 +56,7 @@ void add_same(const ITensor *in1, const ITensor *in2, ITensor *out, const Conver constexpr int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); if(is_broadcast_across_x) { @@ -152,7 +152,7 @@ void add_QASYMM8_QASYMM8_QASYMM8(const ITensor *in1, const ITensor *in2, ITensor const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); @@ -345,7 +345,7 @@ void add_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED(const ITensor *in1, const const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); @@ -537,7 +537,7 @@ void add_QSYMM16_QSYMM16_QSYMM16(const ITensor *in1, const ITensor *in2, ITensor const int window_step_x = 8; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index bdd356ad7f..187e97dd49 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -67,7 +67,7 @@ void sub_same(const ITensor *in1, const ITensor *in2, ITensor *out, const Window constexpr int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape())); Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); @@ -178,7 +178,7 @@ void sub_quantized(const ITensor *in1, const ITensor *in2, ITensor *out, const W const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); @@ -372,7 +372,7 @@ void sub_QSYMM16_QSYMM16_QSYMM16(const ITensor *in1, const ITensor *in2, ITensor const int window_step_x = 8; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); diff --git a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp index bb4e9a67b6..412ae247cb 100644 --- a/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp +++ b/src/core/NEON/kernels/NEElementwiseOperationKernel.cpp @@ -656,7 +656,7 @@ void elementwise_op(const ITensor *in1, const ITensor *in2, ITensor *out, const const int window_step_x = std::min(16 / static_cast(sizeof(OutputScalarType)), 8); const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); if(is_broadcast_across_x) { @@ -735,7 +735,7 @@ void elementwise_op_quantized(const ITensor *in1, const ITensor *in2, ITensor *o const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); @@ -843,7 +843,7 @@ void elementwise_comp_quantized_signed(const ITensor *in1, const ITensor *in2, I const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); @@ -950,7 +950,7 @@ void elementwise_op_quantized_signed(const ITensor *in1, const ITensor *in2, ITe const int window_step_x = 16; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo output_qinfo = out->info()->quantization_info().uniform(); diff --git a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp index f646ea5db7..39517f6ff6 100644 --- a/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp +++ b/src/core/NEON/kernels/NEPixelWiseMultiplicationKernel.cpp @@ -156,7 +156,7 @@ void mul_saturate_quantized_8(const ITensor *in1, const ITensor *in2, ITensor *o const int window_step_x = 16 / sizeof(T); const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); const UniformQuantizationInfo output_qua_info = out->info()->quantization_info().uniform(); const UniformQuantizationInfo tmp_qua_info = { output_qua_info.scale / scale, output_qua_info.offset }; @@ -785,7 +785,7 @@ void mul_S32_S32_S32(const ITensor *in1, const ITensor *in2, ITensor *out, const const int window_step_x = 8; const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); if(is_broadcast_across_x) { @@ -935,7 +935,7 @@ void mul_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const constexpr int window_step_x = 16 / sizeof(float); const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); using ExactTagType = typename wrapper::traits::neon_vector::tag_type; @@ -1033,7 +1033,7 @@ void c_mul_F32_F32_F32_n(const ITensor *in1, const ITensor *in2, ITensor *out, c constexpr int window_step_x = 8 / sizeof(float); const auto window_start_x = static_cast(window.x().start()); const auto window_end_x = static_cast(window.x().end()); - const bool is_broadcast_across_x = (input1_win.x().step() == 0) || (input2_win.x().step() == 0); + const bool is_broadcast_across_x = in1->info()->tensor_shape().x() != in2->info()->tensor_shape().x(); using ExactTagType = typename wrapper::traits::neon_vector::tag_type; -- cgit v1.2.1