diff options
author | Michalis Spyrou <michalis.spyrou@arm.com> | 2019-12-04 12:00:36 +0000 |
---|---|---|
committer | Michalis Spyrou <michalis.spyrou@arm.com> | 2019-12-05 16:39:18 +0000 |
commit | 6f58b37a18cfade5dbec38638926f7bd368756d9 (patch) | |
tree | 65ab884fb1a5bce325db554cbcb73768907043ae /src/core/NEON | |
parent | 8d4d1b85bc57d5f76f3939bb422e44df68dc2342 (diff) | |
download | ComputeLibrary-6f58b37a18cfade5dbec38638926f7bd368756d9.tar.gz |
COMPMID-2798 Add support for QASYMM8_SIGNED in NEArithmeticSubtraction
Change-Id: Ib90e0ce46f8dc006827d9ee9d95cf14e8b7832ad
Signed-off-by: Michalis Spyrou <michalis.spyrou@arm.com>
Reviewed-on: https://review.mlplatform.org/c/2415
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/NEON')
-rw-r--r-- | src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp | 46 |
1 files changed, 41 insertions, 5 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index 8874b52e19..7a2601be26 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -113,6 +113,38 @@ void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2 input1, input2, output); } +void sub_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape())); + Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); + Iterator output(out, window); + + const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform(); + const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform(); + const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform(); + + execute_window_loop(window, [&](const Coordinates &) + { + const float32x4x4_t ta1 = vdequantize(vld1q_s8(reinterpret_cast<const qasymm8_signed_t *>(input1.ptr())), iq1_info); + const float32x4x4_t ta2 = vdequantize(vld1q_s8(reinterpret_cast<const qasymm8_signed_t *>(input2.ptr())), iq2_info); + + const float32x4x4_t ta3 = + { + { + vsubq_f32(ta1.val[0], ta2.val[0]), + vsubq_f32(ta1.val[1], ta2.val[1]), + vsubq_f32(ta1.val[2], ta2.val[2]), + vsubq_f32(ta1.val[3], ta2.val[3]), + } + }; + + const int8x16_t result = vquantize_signed(ta3, oq_info); + + vst1q_s8(reinterpret_cast<qasymm8_signed_t *>(output.ptr()), result); + }, + input1, input2, output); +} + void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape())); @@ -357,9 +389,9 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i { ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32); const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); @@ -367,6 +399,7 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i ARM_COMPUTE_RETURN_ERROR_ON_MSG( !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8) && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8) + && !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED) && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8) && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16) && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8) @@ -376,8 +409,9 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i "You called subtract with the wrong image formats"); ARM_COMPUTE_RETURN_ERROR_ON_MSG( - input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP, - "Convert policy cannot be WRAP if datatype is QASYMM8"); + input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && policy == ConvertPolicy::WRAP + && input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP, + "Convert policy cannot be WRAP if datatype is QASYMM8 or QASYMM8_SIGNED"); // Validate in case of configured output if(output.total_size() > 0) @@ -385,6 +419,7 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i ARM_COMPUTE_RETURN_ERROR_ON_MSG( !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8) && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8) + && !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && output.data_type() == DataType::QASYMM8_SIGNED) && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16) && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) @@ -463,6 +498,7 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens { "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 }, { "sub_saturate_U8_U8_S16", &sub_saturate_U8_U8_S16 }, { "sub_saturate_QASYMM8_QASYMM8_QASYMM8", &sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8 }, + { "sub_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &sub_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED }, { "sub_wrap_U8_S16_S16", &sub_wrap_U8_S16_S16 }, { "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 }, { "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 }, |