diff options
author | Manuel Bottini <manuel.bottini@arm.com> | 2019-02-25 13:50:11 +0000 |
---|---|---|
committer | Manuel Bottini <manuel.bottini@arm.com> | 2019-03-05 09:46:16 +0000 |
commit | 6a2b6e835459ee91dbdf86be8dfdec0bc2421a84 (patch) | |
tree | f9324f478f293a5d76a9c8bcc23d10814f406809 /src/core | |
parent | fc1da1391679c51209c611e95d60569ce4da15cb (diff) | |
download | ComputeLibrary-6a2b6e835459ee91dbdf86be8dfdec0bc2421a84.tar.gz |
COMPMID-2010: Add support for QASYMM8 in NEArithmeticSubtractionKernel
Change-Id: Ica65d5a13f5670d525bbb961a870b23a21d093d9
Signed-off-by: Manuel Bottini <manuel.bottini@arm.com>
Reviewed-on: https://review.mlplatform.org/c/807
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src/core')
-rw-r--r-- | src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp | 54 |
1 files changed, 50 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp index ff8fb84958..ff5893de96 100644 --- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp +++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -27,6 +27,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" +#include "arm_compute/core/NEON/NEAsymm.h" #include "arm_compute/core/NEON/NEFixedPoint.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" @@ -80,6 +81,34 @@ void sub_saturate_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out, input1, input2, output); } +void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) +{ + Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape())); + Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape())); + Iterator output(out, window); + + execute_window_loop(window, [&](const Coordinates & id) + { + const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), in1->info()->quantization_info()); + const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), in2->info()->quantization_info()); + + const float32x4x4_t ta3 = + { + { + vsubq_f32(ta1.val[0], ta2.val[0]), + vsubq_f32(ta1.val[1], ta2.val[1]), + vsubq_f32(ta1.val[2], ta2.val[2]), + vsubq_f32(ta1.val[3], ta2.val[3]), + } + }; + + const uint8x16_t result = vquantize(ta3, out->info()->quantization_info()); + + vst1q_u8(reinterpret_cast<qasymm8_t *>(output.ptr()), result); + }, + input1, input2, output); +} + void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window) { Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape())); @@ -324,18 +353,34 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i { ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32); const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8) + && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8) + && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8) + && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16) + && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8) + && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16) + && !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32) + && !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16), + "You called subtract with the wrong image formats"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG( + input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP, + "Convert policy cannot be WRAP if datatype is QASYMM8"); + // Validate in case of configured output if(output.total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_MSG( !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8) + && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8) && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16) && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16) @@ -413,6 +458,7 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens { "sub_wrap_U8_U8_S16", &sub_wrap_U8_U8_S16 }, { "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 }, { "sub_saturate_U8_U8_S16", &sub_saturate_U8_U8_S16 }, + { "sub_saturate_QASYMM8_QASYMM8_QASYMM8", &sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8 }, { "sub_wrap_U8_S16_S16", &sub_wrap_U8_S16_S16 }, { "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 }, { "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 }, |