aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorManuel Bottini <manuel.bottini@arm.com>2019-02-25 13:50:11 +0000
committerManuel Bottini <manuel.bottini@arm.com>2019-03-05 09:46:16 +0000
commit6a2b6e835459ee91dbdf86be8dfdec0bc2421a84 (patch)
treef9324f478f293a5d76a9c8bcc23d10814f406809 /src
parentfc1da1391679c51209c611e95d60569ce4da15cb (diff)
downloadComputeLibrary-6a2b6e835459ee91dbdf86be8dfdec0bc2421a84.tar.gz
COMPMID-2010: Add support for QASYMM8 in NEArithmeticSubtractionKernel
Change-Id: Ica65d5a13f5670d525bbb961a870b23a21d093d9 Signed-off-by: Manuel Bottini <manuel.bottini@arm.com> Reviewed-on: https://review.mlplatform.org/c/807 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Marquez <pablo.tello@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp54
1 files changed, 50 insertions, 4 deletions
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index ff8fb84958..ff5893de96 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -27,6 +27,7 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
+#include "arm_compute/core/NEON/NEAsymm.h"
#include "arm_compute/core/NEON/NEFixedPoint.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
@@ -80,6 +81,34 @@ void sub_saturate_U8_U8_U8(const ITensor *in1, const ITensor *in2, ITensor *out,
input1, input2, output);
}
+void sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+ Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
+ Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
+ Iterator output(out, window);
+
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float32x4x4_t ta1 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input1.ptr())), in1->info()->quantization_info());
+ const float32x4x4_t ta2 = vdequantize(vld1q_u8(reinterpret_cast<const qasymm8_t *>(input2.ptr())), in2->info()->quantization_info());
+
+ const float32x4x4_t ta3 =
+ {
+ {
+ vsubq_f32(ta1.val[0], ta2.val[0]),
+ vsubq_f32(ta1.val[1], ta2.val[1]),
+ vsubq_f32(ta1.val[2], ta2.val[2]),
+ vsubq_f32(ta1.val[3], ta2.val[3]),
+ }
+ };
+
+ const uint8x16_t result = vquantize(ta3, out->info()->quantization_info());
+
+ vst1q_u8(reinterpret_cast<qasymm8_t *>(output.ptr()), result);
+ },
+ input1, input2, output);
+}
+
void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
@@ -324,18 +353,34 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i
{
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::S16, DataType::F16, DataType::F32);
const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8)
+ && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8)
+ && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8)
+ && !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16)
+ && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8)
+ && !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::S16)
+ && !(input1.data_type() == DataType::F32 && input2.data_type() == DataType::F32)
+ && !(input1.data_type() == DataType::F16 && input2.data_type() == DataType::F16),
+ "You called subtract with the wrong image formats");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP,
+ "Convert policy cannot be WRAP if datatype is QASYMM8");
+
// Validate in case of configured output
if(output.total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
!(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8)
+ && !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
@@ -413,6 +458,7 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
{ "sub_wrap_U8_U8_S16", &sub_wrap_U8_U8_S16 },
{ "sub_saturate_U8_U8_U8", &sub_saturate_U8_U8_U8 },
{ "sub_saturate_U8_U8_S16", &sub_saturate_U8_U8_S16 },
+ { "sub_saturate_QASYMM8_QASYMM8_QASYMM8", &sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8 },
{ "sub_wrap_U8_S16_S16", &sub_wrap_U8_S16_S16 },
{ "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 },
{ "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 },