aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2020-03-27 10:23:44 +0000
committerMichele Di Giorgio <michele.digiorgio@arm.com>2020-03-27 14:28:44 +0000
commit9f2403ff462c3725a03df68484dc43df6f011ab0 (patch)
tree605a46d3ba0da961b3fecca94f072c741e771ccc
parent35fcc4345b6468139a7199a48f75f70d19ea0d31 (diff)
downloadComputeLibrary-9f2403ff462c3725a03df68484dc43df6f011ab0.tar.gz
COMPMID-3237: Add support for QSYMM16 ArithmeticSubtraction on NEON
Change-Id: Ib38796e52665233351b181bf3417eb5650ad7ca7 Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/2939 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h24
-rw-r--r--arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h12
-rw-r--r--src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp188
-rw-r--r--tests/validation/NEON/ArithmeticSubtraction.cpp51
-rw-r--r--tests/validation/fixtures/ArithmeticOperationsFixture.h30
5 files changed, 194 insertions, 111 deletions
diff --git a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
index e90c8b5fa2..919c685886 100644
--- a/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
+++ b/arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -66,20 +66,20 @@ public:
* - (F16,F16) -> F16
* - (F32,F32) -> F32
*
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32.
- * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is QASYMM8/QASYMM8_SIGNED
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
+ * @param[in] policy Overflow policy. Convert policy cannot be WRAP if datatype is quantized.
*/
void configure(const ITensor *input1, const ITensor *input2, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtractionKernel
*
* @note Convert policy cannot be WRAP if datatype is QASYMM8
*
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/S16/F16/F32
- * @param[in] output Output tensor. Data types supported: U8/QASYMM8/S16/F16/F32
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is QASYMM8
+ * @param[in] input1 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] input2 An input tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] output The output tensor info. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
+ * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
*
* @return a status
*/
@@ -92,9 +92,9 @@ public:
private:
/** Common signature for all the specialised sub functions
*
- * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32.
+ * @param[in] input1 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] input2 An input tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[out] output The output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32.
* @param[in] window Region on which to execute the kernel.
*/
using SubFunction = void(const ITensor *input1, const ITensor *input2, ITensor *output, const Window &window);
diff --git a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
index e2c6496416..c8c3fd3d2f 100644
--- a/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
+++ b/arm_compute/runtime/NEON/functions/NEArithmeticSubtraction.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -45,10 +45,10 @@ class NEArithmeticSubtraction : public INESimpleFunction
public:
/** Initialise the kernel's inputs, output and conversion policy.
*
- * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is QASYMM8.
+ * @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[out] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/QSYMM16/S16/F16/F32
+ * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
*/
void configure(ITensor *input1, ITensor *input2, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NEArithmeticSubtraction
@@ -56,7 +56,7 @@ public:
* @param[in] input1 First tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
* @param[in] input2 Second tensor input. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
* @param[in] output Output tensor. Data types supported: U8/QASYMM8/QASYMM8_SIGNED/S16/F16/F32
- * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is QASYMM8
+ * @param[in] policy Policy to use to handle overflow. Convert policy cannot be WRAP if datatype is quantized.
*
* @return a status
*/
diff --git a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
index 0695c94927..9b7b235c9f 100644
--- a/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
+++ b/src/core/NEON/kernels/NEArithmeticSubtractionKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -24,27 +24,13 @@
#include "arm_compute/core/NEON/kernels/NEArithmeticSubtractionKernel.h"
#include "arm_compute/core/CPP/Validate.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/ITensor.h"
#include "arm_compute/core/NEON/NEAsymm.h"
-#include "arm_compute/core/NEON/NEFixedPoint.h"
+#include "arm_compute/core/NEON/NESymm.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Validate.h"
-#include <algorithm>
-#include <arm_neon.h>
-#include <cstdint>
-#include <map>
-#include <string>
-
-using namespace arm_compute;
-
namespace arm_compute
{
-class Coordinates;
-} // namespace arm_compute
-
namespace
{
constexpr unsigned int num_elems_processed_per_iteration = 16;
@@ -145,6 +131,53 @@ void sub_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED(const ITensor *in
input1, input2, output);
}
+void sub_saturate_QSYMM16_QSYMM16_QSYMM16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
+{
+ Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
+ Iterator input2(in2, window.broadcast_if_dimension_le_one(in2->info()->tensor_shape()));
+ Iterator output(out, window);
+
+ const UniformQuantizationInfo iq1_info = in1->info()->quantization_info().uniform();
+ const UniformQuantizationInfo iq2_info = in2->info()->quantization_info().uniform();
+ const UniformQuantizationInfo oq_info = out->info()->quantization_info().uniform();
+
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int16x8x2_t in1_s16 =
+ {
+ {
+ vld1q_s16(reinterpret_cast<const qsymm16_t *>(input1.ptr())),
+ vld1q_s16(reinterpret_cast<const qsymm16_t *>(input1.ptr()) + 8),
+ }
+ };
+ const int16x8x2_t in2_s16 =
+ {
+ {
+ vld1q_s16(reinterpret_cast<const qsymm16_t *>(input2.ptr())),
+ vld1q_s16(reinterpret_cast<const qsymm16_t *>(input2.ptr()) + 8),
+ }
+ };
+ const float32x4x4_t ta1 = vdequantize(in1_s16, iq1_info);
+ const float32x4x4_t ta2 = vdequantize(in2_s16, iq2_info);
+
+ const float32x4x4_t ta3 =
+ {
+ {
+ vsubq_f32(ta1.val[0], ta2.val[0]),
+ vsubq_f32(ta1.val[1], ta2.val[1]),
+ vsubq_f32(ta1.val[2], ta2.val[2]),
+ vsubq_f32(ta1.val[3], ta2.val[3]),
+ }
+ };
+
+ const int16x8x2_t result = vquantize_qsymm16(ta3, oq_info);
+
+ vst1q_s16(reinterpret_cast<qsymm16_t *>(output.ptr()), result.val[0]);
+ vst1q_s16(reinterpret_cast<qsymm16_t *>(output.ptr()) + 8, result.val[1]);
+ },
+ input1, input2, output);
+}
+
void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
Iterator input1(in1, window.broadcast_if_dimension_le_one(in1->info()->tensor_shape()));
@@ -153,8 +186,20 @@ void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out,
execute_window_loop(window, [&](const Coordinates &)
{
- const int16x8x2_t ta1 = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
- const int16x8x2_t ta2 = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
+ const int16x8x2_t ta1 =
+ {
+ {
+ vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr())),
+ vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8),
+ }
+ };
+ const int16x8x2_t ta2 =
+ {
+ {
+ vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr())),
+ vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8),
+ }
+ };
const int16x8x2_t ta3 =
{
@@ -164,7 +209,8 @@ void sub_wrap_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *out,
}
};
- vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3);
+ vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3.val[0]);
+ vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, ta3.val[1]);
},
input1, input2, output);
}
@@ -177,8 +223,20 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o
execute_window_loop(window, [&](const Coordinates &)
{
- const int16x8x2_t ta1 = vld2q_s16(reinterpret_cast<const int16_t *>(input1.ptr()));
- const int16x8x2_t ta2 = vld2q_s16(reinterpret_cast<const int16_t *>(input2.ptr()));
+ const int16x8x2_t ta1 =
+ {
+ {
+ vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr())),
+ vld1q_s16(reinterpret_cast<const int16_t *>(input1.ptr()) + 8),
+ }
+ };
+ const int16x8x2_t ta2 =
+ {
+ {
+ vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr())),
+ vld1q_s16(reinterpret_cast<const int16_t *>(input2.ptr()) + 8),
+ }
+ };
const int16x8x2_t ta3 =
{
@@ -188,26 +246,12 @@ void sub_saturate_S16_S16_S16(const ITensor *in1, const ITensor *in2, ITensor *o
}
};
- vst2q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3);
+ vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), ta3.val[0]);
+ vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, ta3.val[1]);
},
input1, input2, output);
}
-#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
-inline float16x8x2_t vsub2q_f16(const float16x8x2_t &a, const float16x8x2_t &b)
-{
- const float16x8x2_t res =
- {
- {
- vsubq_f16(a.val[0], b.val[0]),
- vsubq_f16(a.val[1], b.val[1])
- }
- };
-
- return res;
-}
-#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
-
void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const Window &window)
{
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
@@ -217,10 +261,30 @@ void sub_F16_F16_F16(const ITensor *in1, const ITensor *in2, ITensor *out, const
execute_window_loop(window, [&](const Coordinates &)
{
- const float16x8x2_t a = vld2q_f16(reinterpret_cast<const float16_t *>(input1.ptr()));
- const float16x8x2_t b = vld2q_f16(reinterpret_cast<const float16_t *>(input2.ptr()));
+ const float16x8x2_t a =
+ {
+ {
+ vld1q_f16(reinterpret_cast<const float16_t *>(input1.ptr())),
+ vld1q_f16(reinterpret_cast<const float16_t *>(input1.ptr()) + 8),
+ }
+ };
+ const float16x8x2_t b =
+ {
+ {
+ vld1q_f16(reinterpret_cast<const float16_t *>(input2.ptr())),
+ vld1q_f16(reinterpret_cast<const float16_t *>(input2.ptr()) + 8),
+ }
+ };
+ const float16x8x2_t res =
+ {
+ {
+ vsubq_f16(a.val[0], b.val[0]),
+ vsubq_f16(a.val[1], b.val[1]),
+ }
+ };
- vst2q_f16(reinterpret_cast<float16_t *>(output.ptr()), vsub2q_f16(a, b));
+ vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), res.val[0]);
+ vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()) + 8, res.val[1]);
},
input1, input2, output);
#else /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
@@ -240,8 +304,24 @@ void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const
execute_window_loop(window, [&](const Coordinates &)
{
- const float32x4x4_t ta1 = vld4q_f32(reinterpret_cast<const float *>(input1.ptr()));
- const float32x4x4_t ta2 = vld4q_f32(reinterpret_cast<const float *>(input2.ptr()));
+ const float32x4x4_t ta1 =
+ {
+ {
+ vld1q_f32(reinterpret_cast<const float *>(input1.ptr())),
+ vld1q_f32(reinterpret_cast<const float *>(input1.ptr()) + 4),
+ vld1q_f32(reinterpret_cast<const float *>(input1.ptr()) + 8),
+ vld1q_f32(reinterpret_cast<const float *>(input1.ptr()) + 12),
+ }
+ };
+ const float32x4x4_t ta2 =
+ {
+ {
+ vld1q_f32(reinterpret_cast<const float *>(input2.ptr())),
+ vld1q_f32(reinterpret_cast<const float *>(input2.ptr()) + 4),
+ vld1q_f32(reinterpret_cast<const float *>(input2.ptr()) + 8),
+ vld1q_f32(reinterpret_cast<const float *>(input2.ptr()) + 12),
+ }
+ };
const float32x4x4_t ta3 =
{
@@ -253,7 +333,10 @@ void sub_F32_F32_F32(const ITensor *in1, const ITensor *in2, ITensor *out, const
}
};
- vst4q_f32(reinterpret_cast<float *>(output.ptr()), ta3);
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()), ta3.val[0]);
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, ta3.val[1]);
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, ta3.val[2]);
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, ta3.val[3]);
},
input1, input2, output);
}
@@ -389,9 +472,9 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i
{
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(&input1);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input2, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::U8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM16, DataType::S16, DataType::F16, DataType::F32);
const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
@@ -400,6 +483,7 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i
!(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8)
&& !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8)
&& !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED)
+ && !(input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8)
@@ -410,7 +494,8 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i
ARM_COMPUTE_RETURN_ERROR_ON_MSG(
input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && policy == ConvertPolicy::WRAP
- && input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP,
+ && input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && policy == ConvertPolicy::WRAP
+ && input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16 && policy == ConvertPolicy::WRAP,
"Convert policy cannot be WRAP if datatype is QASYMM8 or QASYMM8_SIGNED");
// Validate in case of configured output
@@ -420,6 +505,7 @@ inline Status validate_arguments(const ITensorInfo &input1, const ITensorInfo &i
!(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::U8)
&& !(input1.data_type() == DataType::QASYMM8 && input2.data_type() == DataType::QASYMM8 && output.data_type() == DataType::QASYMM8)
&& !(input1.data_type() == DataType::QASYMM8_SIGNED && input2.data_type() == DataType::QASYMM8_SIGNED && output.data_type() == DataType::QASYMM8_SIGNED)
+ && !(input1.data_type() == DataType::QSYMM16 && input2.data_type() == DataType::QSYMM16 && output.data_type() == DataType::QSYMM16)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::U8 && input2.data_type() == DataType::S16 && output.data_type() == DataType::S16)
&& !(input1.data_type() == DataType::S16 && input2.data_type() == DataType::U8 && output.data_type() == DataType::S16)
@@ -464,6 +550,10 @@ inline std::pair<Status, Window> validate_and_configure_window(ITensorInfo &inpu
{
set_data_type_if_unknown(output, DataType::QASYMM8_SIGNED);
}
+ else if(input1.data_type() == DataType::QSYMM16 || input2.data_type() == DataType::QSYMM16)
+ {
+ set_data_type_if_unknown(output, DataType::QSYMM16);
+ }
}
Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration));
@@ -507,6 +597,7 @@ void NEArithmeticSubtractionKernel::configure(const ITensor *input1, const ITens
{ "sub_saturate_U8_U8_S16", &sub_saturate_U8_U8_S16 },
{ "sub_saturate_QASYMM8_QASYMM8_QASYMM8", &sub_saturate_QAYSMM8_QAYSMM8_QAYSMM8 },
{ "sub_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED", &sub_saturate_QASYMM8_SIGNED_QASYMM8_SIGNED_QASYMM8_SIGNED },
+ { "sub_saturate_QSYMM16_QSYMM16_QSYMM16", &sub_saturate_QSYMM16_QSYMM16_QSYMM16 },
{ "sub_wrap_U8_S16_S16", &sub_wrap_U8_S16_S16 },
{ "sub_wrap_S16_U8_S16", &sub_wrap_S16_U8_S16 },
{ "sub_saturate_U8_S16_S16", &sub_saturate_U8_S16_S16 },
@@ -564,4 +655,5 @@ BorderSize NEArithmeticSubtractionKernel::border_size() const
const unsigned int replicateSize = _output->info()->dimension(0) - std::min(_input1->info()->dimension(0), _input2->info()->dimension(0));
const unsigned int border = std::min<unsigned int>(num_elems_processed_per_iteration - 1U, replicateSize);
return BorderSize{ 0, border, 0, 0 };
-} \ No newline at end of file
+}
+} // namespace arm_compute \ No newline at end of file
diff --git a/tests/validation/NEON/ArithmeticSubtraction.cpp b/tests/validation/NEON/ArithmeticSubtraction.cpp
index a57b113082..e5c2c5fd83 100644
--- a/tests/validation/NEON/ArithmeticSubtraction.cpp
+++ b/tests/validation/NEON/ArithmeticSubtraction.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,10 +44,11 @@ namespace validation
namespace
{
#ifdef __aarch64__
-constexpr AbsoluteTolerance<float> tolerance_qasymm8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-#else //__aarch64__
+constexpr AbsoluteTolerance<float> tolerance_qasymm8(0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
+#else //__aarch64__
constexpr AbsoluteTolerance<float> tolerance_qasymm8(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
-#endif //__aarch64__
+#endif //__aarch64__
+constexpr AbsoluteTolerance<int16_t> tolerance_qsymm16(1); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */
/** Input data sets **/
const auto ArithmeticSubtractionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8),
@@ -58,6 +59,10 @@ const auto ArithmeticSubtractionQASYMM8SIGNEDDataset = combine(combine(framework
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)),
framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
+const auto ArithmeticSubtractionQSYMM16Dataset = combine(combine(framework::dataset::make("DataType", DataType::QSYMM16),
+ framework::dataset::make("DataType", DataType::QSYMM16)),
+ framework::dataset::make("DataType", DataType::QSYMM16));
+
const auto ArithmeticSubtractionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8),
framework::dataset::make("DataType", DataType::U8)),
framework::dataset::make("DataType", DataType::U8));
@@ -80,6 +85,9 @@ const auto ArithmeticSubtractionQuantizationInfoDataset = combine(combine(framew
const auto ArithmeticSubtractionQuantizationInfoSignedDataset = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.5f, 10) }),
framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.5f, 20) })),
framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.5f, 50) }));
+const auto ArithmeticSubtractionQuantizationInfoSymmetric = combine(combine(framework::dataset::make("QuantizationInfoIn1", { QuantizationInfo(0.3f, 0) }),
+ framework::dataset::make("QuantizationInfoIn2", { QuantizationInfo(0.7f, 0) })),
+ framework::dataset::make("QuantizationInfoOut", { QuantizationInfo(0.2f, 0) }));
} // namespace
TEST_SUITE(NEON)
@@ -138,16 +146,15 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionFixture<uint8_t>, framew
}
TEST_SUITE_END() // U8
-using NEArithmeticSubtractionQuantFixture = ArithmeticSubtractionQuantValidationFixture<Tensor, Accessor, NEArithmeticSubtraction>;
-using NEArithmeticSubtractionQuantSignedFixture = ArithmeticSubtractionQuantSignedValidationFixture<Tensor, Accessor, NEArithmeticSubtraction>;
+using NEArithmeticSubtractionQASYMM8Fixture = ArithmeticSubtractionValidationQuantizedFixture<Tensor, Accessor, NEArithmeticSubtraction, uint8_t>;
+using NEArithmeticSubtractionQASYMM8SignedFixture = ArithmeticSubtractionValidationQuantizedFixture<Tensor, Accessor, NEArithmeticSubtraction, int8_t>;
+using NEArithmeticSubtractionQSYMM16Fixture = ArithmeticSubtractionValidationQuantizedFixture<Tensor, Accessor, NEArithmeticSubtraction, int16_t>;
TEST_SUITE(Quantized)
TEST_SUITE(QASYMM8)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQuantFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
- datasets::SmallShapes(),
- ArithmeticSubtractionQASYMM8Dataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
- ArithmeticSubtractionQuantizationInfoDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8Fixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallShapes(), ArithmeticSubtractionQASYMM8Dataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ ArithmeticSubtractionQuantizationInfoDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
@@ -155,16 +162,28 @@ FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQuantFixture, framework:
TEST_SUITE_END() // QASYMM8
TEST_SUITE(QASYMM8_SIGNED)
-FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQuantSignedFixture, framework::DatasetMode::ALL, combine(combine(combine(
- datasets::SmallShapes(),
- ArithmeticSubtractionQASYMM8SIGNEDDataset),
- framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
- ArithmeticSubtractionQuantizationInfoSignedDataset))
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQASYMM8SignedFixture, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ ArithmeticSubtractionQASYMM8SIGNEDDataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ ArithmeticSubtractionQuantizationInfoSignedDataset))
{
// Validate output
validate(Accessor(_target), _reference, tolerance_qasymm8);
}
TEST_SUITE_END() // QASYMM8_SIGNED
+
+TEST_SUITE(QSYMM16)
+FIXTURE_DATA_TEST_CASE(RunSmall, NEArithmeticSubtractionQSYMM16Fixture, framework::DatasetMode::ALL, combine(combine(combine(
+ datasets::SmallShapes(),
+ ArithmeticSubtractionQSYMM16Dataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })),
+ ArithmeticSubtractionQuantizationInfoSymmetric))
+{
+ // Validate output
+ validate(Accessor(_target), _reference, tolerance_qsymm16);
+}
+TEST_SUITE_END() // QSYMM16
TEST_SUITE_END() // Quantized
TEST_SUITE(S16)
diff --git a/tests/validation/fixtures/ArithmeticOperationsFixture.h b/tests/validation/fixtures/ArithmeticOperationsFixture.h
index 086b52bc31..d495ab1049 100644
--- a/tests/validation/fixtures/ArithmeticOperationsFixture.h
+++ b/tests/validation/fixtures/ArithmeticOperationsFixture.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -168,34 +168,6 @@ public:
}
};
-template <typename TensorType, typename AccessorType, typename FunctionType>
-class ArithmeticSubtractionQuantValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, qasymm8_t>
-{
-public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
- QuantizationInfo in1_qua_info, QuantizationInfo in2_qua_info, QuantizationInfo out_qua_info)
- {
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, qasymm8_t>::setup(reference::ArithmeticOperation::SUB, shape, shape,
- data_type0, data_type1, output_data_type, convert_policy,
- in1_qua_info, in2_qua_info, out_qua_info);
- }
-};
-
-template <typename TensorType, typename AccessorType, typename FunctionType>
-class ArithmeticSubtractionQuantSignedValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, qasymm8_signed_t>
-{
-public:
- template <typename...>
- void setup(const TensorShape &shape, DataType data_type0, DataType data_type1, DataType output_data_type, ConvertPolicy convert_policy,
- QuantizationInfo in1_qua_info, QuantizationInfo in2_qua_info, QuantizationInfo out_qua_info)
- {
- ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, qasymm8_signed_t>::setup(reference::ArithmeticOperation::SUB, shape, shape,
- data_type0, data_type1, output_data_type, convert_policy,
- in1_qua_info, in2_qua_info, out_qua_info);
- }
-};
-
template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
class ArithmeticSubtractionValidationFixture : public ArithmeticOperationGenericFixture<TensorType, AccessorType, FunctionType, T>
{