aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLuca Foschiani <luca.foschiani@arm.com>2020-01-08 15:55:08 +0000
committerLuca Foschiani <luca.foschiani@arm.com>2020-01-10 09:53:34 +0000
commitdaa3aba64d01fd02fc74d0707d50d2571e5f0ce8 (patch)
tree054aa85892af83aab05524a52b62662b2abc3d80
parentdceacdc7e0b0d1246140940ae67d4f8f80886966 (diff)
downloadComputeLibrary-daa3aba64d01fd02fc74d0707d50d2571e5f0ce8.tar.gz
COMPMID-2799 Add support for QASYMM8_SIGNED in NECast
Change-Id: I671d645cb458bfd5820192156c86cc8d6182fb5a Signed-off-by: Luca Foschiani <luca.foschiani@arm.com> Reviewed-on: https://review.mlplatform.org/c/2553 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h17
-rw-r--r--arm_compute/runtime/NEON/functions/NECast.h19
-rw-r--r--src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp255
-rw-r--r--tests/validation/NEON/Cast.cpp60
4 files changed, 306 insertions, 45 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
index be4a1b7c82..df4102cb86 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
@@ -54,24 +54,25 @@ public:
*
* Valid conversions Input -> Output :
*
+ * - QASYMM8_SIGNED -> S16, S32, F32, F16
* - QASYMM8 -> U16, S16, S32, F32, F16
* - U8 -> U16, S16, S32, F32, F16
* - U16 -> U8, U32
- * - S16 -> U8, S32
- * - F16 -> QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8, F16, S32, U8
+ * - S16 -> QASYMM8_SIGNED, U8, S32
+ * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
+ * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
+ * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8
*
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/F16/F32.
- * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/F32.
+ * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy.
* @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
*/
void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
/** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8/U8/U16/S16/F16/F32.
- * @param[in] output Destination tensor info. Data type supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy
* @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
*
diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h
index 705cddc662..55c21a01ec 100644
--- a/arm_compute/runtime/NEON/functions/NECast.h
+++ b/arm_compute/runtime/NEON/functions/NECast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -43,23 +43,24 @@ public:
*
* Valid conversions Input -> Output :
*
+ * - QASYMM8_SIGNED -> S16, S32, F32, F16
* - QASYMM8 -> U16, S16, S32, F32, F16
* - U8 -> U16, S16, S32, F32, F16
* - U16 -> U8, U32
- * - S16 -> U8, S32
- * - F16 -> QASYMM8, F32, S32, U8
- * - S32 -> QASYMM8, F16, F32, U8
- * - F32 -> QASYMM8, F16, S32, U8
+ * - S16 -> QASYMM8_SIGNED, U8, S32
+ * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8
+ * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8
+ * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8
*
- * @param[in] input The input tensor to convert. Data types supported: QASYMM8, U8, U16, S16, F16, S32, F32.
- * @param[out] output The output tensor. Data types supported: S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32.
+ * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy.
*/
void configure(ITensor *input, ITensor *output, ConvertPolicy policy);
/** Static function to check if given info will lead to a valid configuration of @ref NECast
*
- * @param[in] input Source tensor info. Data types supported: QASYMM8, U8, U16, S16, F16, S32, F32.
- * @param[in] output Destination tensor info. Data type supported: S8/U16/S16/U32/S32/F16/F32.
+ * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32.
+ * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32.
* @param[in] policy Conversion policy.
*
* @return a status
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
index d00c5009d2..f5fb9c09aa 100644
--- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 ARM Limited.
+ * Copyright (c) 2016-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -44,10 +44,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C
ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
ARM_COMPUTE_UNUSED(policy);
ARM_COMPUTE_RETURN_ERROR_ON(input == output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32, DataType::S32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16,
+ DataType::F32);
ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8_SIGNED && (output->data_type() != DataType::S16 && output->data_type() != DataType::S32
+ && output->data_type() != DataType::F16 && output->data_type() != DataType::F32),
+ "Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32");
+
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16
&& output->data_type() != DataType::S32 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32),
"Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32");
@@ -59,19 +64,22 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C
ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32),
"Only data_types supported [in] U16 -> [out] U8, U32");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::U8 && output->data_type() != DataType::S32),
"Only data_types supported [in] S16 -> [out] U8, S32");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::U8 && output->data_type() != DataType::F32
- && output->data_type() != DataType::S32),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::QASYMM8
+ && output->data_type() != DataType::U8
+ && output->data_type() != DataType::F32 && output->data_type() != DataType::S32),
"Only data_types supported [in] F16 -> [out] QASYMM8, F32, S32, U8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16 && output->data_type() != DataType::S32
- && output->data_type() != DataType::U8),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::QASYMM8
+ && output->data_type() != DataType::F16
+ && output->data_type() != DataType::S32 && output->data_type() != DataType::U8),
"Only data_types supported [in] F32 -> [out] QASYMM8, F16, S32, U8");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32
- && output->data_type() != DataType::U8),
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::QASYMM8
+ && output->data_type() != DataType::F16
+ && output->data_type() != DataType::F32 && output->data_type() != DataType::U8),
"Only data_types supported [in] S32 -> [out] QASYMM8, F16, F32, U8");
// Validate in case of configured output
@@ -145,6 +153,107 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
switch(_input->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ {
+ const int16x8_t b = vdupq_n_s16(_shift);
+
+ switch(_output->info()->data_type())
+ {
+ case DataType::S16:
+ {
+ /* Up-conversion QASYMM8_SIGNED -> S16 */
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast<int8_t *>(input.ptr()));
+
+ const int16x8x2_t texels =
+ {
+ {
+ vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b),
+ vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b)
+ }
+ };
+
+ vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()), texels.val[0]);
+ vst1q_s16(reinterpret_cast<int16_t *>(output.ptr()) + 8, texels.val[1]);
+ },
+ input, output);
+ break;
+ }
+ case DataType::S32:
+ {
+ /* Up-conversion QASYMM8_SIGNED -> S32 */
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast<int8_t *>(input.ptr()));
+
+ const int16x8x2_t texels =
+ {
+ {
+ vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b),
+ vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b)
+ }
+ };
+
+ vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0])));
+ vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0])));
+ vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1])));
+ vst1q_s32(reinterpret_cast<int32_t *>(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1])));
+ },
+ input, output);
+ break;
+ }
+ case DataType::F32:
+ {
+ /* Up-conversion QASYMM8_SIGNED -> F32 */
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast<int8_t *>(input.ptr()));
+
+ const int16x8x2_t texels =
+ {
+ {
+ vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b),
+ vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b)
+ }
+ };
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()), vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0]))));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0]))));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1]))));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1]))));
+ },
+ input, output);
+ break;
+ }
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
+ {
+ /* Up-conversion QASYMM8_SIGNED -> F16 */
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast<int8_t *>(input.ptr()));
+
+ const int16x8x2_t texels =
+ {
+ {
+ vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b),
+ vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b)
+ }
+ };
+ vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vcvtq_f16_s16(texels.val[0]));
+ vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()) + 8, vcvtq_f16_s16(texels.val[1]));
+ },
+ input, output);
+ break;
+ }
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
+ default:
+ ARM_COMPUTE_ERROR("Output data type not supported");
+ }
+ break;
+ }
+
case DataType::QASYMM8:
case DataType::U8:
{
@@ -271,6 +380,45 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
{
switch(_output->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ {
+ const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
+
+ /* Down-conversion S16 -> QASYMM8_SIGNED */
+ if(ConvertPolicy::SATURATE == _policy)
+ {
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int16x8x2_t texels =
+ {
+ {
+ vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
+ vqshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
+ }
+ };
+
+ vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), vcombine_s8(vqmovn_s16(texels.val[0]), vqmovn_s16(texels.val[1])));
+ },
+ input, output);
+ }
+ else
+ {
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int16x8x2_t texels =
+ {
+ {
+ vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr())), b),
+ vshlq_s16(vld1q_s16(reinterpret_cast<int16_t *>(input.ptr()) + 8), b)
+ }
+ };
+
+ vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), vcombine_s8(vmovn_s16(texels.val[0]), vmovn_s16(texels.val[1])));
+ },
+ input, output);
+ }
+ break;
+ }
case DataType::U8:
{
const int16x8_t b = vdupq_n_s16(-static_cast<int16_t>(_shift));
@@ -424,6 +572,26 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
case DataType::F16:
switch(_output->info()->data_type())
{
+ case DataType::QASYMM8_SIGNED:
+ {
+ const float16x8_t scale = vdupq_n_f16(1 << _shift);
+
+ /* Up-conversion F16 -> QASYMM8_SIGNED */
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const float16x8x2_t texels =
+ {
+ {
+ vmulq_f16(vld1q_f16(reinterpret_cast<float16_t *>(input.ptr())), scale),
+ vmulq_f16(vld1q_f16(reinterpret_cast<float16_t *>(input.ptr()) + 8), scale),
+ }
+ };
+
+ vst1q_s8(reinterpret_cast<int8_t *>(output.ptr()), vcombine_s8(vqmovn_s16(vcvtq_s16_f16(texels.val[0])), vqmovn_s16(vcvtq_s16_f16(texels.val[1]))));
+ },
+ input, output);
+ break;
+ }
case DataType::QASYMM8:
case DataType::U8:
{
@@ -573,6 +741,29 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
input, output);
break;
}
+ case DataType::QASYMM8_SIGNED:
+ {
+ const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift));
+
+ /* Down-conversion F32 -> QASYMM8_SIGNED */
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const float32x4x4_t texels =
+ {
+ {
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr())), scale),
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 4), scale),
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 8), scale),
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 12), scale),
+ }
+ };
+
+ vst1_s8(reinterpret_cast<int8_t *>(output.ptr()), vqmovn_s16(vcombine_s16(vqmovn_s32(vcvtq_s32_f32(texels.val[0])), vqmovn_s32(vcvtq_s32_f32(texels.val[1])))));
+ vst1_s8(reinterpret_cast<int8_t *>(output.ptr()) + 8, vqmovn_s16(vcombine_s16(vqmovn_s32(vcvtq_s32_f32(texels.val[2])), vqmovn_s32(vcvtq_s32_f32(texels.val[3])))));
+ },
+ input, output);
+ break;
+ }
default:
ARM_COMPUTE_ERROR("Output data type not supported");
@@ -632,6 +823,50 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
input, output);
break;
}
+ case DataType::QASYMM8_SIGNED:
+ {
+ const int32x4_t b = vdupq_n_s32(-static_cast<int32_t>(_shift));
+
+ /* Down-conversion S32 -> QASYMM8_SIGNED */
+ if(ConvertPolicy::SATURATE == _policy)
+ {
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int32x4x4_t texels =
+ {
+ {
+ vqshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr())), b),
+ vqshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr()) + 4), b),
+ vqshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr()) + 8), b),
+ vqshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr()) + 12), b)
+ }
+ };
+ vst1_s8(reinterpret_cast<int8_t *>(output.ptr()), vqmovn_s16(vcombine_s16(vqmovn_s32(texels.val[0]), vqmovn_s32(texels.val[1]))));
+ vst1_s8(reinterpret_cast<int8_t *>(output.ptr()) + 8, vqmovn_s16(vcombine_s16(vqmovn_s32(texels.val[2]), vqmovn_s32(texels.val[3]))));
+ },
+ input, output);
+ }
+ else
+ {
+ execute_window_loop(window, [&](const Coordinates &)
+ {
+ const int32x4x4_t texels =
+ {
+ {
+ vshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr())), b),
+ vshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr()) + 4), b),
+ vshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr()) + 8), b),
+ vshlq_s32(vld1q_s32(reinterpret_cast<int32_t *>(input.ptr()) + 12), b)
+ }
+ };
+
+ vst1_s8(reinterpret_cast<int8_t *>(output.ptr()), vmovn_s16(vcombine_s16(vmovn_s32(texels.val[0]), vmovn_s32(texels.val[1]))));
+ vst1_s8(reinterpret_cast<int8_t *>(output.ptr()) + 8, vmovn_s16(vcombine_s16(vmovn_s32(texels.val[2]), vmovn_s32(texels.val[3]))));
+ },
+ input, output);
+ }
+ break;
+ }
case DataType::QASYMM8:
case DataType::U8:
{
diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp
index 9300ad4163..2fe4e368d0 100644
--- a/tests/validation/NEON/Cast.cpp
+++ b/tests/validation/NEON/Cast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -52,10 +52,17 @@ constexpr AbsoluteTolerance<float> zero_tolerance(0);
*/
/** Input data sets **/
+
+// QASYMM8_SIGNED
+const auto CastQASYMM8_SIGNEDtoS16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::S16));
+const auto CastQASYMM8_SIGNEDtoS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::S32));
+const auto CastQASYMM8_SIGNEDtoF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::F32));
+const auto CastQASYMM8_SIGNEDtoF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::F16));
+
// QASYMM8
-const auto CastQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16));
-const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32));
-const auto CastQASYMM8toS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::S32));
+const auto CastQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16));
+const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32));
+const auto CastQASYMM8toS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::S32));
// U8
const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16));
@@ -68,26 +75,29 @@ const auto CastU16toU8Dataset = combine(framework::dataset::make("DataType", Da
const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
// S16
-const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
-const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto CastS16toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
+const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
+const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
//S32
-const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16));
-const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8));
-const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32));
-const auto CastS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8));
+const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16));
+const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8));
+const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32));
+const auto CastS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8));
+const auto CastS32toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
// F16
-const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
-const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32));
-const auto CastF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8));
-
+const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32));
+const auto CastF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8));
+const auto CastF16toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
// F32
-const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8));
-const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
-const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
-const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8));
+const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8));
+const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
+const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32));
+const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8));
+const auto CastF32toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED));
} // namespace
@@ -109,6 +119,8 @@ template <typename T>
using NECastToF32Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, float>;
template <typename T>
using NECastToQASYMM8Fixture = CastValidationFixture<Tensor, Accessor, NECast, T, uint8_t>;
+template <typename T>
+using NECastToQASYMM8_SIGNEDFixture = CastValidationFixture<Tensor, Accessor, NECast, T, int8_t>;
#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \
TEST_SUITE(NAME) \
@@ -135,6 +147,14 @@ using NECastToQASYMM8Fixture = CastValidationFixture<Tensor, Accessor, NECast, T
} \
TEST_SUITE_END()
+//QASYMM8_SIGNED
+CAST_SUITE(QASYMM8_SIGNED_to_S16, DataType::QASYMM8_SIGNED, DataType::S16, NECastToS16Fixture<int8_t>, CastQASYMM8_SIGNEDtoS16Dataset, one_tolerance)
+CAST_SUITE(QASYMM8_SIGNED_to_S32, DataType::QASYMM8_SIGNED, DataType::S32, NECastToS32Fixture<int8_t>, CastQASYMM8_SIGNEDtoS32Dataset, one_tolerance)
+CAST_SUITE(QASYMM8_SIGNED_to_F32, DataType::QASYMM8_SIGNED, DataType::F32, NECastToF32Fixture<int8_t>, CastQASYMM8_SIGNEDtoF32Dataset, one_tolerance)
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+CAST_SUITE(QASYMM8_SIGNED_to_F16, DataType::QASYMM8_SIGNED, DataType::F16, NECastToF16Fixture<int8_t>, CastQASYMM8_SIGNEDtoF16Dataset, one_tolerance)
+#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+
//QASYMM8
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
CAST_SUITE(QASYMM8_to_F16, DataType::QASYMM8, DataType::F16, NECastToF16Fixture<uint8_t>, CastQASYMM8toF16Dataset, one_tolerance)
@@ -153,10 +173,12 @@ CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, NECastToU8Fixture<uint16_t>,
CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, NECastToU32Fixture<uint16_t>, CastU16toU32Dataset, zero_tolerance)
// S16
+CAST_SUITE(S16_to_QASYMM8_SIGNED, DataType::S16, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<int16_t>, CastS16toQASYMM8_SIGNEDDataset, zero_tolerance)
CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, NECastToU8Fixture<int16_t>, CastS16toU8Dataset, zero_tolerance)
CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, NECastToS32Fixture<int16_t>, CastS16toS32Dataset, zero_tolerance)
// S32
+CAST_SUITE(S32_to_QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<int32_t>, CastS32toQASYMM8_SIGNEDDataset, one_tolerance)
CAST_SUITE(S32_to_QASYMM8, DataType::S32, DataType::QASYMM8, NECastToQASYMM8Fixture<int32_t>, CastS32toQASYMM8Dataset, one_tolerance)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, NECastToF16Fixture<int32_t>, CastS32toF16Dataset, zero_tolerance)
@@ -166,12 +188,14 @@ CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, NECastToU8Fixture<int32_t>, C
// F16
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+CAST_SUITE(F16_to_QASYMM8_SIGNED, DataType::F16, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<half>, CastF16toQASYMM8_SIGNEDDataset, one_tolerance)
CAST_SUITE(F16_to_QASYMM8, DataType::F16, DataType::QASYMM8, NECastToQASYMM8Fixture<half>, CastF16toQASYMM8Dataset, one_tolerance)
CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, NECastToF32Fixture<half>, CastF16toF32Dataset, zero_tolerance)
CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, NECastToS32Fixture<half>, CastF16toS32Dataset, one_tolerance)
#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
// F32
+CAST_SUITE(F32_to_QASYMM8_SIGNED, DataType::F32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture<float>, CastF32toQASYMM8_SIGNEDDataset, one_tolerance)
CAST_SUITE(F32_to_QASYMM8, DataType::F32, DataType::QASYMM8, NECastToQASYMM8Fixture<float>, CastF32toQASYMM8Dataset, one_tolerance)
#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture<float>, CastF32toF16Dataset, zero_tolerance)