From daa3aba64d01fd02fc74d0707d50d2571e5f0ce8 Mon Sep 17 00:00:00 2001 From: Luca Foschiani Date: Wed, 8 Jan 2020 15:55:08 +0000 Subject: COMPMID-2799 Add support for QASYMM8_SIGNED in NECast Change-Id: I671d645cb458bfd5820192156c86cc8d6182fb5a Signed-off-by: Luca Foschiani Reviewed-on: https://review.mlplatform.org/c/2553 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Reviewed-by: Michele Di Giorgio --- .../core/NEON/kernels/NEDepthConvertLayerKernel.h | 17 +- arm_compute/runtime/NEON/functions/NECast.h | 19 +- .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 255 ++++++++++++++++++++- tests/validation/NEON/Cast.cpp | 60 +++-- 4 files changed, 306 insertions(+), 45 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h index be4a1b7c82..df4102cb86 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -54,24 +54,25 @@ public: * * Valid conversions Input -> Output : * + * - QASYMM8_SIGNED -> S16, S32, F32, F16 * - QASYMM8 -> U16, S16, S32, F32, F16 * - U8 -> U16, S16, S32, F32, F16 * - U16 -> U8, U32 - * - S16 -> U8, S32 - * - F16 -> QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8, F16, S32, U8 + * - S16 -> QASYMM8_SIGNED, U8, S32 + * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8 * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/F16/F32. - * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel * - * @param[in] input Source tensor info. Data types supported: QASYMM8/U8/U16/S16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. * diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h index 705cddc662..55c21a01ec 100644 --- a/arm_compute/runtime/NEON/functions/NECast.h +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,23 +43,24 @@ public: * * Valid conversions Input -> Output : * + * - QASYMM8_SIGNED -> S16, S32, F32, F16 * - QASYMM8 -> U16, S16, S32, F32, F16 * - U8 -> U16, S16, S32, F32, F16 * - U16 -> U8, U32 - * - S16 -> U8, S32 - * - F16 -> QASYMM8, F32, S32, U8 - * - S32 -> QASYMM8, F16, F32, U8 - * - F32 -> QASYMM8, F16, S32, U8 + * - S16 -> QASYMM8_SIGNED, U8, S32 + * - F16 -> QASYMM8_SIGNED, QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8_SIGNED, QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8_SIGNED, QASYMM8, F16, S32, U8 * - * @param[in] input The input tensor to convert. Data types supported: QASYMM8, U8, U16, S16, F16, S32, F32. - * @param[out] output The output tensor. Data types supported: S8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor to convert. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. */ void configure(ITensor *input, ITensor *output, ConvertPolicy policy); /** Static function to check if given info will lead to a valid configuration of @ref NECast * - * @param[in] input Source tensor info. Data types supported: QASYMM8, U8, U16, S16, F16, S32, F32. - * @param[in] output Destination tensor info. Data type supported: S8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8_SIGNED/QASYMM8/U8/U16/S16/F16/S32/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8_SIGNED/QASYMM8/U8/S8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * * @return a status diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index d00c5009d2..f5fb9c09aa 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 ARM Limited. + * Copyright (c) 2016-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -44,10 +44,15 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32, DataType::S32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8_SIGNED, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, + DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8_SIGNED && (output->data_type() != DataType::S16 && output->data_type() != DataType::S32 + && output->data_type() != DataType::F16 && output->data_type() != DataType::F32), + "Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16 && output->data_type() != DataType::S32 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32), "Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32"); @@ -59,19 +64,22 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32), "Only data_types supported [in] U16 -> [out] U8, U32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::U8 && output->data_type() != DataType::S32), "Only data_types supported [in] S16 -> [out] U8, S32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::U8 && output->data_type() != DataType::F32 - && output->data_type() != DataType::S32), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::QASYMM8 + && output->data_type() != DataType::U8 + && output->data_type() != DataType::F32 && output->data_type() != DataType::S32), "Only data_types supported [in] F16 -> [out] QASYMM8, F32, S32, U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16 && output->data_type() != DataType::S32 - && output->data_type() != DataType::U8), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::QASYMM8 + && output->data_type() != DataType::F16 + && output->data_type() != DataType::S32 && output->data_type() != DataType::U8), "Only data_types supported [in] F32 -> [out] QASYMM8, F16, S32, U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32 - && output->data_type() != DataType::U8), + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::QASYMM8_SIGNED && output->data_type() != DataType::QASYMM8 + && output->data_type() != DataType::F16 + && output->data_type() != DataType::F32 && output->data_type() != DataType::U8), "Only data_types supported [in] S32 -> [out] QASYMM8, F16, F32, U8"); // Validate in case of configured output @@ -145,6 +153,107 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info switch(_input->info()->data_type()) { + case DataType::QASYMM8_SIGNED: + { + const int16x8_t b = vdupq_n_s16(_shift); + + switch(_output->info()->data_type()) + { + case DataType::S16: + { + /* Up-conversion QASYMM8_SIGNED -> S16 */ + execute_window_loop(window, [&](const Coordinates &) + { + const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast(input.ptr())); + + const int16x8x2_t texels = + { + { + vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b), + vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b) + } + }; + + vst1q_s16(reinterpret_cast(output.ptr()), texels.val[0]); + vst1q_s16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); + }, + input, output); + break; + } + case DataType::S32: + { + /* Up-conversion QASYMM8_SIGNED -> S32 */ + execute_window_loop(window, [&](const Coordinates &) + { + const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast(input.ptr())); + + const int16x8x2_t texels = + { + { + vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b), + vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b) + } + }; + + vst1q_s32(reinterpret_cast(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1]))); + }, + input, output); + break; + } + case DataType::F32: + { + /* Up-conversion QASYMM8_SIGNED -> F32 */ + execute_window_loop(window, [&](const Coordinates &) + { + const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast(input.ptr())); + + const int16x8x2_t texels = + { + { + vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b), + vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b) + } + }; + vst1q_f32(reinterpret_cast(output.ptr()), vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0])))); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0])))); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1])))); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1])))); + }, + input, output); + break; + } +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + { + /* Up-conversion QASYMM8_SIGNED -> F16 */ + execute_window_loop(window, [&](const Coordinates &) + { + const int8x16_t texels_s8 = vld1q_s8(reinterpret_cast(input.ptr())); + + const int16x8x2_t texels = + { + { + vshlq_s16(vmovl_s8(vget_low_s8(texels_s8)), b), + vshlq_s16(vmovl_s8(vget_high_s8(texels_s8)), b) + } + }; + vst1q_f16(reinterpret_cast(output.ptr()), vcvtq_f16_s16(texels.val[0])); + vst1q_f16(reinterpret_cast(output.ptr()) + 8, vcvtq_f16_s16(texels.val[1])); + }, + input, output); + break; + } +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } + case DataType::QASYMM8: case DataType::U8: { @@ -271,6 +380,45 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info { switch(_output->info()->data_type()) { + case DataType::QASYMM8_SIGNED: + { + const int16x8_t b = vdupq_n_s16(-static_cast(_shift)); + + /* Down-conversion S16 -> QASYMM8_SIGNED */ + if(ConvertPolicy::SATURATE == _policy) + { + execute_window_loop(window, [&](const Coordinates &) + { + const int16x8x2_t texels = + { + { + vqshlq_s16(vld1q_s16(reinterpret_cast(input.ptr())), b), + vqshlq_s16(vld1q_s16(reinterpret_cast(input.ptr()) + 8), b) + } + }; + + vst1q_s8(reinterpret_cast(output.ptr()), vcombine_s8(vqmovn_s16(texels.val[0]), vqmovn_s16(texels.val[1]))); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const int16x8x2_t texels = + { + { + vshlq_s16(vld1q_s16(reinterpret_cast(input.ptr())), b), + vshlq_s16(vld1q_s16(reinterpret_cast(input.ptr()) + 8), b) + } + }; + + vst1q_s8(reinterpret_cast(output.ptr()), vcombine_s8(vmovn_s16(texels.val[0]), vmovn_s16(texels.val[1]))); + }, + input, output); + } + break; + } case DataType::U8: { const int16x8_t b = vdupq_n_s16(-static_cast(_shift)); @@ -424,6 +572,26 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info case DataType::F16: switch(_output->info()->data_type()) { + case DataType::QASYMM8_SIGNED: + { + const float16x8_t scale = vdupq_n_f16(1 << _shift); + + /* Up-conversion F16 -> QASYMM8_SIGNED */ + execute_window_loop(window, [&](const Coordinates &) + { + const float16x8x2_t texels = + { + { + vmulq_f16(vld1q_f16(reinterpret_cast(input.ptr())), scale), + vmulq_f16(vld1q_f16(reinterpret_cast(input.ptr()) + 8), scale), + } + }; + + vst1q_s8(reinterpret_cast(output.ptr()), vcombine_s8(vqmovn_s16(vcvtq_s16_f16(texels.val[0])), vqmovn_s16(vcvtq_s16_f16(texels.val[1])))); + }, + input, output); + break; + } case DataType::QASYMM8: case DataType::U8: { @@ -573,6 +741,29 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info input, output); break; } + case DataType::QASYMM8_SIGNED: + { + const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); + + /* Down-conversion F32 -> QASYMM8_SIGNED */ + execute_window_loop(window, [&](const Coordinates &) + { + const float32x4x4_t texels = + { + { + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr())), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 4), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 8), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 12), scale), + } + }; + + vst1_s8(reinterpret_cast(output.ptr()), vqmovn_s16(vcombine_s16(vqmovn_s32(vcvtq_s32_f32(texels.val[0])), vqmovn_s32(vcvtq_s32_f32(texels.val[1]))))); + vst1_s8(reinterpret_cast(output.ptr()) + 8, vqmovn_s16(vcombine_s16(vqmovn_s32(vcvtq_s32_f32(texels.val[2])), vqmovn_s32(vcvtq_s32_f32(texels.val[3]))))); + }, + input, output); + break; + } default: ARM_COMPUTE_ERROR("Output data type not supported"); @@ -632,6 +823,50 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info input, output); break; } + case DataType::QASYMM8_SIGNED: + { + const int32x4_t b = vdupq_n_s32(-static_cast(_shift)); + + /* Down-conversion S32 -> QASYMM8_SIGNED */ + if(ConvertPolicy::SATURATE == _policy) + { + execute_window_loop(window, [&](const Coordinates &) + { + const int32x4x4_t texels = + { + { + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr())), b), + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 4), b), + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 8), b), + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 12), b) + } + }; + vst1_s8(reinterpret_cast(output.ptr()), vqmovn_s16(vcombine_s16(vqmovn_s32(texels.val[0]), vqmovn_s32(texels.val[1])))); + vst1_s8(reinterpret_cast(output.ptr()) + 8, vqmovn_s16(vcombine_s16(vqmovn_s32(texels.val[2]), vqmovn_s32(texels.val[3])))); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const int32x4x4_t texels = + { + { + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr())), b), + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 4), b), + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 8), b), + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 12), b) + } + }; + + vst1_s8(reinterpret_cast(output.ptr()), vmovn_s16(vcombine_s16(vmovn_s32(texels.val[0]), vmovn_s32(texels.val[1])))); + vst1_s8(reinterpret_cast(output.ptr()) + 8, vmovn_s16(vcombine_s16(vmovn_s32(texels.val[2]), vmovn_s32(texels.val[3])))); + }, + input, output); + } + break; + } case DataType::QASYMM8: case DataType::U8: { diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp index 9300ad4163..2fe4e368d0 100644 --- a/tests/validation/NEON/Cast.cpp +++ b/tests/validation/NEON/Cast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -52,10 +52,17 @@ constexpr AbsoluteTolerance zero_tolerance(0); */ /** Input data sets **/ + +// QASYMM8_SIGNED +const auto CastQASYMM8_SIGNEDtoS16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::S16)); +const auto CastQASYMM8_SIGNEDtoS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::S32)); +const auto CastQASYMM8_SIGNEDtoF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::F32)); +const auto CastQASYMM8_SIGNEDtoF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8_SIGNED), framework::dataset::make("DataType", DataType::F16)); + // QASYMM8 -const auto CastQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16)); -const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32)); -const auto CastQASYMM8toS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::S32)); +const auto CastQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16)); +const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32)); +const auto CastQASYMM8toS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::S32)); // U8 const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); @@ -68,26 +75,29 @@ const auto CastU16toU8Dataset = combine(framework::dataset::make("DataType", Da const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); // S16 -const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); -const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto CastS16toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); +const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); //S32 -const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16)); -const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8)); -const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32)); -const auto CastS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16)); +const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8)); +const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32)); +const auto CastS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto CastS32toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); // F16 -const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); -const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32)); -const auto CastF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); - +const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); +const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32)); +const auto CastF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto CastF16toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); // F32 -const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8)); -const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); -const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); -const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8)); +const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); +const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); +const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto CastF32toQASYMM8_SIGNEDDataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8_SIGNED)); } // namespace @@ -109,6 +119,8 @@ template using NECastToF32Fixture = CastValidationFixture; template using NECastToQASYMM8Fixture = CastValidationFixture; +template +using NECastToQASYMM8_SIGNEDFixture = CastValidationFixture; #define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \ TEST_SUITE(NAME) \ @@ -135,6 +147,14 @@ using NECastToQASYMM8Fixture = CastValidationFixture, CastQASYMM8_SIGNEDtoS16Dataset, one_tolerance) +CAST_SUITE(QASYMM8_SIGNED_to_S32, DataType::QASYMM8_SIGNED, DataType::S32, NECastToS32Fixture, CastQASYMM8_SIGNEDtoS32Dataset, one_tolerance) +CAST_SUITE(QASYMM8_SIGNED_to_F32, DataType::QASYMM8_SIGNED, DataType::F32, NECastToF32Fixture, CastQASYMM8_SIGNEDtoF32Dataset, one_tolerance) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(QASYMM8_SIGNED_to_F16, DataType::QASYMM8_SIGNED, DataType::F16, NECastToF16Fixture, CastQASYMM8_SIGNEDtoF16Dataset, one_tolerance) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + //QASYMM8 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC CAST_SUITE(QASYMM8_to_F16, DataType::QASYMM8, DataType::F16, NECastToF16Fixture, CastQASYMM8toF16Dataset, one_tolerance) @@ -153,10 +173,12 @@ CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, NECastToU8Fixture, CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, NECastToU32Fixture, CastU16toU32Dataset, zero_tolerance) // S16 +CAST_SUITE(S16_to_QASYMM8_SIGNED, DataType::S16, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastS16toQASYMM8_SIGNEDDataset, zero_tolerance) CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, NECastToU8Fixture, CastS16toU8Dataset, zero_tolerance) CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, NECastToS32Fixture, CastS16toS32Dataset, zero_tolerance) // S32 +CAST_SUITE(S32_to_QASYMM8_SIGNED, DataType::S32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastS32toQASYMM8_SIGNEDDataset, one_tolerance) CAST_SUITE(S32_to_QASYMM8, DataType::S32, DataType::QASYMM8, NECastToQASYMM8Fixture, CastS32toQASYMM8Dataset, one_tolerance) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, NECastToF16Fixture, CastS32toF16Dataset, zero_tolerance) @@ -166,12 +188,14 @@ CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, NECastToU8Fixture, C // F16 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(F16_to_QASYMM8_SIGNED, DataType::F16, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastF16toQASYMM8_SIGNEDDataset, one_tolerance) CAST_SUITE(F16_to_QASYMM8, DataType::F16, DataType::QASYMM8, NECastToQASYMM8Fixture, CastF16toQASYMM8Dataset, one_tolerance) CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, NECastToF32Fixture, CastF16toF32Dataset, zero_tolerance) CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, NECastToS32Fixture, CastF16toS32Dataset, one_tolerance) #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC // F32 +CAST_SUITE(F32_to_QASYMM8_SIGNED, DataType::F32, DataType::QASYMM8_SIGNED, NECastToQASYMM8_SIGNEDFixture, CastF32toQASYMM8_SIGNEDDataset, one_tolerance) CAST_SUITE(F32_to_QASYMM8, DataType::F32, DataType::QASYMM8, NECastToQASYMM8Fixture, CastF32toQASYMM8Dataset, one_tolerance) #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture, CastF32toF16Dataset, zero_tolerance) -- cgit v1.2.1