From 9e631c204444e7b095510c54819e944f9be8d342 Mon Sep 17 00:00:00 2001 From: Usama Arif Date: Tue, 14 May 2019 17:10:40 +0100 Subject: COMPMID-2252 NECast. Change-Id: I7532aea6827a325eb8457132d4787ac527e93cd4 Signed-off-by: Usama Arif Reviewed-on: https://review.mlplatform.org/c/1149 Reviewed-by: Pablo Marquez Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas --- .../core/NEON/kernels/NEDepthConvertLayerKernel.h | 15 +- arm_compute/runtime/NEON/NEFunctions.h | 1 + arm_compute/runtime/NEON/functions/NECast.h | 70 +++ .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 306 +++++++++---- src/runtime/NEON/functions/NECast.cpp | 46 ++ tests/validation/NEON/Cast.cpp | 186 ++++++++ tests/validation/NEON/DepthConvertLayer.cpp | 486 ++++++++++++++++++++- .../validation/fixtures/DepthConvertLayerFixture.h | 18 +- tests/validation/reference/DepthConvertLayer.cpp | 23 +- 9 files changed, 1019 insertions(+), 132 deletions(-) create mode 100644 arm_compute/runtime/NEON/functions/NECast.h create mode 100644 src/runtime/NEON/functions/NECast.cpp create mode 100644 tests/validation/NEON/Cast.cpp diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h index 16b8e4276f..c900e08424 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,9 @@ namespace arm_compute { class ITensor; -/** Depth conversion kernel */ +/** Depth conversion kernel + * This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values. + */ class NEDepthConvertLayerKernel : public INEKernel { public: @@ -52,12 +54,13 @@ public: * * Valid conversions Input -> Output : * - * - QASYMM8 -> F16, F32 - * - U8 -> U16, S16, S32 + * - QASYMM8 -> U16, S16, S32, F32, F16 + * - U8 -> U16, S16, S32, F32, F16 * - U16 -> U8, U32 * - S16 -> U8, S32 - * - F16 -> QASYMM8, F32 - * - F32 -> QASYMM8, F16 + * - F16 -> QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8, F16, S32, U8 * * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/F16/F32. * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. diff --git a/arm_compute/runtime/NEON/NEFunctions.h b/arm_compute/runtime/NEON/NEFunctions.h index 0d94ea78fc..9094ee4bdf 100644 --- a/arm_compute/runtime/NEON/NEFunctions.h +++ b/arm_compute/runtime/NEON/NEFunctions.h @@ -39,6 +39,7 @@ #include "arm_compute/runtime/NEON/functions/NEBitwiseXor.h" #include "arm_compute/runtime/NEON/functions/NEBox3x3.h" #include "arm_compute/runtime/NEON/functions/NECannyEdge.h" +#include "arm_compute/runtime/NEON/functions/NECast.h" #include "arm_compute/runtime/NEON/functions/NEChannelCombine.h" #include "arm_compute/runtime/NEON/functions/NEChannelExtract.h" #include "arm_compute/runtime/NEON/functions/NEChannelShuffleLayer.h" diff --git a/arm_compute/runtime/NEON/functions/NECast.h b/arm_compute/runtime/NEON/functions/NECast.h new file mode 100644 index 0000000000..5d09bf757f --- /dev/null +++ b/arm_compute/runtime/NEON/functions/NECast.h @@ -0,0 +1,70 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NECAST_H__ +#define __ARM_COMPUTE_NECAST_H__ + +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/INESimpleFunction.h" + +namespace arm_compute +{ +class ITensor; + +/** Basic function to run @ref NEDepthConvertLayerKernel. + * This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values. + */ +class NECast : public INESimpleFunction +{ +public: + /** Initialize the function's source, destination + * + * Input data type must be different than output data type. + * + * Valid conversions Input -> Output : + * + * - QASYMM8 -> U16, S16, S32, F32, F16 + * - U8 -> U16, S16, S32, F32, F16 + * - U16 -> U8, U32 + * - S16 -> U8, S32 + * - F16 -> QASYMM8, F32, S32, U8 + * - S32 -> QASYMM8, F16, F32, U8 + * - F32 -> QASYMM8, F16, S32, U8 + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8, U8, U16, S16, F16, S32, F32. + * @param[out] output The output tensor. Data types supported: S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + */ + void configure(ITensor *input, ITensor *output, ConvertPolicy policy); + /** Static function to check if given info will lead to a valid configuration of @ref NECast + * + * @param[in] input Source tensor info. Data types supported: QASYMM8, U8, U16, S16, F16, S32, F32. + * @param[in] output Destination tensor info. Data type supported: S8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + * + * @return a status + */ + static Status validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy); +}; +} // namespace arm_compute +#endif /*__ARM_COMPUTE_NECAST_H__*/ diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index cbc90a058f..531873e49e 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -44,16 +44,17 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32, DataType::S32); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && (output->data_type() != DataType::F16 && output->data_type() != DataType::F32), - "Only data_types supported [in] QASYMM8 -> [out] F16, F32"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16 + && output->data_type() != DataType::S32 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32), + "Only data_types supported [in] QASYMM8 -> [out] U16, S16, S32, F16, F32"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16 - && output->data_type() != DataType::S32), - "Only data_types supported [in] U8 -> [out] U16, S16, S32"); + && output->data_type() != DataType::S32 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32), + "Only data_types supported [in] U8 -> [out] U16, S16, S32, F16, F32"); ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32), "Only data_types supported [in] U16 -> [out] U8, U32"); @@ -61,11 +62,14 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32), "Only data_types supported [in] S16 -> [out] U8, S32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F32), - "Only data_types supported [in] F16 -> [out] QASYMM8, F32"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::U8 && output->data_type() != DataType::F32 && output->data_type() != DataType::S32), + "Only data_types supported [in] F16 -> [out] QASYMM8, F32, S32, U8"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16), - "Only data_types supported [in] F32 -> [out] QASYMM8, F16"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16 && output->data_type() != DataType::S32 && output->data_type() != DataType::U8), + "Only data_types supported [in] F32 -> [out] QASYMM8, F16, S32, U8"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16 && output->data_type() != DataType::F32 && output->data_type() != DataType::U8), + "Only data_types supported [in] S32 -> [out] QASYMM8, F16, F32, U8"); // Validate in case of configured output if(output->total_size() > 0) @@ -139,83 +143,59 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info switch(_input->info()->data_type()) { case DataType::QASYMM8: + case DataType::U8: { + const int16x8_t b = vdupq_n_s16(_shift); + switch(_output->info()->data_type()) { - /* Up-conversion QASYMM8 -> F32 */ - case DataType::F32: + case DataType::S16: { - const float32x4_t scale = vdupq_n_f32(_input->info()->quantization_info().scale); - const int32x4_t offset = vdupq_n_s32(_input->info()->quantization_info().offset); - + /* Up-conversion U8 -> S16 */ execute_window_loop(window, [&](const Coordinates &) { - const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); - const uint16x8x2_t texels_u16 = - { - { - vmovl_u8(vget_low_u8(texels_u8)), - vmovl_u8(vget_high_u8(texels_u8)) - } - }; + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); - const int32x4x4_t texels_s32 = + const int16x8x2_t texels = { { - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(texels_u16.val[0]))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(texels_u16.val[0]))), - vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(texels_u16.val[1]))), - vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(texels_u16.val[1]))) + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b), + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) } }; - vst1q_f32(reinterpret_cast(output.ptr()), vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[0], offset)), scale)); - vst1q_f32(reinterpret_cast(output.ptr()) + 4, vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[1], offset)), scale)); - vst1q_f32(reinterpret_cast(output.ptr()) + 8, vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[2], offset)), scale)); - vst1q_f32(reinterpret_cast(output.ptr()) + 12, vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[3], offset)), scale)); + vst1q_s16(reinterpret_cast(output.ptr()), texels.val[0]); + vst1q_s16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); }, input, output); break; } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - /* Up-conversion QASYMM8 -> F16 */ - case DataType::F16: + case DataType::S32: { - const float16x8_t scale = vdupq_n_f16(static_cast(_input->info()->quantization_info().scale)); - const int16x8_t offset = vdupq_n_s16(static_cast(_input->info()->quantization_info().offset)); - + /* Up-conversion U8 -> S32 */ execute_window_loop(window, [&](const Coordinates &) { - const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); - const int16x8x2_t texels_s16 = + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); + + const int16x8x2_t texels = { { - vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), - vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))) + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), b), + vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) } }; - vst1q_f16(reinterpret_cast(output.ptr()), vmulq_f16(vcvtq_f16_s16(vsubq_s16(texels_s16.val[0], offset)), scale)); - vst1q_f16(reinterpret_cast(output.ptr()) + 8, vmulq_f16(vcvtq_f16_s16(vsubq_s16(texels_s16.val[1], offset)), scale)); + vst1q_s32(reinterpret_cast(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1]))); + vst1q_s32(reinterpret_cast(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1]))); }, input, output); break; } -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ - default: - ARM_COMPUTE_ERROR("Output data type not supported"); - } - break; - } - case DataType::U8: - { - const int16x8_t b = vdupq_n_s16(_shift); - - switch(_output->info()->data_type()) - { - case DataType::S16: + case DataType::F32: { - /* Up-conversion U8 -> S16 */ + /* Up-conversion U8 -> F32 */ execute_window_loop(window, [&](const Coordinates &) { const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); @@ -227,16 +207,18 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) } }; - - vst1q_s16(reinterpret_cast(output.ptr()), texels.val[0]); - vst1q_s16(reinterpret_cast(output.ptr()) + 8, texels.val[1]); + vst1q_f32(reinterpret_cast(output.ptr()), vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[0])))); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[0])))); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, vcvtq_f32_s32(vmovl_s16(vget_low_s16(texels.val[1])))); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, vcvtq_f32_s32(vmovl_s16(vget_high_s16(texels.val[1])))); }, input, output); break; } - case DataType::S32: +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: { - /* Up-conversion U8 -> S32 */ + /* Up-conversion U8 -> F16 */ execute_window_loop(window, [&](const Coordinates &) { const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); @@ -248,15 +230,14 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info vshlq_s16(vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))), b) } }; - - vst1q_s32(reinterpret_cast(output.ptr()), vmovl_s16(vget_low_s16(texels.val[0]))); - vst1q_s32(reinterpret_cast(output.ptr()) + 4, vmovl_s16(vget_high_s16(texels.val[0]))); - vst1q_s32(reinterpret_cast(output.ptr()) + 8, vmovl_s16(vget_low_s16(texels.val[1]))); - vst1q_s32(reinterpret_cast(output.ptr()) + 12, vmovl_s16(vget_high_s16(texels.val[1]))); + vst1q_f16(reinterpret_cast(output.ptr()), vcvtq_f16_s16(texels.val[0])); + vst1q_f16(reinterpret_cast(output.ptr()) + 8, vcvtq_f16_s16(texels.val[1])); }, input, output); break; } +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::U16: { /* Up-conversion U8 -> U16 */ @@ -441,13 +422,11 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info switch(_output->info()->data_type()) { case DataType::QASYMM8: + case DataType::U8: { - const float16x8_t scale = vinvq_f16(vdupq_n_f16(static_cast(_output->info()->quantization_info().scale))); - const int16x8_t offset = vdupq_n_s16(static_cast(_output->info()->quantization_info().offset)); - const int16x8_t max_val_vec = vdupq_n_s16(255); - const int16x8_t zero_val_vec = vdupq_n_s16(0); + const float16x8_t scale = vdupq_n_f16(1 << _shift); - /* Down-conversion F16 -> QASYMM8 */ + /* Up-conversion F16 -> U8 */ execute_window_loop(window, [&](const Coordinates & id) { const float16x8x2_t texels = @@ -458,9 +437,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info } }; - const auto texel_quantized_0 = vmaxq_s16(vminq_s16(vaddq_s16(vcvtq_s16_f16(texels.val[0]), offset), max_val_vec), zero_val_vec); - const auto texel_quantized_1 = vmaxq_s16(vminq_s16(vaddq_s16(vcvtq_s16_f16(texels.val[1]), offset), max_val_vec), zero_val_vec); - vst1q_u8(reinterpret_cast(output.ptr()), vcombine_u8(vqmovun_s16(texel_quantized_0), vqmovun_s16(texel_quantized_1))); + vst1q_u8(reinterpret_cast(output.ptr()), vcombine_u8(vqmovun_s16(vcvtq_s16_f16(texels.val[0])), vqmovun_s16(vcvtq_s16_f16(texels.val[1])))); }, input, output); break; @@ -488,6 +465,29 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info input, output); break; } + case DataType::S32: + { + const float32x4_t scale = vdupq_n_f32(1 << _shift); + + /* Up-conversion F16 -> S32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float16x8x2_t texels = + { + { + vld1q_f16(reinterpret_cast(input.ptr())), + vld1q_f16(reinterpret_cast(input.ptr()) + 8) + } + }; + + vst1q_s32(reinterpret_cast(output.ptr()), vcvtq_s32_f32(vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[0])), scale))); + vst1q_s32(reinterpret_cast(output.ptr()) + 4, vcvtq_s32_f32(vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[0])), scale))); + vst1q_s32(reinterpret_cast(output.ptr()) + 8, vcvtq_s32_f32(vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[1])), scale))); + vst1q_s32(reinterpret_cast(output.ptr()) + 12, vcvtq_s32_f32(vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[1])), scale))); + }, + input, output); + break; + } default: ARM_COMPUTE_ERROR("Output data type not supported"); } @@ -496,14 +496,12 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info case DataType::F32: switch(_output->info()->data_type()) { - case DataType::QASYMM8: +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: { - const float32x4_t scale = vinvq_f32(vdupq_n_f32(_output->info()->quantization_info().scale)); - const int32x4_t offset = vdupq_n_s32(_output->info()->quantization_info().offset); - const int32x4_t max_val_vec = vdupq_n_s32(255); - const int32x4_t zero_val_vec = vdupq_n_s32(0); + const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); - /* Down-conversion F32 -> QASYMM8 */ + /* Down-conversion F32 -> F16 */ execute_window_loop(window, [&](const Coordinates &) { const float32x4x4_t texels = @@ -516,25 +514,44 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info } }; - const auto texel_quantized_0 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[0]), offset), max_val_vec), zero_val_vec); - const auto texel_quantized_1 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[1]), offset), max_val_vec), zero_val_vec); - const auto texel_quantized_2 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[2]), offset), max_val_vec), zero_val_vec); - const auto texel_quantized_3 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[3]), offset), max_val_vec), zero_val_vec); + vst1q_f16(reinterpret_cast(output.ptr()), vcombine_f16(vcvt_f16_f32(texels.val[0]), vcvt_f16_f32(texels.val[1]))); + vst1q_f16(reinterpret_cast(output.ptr()) + 8, vcombine_f16(vcvt_f16_f32(texels.val[2]), vcvt_f16_f32(texels.val[3]))); + }, + input, output); + break; + } +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + case DataType::S32: + { + const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); - const auto converted_0 = vqmovn_u16(vcombine_u16(vqmovun_s32(texel_quantized_0), vqmovun_s32(texel_quantized_1))); - const auto converted_1 = vqmovn_u16(vcombine_u16(vqmovun_s32(texel_quantized_2), vqmovun_s32(texel_quantized_3))); + /* Conversion F32 -> S32 */ + execute_window_loop(window, [&](const Coordinates &) + { + const float32x4x4_t texels = + { + { + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr())), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 4), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 8), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 12), scale), + } + }; - vst1q_u8(reinterpret_cast(output.ptr()), vcombine_u8(converted_0, converted_1)); + vst1q_s32(reinterpret_cast(output.ptr()), vcvtq_s32_f32(texels.val[0])); + vst1q_s32(reinterpret_cast(output.ptr()) + 4, vcvtq_s32_f32(texels.val[1])); + vst1q_s32(reinterpret_cast(output.ptr()) + 8, vcvtq_s32_f32(texels.val[2])); + vst1q_s32(reinterpret_cast(output.ptr()) + 12, vcvtq_s32_f32(texels.val[3])); }, input, output); break; } -#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC - case DataType::F16: + case DataType::QASYMM8: + case DataType::U8: { const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); - /* Down-conversion F32 -> F16 */ + /* Down-conversion F32 -> U8 */ execute_window_loop(window, [&](const Coordinates &) { const float32x4x4_t texels = @@ -543,7 +560,41 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr())), scale), vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 4), scale), vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 8), scale), - vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 12), scale) + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 12), scale), + } + }; + + vst1_u8(reinterpret_cast(output.ptr()), vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(texels.val[0])), vqmovun_s32(vcvtq_s32_f32(texels.val[1]))))); + vst1_u8(reinterpret_cast(output.ptr())+8, vqmovn_u16(vcombine_u16(vqmovun_s32(vcvtq_s32_f32(texels.val[2])), vqmovun_s32(vcvtq_s32_f32(texels.val[3]))))); + }, + input, output); + break; + } + + + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + + case DataType::S32: + switch(_output->info()->data_type()) + { +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + { + const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); + + /* Down-conversion S32 -> F16 */ + execute_window_loop(window, [&](const Coordinates &) + { + const float32x4x4_t texels = + { + { + vmulq_f32(vcvtq_f32_s32(vld1q_s32(reinterpret_cast(input.ptr()))), scale), + vmulq_f32(vcvtq_f32_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 4)), scale), + vmulq_f32(vcvtq_f32_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 8)), scale), + vmulq_f32(vcvtq_f32_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 12)), scale) } }; @@ -554,6 +605,77 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info break; } #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + case DataType::F32: + { + const int32x4_t scale = vdupq_n_s32(1.f / (1 << _shift)); + + /* Conversion S32 -> F32 */ + execute_window_loop(window, [&](const Coordinates &) + { + const int32x4x4_t texels = + { + { + vmulq_s32(vld1q_s32(reinterpret_cast(input.ptr())), scale), + vmulq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 4), scale), + vmulq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 8), scale), + vmulq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 12), scale), + } + }; + + vst1q_f32(reinterpret_cast(output.ptr()), vcvtq_f32_s32(texels.val[0])); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, vcvtq_f32_s32(texels.val[1])); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, vcvtq_f32_s32(texels.val[2])); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, vcvtq_f32_s32(texels.val[3])); + }, + input, output); + break; + } + case DataType::QASYMM8: + case DataType::U8: + { + const int32x4_t b = vdupq_n_s32(-static_cast(_shift)); + + /* Down-conversion S32 -> U8 */ + if(ConvertPolicy::SATURATE == _policy) + { + execute_window_loop(window, [&](const Coordinates &) + { + const int32x4x4_t texels = + { + { + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr())), b), + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 4), b), + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 8), b), + vqshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 12), b) + } + }; + + vst1_u8(reinterpret_cast(output.ptr()), vmovn_u16(vcombine_u16(vqmovun_s32(texels.val[0]), vqmovun_s32(texels.val[1])))); + vst1_u8(reinterpret_cast(output.ptr())+8, vmovn_u16(vcombine_u16(vqmovun_s32(texels.val[2]), vqmovun_s32(texels.val[3])))); + }, + input, output); + } + else + { + execute_window_loop(window, [&](const Coordinates &) + { + const int32x4x4_t texels = + { + { + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr())), b), + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 4), b), + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 8), b), + vshlq_s32(vld1q_s32(reinterpret_cast(input.ptr()) + 12), b) + } + }; + + vst1_u8(reinterpret_cast(output.ptr()), vmovn_u16(vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(texels.val[0])),vmovn_u32(vreinterpretq_u32_s32(texels.val[1]))))); + vst1_u8(reinterpret_cast(output.ptr())+8, vmovn_u16(vcombine_u16(vmovn_u32(vreinterpretq_u32_s32(texels.val[2])),vmovn_u32(vreinterpretq_u32_s32(texels.val[3]))))); + }, + input, output); + } + break; + } default: ARM_COMPUTE_ERROR("Output data type not supported"); } diff --git a/src/runtime/NEON/functions/NECast.cpp b/src/runtime/NEON/functions/NECast.cpp new file mode 100644 index 0000000000..fe1e486ab1 --- /dev/null +++ b/src/runtime/NEON/functions/NECast.cpp @@ -0,0 +1,46 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/runtime/NEON/functions/NECast.h" + +#include "arm_compute/core/ITensor.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "support/ToolchainSupport.h" + +#include + +namespace arm_compute +{ +void NECast::configure(ITensor *input, ITensor *output, ConvertPolicy policy) +{ + auto k = arm_compute::support::cpp14::make_unique(); + k->configure(input, output, policy, 0); + _kernel = std::move(k); +} + +Status NECast::validate(ITensorInfo *input, ITensorInfo *output, ConvertPolicy policy) +{ + return NEDepthConvertLayerKernel::validate(input, output, policy, 0); +} +} // namespace arm_compute diff --git a/tests/validation/NEON/Cast.cpp b/tests/validation/NEON/Cast.cpp new file mode 100644 index 0000000000..9300ad4163 --- /dev/null +++ b/tests/validation/NEON/Cast.cpp @@ -0,0 +1,186 @@ +/* + * Copyright (c) 2019 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NECast.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/ConvertPolicyDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/CastFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +// Tolerance +constexpr AbsoluteTolerance one_tolerance(1); +constexpr AbsoluteTolerance zero_tolerance(0); + +/* + *This function ignores the scale and zeroPoint of quanized tensors,so QASYMM8 input is treated as uint8 values. +*/ + +/** Input data sets **/ +// QASYMM8 +const auto CastQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16)); +const auto CastQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32)); +const auto CastQASYMM8toS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::S32)); + +// U8 +const auto CastU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); +const auto CastU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); +const auto CastU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto CastU8toF32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F32)); + +// U16 +const auto CastU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); +const auto CastU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); + +// S16 +const auto CastS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto CastS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); + +//S32 +const auto CastS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16)); +const auto CastS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8)); +const auto CastS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32)); +const auto CastS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8)); + +// F16 +const auto CastF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); +const auto CastF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32)); +const auto CastF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); + + +// F32 +const auto CastF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8)); +const auto CastF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); +const auto CastF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); +const auto CastF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); + +} // namespace + +TEST_SUITE(NEON) +TEST_SUITE(Cast) +template +using NECastToU8Fixture = CastValidationFixture; +template +using NECastToU16Fixture = CastValidationFixture; +template +using NECastToS16Fixture = CastValidationFixture; +template +using NECastToU32Fixture = CastValidationFixture; +template +using NECastToS32Fixture = CastValidationFixture; +template +using NECastToF16Fixture = CastValidationFixture; +template +using NECastToF32Fixture = CastValidationFixture; +template +using NECastToQASYMM8Fixture = CastValidationFixture; + +#define CAST_SUITE(NAME, idt, odt, type, dataset, tolerance) \ + TEST_SUITE(NAME) \ + DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), datasets::ConvertPolicies()), \ + shape, policy) \ + { \ + Tensor src = create_tensor(shape, idt, 1); \ + Tensor dst = create_tensor(shape, odt, 1); \ + \ + NECast cast; \ + cast.configure(&src, &dst, policy); \ + \ + const ValidRegion valid_region = shape_to_valid_region(shape); \ + validate(dst.info()->valid_region(), valid_region); \ + \ + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); \ + validate(src.info()->padding(), padding); \ + validate(dst.info()->padding(), padding); \ + } \ + FIXTURE_DATA_TEST_CASE(RunSmall, type, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallShapes(), dataset), \ + datasets::ConvertPolicies())) \ + { \ + validate(Accessor(_target), _reference, tolerance); \ + } \ + TEST_SUITE_END() + +//QASYMM8 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(QASYMM8_to_F16, DataType::QASYMM8, DataType::F16, NECastToF16Fixture, CastQASYMM8toF16Dataset, one_tolerance) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(QASYMM8_to_F32, DataType::QASYMM8, DataType::F32, NECastToF32Fixture, CastQASYMM8toF32Dataset, one_tolerance) +CAST_SUITE(QASYMM8_to_S32, DataType::QASYMM8, DataType::S32, NECastToS32Fixture, CastQASYMM8toS32Dataset, one_tolerance) + +// U8 +CAST_SUITE(U8_to_U16, DataType::U8, DataType::U16, NECastToU16Fixture, CastU8toU16Dataset, zero_tolerance) +CAST_SUITE(U8_to_S16, DataType::U8, DataType::S16, NECastToS16Fixture, CastU8toS16Dataset, zero_tolerance) +CAST_SUITE(U8_to_S32, DataType::U8, DataType::S32, NECastToS32Fixture, CastU8toS32Dataset, zero_tolerance) +CAST_SUITE(U8_to_F32, DataType::U8, DataType::F32, NECastToF32Fixture, CastU8toF32Dataset, zero_tolerance) + +// U16 +CAST_SUITE(U16_to_U8, DataType::U16, DataType::U8, NECastToU8Fixture, CastU16toU8Dataset, zero_tolerance) +CAST_SUITE(U16_to_U32, DataType::U16, DataType::U32, NECastToU32Fixture, CastU16toU32Dataset, zero_tolerance) + +// S16 +CAST_SUITE(S16_to_U8, DataType::S16, DataType::U8, NECastToU8Fixture, CastS16toU8Dataset, zero_tolerance) +CAST_SUITE(S16_to_S32, DataType::S16, DataType::S32, NECastToS32Fixture, CastS16toS32Dataset, zero_tolerance) + +// S32 +CAST_SUITE(S32_to_QASYMM8, DataType::S32, DataType::QASYMM8, NECastToQASYMM8Fixture, CastS32toQASYMM8Dataset, one_tolerance) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(S32_to_F16, DataType::S32, DataType::F16, NECastToF16Fixture, CastS32toF16Dataset, zero_tolerance) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(S32_to_F32, DataType::S32, DataType::F32, NECastToF32Fixture, CastS32toF32Dataset, one_tolerance) +CAST_SUITE(S32_to_U8, DataType::S32, DataType::U8, NECastToU8Fixture, CastS32toU8Dataset, one_tolerance) + +// F16 +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(F16_to_QASYMM8, DataType::F16, DataType::QASYMM8, NECastToQASYMM8Fixture, CastF16toQASYMM8Dataset, one_tolerance) +CAST_SUITE(F16_to_F32, DataType::F16, DataType::F32, NECastToF32Fixture, CastF16toF32Dataset, zero_tolerance) +CAST_SUITE(F16_to_S32, DataType::F16, DataType::S32, NECastToS32Fixture, CastF16toS32Dataset, one_tolerance) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + +// F32 +CAST_SUITE(F32_to_QASYMM8, DataType::F32, DataType::QASYMM8, NECastToQASYMM8Fixture, CastF32toQASYMM8Dataset, one_tolerance) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(F32_to_F16, DataType::F32, DataType::F16, NECastToF16Fixture, CastF32toF16Dataset, zero_tolerance) +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +CAST_SUITE(F32_to_S32, DataType::F32, DataType::S32, NECastToS32Fixture, CastF32toS32Dataset, one_tolerance) +CAST_SUITE(F32_to_U8, DataType::F32, DataType::S32, NECastToS32Fixture, CastF32toS32Dataset, one_tolerance) + +TEST_SUITE_END() // Cast +TEST_SUITE_END() // NEON +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp index cee7bdedc2..2592c5d5a1 100644 --- a/tests/validation/NEON/DepthConvertLayer.cpp +++ b/tests/validation/NEON/DepthConvertLayer.cpp @@ -46,15 +46,28 @@ namespace /** Input data sets **/ const auto DepthConvertLayerQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16)); const auto DepthConvertLayerQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerQASYMM8toS32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::S32)); const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerU8toF16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F16)); +const auto DepthConvertLayerU8toF32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::F32)); const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerF16toU8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::U8)); const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerF16toS32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::S32)); const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); +const auto DepthConvertLayerF32toS32Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerF32toU8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::U8)); + +const auto DepthConvertLayerS32toF32Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerS32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto DepthConvertLayerS32toF16Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::F16)); +const auto DepthConvertLayerS32toU8Dataset = combine(framework::dataset::make("DataType", DataType::S32), framework::dataset::make("DataType", DataType::U8)); + const auto DepthConvertLayerF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); const auto DepthConvertLayerF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); const auto DepthConvertLayerShiftDatasetNightly = framework::dataset::make("Shift", 0, 7); @@ -62,6 +75,8 @@ const auto DepthConvertLayerShiftDatasetPrecommit = framework::dataset::make("Sh const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0); constexpr AbsoluteTolerance tolerance_qasymm8(1); +constexpr AbsoluteTolerance tolerance_one_int32(1); +constexpr AbsoluteTolerance tolerance_one_uint8(1); } // namespace TEST_SUITE(NEON) @@ -70,23 +85,15 @@ TEST_SUITE(DepthConvertLayer) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Invalid data type combination - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U16), // Invalid data type combination + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U16), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Invalid data type combination - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Mismatching shapes TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Invalid shift TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Valid }), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), })), framework::dataset::make("Policy",{ ConvertPolicy::WRAP, @@ -99,7 +106,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( })), framework::dataset::make("Shift",{ 1, 1, 1, 1, 1, 1, 8, 1, })), - framework::dataset::make("Expected", { false, false, false, false, false, false, false, true})), + framework::dataset::make("Expected", { false, false, false, false, true})), input_info, output_info, policy, shift, expected) { ARM_COMPUTE_EXPECT(bool(NEDepthConvertLayer::validate(&input_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false), policy, shift)) == expected, framework::LogLevel::ERRORS); @@ -127,6 +134,8 @@ template using NEDepthConvertLayerQuantizedToF16Fixture = DepthConvertLayerValidationQuantizedFixture; template using NEDepthConvertLayerQuantizedToF32Fixture = DepthConvertLayerValidationQuantizedFixture; +template +using NEDepthConvertLayerQuantizedToS32Fixture = DepthConvertLayerValidationQuantizedFixture; TEST_SUITE(QASYMM8_to_F32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -171,6 +180,49 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToF32Fixture(shape, DataType::QASYMM8, 1); + Tensor dst = create_tensor(shape, DataType::S32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerQuantizedToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQASYMM8toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQASYMM8toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8_to_S32 + TEST_SUITE(U8_to_U16) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerShiftDatasetNightly), @@ -290,6 +342,90 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, frame } TEST_SUITE_END() // U8_to_S32 + +TEST_SUITE(U8_to_F32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDatasetNightly), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::U8, 1); + Tensor dst = create_tensor(shape, DataType::F32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDatasetPrecommit)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDatasetNightly)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // U8_to_F32 + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(U8_to_F16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDatasetNightly), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::U8, 1); + Tensor dst = create_tensor(shape, DataType::F16, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerU8toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDatasetPrecommit)) +{ + // Validate output + validate(Accessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerU8toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDatasetNightly)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // U8_to_F36 +#endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + + TEST_SUITE(U16_to_U8) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerShiftDatasetNightly), @@ -490,6 +626,48 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, fram } TEST_SUITE_END() // F16_to_QASYMM8 + +TEST_SUITE(F16_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F16, 1); + Tensor dst = create_tensor(shape, DataType::U8, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_uint8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_uint8); +} +TEST_SUITE_END() // F16_to_U8 + + + TEST_SUITE(F16_to_F32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset), @@ -529,6 +707,46 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, framewor } TEST_SUITE_END() // F16_to_F32 +TEST_SUITE(F16_to_S32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F16, 1); + Tensor dst = create_tensor(shape, DataType::S32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); +} + +TEST_SUITE_END() // F16_to_S32 + TEST_SUITE(QASYMM8_to_F16) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset), @@ -610,8 +828,128 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framewo validate(Accessor(_target), _reference); } TEST_SUITE_END() // F32_to_F16 + +TEST_SUITE(S32_to_F16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::S32, 1); + Tensor dst = create_tensor(shape, DataType::F16, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS32toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS32toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32_to_F16 + #endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +TEST_SUITE(F32_to_S32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F32, 1); + Tensor dst = create_tensor(shape, DataType::S32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toS32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); +} +TEST_SUITE_END() // F32_to_S32 + +TEST_SUITE(F32_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F32, 1); + Tensor dst = create_tensor(shape, DataType::U8, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_one_int32); +} +TEST_SUITE_END() // F32_to_U8 + + + TEST_SUITE(F32_to_QASYMM8) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset), @@ -654,6 +992,132 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, fra validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE_END() // F32_to_QASYMM8 + + +TEST_SUITE(S32_to_F32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::S32, 1); + Tensor dst = create_tensor(shape, DataType::F32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS32toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS32toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32_to_F32 + +TEST_SUITE(S32_to_QASYMM8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::S32, 1); + Tensor dst = create_tensor(shape, DataType::QASYMM8, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerS32toQASYMM8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerS32toQASYMM8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // S32_to_QASYMM8 + +TEST_SUITE(S32_to_U8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(datasets::SmallShapes(), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::S32, 1); + Tensor dst = create_tensor(shape, DataType::U8, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerS32toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerS32toU8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // S32_to_U8 + + + + TEST_SUITE_END() // DepthConvertLayer TEST_SUITE_END() // NEON } // namespace validation diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h index a3d379eede..3fe12709c8 100644 --- a/tests/validation/fixtures/DepthConvertLayerFixture.h +++ b/tests/validation/fixtures/DepthConvertLayerFixture.h @@ -40,6 +40,7 @@ namespace test { namespace validation { +/* This function ignores the scale and zeroPoint of quanized tensors, i.e. QASYMM8 input is treated as uint8 values.*/ template class DepthConvertLayerValidationBaseFixture : public framework::Fixture { @@ -55,7 +56,7 @@ public: protected: template - void fill(U &&tensor, int i) + void fill(U &&tensor, int i, DataType dt_in, DataType dt_out) { if(is_data_type_quantized(tensor.data_type())) { @@ -66,7 +67,16 @@ protected: } else { - library->fill_tensor_uniform(tensor, i); + // When converting S32 to F16, both reference and NEON implementations are + or - infinity outside the F16 range. + if(dt_in==DataType::S32 && dt_out==DataType::F16) + { + std::uniform_int_distribution distribution_s32(-65504, 65504); + library->fill(tensor, distribution_s32, i); + } + else + { + library->fill_tensor_uniform(tensor, i); + } } } @@ -91,7 +101,7 @@ protected: ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); // Fill tensors - fill(AccessorType(src), 0); + fill(AccessorType(src), 0, dt_in, dt_out); // Compute function depth_convert.run(); @@ -105,7 +115,7 @@ protected: SimpleTensor src{ shape, dt_in, 1, _quantization_info }; // Fill reference - fill(src, 0); + fill(src, 0, dt_in, dt_out); return reference::depth_convert(src, dt_out, policy, shift); } diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp index 6d9f98db5a..7da0011fbb 100644 --- a/tests/validation/reference/DepthConvertLayer.cpp +++ b/tests/validation/reference/DepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -48,14 +48,7 @@ SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, Con { for(int i = 0; i < src.num_elements(); ++i) { - if(is_data_type_quantized(src.data_type())) - { - result[i] = scvt_f32_qasymm8(src[i], src.quantization_info().scale, src.quantization_info().offset); - } - else - { - result[i] = src[i] << shift; - } + result[i] = src[i] << shift; } } // Down-casting @@ -82,16 +75,8 @@ SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, Con // Always saturate on floats for(int i = 0; i < src.num_elements(); ++i) { - if(is_data_type_quantized(dt_out)) - { - T1 val = utils::rounding::round_half_away_from_zero(src[i]); - result[i] = sqcvt_qasymm8_f32(val, src.quantization_info().scale, src.quantization_info().offset); - } - else - { - T1 val = utils::rounding::round_half_away_from_zero(src[i]); - result[i] = utils::cast::saturate_cast(val); - } + T1 val = utils::rounding::round_half_away_from_zero(src[i]); + result[i] = utils::cast::saturate_cast(val); } } else -- cgit v1.2.1