From e2588184240b4850f62859ca9f3c5e95c9d8e129 Mon Sep 17 00:00:00 2001 From: Michalis Spyrou Date: Thu, 13 Dec 2018 18:31:18 +0000 Subject: COMPMID-1755 NEON: Extend DepthConvert to support Cast Change-Id: I8e2ed9e97cbe86d8caf162bd84ecfd9b43b0bd3b Reviewed-on: https://review.mlplatform.org/401 Tested-by: Arm Jenkins Reviewed-by: Giuseppe Rossini Reviewed-by: Georgios Pinitas --- .../core/NEON/kernels/NEDepthConvertLayerKernel.h | 13 +- .../runtime/NEON/functions/NEDepthConvertLayer.h | 20 +- .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 147 ++++++++++++- tests/validation/NEON/DepthConvertLayer.cpp | 229 ++++++++++++++++++--- .../validation/fixtures/DepthConvertLayerFixture.h | 54 ++++- tests/validation/reference/DepthConvertLayer.cpp | 21 +- 6 files changed, 427 insertions(+), 57 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h index 6840b1adcd..16b8e4276f 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -52,22 +52,23 @@ public: * * Valid conversions Input -> Output : * + * - QASYMM8 -> F16, F32 * - U8 -> U16, S16, S32 * - U16 -> U8, U32 * - S16 -> U8, S32 - * - F16 -> F32 - * - F32 -> F16 + * - F16 -> QASYMM8, F32 + * - F32 -> QASYMM8, F16 * - * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/F16/F32. - * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel * - * @param[in] input Source tensor info. Data types supported: U8/U16/S16/F16/F32. - * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/U8/U16/S16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. * diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index ebb9530c71..8ee5a123ae 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -46,22 +46,24 @@ public: /** Initialize the function's source, destination * * Valid conversions Input -> Output : - * U8 -> U16, S16, S32 - * U16 -> U8, U32 - * S16 -> U8, S32 - * F16 -> F32 - * F32 -> F16 * - * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/F32. - * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F32. + * - QASYMM8 -> F16, F32 + * - U8 -> U16, S16, S32 + * - U16 -> U8, U32 + * - S16 -> U8, S32 + * - F16 -> QASYMM8, F32 + * - F32 -> QASYMM8, F16 + * + * @param[in] input The input tensor to convert. Data types supported: QASYMM8/U8/U16/S16/F16/F32. + * @param[out] output The output tensor. Data types supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. */ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayer * - * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32. - * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] input Source tensor info. Data types supported: QASYMM8/U8/U16/S16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: QASYMM8/U8/U16/S16/U32/S32/F16/F32. * @param[in] policy Conversion policy. * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. * diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index 158f401084..54337551a7 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -28,6 +28,7 @@ #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" #include "arm_compute/core/NEON/NEFixedPoint.h" +#include "arm_compute/core/NEON/NEMath.h" #include "arm_compute/core/TensorInfo.h" #include "arm_compute/core/Validate.h" @@ -43,10 +44,13 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); ARM_COMPUTE_UNUSED(policy); ARM_COMPUTE_RETURN_ERROR_ON(input == output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QASYMM8, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::QASYMM8 && (output->data_type() != DataType::F16 && output->data_type() != DataType::F32), + "Only data_types supported [in] QASYMM8 -> [out] F16, F32"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16 && output->data_type() != DataType::S32), "Only data_types supported [in] U8 -> [out] U16, S16, S32"); @@ -57,11 +61,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, C ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32), "Only data_types supported [in] S16 -> [out] U8, S32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32, - "Only data_types supported [in] F16 -> [out] F32"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F32), + "Only data_types supported [in] F16 -> [out] QASYMM8, F32"); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16, - "Only data_types supported [in] F32 -> [out] F16"); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && (output->data_type() != DataType::QASYMM8 && output->data_type() != DataType::F16), + "Only data_types supported [in] F32 -> [out] QASYMM8, F16"); // Validate in case of configured output if(output->total_size() > 0) @@ -134,6 +138,75 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info switch(_input->info()->data_type()) { + case DataType::QASYMM8: + { + switch(_output->info()->data_type()) + { + /* Up-conversion QASYMM8 -> F32 */ + case DataType::F32: + { + const float32x4_t scale = vdupq_n_f32(_input->info()->quantization_info().scale); + const int32x4_t offset = vdupq_n_s32(_input->info()->quantization_info().offset); + + execute_window_loop(window, [&](const Coordinates & id) + { + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); + const uint16x8x2_t texels_u16 = + { + { + vmovl_u8(vget_low_u8(texels_u8)), + vmovl_u8(vget_high_u8(texels_u8)) + } + }; + + const int32x4x4_t texels_s32 = + { + { + vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(texels_u16.val[0]))), + vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(texels_u16.val[0]))), + vreinterpretq_s32_u32(vmovl_u16(vget_low_u16(texels_u16.val[1]))), + vreinterpretq_s32_u32(vmovl_u16(vget_high_u16(texels_u16.val[1]))) + } + }; + + vst1q_f32(reinterpret_cast(output.ptr()), vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[0], offset)), scale)); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[1], offset)), scale)); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[2], offset)), scale)); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, vmulq_f32(vcvtq_f32_s32(vsubq_s32(texels_s32.val[3], offset)), scale)); + }, + input, output); + break; + } +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + /* Up-conversion QASYMM8 -> F16 */ + case DataType::F16: + { + const float16x8_t scale = vdupq_n_f16(static_cast(_input->info()->quantization_info().scale)); + const int16x8_t offset = vdupq_n_s16(static_cast(_input->info()->quantization_info().offset)); + + execute_window_loop(window, [&](const Coordinates & id) + { + const uint8x16_t texels_u8 = vld1q_u8(input.ptr()); + const int16x8x2_t texels_s16 = + { + { + vreinterpretq_s16_u16(vmovl_u8(vget_low_u8(texels_u8))), + vreinterpretq_s16_u16(vmovl_u8(vget_high_u8(texels_u8))) + } + }; + + vst1q_f16(reinterpret_cast(output.ptr()), vmulq_f16(vcvtq_f16_s16(vsubq_s16(texels_s16.val[0], offset)), scale)); + vst1q_f16(reinterpret_cast(output.ptr()) + 8, vmulq_f16(vcvtq_f16_s16(vsubq_s16(texels_s16.val[1], offset)), scale)); + }, + input, output); + break; + } +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + } case DataType::U8: { const int16x8_t b = vdupq_n_s16(_shift); @@ -367,6 +440,31 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info case DataType::F16: switch(_output->info()->data_type()) { + case DataType::QASYMM8: + { + const float16x8_t scale = vinvq_f16(vdupq_n_f16(static_cast(_output->info()->quantization_info().scale))); + const int16x8_t offset = vdupq_n_s16(static_cast(_output->info()->quantization_info().offset)); + const int16x8_t max_val_vec = vdupq_n_s16(255); + const int16x8_t zero_val_vec = vdupq_n_s16(0); + + /* Down-conversion F16 -> QASYMM8 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float16x8x2_t texels = + { + { + vmulq_f16(vld1q_f16(reinterpret_cast(input.ptr())), scale), + vmulq_f16(vld1q_f16(reinterpret_cast(input.ptr()) + 8), scale), + } + }; + + const auto texel_quantized_0 = vmaxq_s16(vminq_s16(vaddq_s16(vcvtq_s16_f16(texels.val[0]), offset), max_val_vec), zero_val_vec); + const auto texel_quantized_1 = vmaxq_s16(vminq_s16(vaddq_s16(vcvtq_s16_f16(texels.val[1]), offset), max_val_vec), zero_val_vec); + vst1q_u8(reinterpret_cast(output.ptr()), vcombine_u8(vqmovun_s16(texel_quantized_0), vqmovun_s16(texel_quantized_1))); + }, + input, output); + break; + } case DataType::F32: { const float32x4_t scale = vdupq_n_f32(1 << _shift); @@ -394,9 +492,44 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info ARM_COMPUTE_ERROR("Output data type not supported"); } break; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ case DataType::F32: switch(_output->info()->data_type()) { + case DataType::QASYMM8: + { + const float32x4_t scale = vinvq_f32(vdupq_n_f32(_output->info()->quantization_info().scale)); + const int32x4_t offset = vdupq_n_s32(_output->info()->quantization_info().offset); + const int32x4_t max_val_vec = vdupq_n_s32(255); + const int32x4_t zero_val_vec = vdupq_n_s32(0); + + /* Down-conversion F32 -> QASYMM8 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float32x4x4_t texels = + { + { + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr())), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 4), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 8), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 12), scale) + } + }; + + const auto texel_quantized_0 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[0]), offset), max_val_vec), zero_val_vec); + const auto texel_quantized_1 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[1]), offset), max_val_vec), zero_val_vec); + const auto texel_quantized_2 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[2]), offset), max_val_vec), zero_val_vec); + const auto texel_quantized_3 = vmaxq_s32(vminq_s32(vaddq_s32(vcvtq_s32_f32(texels.val[3]), offset), max_val_vec), zero_val_vec); + + const auto converted_0 = vqmovn_u16(vcombine_u16(vqmovun_s32(texel_quantized_0), vqmovun_s32(texel_quantized_1))); + const auto converted_1 = vqmovn_u16(vcombine_u16(vqmovun_s32(texel_quantized_2), vqmovun_s32(texel_quantized_3))); + + vst1q_u8(reinterpret_cast(output.ptr()), vcombine_u8(converted_0, converted_1)); + }, + input, output); + break; + } +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC case DataType::F16: { const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); @@ -420,11 +553,11 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info input, output); break; } +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Output data type not supported"); } break; -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Not supported"); } diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp index 133ff4f735..0916f633a3 100644 --- a/tests/validation/NEON/DepthConvertLayer.cpp +++ b/tests/validation/NEON/DepthConvertLayer.cpp @@ -44,17 +44,23 @@ namespace validation namespace { /** Input data sets **/ -const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); -const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); -const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); -const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); -const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); -const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); -const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); -const auto DepthConvertLayerShiftDataset = framework::dataset::make("Shift", 0, 7); -const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0); +const auto DepthConvertLayerQASYMM8toF16Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F16)); +const auto DepthConvertLayerQASYMM8toF32Dataset = combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerU8toU16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U16)); +const auto DepthConvertLayerU8toS16Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S16)); +const auto DepthConvertLayerU8toS32Dataset = combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); +const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); +const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); +const auto DepthConvertLayerF16toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto DepthConvertLayerF32toQASYMM8Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::QASYMM8)); +const auto DepthConvertLayerShiftDataset = framework::dataset::make("Shift", 0, 7); +const auto DepthConvertLayerZeroShiftDataset = framework::dataset::make("Shift", 0); + +constexpr AbsoluteTolerance tolerance_qasymm8(1); } // namespace TEST_SUITE(NEON) @@ -73,6 +79,55 @@ template using NEDepthConvertLayerToF16Fixture = DepthConvertLayerValidationFixture; template using NEDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToQASYMM8Fixture = DepthConvertLayerValidationQuantizedFixture; +template +using NEDepthConvertLayerQuantizedToF16Fixture = DepthConvertLayerValidationQuantizedFixture; +template +using NEDepthConvertLayerQuantizedToF32Fixture = DepthConvertLayerValidationQuantizedFixture; + +TEST_SUITE(QASYMM8_to_F32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::QASYMM8, 1); + Tensor dst = create_tensor(shape, DataType::F32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerQuantizedToF32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQASYMM8toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToF32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQASYMM8toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8_to_F32 TEST_SUITE(U8_to_U16) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -112,7 +167,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU16Fixture, frame // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // U8_to_U16 TEST_SUITE(U8_to_S16) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -152,7 +207,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS16Fixture, frame // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // U8_to_S16 TEST_SUITE(U8_to_S32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerShiftDataset), @@ -191,7 +246,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, frame // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // U8_to_S32 TEST_SUITE(U16_to_U8) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -230,7 +285,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, frame // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // U16_to_U8 TEST_SUITE(U16_to_U32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -269,7 +324,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU32Fixture, fram // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // U16_to_U32 TEST_SUITE(S16_to_U8) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -308,7 +363,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToU8Fixture, framew // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // S16_to_U8 TEST_SUITE(S16_to_S32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -347,9 +402,52 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, frame // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // S16_to_S32 #ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(F16_to_QASYMM8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F16, 1); + Tensor dst = create_tensor(shape, DataType::QASYMM8, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerF16toQASYMM8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerF16toQASYMM8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // F16_to_QASYMM8 + TEST_SUITE(F16_to_F32) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), DepthConvertLayerZeroShiftDataset), @@ -387,7 +485,50 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, framewor // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() +TEST_SUITE_END() // F16_to_F32 + +TEST_SUITE(QASYMM8_to_F16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::QASYMM8, 1); + Tensor dst = create_tensor(shape, DataType::F16, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerQuantizedToF16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerQASYMM8toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerQuantizedToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerQASYMM8toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() // QASYMM8_to_F16 TEST_SUITE(F32_to_F16) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -426,11 +567,53 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framewo // Validate output validate(Accessor(_target), _reference); } -TEST_SUITE_END() -#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +TEST_SUITE_END() // F32_to_F16 +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ -TEST_SUITE_END() -TEST_SUITE_END() +TEST_SUITE(F32_to_QASYMM8) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerZeroShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F32, 1); + Tensor dst = create_tensor(shape, DataType::QASYMM8, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), + DepthConvertLayerF32toQASYMM8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToQASYMM8Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeShapes(), + DepthConvertLayerF32toQASYMM8Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE })), + DepthConvertLayerZeroShiftDataset), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() // F32_to_QASYMM8 +TEST_SUITE_END() // DepthConvertLayer +TEST_SUITE_END() // NEON } // namespace validation } // namespace test } // namespace arm_compute diff --git a/tests/validation/fixtures/DepthConvertLayerFixture.h b/tests/validation/fixtures/DepthConvertLayerFixture.h index 29034c5334..a3d379eede 100644 --- a/tests/validation/fixtures/DepthConvertLayerFixture.h +++ b/tests/validation/fixtures/DepthConvertLayerFixture.h @@ -41,29 +41,40 @@ namespace test namespace validation { template -class DepthConvertLayerValidationFixture : public framework::Fixture +class DepthConvertLayerValidationBaseFixture : public framework::Fixture { public: template - void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info) { - _shift = shift; - _target = compute_target(shape, dt_in, dt_out, policy, shift); - _reference = compute_reference(shape, dt_in, dt_out, policy, shift); + _shift = shift; + _quantization_info = quantization_info; + _target = compute_target(shape, dt_in, dt_out, policy, shift); + _reference = compute_reference(shape, dt_in, dt_out, policy, shift); } protected: template void fill(U &&tensor, int i) { - library->fill_tensor_uniform(tensor, i); + if(is_data_type_quantized(tensor.data_type())) + { + std::pair bounds = get_quantized_bounds(tensor.quantization_info(), -1.0f, 1.0f); + std::uniform_int_distribution distribution(bounds.first, bounds.second); + + library->fill(tensor, distribution, i); + } + else + { + library->fill_tensor_uniform(tensor, i); + } } TensorType compute_target(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) { // Create tensors - TensorType src = create_tensor(shape, dt_in, 1); - TensorType dst = create_tensor(shape, dt_out, 1); + TensorType src = create_tensor(shape, dt_in, 1, _quantization_info); + TensorType dst = create_tensor(shape, dt_out, 1, _quantization_info); // Create and configure function FunctionType depth_convert; @@ -91,7 +102,7 @@ protected: SimpleTensor compute_reference(const TensorShape &shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) { // Create reference - SimpleTensor src{ shape, dt_in, 1 }; + SimpleTensor src{ shape, dt_in, 1, _quantization_info }; // Fill reference fill(src, 0); @@ -102,6 +113,31 @@ protected: TensorType _target{}; SimpleTensor _reference{}; int _shift{}; + QuantizationInfo _quantization_info{}; +}; + +template +class DepthConvertLayerValidationFixture : public DepthConvertLayerValidationBaseFixture +{ +public: + template + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift) + { + DepthConvertLayerValidationBaseFixture::setup(shape, dt_in, dt_out, policy, + shift, QuantizationInfo()); + } +}; + +template +class DepthConvertLayerValidationQuantizedFixture : public DepthConvertLayerValidationBaseFixture +{ +public: + template + void setup(TensorShape shape, DataType dt_in, DataType dt_out, ConvertPolicy policy, uint32_t shift, QuantizationInfo quantization_info) + { + DepthConvertLayerValidationBaseFixture::setup(shape, dt_in, dt_out, policy, + shift, quantization_info); + } }; } // namespace validation } // namespace test diff --git a/tests/validation/reference/DepthConvertLayer.cpp b/tests/validation/reference/DepthConvertLayer.cpp index c1fd9c51f7..6d9f98db5a 100644 --- a/tests/validation/reference/DepthConvertLayer.cpp +++ b/tests/validation/reference/DepthConvertLayer.cpp @@ -48,7 +48,14 @@ SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, Con { for(int i = 0; i < src.num_elements(); ++i) { - result[i] = src[i] << shift; + if(is_data_type_quantized(src.data_type())) + { + result[i] = scvt_f32_qasymm8(src[i], src.quantization_info().scale, src.quantization_info().offset); + } + else + { + result[i] = src[i] << shift; + } } } // Down-casting @@ -75,8 +82,16 @@ SimpleTensor depth_convert(const SimpleTensor &src, DataType dt_out, Con // Always saturate on floats for(int i = 0; i < src.num_elements(); ++i) { - T1 val = utils::rounding::round_half_away_from_zero(src[i]); - result[i] = utils::cast::saturate_cast(val); + if(is_data_type_quantized(dt_out)) + { + T1 val = utils::rounding::round_half_away_from_zero(src[i]); + result[i] = sqcvt_qasymm8_f32(val, src.quantization_info().scale, src.quantization_info().offset); + } + else + { + T1 val = utils::rounding::round_half_away_from_zero(src[i]); + result[i] = utils::cast::saturate_cast(val); + } } } else -- cgit v1.2.1