From 3e570dbdb0cbcbc3314e8f3e4daf2cf385caf325 Mon Sep 17 00:00:00 2001 From: Michele Di Giorgio Date: Fri, 24 Aug 2018 18:28:48 +0100 Subject: COMPMID-1304: NEDepthConvert : Add support for FP32 -> FP16 and FP16 -> FP32 + validate() function Change-Id: I12e4696a454744f6d493ab3a53520d3acf3a1a26 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145719 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- .../core/NEON/kernels/NEDepthConvertLayerKernel.h | 34 ++-- .../runtime/NEON/functions/NEDepthConvertLayer.h | 22 ++- .../NEON/kernels/NEDepthConvertLayerKernel.cpp | 180 +++++++++++++++------ src/runtime/NEON/functions/NEDepthConvertLayer.cpp | 9 +- tests/validation/NEON/DepthConvertLayer.cpp | 86 ++++++++++ 5 files changed, 263 insertions(+), 68 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h index 77bb0413ca..6840b1adcd 100644 --- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h +++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h @@ -25,9 +25,6 @@ #define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__ #include "arm_compute/core/NEON/INEKernel.h" -#include "arm_compute/core/Types.h" - -#include namespace arm_compute { @@ -58,23 +55,34 @@ public: * - U8 -> U16, S16, S32 * - U16 -> U8, U32 * - S16 -> U8, S32 + * - F16 -> F32 + * - F32 -> F16 + * + * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/F16/F32. + * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + */ + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel + * + * @param[in] input Source tensor info. Data types supported: U8/U16/S16/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. * - * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16. - * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32. - * @param[in] policy Conversion policy. - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. - * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place. + * @return a status */ - void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; private: - ITensor *_input; - ITensor *_output; - ConvertPolicy _policy; - uint32_t _shift; + const ITensor *_input; + ITensor *_output; + ConvertPolicy _policy; + uint32_t _shift; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h index eedadc242d..1fdad30115 100644 --- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h @@ -49,13 +49,25 @@ public: * U8 -> U16, S16, S32 * U16 -> U8, U32 * S16 -> U8, S32 + * F16 -> F32 + * F32 -> F16 * - * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16/F32. - * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32. - * @param[in] policy Conversion policy. - * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/F32. + * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. */ - void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0); + /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayer + * + * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32. + * @param[in] policy Conversion policy. + * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0); }; } #endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/ diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp index 8280b52fcb..158f401084 100644 --- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp +++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp @@ -23,6 +23,7 @@ */ #include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h" +#include "arm_compute/core/CPP/Validate.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/ITensor.h" @@ -34,68 +35,90 @@ using namespace arm_compute; -namespace arm_compute +namespace { -class Coordinates; -} // namespace arm_compute - -NEDepthConvertLayerKernel::NEDepthConvertLayerKernel() - : _input(nullptr), _output(nullptr), _policy(), _shift(0) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) { -} + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input); + ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output); + ARM_COMPUTE_UNUSED(policy); + ARM_COMPUTE_RETURN_ERROR_ON(input == output); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8); -void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) -{ - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16 + && output->data_type() != DataType::S32), + "Only data_types supported [in] U8 -> [out] U16, S16, S32"); - _input = input; - _output = input; - _policy = policy; - _shift = shift; + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32), + "Only data_types supported [in] U16 -> [out] U8, U32"); - if(output != nullptr) - { - // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) - set_shape_if_empty(*output->info(), input->info()->tensor_shape()); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32), + "Only data_types supported [in] S16 -> [out] U8, S32"); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32, + "Only data_types supported [in] F16 -> [out] F32"); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F32); - ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16, + "Only data_types supported [in] F32 -> [out] F16"); - // Set output - _output = output; + // Validate in case of configured output + if(output->total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output); } - ARM_COMPUTE_ERROR_ON(shift >= 8); - ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type()))); + return Status{}; +} - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16 - && output->info()->data_type() != DataType::S32), - "Only data_types supported [in] U8 -> [out] U16, S16, S32"); +std::pair validate_and_configure_window(ITensorInfo *input, ITensorInfo *output) +{ + constexpr unsigned int num_elems_processed_per_iteration = 16; - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32), - "Only data_types supported [in] U16 -> [out] U8, U32"); + Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration)); - ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32), - "Only data_types supported [in] S16 -> [out] U8, S32"); + AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration); + bool window_changed = update_window_and_padding(win, input_access, output_access); + output_access.set_valid_region(win, output->valid_region()); - constexpr unsigned int num_elems_processed_per_iteration = 16; + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} +} // namespace + +NEDepthConvertLayerKernel::NEDepthConvertLayerKernel() + : _input(nullptr), _output(nullptr), _policy(), _shift(0) +{ +} + +void NEDepthConvertLayerKernel::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); + + // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given) + set_shape_if_empty(*output->info(), input->info()->tensor_shape()); + + _input = input; + _output = output; + _policy = policy; + _shift = shift; + + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), policy, shift)); // Configure kernel window - Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration)); + auto win_config = validate_and_configure_window(input->info(), output->info()); + ARM_COMPUTE_ERROR_THROW_ON(win_config.first); + ICPPKernel::configure(win_config.second); +} - AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration); - if(output != nullptr) - { - AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration); - update_window_and_padding(win, input_access, output_access); - output_access.set_valid_region(win, input->info()->valid_region()); - } - else - { - // In-place computation - update_window_and_padding(win, input_access); - } - ICPPKernel::configure(win); +Status NEDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) +{ + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, policy, shift)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first); + + return Status{}; } void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info) @@ -103,8 +126,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info ARM_COMPUTE_UNUSED(info); ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - ARM_COMPUTE_ERROR_ON(nullptr == _input); - ARM_COMPUTE_ERROR_ON(nullptr == _output); + ARM_COMPUTE_ERROR_ON_NULLPTR(_input, _output); ARM_COMPUTE_ERROR_ON(_input == _output); Iterator input(_input, window); @@ -341,6 +363,68 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info } break; } +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + case DataType::F16: + switch(_output->info()->data_type()) + { + case DataType::F32: + { + const float32x4_t scale = vdupq_n_f32(1 << _shift); + + /* Up-conversion F16 -> F32 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float16x8x2_t texels = + { + { + vld1q_f16(reinterpret_cast(input.ptr())), + vld1q_f16(reinterpret_cast(input.ptr()) + 8) + } + }; + + vst1q_f32(reinterpret_cast(output.ptr()), vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[0])), scale)); + vst1q_f32(reinterpret_cast(output.ptr()) + 4, vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[0])), scale)); + vst1q_f32(reinterpret_cast(output.ptr()) + 8, vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[1])), scale)); + vst1q_f32(reinterpret_cast(output.ptr()) + 12, vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[1])), scale)); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; + case DataType::F32: + switch(_output->info()->data_type()) + { + case DataType::F16: + { + const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift)); + + /* Down-conversion F32 -> F16 */ + execute_window_loop(window, [&](const Coordinates & id) + { + const float32x4x4_t texels = + { + { + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr())), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 4), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 8), scale), + vmulq_f32(vld1q_f32(reinterpret_cast(input.ptr()) + 12), scale) + } + }; + + vst1q_f16(reinterpret_cast(output.ptr()), vcombine_f16(vcvt_f16_f32(texels.val[0]), vcvt_f16_f32(texels.val[1]))); + vst1q_f16(reinterpret_cast(output.ptr()) + 8, vcombine_f16(vcvt_f16_f32(texels.val[2]), vcvt_f16_f32(texels.val[3]))); + }, + input, output); + break; + } + default: + ARM_COMPUTE_ERROR("Output data type not supported"); + } + break; +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ default: ARM_COMPUTE_ERROR("Not supported"); } diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp index 9a75404fcd..0041c1f62e 100644 --- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp +++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -30,9 +30,14 @@ using namespace arm_compute; -void NEDepthConvertLayer::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) +void NEDepthConvertLayer::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift) { auto k = arm_compute::support::cpp14::make_unique(); k->configure(input, output, policy, shift); _kernel = std::move(k); } + +Status NEDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift) +{ + return NEDepthConvertLayerKernel::validate(input, output, policy, shift); +} diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp index 78070d004e..40700f85bb 100644 --- a/tests/validation/NEON/DepthConvertLayer.cpp +++ b/tests/validation/NEON/DepthConvertLayer.cpp @@ -51,6 +51,8 @@ const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make(" const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32)); const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8)); const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32)); +const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32)); +const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16)); const auto DepthConvertLayerShiftDataset = framework::dataset::make("Shift", 0, 7); } // namespace @@ -66,6 +68,10 @@ template using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture; template using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToF16Fixture = DepthConvertLayerValidationFixture; +template +using NEDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture; TEST_SUITE(U8_to_U16) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), @@ -342,6 +348,86 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture, frame } TEST_SUITE_END() +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(F16_to_F32) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F16, 1); + Tensor dst = create_tensor(shape, DataType::F32, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() + +TEST_SUITE(F32_to_F16) +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset), + shape, policy, shift) +{ + // Create tensors + Tensor src = create_tensor(shape, DataType::F32, 1); + Tensor dst = create_tensor(shape, DataType::F16, 1); + + // Create and Configure function + NEDepthConvertLayer depth_convert; + depth_convert.configure(&src, &dst, policy, shift); + + // Validate valid region + const ValidRegion valid_region = shape_to_valid_region(shape); + validate(dst.info()->valid_region(), valid_region); + + // Validate padding + const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding(); + validate(src.info()->padding(), padding); + validate(dst.info()->padding(), padding); +} + +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset), + framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })), + DepthConvertLayerShiftDataset)) +{ + // Validate output + validate(Accessor(_target), _reference); +} +TEST_SUITE_END() +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + TEST_SUITE_END() TEST_SUITE_END() } // namespace validation -- cgit v1.2.1