aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorMichele Di Giorgio <michele.digiorgio@arm.com>2018-08-24 18:28:48 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit3e570dbdb0cbcbc3314e8f3e4daf2cf385caf325 (patch)
tree5cb9a4279a4b6b456a53ff8f918bb2aa6cd68dbe
parentdaa38559bb01eb3c2985f503e7b2b4dc850a34d3 (diff)
downloadComputeLibrary-3e570dbdb0cbcbc3314e8f3e4daf2cf385caf325.tar.gz
COMPMID-1304: NEDepthConvert : Add support for FP32 -> FP16 and FP16 -> FP32 + validate() function
Change-Id: I12e4696a454744f6d493ab3a53520d3acf3a1a26 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/145719 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
-rw-r--r--arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h34
-rw-r--r--arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h22
-rw-r--r--src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp180
-rw-r--r--src/runtime/NEON/functions/NEDepthConvertLayer.cpp9
-rw-r--r--tests/validation/NEON/DepthConvertLayer.cpp86
5 files changed, 263 insertions, 68 deletions
diff --git a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
index 77bb0413ca..6840b1adcd 100644
--- a/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
+++ b/arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h
@@ -25,9 +25,6 @@
#define __ARM_COMPUTE_DEPTHCONVERTKERNEL_H__
#include "arm_compute/core/NEON/INEKernel.h"
-#include "arm_compute/core/Types.h"
-
-#include <cstdint>
namespace arm_compute
{
@@ -58,23 +55,34 @@ public:
* - U8 -> U16, S16, S32
* - U16 -> U8, U32
* - S16 -> U8, S32
+ * - F16 -> F32
+ * - F32 -> F16
+ *
+ * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/F16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy.
+ * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+ */
+ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayerKernel
+ *
+ * @param[in] input Source tensor info. Data types supported: U8/U16/S16/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy
+ * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
*
- * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16.
- * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32.
- * @param[in] policy Conversion policy.
- * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
- * In case of fixed point position conversion, it specifies the new fixed point position, if operation is in-place.
+ * @return a status
*/
- void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
// Inherited methods overridden:
void run(const Window &window, const ThreadInfo &info) override;
private:
- ITensor *_input;
- ITensor *_output;
- ConvertPolicy _policy;
- uint32_t _shift;
+ const ITensor *_input;
+ ITensor *_output;
+ ConvertPolicy _policy;
+ uint32_t _shift;
};
} // namespace arm_compute
#endif /*__ARM_COMPUTE_NEDEPTHCONVERTKERNEL_H__ */
diff --git a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
index eedadc242d..1fdad30115 100644
--- a/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
+++ b/arm_compute/runtime/NEON/functions/NEDepthConvertLayer.h
@@ -49,13 +49,25 @@ public:
* U8 -> U16, S16, S32
* U16 -> U8, U32
* S16 -> U8, S32
+ * F16 -> F32
+ * F32 -> F16
*
- * @param[in, out] input The input tensor to convert (Written in case of in-place computation). Data types supported: U8/U16/S16/F32.
- * @param[out] output The output tensor. Can be null in case of in-place computation. Data types supported: U8/U16/S16/U32/S32/F32.
- * @param[in] policy Conversion policy.
- * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+ * @param[in] input The input tensor to convert. Data types supported: U8/U16/S16/F32.
+ * @param[out] output The output tensor. Data types supported: U8/U16/S16/U32/S32/F32.
+ * @param[in] policy Conversion policy.
+ * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
*/
- void configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+ void configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift = 0);
+ /** Static function to check if given info will lead to a valid configuration of @ref NEDepthConvertLayer
+ *
+ * @param[in] input Source tensor info. Data types supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] output Destination tensor info. Data type supported: U8/U16/S16/U32/S32/F16/F32.
+ * @param[in] policy Conversion policy.
+ * @param[in] shift (Optional) Value for down/up conversions. Must be 0 <= shift < 8.
+ *
+ * @return a status
+ */
+ static Status validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift = 0);
};
}
#endif /*__ARM_COMPUTE_NEDEPTHCONVERT_H__*/
diff --git a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
index 8280b52fcb..158f401084 100644
--- a/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
+++ b/src/core/NEON/kernels/NEDepthConvertLayerKernel.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/core/NEON/kernels/NEDepthConvertLayerKernel.h"
+#include "arm_compute/core/CPP/Validate.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Helpers.h"
#include "arm_compute/core/ITensor.h"
@@ -34,68 +35,90 @@
using namespace arm_compute;
-namespace arm_compute
+namespace
{
-class Coordinates;
-} // namespace arm_compute
-
-NEDepthConvertLayerKernel::NEDepthConvertLayerKernel()
- : _input(nullptr), _output(nullptr), _policy(), _shift(0)
+Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
{
-}
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(input);
+ ARM_COMPUTE_RETURN_ERROR_ON_CPU_F16_UNSUPPORTED(output);
+ ARM_COMPUTE_UNUSED(policy);
+ ARM_COMPUTE_RETURN_ERROR_ON(input == output);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON(shift >= 8);
-void NEDepthConvertLayerKernel::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
-{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::U8, DataType::S16, DataType::U16);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U8 && (output->data_type() != DataType::S16 && output->data_type() != DataType::U16
+ && output->data_type() != DataType::S32),
+ "Only data_types supported [in] U8 -> [out] U16, S16, S32");
- _input = input;
- _output = input;
- _policy = policy;
- _shift = shift;
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::U16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::U32),
+ "Only data_types supported [in] U16 -> [out] U8, U32");
- if(output != nullptr)
- {
- // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
- set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::S16 && (output->data_type() != DataType::U8 && output->data_type() != DataType::S32),
+ "Only data_types supported [in] S16 -> [out] U8, S32");
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F16 && output->data_type() != DataType::F32,
+ "Only data_types supported [in] F16 -> [out] F32");
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::U8, DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F32);
- ARM_COMPUTE_ERROR_ON_MISMATCHING_SHAPES(input, output);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(input->data_type() == DataType::F32 && output->data_type() != DataType::F16,
+ "Only data_types supported [in] F32 -> [out] F16");
- // Set output
- _output = output;
+ // Validate in case of configured output
+ if(output->total_size() > 0)
+ {
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(input, output);
}
- ARM_COMPUTE_ERROR_ON(shift >= 8);
- ARM_COMPUTE_ERROR_ON(input == output && (data_size_from_type(input->info()->data_type()) != data_size_from_type(output->info()->data_type())));
+ return Status{};
+}
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U8 && (output->info()->data_type() != DataType::S16 && output->info()->data_type() != DataType::U16
- && output->info()->data_type() != DataType::S32),
- "Only data_types supported [in] U8 -> [out] U16, S16, S32");
+std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
+{
+ constexpr unsigned int num_elems_processed_per_iteration = 16;
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::U16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::U32),
- "Only data_types supported [in] U16 -> [out] U8, U32");
+ Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- ARM_COMPUTE_ERROR_ON_MSG(input->info()->data_type() == DataType::S16 && (output->info()->data_type() != DataType::U8 && output->info()->data_type() != DataType::S32),
- "Only data_types supported [in] S16 -> [out] U8, S32");
+ AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
+ AccessWindowHorizontal output_access(output, 0, num_elems_processed_per_iteration);
+ bool window_changed = update_window_and_padding(win, input_access, output_access);
+ output_access.set_valid_region(win, output->valid_region());
- constexpr unsigned int num_elems_processed_per_iteration = 16;
+ Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
+ return std::make_pair(err, win);
+}
+} // namespace
+
+NEDepthConvertLayerKernel::NEDepthConvertLayerKernel()
+ : _input(nullptr), _output(nullptr), _policy(), _shift(0)
+{
+}
+
+void NEDepthConvertLayerKernel::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+{
+ ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
+
+ // Auto initialize output shape if not initialized (We can only auto-configure the shape, datatype must be given)
+ set_shape_if_empty(*output->info(), input->info()->tensor_shape());
+
+ _input = input;
+ _output = output;
+ _policy = policy;
+ _shift = shift;
+
+ ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), policy, shift));
// Configure kernel window
- Window win = calculate_max_window(*input->info(), Steps(num_elems_processed_per_iteration));
+ auto win_config = validate_and_configure_window(input->info(), output->info());
+ ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
+ ICPPKernel::configure(win_config.second);
+}
- AccessWindowHorizontal input_access(input->info(), 0, num_elems_processed_per_iteration);
- if(output != nullptr)
- {
- AccessWindowHorizontal output_access(output->info(), 0, num_elems_processed_per_iteration);
- update_window_and_padding(win, input_access, output_access);
- output_access.set_valid_region(win, input->info()->valid_region());
- }
- else
- {
- // In-place computation
- update_window_and_padding(win, input_access);
- }
- ICPPKernel::configure(win);
+Status NEDepthConvertLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
+{
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, policy, shift));
+ ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window(input->clone().get(), output->clone().get()).first);
+
+ return Status{};
}
void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info)
@@ -103,8 +126,7 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
ARM_COMPUTE_UNUSED(info);
ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
- ARM_COMPUTE_ERROR_ON(nullptr == _input);
- ARM_COMPUTE_ERROR_ON(nullptr == _output);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_input, _output);
ARM_COMPUTE_ERROR_ON(_input == _output);
Iterator input(_input, window);
@@ -341,6 +363,68 @@ void NEDepthConvertLayerKernel::run(const Window &window, const ThreadInfo &info
}
break;
}
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+ case DataType::F16:
+ switch(_output->info()->data_type())
+ {
+ case DataType::F32:
+ {
+ const float32x4_t scale = vdupq_n_f32(1 << _shift);
+
+ /* Up-conversion F16 -> F32 */
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float16x8x2_t texels =
+ {
+ {
+ vld1q_f16(reinterpret_cast<float16_t *>(input.ptr())),
+ vld1q_f16(reinterpret_cast<float16_t *>(input.ptr()) + 8)
+ }
+ };
+
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()), vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[0])), scale));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 4, vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[0])), scale));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 8, vmulq_f32(vcvt_f32_f16(vget_low_f16(texels.val[1])), scale));
+ vst1q_f32(reinterpret_cast<float *>(output.ptr()) + 12, vmulq_f32(vcvt_f32_f16(vget_high_f16(texels.val[1])), scale));
+ },
+ input, output);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Output data type not supported");
+ }
+ break;
+ case DataType::F32:
+ switch(_output->info()->data_type())
+ {
+ case DataType::F16:
+ {
+ const float32x4_t scale = vdupq_n_f32(1.f / (1 << _shift));
+
+ /* Down-conversion F32 -> F16 */
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float32x4x4_t texels =
+ {
+ {
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr())), scale),
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 4), scale),
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 8), scale),
+ vmulq_f32(vld1q_f32(reinterpret_cast<float *>(input.ptr()) + 12), scale)
+ }
+ };
+
+ vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()), vcombine_f16(vcvt_f16_f32(texels.val[0]), vcvt_f16_f32(texels.val[1])));
+ vst1q_f16(reinterpret_cast<float16_t *>(output.ptr()) + 8, vcombine_f16(vcvt_f16_f32(texels.val[2]), vcvt_f16_f32(texels.val[3])));
+ },
+ input, output);
+ break;
+ }
+ default:
+ ARM_COMPUTE_ERROR("Output data type not supported");
+ }
+ break;
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
default:
ARM_COMPUTE_ERROR("Not supported");
}
diff --git a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
index 9a75404fcd..0041c1f62e 100644
--- a/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
+++ b/src/runtime/NEON/functions/NEDepthConvertLayer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016, 2017 ARM Limited.
+ * Copyright (c) 2016-2018 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -30,9 +30,14 @@
using namespace arm_compute;
-void NEDepthConvertLayer::configure(ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
+void NEDepthConvertLayer::configure(const ITensor *input, ITensor *output, ConvertPolicy policy, uint32_t shift)
{
auto k = arm_compute::support::cpp14::make_unique<NEDepthConvertLayerKernel>();
k->configure(input, output, policy, shift);
_kernel = std::move(k);
}
+
+Status NEDepthConvertLayer::validate(const ITensorInfo *input, const ITensorInfo *output, ConvertPolicy policy, uint32_t shift)
+{
+ return NEDepthConvertLayerKernel::validate(input, output, policy, shift);
+}
diff --git a/tests/validation/NEON/DepthConvertLayer.cpp b/tests/validation/NEON/DepthConvertLayer.cpp
index 78070d004e..40700f85bb 100644
--- a/tests/validation/NEON/DepthConvertLayer.cpp
+++ b/tests/validation/NEON/DepthConvertLayer.cpp
@@ -51,6 +51,8 @@ const auto DepthConvertLayerU16toU8Dataset = combine(framework::dataset::make("
const auto DepthConvertLayerU16toU32Dataset = combine(framework::dataset::make("DataType", DataType::U16), framework::dataset::make("DataType", DataType::U32));
const auto DepthConvertLayerS16toU8Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::U8));
const auto DepthConvertLayerS16toS32Dataset = combine(framework::dataset::make("DataType", DataType::S16), framework::dataset::make("DataType", DataType::S32));
+const auto DepthConvertLayerF16toF32Dataset = combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F32));
+const auto DepthConvertLayerF32toF16Dataset = combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F16));
const auto DepthConvertLayerShiftDataset = framework::dataset::make("Shift", 0, 7);
} // namespace
@@ -66,6 +68,10 @@ template <typename T>
using NEDepthConvertLayerToU8Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint8_t>;
template <typename T>
using NEDepthConvertLayerToU32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, uint32_t>;
+template <typename T>
+using NEDepthConvertLayerToF16Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, half>;
+template <typename T>
+using NEDepthConvertLayerToF32Fixture = DepthConvertLayerValidationFixture<Tensor, Accessor, NEDepthConvertLayer, T, float>;
TEST_SUITE(U8_to_U16)
DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
@@ -342,6 +348,86 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToS32Fixture<int16_t>, frame
}
TEST_SUITE_END()
+#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC
+TEST_SUITE(F16_to_F32)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ DepthConvertLayerShiftDataset),
+ shape, policy, shift)
+{
+ // Create tensors
+ Tensor src = create_tensor<Tensor>(shape, DataType::F16, 1);
+ Tensor dst = create_tensor<Tensor>(shape, DataType::F32, 1);
+
+ // Create and Configure function
+ NEDepthConvertLayer depth_convert;
+ depth_convert.configure(&src, &dst, policy, shift);
+
+ // Validate valid region
+ const ValidRegion valid_region = shape_to_valid_region(shape);
+ validate(dst.info()->valid_region(), valid_region);
+
+ // Validate padding
+ const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+ validate(src.info()->padding(), padding);
+ validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF16toF32Dataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ DepthConvertLayerShiftDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF32Fixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF16toF32Dataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ DepthConvertLayerShiftDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+
+TEST_SUITE(F32_to_F16)
+DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ DepthConvertLayerShiftDataset),
+ shape, policy, shift)
+{
+ // Create tensors
+ Tensor src = create_tensor<Tensor>(shape, DataType::F32, 1);
+ Tensor dst = create_tensor<Tensor>(shape, DataType::F16, 1);
+
+ // Create and Configure function
+ NEDepthConvertLayer depth_convert;
+ depth_convert.configure(&src, &dst, policy, shift);
+
+ // Validate valid region
+ const ValidRegion valid_region = shape_to_valid_region(shape);
+ validate(dst.info()->valid_region(), valid_region);
+
+ // Validate padding
+ const PaddingSize padding = PaddingCalculator(shape.x(), 16).required_padding();
+ validate(src.info()->padding(), padding);
+ validate(dst.info()->padding(), padding);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallShapes(), DepthConvertLayerF32toF16Dataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ DepthConvertLayerShiftDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthConvertLayerToF16Fixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeShapes(), DepthConvertLayerF32toF16Dataset),
+ framework::dataset::make("ConvertPolicy", { ConvertPolicy::SATURATE, ConvertPolicy::WRAP })),
+ DepthConvertLayerShiftDataset))
+{
+ // Validate output
+ validate(Accessor(_target), _reference);
+}
+TEST_SUITE_END()
+#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */
+
TEST_SUITE_END()
TEST_SUITE_END()
} // namespace validation