From f391fff0336ae84387dd3ebc683ef85649de9eb5 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Wed, 15 May 2019 13:01:26 +0100 Subject: COMPMID-2302 NEDeconvolution: support for FP16 Change-Id: I9fef05abdcafbc97607613a88f7997dd012e0d80 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/1142 Tested-by: Arm Jenkins Reviewed-by: Giuseppe Rossini Comments-Addressed: Arm Jenkins --- arm_compute/core/CPP/kernels/CPPUpsampleKernel.h | 2 +- arm_compute/runtime/CPP/functions/CPPUpsample.h | 4 +- .../runtime/NEON/functions/NEDeconvolutionLayer.h | 16 ++++---- .../NEON/functions/NEDeconvolutionLayer.cpp | 8 ++-- tests/validation/NEON/DeconvolutionLayer.cpp | 48 ++++++++++++++++++---- 5 files changed, 57 insertions(+), 21 deletions(-) diff --git a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h index 4e61356760..fedbb54d35 100644 --- a/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h +++ b/arm_compute/core/CPP/kernels/CPPUpsampleKernel.h @@ -55,7 +55,7 @@ public: /** Set the input and output of the kernel. * - * @param[in] input The input tensor to upsample. Data types supported: F32/QASYMM8 + * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] info Padding info. * @param[in] inner_border_right The number of zeros added to right edge of the input. diff --git a/arm_compute/runtime/CPP/functions/CPPUpsample.h b/arm_compute/runtime/CPP/functions/CPPUpsample.h index 06df866349..fd7d9c24bf 100644 --- a/arm_compute/runtime/CPP/functions/CPPUpsample.h +++ b/arm_compute/runtime/CPP/functions/CPPUpsample.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ class CPPUpsample : public ICPPSimpleFunction public: /** Configure the upsample CPP kernel * - * @param[in] input The input tensor to upsample. Data types supported: F32 + * @param[in] input The input tensor to upsample. Data types supported: F32/F16/QASYMM8 * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] info Padding information * @param[in] inner_border_right The number of zeros added to right edge of the input. diff --git a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h index 25512fa147..4eb684b9aa 100644 --- a/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEDeconvolutionLayer.h @@ -90,9 +90,9 @@ public: * * @note This method will be deprecated in the next release. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @param[in] inner_border_right The number of zeros added to right edge of the input. @@ -105,9 +105,9 @@ public: * * @note This method will be deprecated in the next release. * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @param[in] inner_border_right The number of zeros added to right edge of the input. @@ -120,9 +120,9 @@ public: /** Set the input, weights, biases and output tensors. * - * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. + * @param[in,out] input Input tensor. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. * @param[in] weights The 4d weights with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. + * @param[in] bias Optional, ignored if NULL. The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. * @param[out] output Output tensor. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * @@ -130,9 +130,9 @@ public: void configure(ITensor *input, const ITensor *weights, const ITensor *bias, ITensor *output, const PadStrideInfo &info); /** Static function to check if given info will lead to a valid configuration of @ref NEDeconvolutionLayer * - * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/QASYMM8. + * @param[in] input Input tensor info. 3 lower dimensions represent a single input, and an optional 4th dimension for batch of inputs. Data types supported: F32/F16/QASYMM8. * @param[in] weights The 4d weights info with dimensions [width, height, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input. + * @param[in] bias (Optional) The biases have one dimension. Data type supported: Data types supported: S32 for QASYMM8 input, F32 for F32 input, F16 for F16 input. * @param[in] output Output tensor info. The output has the same number of dimensions as the @p input. * @param[in] info Contains padding and policies to be used in the deconvolution, this is decribed in @ref PadStrideInfo. * diff --git a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp index aff335e5e3..c3d6b94d8d 100644 --- a/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEDeconvolutionLayer.cpp @@ -29,9 +29,10 @@ #include "arm_compute/core/utils/misc/ShapeCalculator.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -using namespace arm_compute; using namespace arm_compute::misc::shape_calculator; +namespace arm_compute +{ NEDeconvolutionLayer::NEDeconvolutionLayer(std::shared_ptr memory_manager) // NOLINT : _memory_group(std::move(memory_manager)), _conv_f(), @@ -51,8 +52,8 @@ Status NEDeconvolutionLayer::validate(const ITensorInfo *input, const ITensorInf unsigned int inner_border_right, unsigned int inner_border_top) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32, DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QASYMM8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(weights, input); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != weights->dimension(1)); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) < 1); ARM_COMPUTE_RETURN_ERROR_ON(!info.padding_is_symmetric()); @@ -192,3 +193,4 @@ void NEDeconvolutionLayer::prepare() _is_prepared = true; } } +} // namespace arm_compute \ No newline at end of file diff --git a/tests/validation/NEON/DeconvolutionLayer.cpp b/tests/validation/NEON/DeconvolutionLayer.cpp index fc37c02279..8860a9f974 100644 --- a/tests/validation/NEON/DeconvolutionLayer.cpp +++ b/tests/validation/NEON/DeconvolutionLayer.cpp @@ -45,7 +45,10 @@ namespace { constexpr AbsoluteTolerance tolerance_fp32(0.001f); /**< Tolerance for floating point tests */ constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ -constexpr float tolerance_num = 0.07f; /**< Tolerance number */ +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +const RelativeTolerance tolerance_fp16(half_float::half(0.2f)); /**< Relative tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC*/ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ const auto data4x4 = datasets::SmallDeconvolutionShapes() * framework::dataset::make("StrideX", 1, 4) * framework::dataset::make("StrideY", 1, 4) * framework::dataset::make("PadX", 0, 3) * framework::dataset::make("PadY", 0, 3) * framework::dataset::make("NumKernels", { 3 }); @@ -175,10 +178,8 @@ template using NEDeconvolutionLayerFixture1x1 = DeconvolutionValidationFixture; TEST_SUITE(Float) - TEST_SUITE(FP32) TEST_SUITE(W4x4) - FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4, framework::DatasetMode::NIGHTLY, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { @@ -186,9 +187,7 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4, framework::Da validate(Accessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W4x4 - TEST_SUITE(W3x3) - FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3, framework::DatasetMode::PRECOMMIT, combine(combine(data3x3_precommit, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) { @@ -202,7 +201,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3, framewor validate(Accessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W3x3 - TEST_SUITE(W1x1) FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1, framework::DatasetMode::NIGHTLY, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F32)), data_layouts_dataset)) @@ -211,8 +209,44 @@ FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1, framework::Da validate(Accessor(_target), _reference, tolerance_fp32); } TEST_SUITE_END() // W1x1 - TEST_SUITE_END() // FP32 + +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +TEST_SUITE(W4x4) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture4x4, framework::DatasetMode::NIGHTLY, combine(combine(data4x4, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +TEST_SUITE_END() // W4x4 +TEST_SUITE(W3x3) +FIXTURE_DATA_TEST_CASE(RunSmall, NEDeconvolutionLayerFixture3x3, framework::DatasetMode::PRECOMMIT, combine(combine(data3x3_precommit, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEDeconvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(data3x3, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +TEST_SUITE_END() // W3x3 +TEST_SUITE(W1x1) +FIXTURE_DATA_TEST_CASE(Run, NEDeconvolutionLayerFixture1x1, framework::DatasetMode::NIGHTLY, combine(combine(data1x1, framework::dataset::make("DataType", DataType::F16)), + data_layouts_dataset)) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_fp16); +} +TEST_SUITE_END() // W1x1 +TEST_SUITE_END() // FP16 +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + + TEST_SUITE_END() // Float template -- cgit v1.2.1