From 659abc022b42ad8d11f58b3f3b7b4175f3cf1038 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Thu, 22 Jun 2017 16:00:16 +0100 Subject: COMPMID-421: Added FP16 support in Convolutional layer (Neon) The test suite for FP16 is conditionally compiled in when the target platform is arch=arm64-8.2-a Change-Id: I1686157e83809a00a91058bff80dbecf692fb356 Reviewed-on: http://mpd-gerrit.cambridge.arm.com/78740 Tested-by: Kaizen Reviewed-by: Anthony Barbier --- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 2 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 14 ++++++++++++-- src/core/NEON/kernels/NEWeightsReshapeKernel.cpp | 11 ++++++++--- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 14 +++++++------- tests/validation/NEON/ConvolutionLayer.cpp | 19 +++++++++++++++++++ 5 files changed, 47 insertions(+), 13 deletions(-) diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index ebaafb467f..e219ce2e0e 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -72,7 +72,7 @@ public: /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], - * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F32 + * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: QS8/F16/F32 * @param[out] output The output tensor. Data types supported: Same as @p input * @param[in] convolved_dims The convolved output dimensions. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index c7c23d5d06..875c08ed42 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -266,8 +266,8 @@ NEIm2ColKernel::NEIm2ColKernel() void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, std::pair convolved_dims, const PadStrideInfo &conv_info, bool has_bias) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32, DataType::QS8); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); _input = input; @@ -296,6 +296,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, std::pair< case DataType::F32: _func = &NEIm2ColKernel::run_reduced; break; +#ifdef ARM_COMPUTE_ENABLE_FP16 + case DataType::F16: + _func = &NEIm2ColKernel::run_reduced; + break; +#endif case DataType::QS8: _func = &NEIm2ColKernel::run_reduced; break; @@ -311,6 +316,11 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, std::pair< case DataType::F32: _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; +#ifdef ARM_COMPUTE_ENABLE_FP16 + case DataType::F16: + _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; + break; +#endif case DataType::QS8: _func = ((pad_x == 0) && (pad_y == 0)) ? &NEIm2ColKernel::run_generic : &NEIm2ColKernel::run_generic; break; diff --git a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp index f31cde719a..4f52bf6279 100644 --- a/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp +++ b/src/core/NEON/kernels/NEWeightsReshapeKernel.cpp @@ -95,7 +95,7 @@ NEWeightsReshapeKernel::NEWeightsReshapeKernel() void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias, ITensor *output) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32, DataType::F16, DataType::QS8); ARM_COMPUTE_ERROR_ON_NULLPTR(output); ARM_COMPUTE_ERROR_ON(input->info()->dimension(0) != input->info()->dimension(1)); @@ -113,7 +113,7 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias set_fixed_point_position_if_zero(*output->info(), fixed_point_position); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(output->info()->tensor_shape(), output_shape); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F16, DataType::F32, DataType::QS8); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); @@ -127,7 +127,7 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias set_shape_if_empty(*bias->info(), bias_shape); ARM_COMPUTE_ERROR_ON_MISMATCHING_DIMENSIONS(bias->info()->tensor_shape(), bias_shape); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::F32, DataType::QS8); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(bias, 1, DataType::F16, DataType::F32, DataType::QS8); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, bias); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); } @@ -143,6 +143,11 @@ void NEWeightsReshapeKernel::configure(const ITensor *input, const ITensor *bias _func = &weights_reshape; break; } + case DataType::F16: + { + _func = &weights_reshape; + break; + } case DataType::QS8: { _func = &weights_reshape; diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index bd688cffb6..82c33d54bb 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -40,15 +40,15 @@ NEConvolutionLayerReshapeWeights::NEConvolutionLayerReshapeWeights() void NEConvolutionLayerReshapeWeights::configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() > 4); if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(weights, biases); ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(3)); @@ -96,9 +96,9 @@ NEConvolutionLayer::NEConvolutionLayer() void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F32); - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::QS8, DataType::F16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, weights, output); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && weights->info()->dimension(2) != input->info()->dimension(2)); @@ -106,7 +106,7 @@ void NEConvolutionLayer::configure(const ITensor *input, const ITensor *weights, if(biases != nullptr) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(biases, 1, DataType::QS8, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); ARM_COMPUTE_ERROR_ON(!weights_info.are_reshaped() && biases->info()->dimension(0) != weights->info()->dimension(3)); diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index ee2b24db96..40c1e2e969 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -130,6 +130,25 @@ BOOST_DATA_TEST_CASE(Configuration, validate(dst.info()->valid_region(), dst_valid_region); } +#ifdef ARM_COMPUTE_ENABLE_FP16 +BOOST_AUTO_TEST_SUITE(Float16) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(SmallConvolutionLayer, + SmallConvolutionLayerDataset() * boost::unit_test::data::make(DataType::F16), + conv_set, dt) +{ + // Compute function + Tensor dst = compute_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_convolution_layer(conv_set.src_shape, conv_set.weights_shape, conv_set.bias_shape, conv_set.dst_shape, dt, conv_set.info, 0); + + // Validate output + validate(NEAccessor(dst), ref_dst, tolerance_f32); +} +BOOST_AUTO_TEST_SUITE_END() +#endif + BOOST_AUTO_TEST_SUITE(Float) BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) BOOST_DATA_TEST_CASE(SmallConvolutionLayer, -- cgit v1.2.1