From dcdc85ef876e854749db58ecd60c37f64a627536 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 28 Jun 2017 10:05:29 +0100 Subject: COMPMID-421: Added F16 support in FC Layer. Change-Id: I9c3ab51ae024be69c7b1d83803b1a8f60a0cdbfd Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79326 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen Reviewed-by: Georgios Pinitas --- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 23 +++++++++++++- .../NEON/functions/NEFullyConnectedLayer.cpp | 4 +-- tests/validation/NEON/FullyConnectedLayer.cpp | 24 ++++++++++++++- tests/validation/Reference.cpp | 36 ++++++++++++++-------- 4 files changed, 71 insertions(+), 16 deletions(-) diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index 826a386557..f3d06ed481 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -45,7 +45,7 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1); @@ -109,6 +109,27 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window) in0_out, in1); break; } +#ifdef ARM_COMPUTE_ENABLE_FP16 + case DataType::F16: + { + execute_window_loop(window, [&](const Coordinates & id) + { + const float16x8x2_t accum = vld2q_f16(reinterpret_cast(in0_out.ptr())); + const float16x8x2_t biases = vld2q_f16(reinterpret_cast(in1.ptr())); + const float16x8x2_t res = + { + { + vaddq_f16(accum.val[0], biases.val[0]), + vaddq_f16(accum.val[1], biases.val[1]) + } + }; + + vst2q_f16(reinterpret_cast(in0_out.ptr()), res); + }, + in0_out, in1); + break; + } +#endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: { execute_window_loop(window, [&](const Coordinates & id) diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index eb84ccaddc..4d9ee85f9b 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -39,7 +39,7 @@ NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights() void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON(output == nullptr); ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2); ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false)); @@ -196,7 +196,7 @@ void NEFullyConnectedLayer::configure_fc_fc_nb(const ITensor *input, const ITens void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2); diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp index 87e0071007..fa962787d1 100644 --- a/tests/validation/NEON/FullyConnectedLayer.cpp +++ b/tests/validation/NEON/FullyConnectedLayer.cpp @@ -45,6 +45,9 @@ namespace { const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ const float tolerance_q = 1.0f; /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */ +#ifdef ARM_COMPUTE_ENABLE_FP16 +const float tolerance_f16 = 0.01f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +#endif /*ARM_COMPUTE_ENABLE_FP16*/ Tensor compute_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt, bool transpose_weights, int fixed_point_position) @@ -82,7 +85,7 @@ Tensor compute_fully_connected_layer(const TensorShape &input_shape, const Tenso BOOST_TEST(!dst.info()->is_resizable()); // Fill tensors - if(dt == DataType::F32) + if(dt == DataType::F16 || dt == DataType::F32) { std::uniform_real_distribution<> distribution(-1.0f, 1.0f); library->fill(NEAccessor(src), distribution, 0); @@ -153,6 +156,25 @@ BOOST_DATA_TEST_CASE(Configuration, validate(dst.info()->valid_region(), dst_valid_region); } +#ifdef ARM_COMPUTE_ENABLE_FP16 +BOOST_AUTO_TEST_SUITE(Float16) +BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) +BOOST_DATA_TEST_CASE(RunSmall, + SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F16 }), + fc_set, dt) +{ + // Compute function + Tensor dst = compute_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0); + + // Compute reference + RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0); + + // Validate output + validate(NEAccessor(dst), ref_dst, tolerance_f16); +} +BOOST_AUTO_TEST_SUITE_END() +#endif /* ARM_COMPUTE_ENABLE_FP16 */ + BOOST_AUTO_TEST_SUITE(Float) BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit")) BOOST_DATA_TEST_CASE(RunSmall, diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp index 62dfcba37e..04362f0dc1 100644 --- a/tests/validation/Reference.cpp +++ b/tests/validation/Reference.cpp @@ -506,18 +506,30 @@ RawTensor Reference::compute_reference_convolution_layer(const TensorShape &inpu RawTensor ref_dst = library->get(output_shape, dt, 1, fixed_point_position); // Fill reference - if(dt == DataType::F16 || dt == DataType::F32) - { - std::uniform_real_distribution<> distribution(-1.0f, 1.0f); - library->fill(ref_src, distribution, 0); - library->fill(ref_weights, distribution, 1); - library->fill(ref_bias, distribution, 2); - } - else + switch(dt) { - library->fill_tensor_uniform(ref_src, 0); - library->fill_tensor_uniform(ref_weights, 1); - library->fill_tensor_uniform(ref_bias, 2); + case DataType::F32: + case DataType::F16: + { + std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + library->fill(ref_src, distribution, 0); + library->fill(ref_weights, distribution, 1); + library->fill(ref_bias, distribution, 2); + break; + } + case DataType::QS16: + case DataType::QS8: + { + library->fill_tensor_uniform(ref_src, 0); + library->fill_tensor_uniform(ref_weights, 1); + library->fill_tensor_uniform(ref_bias, 2); + break; + } + default: + { + ARM_COMPUTE_ERROR("Not supported"); + break; + } } // Compute reference @@ -546,7 +558,7 @@ RawTensor Reference::compute_reference_fully_connected_layer(const TensorShape & RawTensor ref_weights = library->get(ws, dt, 1, fixed_point_position); // Fill reference - if(dt == DataType::F32) + if(dt == DataType::F16 || dt == DataType::F32) { std::uniform_real_distribution<> distribution(-1.0f, 1.0f); library->fill(ref_src, distribution, 0); -- cgit v1.2.1