aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp23
-rw-r--r--src/runtime/NEON/functions/NEFullyConnectedLayer.cpp4
-rw-r--r--tests/validation/NEON/FullyConnectedLayer.cpp24
-rw-r--r--tests/validation/Reference.cpp36
4 files changed, 71 insertions, 16 deletions
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index 826a386557..f3d06ed481 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -45,7 +45,7 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum);
ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1);
@@ -109,6 +109,27 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
in0_out, in1);
break;
}
+#ifdef ARM_COMPUTE_ENABLE_FP16
+ case DataType::F16:
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float16x8x2_t accum = vld2q_f16(reinterpret_cast<const float16_t *>(in0_out.ptr()));
+ const float16x8x2_t biases = vld2q_f16(reinterpret_cast<const float16_t *>(in1.ptr()));
+ const float16x8x2_t res =
+ {
+ {
+ vaddq_f16(accum.val[0], biases.val[0]),
+ vaddq_f16(accum.val[1], biases.val[1])
+ }
+ };
+
+ vst2q_f16(reinterpret_cast<float16_t *>(in0_out.ptr()), res);
+ },
+ in0_out, in1);
+ break;
+ }
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
case DataType::QS8:
{
execute_window_loop(window, [&](const Coordinates & id)
diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
index eb84ccaddc..4d9ee85f9b 100644
--- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
+++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp
@@ -39,7 +39,7 @@ NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights()
void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON(output == nullptr);
ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2);
ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false));
@@ -196,7 +196,7 @@ void NEFullyConnectedLayer::configure_fc_fc_nb(const ITensor *input, const ITens
void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output);
ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2);
diff --git a/tests/validation/NEON/FullyConnectedLayer.cpp b/tests/validation/NEON/FullyConnectedLayer.cpp
index 87e0071007..fa962787d1 100644
--- a/tests/validation/NEON/FullyConnectedLayer.cpp
+++ b/tests/validation/NEON/FullyConnectedLayer.cpp
@@ -45,6 +45,9 @@ namespace
{
const float tolerance_f32 = 1e-03f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
const float tolerance_q = 1.0f; /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */
+#ifdef ARM_COMPUTE_ENABLE_FP16
+const float tolerance_f16 = 0.01f; /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+#endif /*ARM_COMPUTE_ENABLE_FP16*/
Tensor compute_fully_connected_layer(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, DataType dt,
bool transpose_weights, int fixed_point_position)
@@ -82,7 +85,7 @@ Tensor compute_fully_connected_layer(const TensorShape &input_shape, const Tenso
BOOST_TEST(!dst.info()->is_resizable());
// Fill tensors
- if(dt == DataType::F32)
+ if(dt == DataType::F16 || dt == DataType::F32)
{
std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
library->fill(NEAccessor(src), distribution, 0);
@@ -153,6 +156,25 @@ BOOST_DATA_TEST_CASE(Configuration,
validate(dst.info()->valid_region(), dst_valid_region);
}
+#ifdef ARM_COMPUTE_ENABLE_FP16
+BOOST_AUTO_TEST_SUITE(Float16)
+BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
+BOOST_DATA_TEST_CASE(RunSmall,
+ SmallFullyConnectedLayerDataset() * boost::unit_test::data::make({ DataType::F16 }),
+ fc_set, dt)
+{
+ // Compute function
+ Tensor dst = compute_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0);
+
+ // Compute reference
+ RawTensor ref_dst = Reference::compute_reference_fully_connected_layer(fc_set.src_shape, fc_set.weights_shape, fc_set.bias_shape, fc_set.dst_shape, dt, fc_set.transpose_weights, 0);
+
+ // Validate output
+ validate(NEAccessor(dst), ref_dst, tolerance_f16);
+}
+BOOST_AUTO_TEST_SUITE_END()
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
+
BOOST_AUTO_TEST_SUITE(Float)
BOOST_TEST_DECORATOR(*boost::unit_test::label("precommit"))
BOOST_DATA_TEST_CASE(RunSmall,
diff --git a/tests/validation/Reference.cpp b/tests/validation/Reference.cpp
index 62dfcba37e..04362f0dc1 100644
--- a/tests/validation/Reference.cpp
+++ b/tests/validation/Reference.cpp
@@ -506,18 +506,30 @@ RawTensor Reference::compute_reference_convolution_layer(const TensorShape &inpu
RawTensor ref_dst = library->get(output_shape, dt, 1, fixed_point_position);
// Fill reference
- if(dt == DataType::F16 || dt == DataType::F32)
- {
- std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
- library->fill(ref_src, distribution, 0);
- library->fill(ref_weights, distribution, 1);
- library->fill(ref_bias, distribution, 2);
- }
- else
+ switch(dt)
{
- library->fill_tensor_uniform(ref_src, 0);
- library->fill_tensor_uniform(ref_weights, 1);
- library->fill_tensor_uniform(ref_bias, 2);
+ case DataType::F32:
+ case DataType::F16:
+ {
+ std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
+ library->fill(ref_src, distribution, 0);
+ library->fill(ref_weights, distribution, 1);
+ library->fill(ref_bias, distribution, 2);
+ break;
+ }
+ case DataType::QS16:
+ case DataType::QS8:
+ {
+ library->fill_tensor_uniform(ref_src, 0);
+ library->fill_tensor_uniform(ref_weights, 1);
+ library->fill_tensor_uniform(ref_bias, 2);
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Not supported");
+ break;
+ }
}
// Compute reference
@@ -546,7 +558,7 @@ RawTensor Reference::compute_reference_fully_connected_layer(const TensorShape &
RawTensor ref_weights = library->get(ws, dt, 1, fixed_point_position);
// Fill reference
- if(dt == DataType::F32)
+ if(dt == DataType::F16 || dt == DataType::F32)
{
std::uniform_real_distribution<> distribution(-1.0f, 1.0f);
library->fill(ref_src, distribution, 0);