From dcdc85ef876e854749db58ecd60c37f64a627536 Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 28 Jun 2017 10:05:29 +0100 Subject: COMPMID-421: Added F16 support in FC Layer. Change-Id: I9c3ab51ae024be69c7b1d83803b1a8f60a0cdbfd Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79326 Reviewed-by: Moritz Pflanzer Tested-by: Kaizen Reviewed-by: Georgios Pinitas --- .../kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp | 23 +++++++++++++++++++++- .../NEON/functions/NEFullyConnectedLayer.cpp | 4 ++-- 2 files changed, 24 insertions(+), 3 deletions(-) (limited to 'src') diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp index 826a386557..f3d06ed481 100644 --- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp +++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp @@ -45,7 +45,7 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel() void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum); ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1); @@ -109,6 +109,27 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window) in0_out, in1); break; } +#ifdef ARM_COMPUTE_ENABLE_FP16 + case DataType::F16: + { + execute_window_loop(window, [&](const Coordinates & id) + { + const float16x8x2_t accum = vld2q_f16(reinterpret_cast(in0_out.ptr())); + const float16x8x2_t biases = vld2q_f16(reinterpret_cast(in1.ptr())); + const float16x8x2_t res = + { + { + vaddq_f16(accum.val[0], biases.val[0]), + vaddq_f16(accum.val[1], biases.val[1]) + } + }; + + vst2q_f16(reinterpret_cast(in0_out.ptr()), res); + }, + in0_out, in1); + break; + } +#endif /* ARM_COMPUTE_ENABLE_FP16 */ case DataType::QS8: { execute_window_loop(window, [&](const Coordinates & id) diff --git a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp index eb84ccaddc..4d9ee85f9b 100644 --- a/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp +++ b/src/runtime/NEON/functions/NEFullyConnectedLayer.cpp @@ -39,7 +39,7 @@ NEFullyConnectedLayerReshapeWeights::NEFullyConnectedLayerReshapeWeights() void NEFullyConnectedLayerReshapeWeights::configure(const ITensor *input, ITensor *output, bool transpose_weights, bool is_batched_fc_layer) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON(output == nullptr); ARM_COMPUTE_ERROR_ON(input->info()->num_dimensions() != 2); ARM_COMPUTE_ERROR_ON((transpose_weights == false) && (is_batched_fc_layer == false)); @@ -196,7 +196,7 @@ void NEFullyConnectedLayer::configure_fc_fc_nb(const ITensor *input, const ITens void NEFullyConnectedLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose_weights, bool are_weights_reshaped) { - ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F32); + ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, weights, output); ARM_COMPUTE_ERROR_ON(weights->info()->num_dimensions() != 2); -- cgit v1.2.1