aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
diff options
context:
space:
mode:
authorPablo Tello <pablo.tello@arm.com>2017-06-28 10:05:29 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-09-17 14:15:39 +0100
commitdcdc85ef876e854749db58ecd60c37f64a627536 (patch)
tree8af2006f14a8b32eb1aced14def96afab89c5981 /src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
parentfc95ed2b9900471922d93c963b263f1f506da167 (diff)
downloadComputeLibrary-dcdc85ef876e854749db58ecd60c37f64a627536.tar.gz
COMPMID-421: Added F16 support in FC Layer.
Change-Id: I9c3ab51ae024be69c7b1d83803b1a8f60a0cdbfd Reviewed-on: http://mpd-gerrit.cambridge.arm.com/79326 Reviewed-by: Moritz Pflanzer <moritz.pflanzer@arm.com> Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com> Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp23
1 files changed, 22 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
index 826a386557..f3d06ed481 100644
--- a/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/NEON/kernels/NEGEMMMatrixAccumulateBiasesKernel.cpp
@@ -45,7 +45,7 @@ NEGEMMMatrixAccumulateBiasesKernel::NEGEMMMatrixAccumulateBiasesKernel()
void NEGEMMMatrixAccumulateBiasesKernel::configure(ITensor *accum, const ITensor *biases)
{
- ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F32);
+ ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(accum, 1, DataType::QS8, DataType::QS16, DataType::F16, DataType::F32);
ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(biases, accum);
ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(biases, accum);
ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() != 1);
@@ -109,6 +109,27 @@ void NEGEMMMatrixAccumulateBiasesKernel::run(const Window &window)
in0_out, in1);
break;
}
+#ifdef ARM_COMPUTE_ENABLE_FP16
+ case DataType::F16:
+ {
+ execute_window_loop(window, [&](const Coordinates & id)
+ {
+ const float16x8x2_t accum = vld2q_f16(reinterpret_cast<const float16_t *>(in0_out.ptr()));
+ const float16x8x2_t biases = vld2q_f16(reinterpret_cast<const float16_t *>(in1.ptr()));
+ const float16x8x2_t res =
+ {
+ {
+ vaddq_f16(accum.val[0], biases.val[0]),
+ vaddq_f16(accum.val[1], biases.val[1])
+ }
+ };
+
+ vst2q_f16(reinterpret_cast<float16_t *>(in0_out.ptr()), res);
+ },
+ in0_out, in1);
+ break;
+ }
+#endif /* ARM_COMPUTE_ENABLE_FP16 */
case DataType::QS8:
{
execute_window_loop(window, [&](const Coordinates & id)