From 4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Fri, 16 Jul 2021 16:16:43 +0100
Subject: Update GEMM assembly kernels

- Introduce Fp32 kernels with internal calculations in Bfloat16 when
fast_mode is enabled
- Improve kernel selection heuristics

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I68a9e7e862b6fd2721b46e0d7cc791091c4ab279
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5965
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'src/runtime/CL/functions/CLFullyConnectedLayer.cpp')

diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index bc9a3056e8..0647a473e2 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -101,6 +101,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
                                          fc_info.retain_internal_weights, // retain_internal_weights
                                          gemmlowp_output_stage,           // gemmlowp_output_stage
                                          fc_info.fp_mixed_precision,      // fp_mixed_precision
+                                         false,                           // fast_math
                                          true,                            // broadcast_bias
                                          ActivationLayerInfo());          // activation_info
 
@@ -151,6 +152,7 @@ void CLFullyConnectedLayer::configure_mm(const CLCompileContext &compile_context
                                          fc_info.retain_internal_weights, // retain_internal_weights
                                          gemmlowp_output_stage,           // gemmlowp_output_stage
                                          fc_info.fp_mixed_precision,      // fp_mixed_precision
+                                         false,                           // fast_math
                                          true,                            // broadcast_bias
                                          fc_info.activation_info,         // activation_info
                                          fc_info.constant_weights);       // constant_weights
-- 
cgit v1.2.1