From 4ee8b1599dbaf7634d25607fa5ac96ba3dc6b0f2 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Fri, 16 Jul 2021 16:16:43 +0100 Subject: Update GEMM assembly kernels - Introduce Fp32 kernels with internal calculations in Bfloat16 when fast_mode is enabled - Improve kernel selection heuristics Signed-off-by: Georgios Pinitas Change-Id: I68a9e7e862b6fd2721b46e0d7cc791091c4ab279 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5965 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 2 ++ 1 file changed, 2 insertions(+) (limited to 'src/runtime/CL/functions/CLFullyConnectedLayer.cpp') diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp index bc9a3056e8..0647a473e2 100644 --- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp +++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp @@ -101,6 +101,7 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I fc_info.retain_internal_weights, // retain_internal_weights gemmlowp_output_stage, // gemmlowp_output_stage fc_info.fp_mixed_precision, // fp_mixed_precision + false, // fast_math true, // broadcast_bias ActivationLayerInfo()); // activation_info @@ -151,6 +152,7 @@ void CLFullyConnectedLayer::configure_mm(const CLCompileContext &compile_context fc_info.retain_internal_weights, // retain_internal_weights gemmlowp_output_stage, // gemmlowp_output_stage fc_info.fp_mixed_precision, // fp_mixed_precision + false, // fast_math true, // broadcast_bias fc_info.activation_info, // activation_info fc_info.constant_weights); // constant_weights -- cgit v1.2.1