From 60ab4e66ea3cb85042035fd1aafbfea666bb4ea7 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Fri, 28 Apr 2023 10:40:07 +0100 Subject: Fix export_to_cl_image issue in the fp16 GeMM implementation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - The issue affects Fp16 GeMM on Arm® Mali™-G78 - The issue was caused by a missing fallback implementation for the case when export_to_cl_image cannot be used - The new implementation fixes this issues and make the GeMM implementation for M=1 also faster (4-5% on various networks with fully connected at the end of the model) - This patch also enables the H0=0 case in the GeMM examples Resolves COMPMID-5812, COMPMID-5688, and COMPMID-6147 Signed-off-by: Gian Marco Iodice Change-Id: Ib7b355ae25337962598dd2ba21665b1a6b48686f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/514664 Tested-by: bsgcomp Reviewed-by: Viet-Hoa Do Comments-Addressed: bsgcomp Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9526 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- ...cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp') diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp index 0e0917daa9..94f3c93166 100644 --- a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020-2021 Arm Limited. + * Copyright (c) 2020-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -220,6 +220,11 @@ public: rhs_info.transpose = configs.transpose_rhs; rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet + if(rhs_info.h0 == 0) + { + rhs_info.h0 = std::max(static_cast(params.N) / rhs_info.n0, 1U); + } + rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); rhs_reshaped.info()->set_quantization_info(q_info); if(rhs_info.export_to_cl_image) -- cgit v1.2.1