Fix export_to_cl_image issue in the fp16 GeMM implementation

- The issue affects Fp16 GeMM on Arm® Mali™-G78 - The issue was caused by a missing fallback implementation for the case when export_to_cl_image cannot be used - The new implementation fixes this issues and make the GeMM implementation for M=1 also faster (4-5% on various networks with fully connected at the end of the model) - This patch also enables the H0=0 case in the GeMM examples Resolves COMPMID-5812, COMPMID-5688, and COMPMID-6147 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Change-Id: Ib7b355ae25337962598dd2ba21665b1a6b48686f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/514664 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9526 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2023-04-28 10:40:07 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2023-05-02 15:53:28 +0000
commit: 60ab4e66ea3cb85042035fd1aafbfea666bb4ea7 (patch)
tree: 05818f7fafb0cf02d337b201756548152090436f /examples
parent: d7113e4af5b5497d3a3a62dc9cf6b147e2a024cd (diff)
download: ComputeLibrary-60ab4e66ea3cb85042035fd1aafbfea666bb4ea7.tar.gz
4 files changed, 24 insertions, 4 deletions
diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp
index bbc42a7e55..59044477bf 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -256,6 +256,11 @@ public:
         kernel_info.broadcast_bias          = true;
         kernel_info.activation_info         = act_info;
 
+        if(rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U);
+        }
+
         // Initialise lhs_reshaped tensor info
         lhs_reshaped.allocator()->init(TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
 
diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
index 79f251ae7d..0ad2a65dc2 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2021 Arm Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -225,6 +225,11 @@ public:
         kernel_info.broadcast_bias          = true;
         kernel_info.activation_info         = act_info;
 
+        if(rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U);
+        }
+
         // Initialise rhs_reshaped tensor info
         rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
 
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
index 115ce6ba23..9cf9c9fed0 100644
--- a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -240,6 +240,11 @@ public:
         rhs_info.transpose          = configs.transpose_rhs;
         rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet
 
+        if(rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U);
+        }
+
         lhs_reshaped.allocator()->init(TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
         rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
         lhs_reshaped.info()->set_quantization_info(q_info);
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
index 0e0917daa9..94f3c93166 100644
--- a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020-2021 Arm Limited.
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -220,6 +220,11 @@ public:
         rhs_info.transpose          = configs.transpose_rhs;
         rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet
 
+        if(rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U);
+        }
+
         rhs_reshaped.allocator()->init(TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
         rhs_reshaped.info()->set_quantization_info(q_info);
         if(rhs_info.export_to_cl_image)
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2023-04-28 10:40:07 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2023-05-02 15:53:28 +0000
commit	60ab4e66ea3cb85042035fd1aafbfea666bb4ea7 (patch)
tree	05818f7fafb0cf02d337b201756548152090436f /examples
parent	d7113e4af5b5497d3a3a62dc9cf6b147e2a024cd (diff)
download	ComputeLibrary-60ab4e66ea3cb85042035fd1aafbfea666bb4ea7.tar.gz