From eb65f6da695ac0d3e495817145cceb1c4de4f048 Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Wed, 15 Apr 2020 11:42:15 +0100
Subject: COMPMID-3304: Update OpenCL GEMM heuristic for Int8

Change-Id: I6b7ff678d8d0437a1639db2ff602ea1cdb155464
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3056
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/runtime/CL/CLTypes.h                         | 12 +++++++-----
 .../runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h  | 16 +---------------
 2 files changed, 8 insertions(+), 20 deletions(-)

(limited to 'arm_compute/runtime')

diff --git a/arm_compute/runtime/CL/CLTypes.h b/arm_compute/runtime/CL/CLTypes.h
index f7b4ebd9b4..48697af35f 100644
--- a/arm_compute/runtime/CL/CLTypes.h
+++ b/arm_compute/runtime/CL/CLTypes.h
@@ -34,6 +34,8 @@ enum class CLGEMMKernelType
      * @note This variant will be deprecated in favor of a new and configurable NATIVE variant
      */
     NATIVE_V1,
+    /** Native GEMM kernel with configurable block size.*/
+    NATIVE,
     /** Reshaped GEMM kernel where both lhs and rhs matrices are reshaped. Fixed block size fixed.
      * @note Temporary variant to keep compatibility with the old implementation.
      * @note This variant will be deprecated in favor of RESHAPED
@@ -48,11 +50,11 @@ enum class CLGEMMKernelType
 /** OpenCL GEMM kernel selection parameters. These information are retrieved to select the GEMM kernel on OpenCL */
 struct CLGEMMKernelSelectionParams
 {
-    unsigned int m{ 0 };                        /**< Number of rows for the lhs matrix. Lhs matrix NOT transposed */
-    unsigned int n{ 0 };                        /**< Number of columns for the rhs matrix. Rhs matrix NOT transposed */
-    unsigned int k{ 0 };                        /**< Number of rows for the rhs matrix. Rhs matrix NOT transposed */
-    bool         is_rhs_constant{ false };      /**< True if the content of the rhs matrix is constant */
-    DataType     data_type{DataType::UNKNOWN};  /**< Data type */
+    unsigned int m{ 0 };                         /**< Number of rows for the lhs matrix. Lhs matrix NOT transposed */
+    unsigned int n{ 0 };                         /**< Number of columns for the rhs matrix. Rhs matrix NOT transposed */
+    unsigned int k{ 0 };                         /**< Number of rows for the rhs matrix. Rhs matrix NOT transposed */
+    bool         is_rhs_constant{ false };       /**< True if the content of the rhs matrix is constant */
+    DataType     data_type{ DataType::UNKNOWN }; /**< Data type */
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_RUNTIME_CLTYPES_H */
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index c9b1b70c54..b147001820 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -25,7 +25,6 @@
 #define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H
 
 #include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
@@ -41,18 +40,7 @@ namespace arm_compute
 class IMemoryManager;
 class ICLTensor;
 
-/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the following OpenCL kernels:
- *
- *  -# @ref CLGEMMReshapeRHSMatrixKernel  (if the output tensor is a matrix)
- *  -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is FALSE)
- *  -# @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is TRUE)
- *  -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
- *  -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
- *  -# @ref CLGEMMLowpOffsetContributionKernel (if gemm_info.gemmlowp_output_stage == NONE)
- *  -# @ref CLGEMMLowpOffsetContributionOutputStageKernel (if gemm_info.gemmlowp_output_stage != NONE)
- *  -# @ref CLDepthConvertLayerKernel
- *
-*/
+/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
 class CLGEMMLowpMatrixMultiplyCore : public IFunction
 {
 public:
@@ -106,7 +94,6 @@ private:
 
     // Kernels used
     CLDepthConvertLayerKernel                     _weights_to_qasymm8;
-    CLGEMMLowpMatrixMultiplyKernel                _mm_midgard_kernel;
     CLGEMMLowpMatrixMultiplyNativeKernel          _mm_native_kernel;
     CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
     CLGEMMReshapeRHSMatrixKernel                  _mtx_b_reshape_kernel;
@@ -132,7 +119,6 @@ private:
     int32_t _a_offset;
     int32_t _b_offset;
     bool    _is_gemm_reshaped;
-    bool    _is_midgard;
     bool    _reshape_b_only_on_first_run;
     bool    _is_prepared;
     bool    _run_output_stage;
-- 
cgit v1.2.1