From eb65f6da695ac0d3e495817145cceb1c4de4f048 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Wed, 15 Apr 2020 11:42:15 +0100 Subject: COMPMID-3304: Update OpenCL GEMM heuristic for Int8 Change-Id: I6b7ff678d8d0437a1639db2ff602ea1cdb155464 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3056 Tested-by: Arm Jenkins Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins --- arm_compute/runtime/CL/CLTypes.h | 12 +++++++----- .../runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h | 16 +--------------- 2 files changed, 8 insertions(+), 20 deletions(-) (limited to 'arm_compute/runtime') diff --git a/arm_compute/runtime/CL/CLTypes.h b/arm_compute/runtime/CL/CLTypes.h index f7b4ebd9b4..48697af35f 100644 --- a/arm_compute/runtime/CL/CLTypes.h +++ b/arm_compute/runtime/CL/CLTypes.h @@ -34,6 +34,8 @@ enum class CLGEMMKernelType * @note This variant will be deprecated in favor of a new and configurable NATIVE variant */ NATIVE_V1, + /** Native GEMM kernel with configurable block size.*/ + NATIVE, /** Reshaped GEMM kernel where both lhs and rhs matrices are reshaped. Fixed block size fixed. * @note Temporary variant to keep compatibility with the old implementation. * @note This variant will be deprecated in favor of RESHAPED @@ -48,11 +50,11 @@ enum class CLGEMMKernelType /** OpenCL GEMM kernel selection parameters. These information are retrieved to select the GEMM kernel on OpenCL */ struct CLGEMMKernelSelectionParams { - unsigned int m{ 0 }; /**< Number of rows for the lhs matrix. Lhs matrix NOT transposed */ - unsigned int n{ 0 }; /**< Number of columns for the rhs matrix. Rhs matrix NOT transposed */ - unsigned int k{ 0 }; /**< Number of rows for the rhs matrix. Rhs matrix NOT transposed */ - bool is_rhs_constant{ false }; /**< True if the content of the rhs matrix is constant */ - DataType data_type{DataType::UNKNOWN}; /**< Data type */ + unsigned int m{ 0 }; /**< Number of rows for the lhs matrix. Lhs matrix NOT transposed */ + unsigned int n{ 0 }; /**< Number of columns for the rhs matrix. Rhs matrix NOT transposed */ + unsigned int k{ 0 }; /**< Number of rows for the rhs matrix. Rhs matrix NOT transposed */ + bool is_rhs_constant{ false }; /**< True if the content of the rhs matrix is constant */ + DataType data_type{ DataType::UNKNOWN }; /**< Data type */ }; } // namespace arm_compute #endif /* ARM_COMPUTE_RUNTIME_CLTYPES_H */ diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h index c9b1b70c54..b147001820 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h @@ -25,7 +25,6 @@ #define ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H #include "arm_compute/core/CL/kernels/CLDepthConvertLayerKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyNativeKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" @@ -41,18 +40,7 @@ namespace arm_compute class IMemoryManager; class ICLTensor; -/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the following OpenCL kernels: - * - * -# @ref CLGEMMReshapeRHSMatrixKernel (if the output tensor is a matrix) - * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is FALSE) - * -# @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is TRUE) - * -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0) - * -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0) - * -# @ref CLGEMMLowpOffsetContributionKernel (if gemm_info.gemmlowp_output_stage == NONE) - * -# @ref CLGEMMLowpOffsetContributionOutputStageKernel (if gemm_info.gemmlowp_output_stage != NONE) - * -# @ref CLDepthConvertLayerKernel - * -*/ +/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */ class CLGEMMLowpMatrixMultiplyCore : public IFunction { public: @@ -106,7 +94,6 @@ private: // Kernels used CLDepthConvertLayerKernel _weights_to_qasymm8; - CLGEMMLowpMatrixMultiplyKernel _mm_midgard_kernel; CLGEMMLowpMatrixMultiplyNativeKernel _mm_native_kernel; CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel; CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel; @@ -132,7 +119,6 @@ private: int32_t _a_offset; int32_t _b_offset; bool _is_gemm_reshaped; - bool _is_midgard; bool _reshape_b_only_on_first_run; bool _is_prepared; bool _run_output_stage; -- cgit v1.2.1