From 2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 9 Apr 2019 12:03:05 +0100 Subject: COMPMID-2110: Enable CLGEMMLowpMatrixMultiplyReshapeOnlyRHSKernel in CLGEMMLowp Change-Id: Ic32c803c3e2a067de10a7e46c85c962a970957b6 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/969 Reviewed-by: Georgios Pinitas Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h index 67b22821da..a07101c020 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h +++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h @@ -25,11 +25,10 @@ #define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__ #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h" +#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h" #include "arm_compute/runtime/CL/CLMemoryGroup.h" #include "arm_compute/runtime/CL/CLTensor.h" @@ -42,10 +41,9 @@ class ICLTensor; /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the following OpenCL kernels: * - * -# @ref CLGEMMReshapeLHSMatrixKernel (if the output tensor is a matrix) * -# @ref CLGEMMReshapeRHSMatrixKernel (if the output tensor is a matrix) - * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the input matrix is a vector or for Midgard architectures) - * -# @ref CLGEMMLowpMatrixMultiplyReshapedKernel (if the input matrix is not a vector and if the GPU architecture is not Midgard) + * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is FALSE) + * -# @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is TRUE) * -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0) * -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0) * -# @ref CLGEMMLowpOffsetContributionKernel (if gemm_info.gemmlowp_output_stage == NONE) @@ -103,8 +101,7 @@ public: private: CLMemoryGroup _memory_group; CLGEMMLowpMatrixMultiplyKernel _mm_kernel; - CLGEMMLowpMatrixMultiplyReshapedKernel _mm_reshaped_kernel; - CLGEMMReshapeLHSMatrixKernel _mtx_a_reshape_kernel; + CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel; CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel; CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel; CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel; @@ -112,7 +109,6 @@ private: CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel; CLTensor _vector_sum_col; CLTensor _vector_sum_row; - CLTensor _tmp_a; CLTensor _tmp_b; CLTensor _mm_result_s32; const ICLTensor *_original_b; -- cgit v1.2.1