COMPMID-2110: Enable CLGEMMLowpMatrixMultiplyReshapeOnlyRHSKernel in CLGEMMLowp

Change-Id: Ic32c803c3e2a067de10a7e46c85c962a970957b6 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/969 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
author: Gian Marco Iodice <gianmarco.iodice@arm.com> 2019-04-09 12:03:05 +0100
committer: Gian Marco Iodice <gianmarco.iodice@arm.com> 2019-04-16 12:32:21 +0000
commit: 2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d (patch)
tree: 5406927e4b3f073084bb4b82a6836f9753c32d8e /arm_compute
parent: a851bbaaa8f48d6716eff3375668f5d2b910104b (diff)
download: ComputeLibrary-2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d.tar.gz
1 files changed, 4 insertions, 8 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 67b22821da..a07101c020 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -25,11 +25,10 @@
 #define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__
 
 #include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
 #include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
 #include "arm_compute/runtime/CL/CLMemoryGroup.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
@@ -42,10 +41,9 @@ class ICLTensor;
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the following OpenCL kernels:
  *
- *  -# @ref CLGEMMReshapeLHSMatrixKernel  (if the output tensor is a matrix)
  *  -# @ref CLGEMMReshapeRHSMatrixKernel  (if the output tensor is a matrix)
- *  -# @ref CLGEMMLowpMatrixMultiplyKernel (if the input matrix is a vector or for Midgard architectures)
- *  -# @ref CLGEMMLowpMatrixMultiplyReshapedKernel (if the input matrix is not a vector and if the GPU architecture is not Midgard)
+ *  -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is FALSE)
+ *  -# @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is TRUE)
  *  -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
  *  -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
  *  -# @ref CLGEMMLowpOffsetContributionKernel (if gemm_info.gemmlowp_output_stage == NONE)
@@ -103,8 +101,7 @@ public:
 private:
     CLMemoryGroup                                 _memory_group;
     CLGEMMLowpMatrixMultiplyKernel                _mm_kernel;
-    CLGEMMLowpMatrixMultiplyReshapedKernel        _mm_reshaped_kernel;
-    CLGEMMReshapeLHSMatrixKernel                  _mtx_a_reshape_kernel;
+    CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
     CLGEMMReshapeRHSMatrixKernel                  _mtx_b_reshape_kernel;
     CLGEMMLowpMatrixAReductionKernel              _mtx_a_reduction_kernel;
     CLGEMMLowpMatrixBReductionKernel              _mtx_b_reduction_kernel;
@@ -112,7 +109,6 @@ private:
     CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
     CLTensor                                      _vector_sum_col;
     CLTensor                                      _vector_sum_row;
-    CLTensor                                      _tmp_a;
     CLTensor                                      _tmp_b;
     CLTensor                                      _mm_result_s32;
     const ICLTensor                              *_original_b;
author	Gian Marco Iodice <gianmarco.iodice@arm.com>	2019-04-09 12:03:05 +0100
committer	Gian Marco Iodice <gianmarco.iodice@arm.com>	2019-04-16 12:32:21 +0000
commit	2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d (patch)
tree	5406927e4b3f073084bb4b82a6836f9753c32d8e /arm_compute
parent	a851bbaaa8f48d6716eff3375668f5d2b910104b (diff)
download	ComputeLibrary-2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d.tar.gz