aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-09 12:03:05 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-16 12:32:21 +0000
commit2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d (patch)
tree5406927e4b3f073084bb4b82a6836f9753c32d8e /arm_compute
parenta851bbaaa8f48d6716eff3375668f5d2b910104b (diff)
downloadComputeLibrary-2ec6c1eb6ee77b79e8ab6b97b8cd70bcc4c5589d.tar.gz
COMPMID-2110: Enable CLGEMMLowpMatrixMultiplyReshapeOnlyRHSKernel in CLGEMMLowp
Change-Id: Ic32c803c3e2a067de10a7e46c85c962a970957b6 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/969 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h12
1 files changed, 4 insertions, 8 deletions
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index 67b22821da..a07101c020 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -25,11 +25,10 @@
#define __ARM_COMPUTE_CLGEMMLOWPMATRIXMULTIPLYCORE_H__
#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedKernel.h"
+#include "arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpOffsetContributionOutputStageKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMLowpReductionKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
#include "arm_compute/core/CL/kernels/CLGEMMReshapeRHSMatrixKernel.h"
#include "arm_compute/runtime/CL/CLMemoryGroup.h"
#include "arm_compute/runtime/CL/CLTensor.h"
@@ -42,10 +41,9 @@ class ICLTensor;
/** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. This function calls the following OpenCL kernels:
*
- * -# @ref CLGEMMReshapeLHSMatrixKernel (if the output tensor is a matrix)
* -# @ref CLGEMMReshapeRHSMatrixKernel (if the output tensor is a matrix)
- * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the input matrix is a vector or for Midgard architectures)
- * -# @ref CLGEMMLowpMatrixMultiplyReshapedKernel (if the input matrix is not a vector and if the GPU architecture is not Midgard)
+ * -# @ref CLGEMMLowpMatrixMultiplyKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is FALSE)
+ * -# @ref CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel (if the parameter "reshape_b_only_on_first_run" of GEMMInfo is TRUE)
* -# @ref CLGEMMLowpMatrixAReductionKernel (if the offset of matrix B is not 0)
* -# @ref CLGEMMLowpMatrixBReductionKernel (if the offset of matrix A is not 0)
* -# @ref CLGEMMLowpOffsetContributionKernel (if gemm_info.gemmlowp_output_stage == NONE)
@@ -103,8 +101,7 @@ public:
private:
CLMemoryGroup _memory_group;
CLGEMMLowpMatrixMultiplyKernel _mm_kernel;
- CLGEMMLowpMatrixMultiplyReshapedKernel _mm_reshaped_kernel;
- CLGEMMReshapeLHSMatrixKernel _mtx_a_reshape_kernel;
+ CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel _mm_reshaped_only_rhs_kernel;
CLGEMMReshapeRHSMatrixKernel _mtx_b_reshape_kernel;
CLGEMMLowpMatrixAReductionKernel _mtx_a_reduction_kernel;
CLGEMMLowpMatrixBReductionKernel _mtx_b_reduction_kernel;
@@ -112,7 +109,6 @@ private:
CLGEMMLowpOffsetContributionOutputStageKernel _offset_contribution_output_stage_kernel;
CLTensor _vector_sum_col;
CLTensor _vector_sum_row;
- CLTensor _tmp_a;
CLTensor _tmp_b;
CLTensor _mm_result_s32;
const ICLTensor *_original_b;