COMPMID-882 - Optimizing GEMMLowp on OpenCL reshaping matrices

This new optimization allows to achieve 36.3 % of MAC utilisation on Mate 9 @ 1GHz. The performance have been reported here https://confluence.arm.com/display/MLENG/GEMMLowp+performance%3A+ACL+18.02 Change-Id: I71b6a217068763dfdc11bbf3574ee0eb94f93679 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/118531 Reviewed-by: Anthony Barbier <anthony.barbier@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
author: Gian Marco <gianmarco.iodice@arm.com> 2018-01-30 13:35:54 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:47:18 +0000
commit: 19835e591cb0b66a0f5000ae1505bf299e50337d (patch)
tree: 525ee8b233a2cefe3b2734d76fdb91093b8c2d50 /arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
parent: 6fa009e05ae32e64f397f54087885c3eb68f0b4b (diff)
download: ComputeLibrary-19835e591cb0b66a0f5000ae1505bf299e50337d.tar.gz
1 files changed, 6 insertions, 4 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
index 3ad3ced003..b96e978b66 100644
--- a/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017 ARM Limited.
+ * Copyright (c) 2017-2018 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -59,18 +59,20 @@ public:
      * @param[in]  input1                    Input tensor containing the transposed1xW Matrix B. Data type supported: same as @p input0
      * @param[out] output                    Output tensor to store the result of matrix multiplication. Data type supported: S32
      * @param[in]  is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
+     * @param[in]  reshape_info              (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
      */
-    void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, bool is_interleaved_transposed = true);
+    void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMLowpMatrixMultiplyKernel
      *
      * @param[in] input0                    Input tensor info containing the interleaved Matrix A. Data type supported: QASYMM8
      * @param[in] input1                    Input tensor info containing the transposed Matrix B. Data type supported: same as @p input0
      * @param[in] output                    Output tensor info to store the result of matrix multiplication. Data type supported: S32
-     * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
+     * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel
+     * @param[in] reshape_info              GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped
      *
      * @return a status
      */
-    static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed = true);
+    static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
author	Gian Marco <gianmarco.iodice@arm.com>	2018-01-30 13:35:54 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:47:18 +0000
commit	19835e591cb0b66a0f5000ae1505bf299e50337d (patch)
tree	525ee8b233a2cefe3b2734d76fdb91093b8c2d50 /arm_compute/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.h
parent	6fa009e05ae32e64f397f54087885c3eb68f0b4b (diff)
download	ComputeLibrary-19835e591cb0b66a0f5000ae1505bf299e50337d.tar.gz