From e9b3ee2badebf91188c1cd0e59d6aaa30ed60985 Mon Sep 17 00:00:00 2001
From: Jakub Sujak <jakub.sujak@arm.com>
Date: Mon, 17 Apr 2023 12:08:48 +0100
Subject: Connect CLMatMul function to quantized kernels and resolve NE
 BatchMatMul int_8 failures

* Adapt the CLMatMul function and ClMatMul operator to use quantized kernels.
* Add function-level tests.

Resolves: COMPMID-5929 and COMPMID-5811

Change-Id: I5348cdcf07b8074c138e04dfef0a73399377accd
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Signed-off-by: Omar Al Khatib <omar.alkhatib@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9575
Reviewed-by: Mohmun02 <MohammedSuhail.Munshi@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/runtime/CL/functions/CLMatMul.h   | 29 +++++++++++++++------------
 arm_compute/runtime/NEON/functions/NEMatMul.h | 22 +++++++++++---------
 2 files changed, 28 insertions(+), 23 deletions(-)

(limited to 'arm_compute')
diff --git a/arm_compute/runtime/CL/functions/CLMatMul.h b/arm_compute/runtime/CL/functions/CLMatMul.h
index 712bac06bf..2af9a4a9a6 100644
--- a/arm_compute/runtime/CL/functions/CLMatMul.h
+++ b/arm_compute/runtime/CL/functions/CLMatMul.h
@@ -21,11 +21,12 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
-#define ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
+#ifndef ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
+#define ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
 
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
+
 namespace arm_compute
 {
 // Forward declarations for used types instead of including their header, that could minimize compile time
@@ -64,10 +65,12 @@ public:
      * - All
      *
      * Valid data type configurations:
-     * |lhs          |rhs          |output         |
-     * |:------------|:------------|:--------------|
-     * |F32          |F32          |F32            |
-     * |F16          |F16          |F16            |
+     * |lhs            |rhs            |dst            |
+     * |:--------------|:--------------|:--------------|
+     * |F32            |F32            |F32            |
+     * |F16            |F16            |F16            |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED |QASYMM8_SIGNED |
+     * |QASYMM8        |QASYMM8        |QASYMM8        |
      *
      * @note BatchMatMul: Batched Matrix Multiply - [A * B], Multiplies all slices (slice is an element of a batch) of Tensors A and B
      *                    and stores the result in the dst tensor of the same batch size.
@@ -76,18 +79,18 @@ public:
      * @note All tensors must have the same data type.
      *
      * @param[in]  compile_context The compile context to be used.
-     * @param[in]  lhs             LHS input tensor (Matrix or Vector A). Data types supported: F16/F32
-     * @param[in]  rhs             RHS input tensor (Matrix B). Data type supported: same as @p lhs.
-     * @param[out] output          Output tensor. Data type supported: same as @p lhs.
-     * @param[in]  matmul_info     Attributes for MatMul
+     * @param[in]  lhs             Left-hand side tensor info containing the input activations as Matrix A. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+     * @param[in]  rhs             Right-hand side tensor info containing the input weights as Matrix B. Data types supported: same as @p lhs.
+     * @param[out] dst             Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs.
+     * @param[in]  matmul_info     Contains MatMul operation information described in @ref MatMulInfo.
      * @param[in]  settings        Class containing flags for function level settings
      */
-    void configure(const CLCompileContext &compile_context, ICLTensor *rhs, ICLTensor *lhs, ICLTensor *output, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{});
+    void configure(const CLCompileContext &compile_context, ICLTensor *rhs, ICLTensor *lhs, ICLTensor *dst, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{});
     /** Initialise the kernel's inputs and output
      *
      * Similar to @ref CLMatMul::configure()
      */
-    void configure(ICLTensor *lhs, ICLTensor *rhs, ICLTensor *output, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{});
+    void configure(ICLTensor *lhs, ICLTensor *rhs, ICLTensor *dst, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{});
     /** Static function to check if given info will lead to a valid configuration of @ref CLMatMul.
      *
      * Similar to @ref CLMatMul::configure()
@@ -104,4 +107,4 @@ private:
 };
 } // namespace arm_compute
 
-#endif /* ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL */
+#endif /* ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL */
diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h
index 0f3e3adacc..a331c55a98 100644
--- a/arm_compute/runtime/NEON/functions/NEMatMul.h
+++ b/arm_compute/runtime/NEON/functions/NEMatMul.h
@@ -21,8 +21,8 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#ifndef ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
-#define ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
+#ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
+#define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
 
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
@@ -80,25 +80,27 @@ public:
      * - Any
      *
      * Valid data type configurations:
-     * |src0           |src1               |dst            |
+     * |lhs            |rhs                |dst            |
      * |:--------------|:------------------|:--------------|
      * |F32            |F32                |F32            |
      * |F16            |F16                |F16            |
+     * |QASYMM8_SIGNED |QASYMM8_SIGNED     |QASYMM8_SIGNED |
+     * |QASYMM8        |QASYMM8            |QASYMM8        |
      *
-     * @param[in]  lhs      Input source tensor.
-     * @param[in]  rhs      Input source tensor.
-     * @param[out] output   Output tensor. Data type supported: same as @p lhs/rhs
-     * @param[in]  info     Class containing flags to transpose lhs/rhs
+     * @param[in]  lhs      Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+     * @param[in]  rhs      Right-hand side tensor info. Data types supported: same as @p lhs.
+     * @param[out] dst      Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+     * @param[in]  info     Contains MatMul operation information described in @ref MatMulInfo.
      * @param[in]  settings Class containing flags for function level settings i.e fast math
      */
-    void configure(ITensor *lhs, ITensor *rhs, ITensor *output, const MatMulInfo &info, const CpuMatMulSettings &settings);
+    void configure(ITensor *lhs, ITensor *rhs, ITensor *dst, const MatMulInfo &info, const CpuMatMulSettings &settings);
     /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul
      *
      * Parameters are similar to @ref NEMatMul::configure()
      *
      * @return Status
      */
-    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &info, const CpuMatMulSettings &settings);
+    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings);
 
     // Inherited methods overridden
     void run() override;
@@ -108,4 +110,4 @@ private:
     std::unique_ptr<Impl> _impl;
 };
 }
-#endif /* ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL */
+#endif /* ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL */
-- 
cgit v1.2.1