From 94abde4f4e98f6f1adb5c46b194527f34a8ea07d Mon Sep 17 00:00:00 2001
From: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Date: Thu, 25 May 2023 16:48:43 +0100
Subject: Add Fused Activation to OpenCL MatMul

- Added fused activation to MatMul function interface
- Added fused activation to CL backend
- Includes tests for supported Activation Functions in MatMul

Resolves: [COMPMID-6192]
Signed-off-by: Mohammed Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: Ie103212b600b60699eaf6a6394d609e6e1f5aba6
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/522465
Comments-Addressed: bsgcomp <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Tested-by: bsgcomp <bsgcomp@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9714
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Jakub Sujak <jakub.sujak@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/core/MatMulInfo.h                 | 12 ------------
 arm_compute/runtime/CL/functions/CLMatMul.h   | 22 ++++++++++++++++------
 arm_compute/runtime/NEON/functions/NEMatMul.h | 17 +++++++++++++----
 3 files changed, 29 insertions(+), 22 deletions(-)

(limited to 'arm_compute')
diff --git a/arm_compute/core/MatMulInfo.h b/arm_compute/core/MatMulInfo.h
index 62d782215b..01b9b47761 100644
--- a/arm_compute/core/MatMulInfo.h
+++ b/arm_compute/core/MatMulInfo.h
@@ -58,11 +58,6 @@ public:
     {
         return _adj_rhs;
     }
-    /* Get Fused Activation Layer Info */
-    ActivationLayerInfo fused_activation() const
-    {
-        return _fused_act;
-    }
     /* Set Adjoint LHS flag */
     MatMulInfo &adj_lhs(bool adj_lhs)
     {
@@ -75,17 +70,10 @@ public:
         _adj_rhs = adj_rhs;
         return *this;
     }
-    /* Set Fused Activation Layer Info */
-    MatMulInfo &fused_activation(const ActivationLayerInfo &act_info)
-    {
-        _fused_act = act_info;
-        return *this;
-    }
 
 private:
     bool                _adj_lhs{ false };
     bool                _adj_rhs{ false };
-    ActivationLayerInfo _fused_act{}; // disabled by default
 };
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_MATMULINFO_H */
diff --git a/arm_compute/runtime/CL/functions/CLMatMul.h b/arm_compute/runtime/CL/functions/CLMatMul.h
index 2af9a4a9a6..a11c1ed6a2 100644
--- a/arm_compute/runtime/CL/functions/CLMatMul.h
+++ b/arm_compute/runtime/CL/functions/CLMatMul.h
@@ -24,6 +24,8 @@
 #ifndef ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
 #define ACL_ARM_COMPUTE_RUNTIME_CL_FUNCTIONS_CLMATMUL
 
+#include "arm_compute/core/ActivationLayerInfo.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
 
@@ -83,21 +85,29 @@ public:
      * @param[in]  rhs             Right-hand side tensor info containing the input weights as Matrix B. Data types supported: same as @p lhs.
      * @param[out] dst             Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs.
      * @param[in]  matmul_info     Contains MatMul operation information described in @ref MatMulInfo.
-     * @param[in]  settings        Class containing flags for function level settings
+     * @param[in]  settings        Contains flags for function level settings
+     * @param[in]  act_info        (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
      */
-    void configure(const CLCompileContext &compile_context, ICLTensor *rhs, ICLTensor *lhs, ICLTensor *dst, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{});
+    void configure(const CLCompileContext &compile_context, ICLTensor *rhs, ICLTensor *lhs, ICLTensor *dst, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{}, const
+                   ActivationLayerInfo &act_info = ActivationLayerInfo{});
     /** Initialise the kernel's inputs and output
      *
      * Similar to @ref CLMatMul::configure()
      */
-    void configure(ICLTensor *lhs, ICLTensor *rhs, ICLTensor *dst, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{});
+    void configure(ICLTensor *lhs, ICLTensor *rhs, ICLTensor *dst, const MatMulInfo &matmul_info, const GpuMatMulSettings &settings = GpuMatMulSettings{}, const ActivationLayerInfo &act_info =
+                       ActivationLayerInfo{});
     /** Static function to check if given info will lead to a valid configuration of @ref CLMatMul.
      *
-     * Similar to @ref CLMatMul::configure()
      *
-     * @return a status
+     * @note All tensors must have the same data type.
+     *
+     * @param[in]  lhs         Left-hand side (Matrix A) tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+     * @param[in]  rhs         Right-hand side (Matrix B) tensor info. Data types supported: same as @p lhs.
+     * @param[out] output      Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs.
+     * @param[in]  matmul_info Contains MatMul operation information described in @ref MatMulInfo.
+     * @param[in]  act_info    (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
      */
-    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info);
+    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *output, const MatMulInfo &matmul_info, const ActivationLayerInfo &act_info = ActivationLayerInfo{});
     // Inherited methods overridden:
     void run() override;
 
diff --git a/arm_compute/runtime/NEON/functions/NEMatMul.h b/arm_compute/runtime/NEON/functions/NEMatMul.h
index a331c55a98..81fec19f86 100644
--- a/arm_compute/runtime/NEON/functions/NEMatMul.h
+++ b/arm_compute/runtime/NEON/functions/NEMatMul.h
@@ -24,6 +24,8 @@
 #ifndef ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
 #define ACL_ARM_COMPUTE_RUNTIME_NEON_FUNCTIONS_NEMATMUL
 
+#include "arm_compute/core/ActivationLayerInfo.h"
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/IFunction.h"
 #include <memory>
 
@@ -91,16 +93,23 @@ public:
      * @param[in]  rhs      Right-hand side tensor info. Data types supported: same as @p lhs.
      * @param[out] dst      Output tensor to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
      * @param[in]  info     Contains MatMul operation information described in @ref MatMulInfo.
-     * @param[in]  settings Class containing flags for function level settings i.e fast math
+     * @param[in]  settings Contains flags for function level settings i.e fast math
+     * @param[in]  act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
      */
-    void configure(ITensor *lhs, ITensor *rhs, ITensor *dst, const MatMulInfo &info, const CpuMatMulSettings &settings);
+    void configure(ITensor *lhs, ITensor *rhs, ITensor *dst, const MatMulInfo &info, const CpuMatMulSettings &settings, const ActivationLayerInfo &act_info = ActivationLayerInfo());
     /** Static function to check if given info will lead to a valid configuration of @ref NEMatMul
      *
-     * Parameters are similar to @ref NEMatMul::configure()
+     * @param[in]  lhs      Left-hand side tensor info. Data types supported: F16/F32/QASYMM8_SIGNED/QASYMM8.
+     * @param[in]  rhs      Right-hand side tensor info. Data types supported: same as @p lhs.
+     * @param[out] dst      Output tensor info to store the result of the batched matrix multiplication. Data types supported: same as @p lhs / @p rhs.
+     * @param[in]  info     Contains MatMul operation information described in @ref MatMulInfo.
+     * @param[in]  settings Contains flags for function level settings i.e fast math
+     * @param[in]  act_info (Optional) Contains activation function and lower and upper bound values for bounded activation functions.
      *
      * @return Status
      */
-    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings);
+    static Status validate(const ITensorInfo *lhs, const ITensorInfo *rhs, const ITensorInfo *dst, const MatMulInfo &info, const CpuMatMulSettings &settings,
+                           const ActivationLayerInfo &act_info = ActivationLayerInfo());
 
     // Inherited methods overridden
     void run() override;
-- 
cgit v1.2.1