From 7026b303d636e7639f8877ae8d5eff54f39c1121 Mon Sep 17 00:00:00 2001
From: Gian Marco Iodice <gianmarco.iodice@arm.com>
Date: Wed, 26 Jun 2019 17:18:11 +0100
Subject: COMPMID-1979: Fuse Activation Function in CLGEMM - part 1

Implementing a new struct to contains the information for the
OpenCL GEMM kernels

Change-Id: I6c641c312f9c3b025a7c69dd0df3b730d2d2c2cb
Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1434
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
---
 .../CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h  |  6 ++--
 .../kernels/CLGEMMMatrixMultiplyReshapedKernel.h   |  6 ++--
 .../CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h    |  6 ++--
 arm_compute/core/KernelDescriptors.h               | 11 ++++++
 arm_compute/core/utils/misc/ShapeCalculator.h      | 40 ++++++++++++++++++++++
 5 files changed, 63 insertions(+), 6 deletions(-)

(limited to 'arm_compute/core')

diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
index 79689a2894..96f412c6a5 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h
@@ -26,6 +26,8 @@
 
 #include "arm_compute/core/CL/ICLKernel.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
+
 namespace arm_compute
 {
 class ICLTensor;
@@ -62,7 +64,7 @@ public:
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                    const GEMMRHSMatrixInfo &rhs_info,
-                   const GEMMReshapeInfo &gemm_info);
+                   const GEMMKernelInfo    &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyNativeKernel
      *
      * @param[in] input0    Input tensor info for the LHS matrix. Data type supported: F32/F16. The number of dimensions for the LHS matrix must be less or equal than 4.
@@ -83,7 +85,7 @@ public:
      */
     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                            const GEMMRHSMatrixInfo &rhs_info,
-                           const GEMMReshapeInfo &gemm_info);
+                           const GEMMKernelInfo    &gemm_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
index 68ab94a31d..47916b3019 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h
@@ -26,6 +26,8 @@
 
 #include "arm_compute/core/CL/ICLKernel.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
+
 namespace arm_compute
 {
 class ICLTensor;
@@ -69,7 +71,7 @@ public:
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                    const GEMMRHSMatrixInfo &rhs_info,
-                   const GEMMReshapeInfo   &gemm_info);
+                   const GEMMKernelInfo    &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedKernel
      *
      * @param[in] input0    Input tensor containing the LHS reshaped matrix. Data type supported: F32/F16. The number of dimensions for the LHS matrix must be less or equal than 4
@@ -94,7 +96,7 @@ public:
      */
     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                            const GEMMRHSMatrixInfo &rhs_info,
-                           const GEMMReshapeInfo   &gemm_info);
+                           const GEMMKernelInfo    &gemm_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
index e3b3880a37..3315331e87 100644
--- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
+++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h
@@ -26,6 +26,8 @@
 
 #include "arm_compute/core/CL/ICLKernel.h"
 
+#include "arm_compute/core/KernelDescriptors.h"
+
 namespace arm_compute
 {
 class ICLTensor;
@@ -65,7 +67,7 @@ public:
      */
     void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                    const GEMMRHSMatrixInfo &rhs_info,
-                   const GEMMReshapeInfo   &gemm_info);
+                   const GEMMKernelInfo    &gemm_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
      *
      * @param[in] input0    Input tensor info for the LHS matrix. Data type supported: F32/F16. The number of dimensions for the LHS matrix must be less or equal than 4.
@@ -86,7 +88,7 @@ public:
      */
     static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, const GEMMLHSMatrixInfo &lhs_info,
                            const GEMMRHSMatrixInfo &rhs_info,
-                           const GEMMReshapeInfo   &gemm_info);
+                           const GEMMKernelInfo    &gemm_info);
 
     // Inherited methods overridden:
     void run(const Window &window, cl::CommandQueue &queue) override;
diff --git a/arm_compute/core/KernelDescriptors.h b/arm_compute/core/KernelDescriptors.h
index 83131f4296..fe59365d06 100644
--- a/arm_compute/core/KernelDescriptors.h
+++ b/arm_compute/core/KernelDescriptors.h
@@ -48,5 +48,16 @@ struct FFTRadixStageKernelInfo
     unsigned int Nx{ 0 };                 /**< Nx coefficient. */
     bool         is_first_stage{ false }; /**< Flags if the FFT kernels is the first stage of a decomposed FFT. */
 };
+
+/** Descriptor used by the GEMM kernels */
+struct GEMMKernelInfo
+{
+    unsigned int m{ 0 };
+    unsigned int n{ 0 };
+    unsigned int k{ 0 };
+    unsigned int depth_output_gemm3d{ 0 };
+    bool         reinterpret_input_as_3d{ false };
+    bool         broadcast_bias{ false };
+};
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_CORE_KERNEL_DESCRIPTORS_H__ */
diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h
index 7eab17ba11..010501454f 100644
--- a/arm_compute/core/utils/misc/ShapeCalculator.h
+++ b/arm_compute/core/utils/misc/ShapeCalculator.h
@@ -26,6 +26,7 @@
 
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/ITensorInfo.h"
+#include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Utils.h"
 
 #include "arm_compute/core/utils/helpers/tensor_transform.h"
@@ -850,6 +851,8 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
 }
 
 /** Calculate the matrix multiplication output shape of two tensors
+ *
+ * @note Deprecated. Remove when GEMMReshapeInfo is not used anymore by any other kernels
  *
  * @param[in] input0    First input tensor info
  * @param[in] input1    Second input tensor info
@@ -886,6 +889,43 @@ inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo
     return output_shape;
 }
 
+/** Calculate the matrix multiplication output shape of two tensors
+ *
+ * @param[in] input0    First input tensor info
+ * @param[in] input1    Second input tensor info
+ * @param[in] gemm_info GEMM kernel info used to retrieve the original dimensions of the input matrices
+ *
+ * @return the calculated shape
+ */
+inline TensorShape compute_mm_shape(const ITensorInfo &input0, const ITensorInfo &input1, const GEMMKernelInfo &gemm_info)
+{
+    ARM_COMPUTE_ERROR_ON_MSG(input0.num_dimensions() > 4, "The number of dimensions for the matrix A must be <= 4");
+
+    const bool         reinterpret_input_as_3d  = gemm_info.reinterpret_input_as_3d;
+    const bool         reinterpret_output_as_3d = gemm_info.depth_output_gemm3d != 0;
+    const unsigned int depth_output_gemm3d      = reinterpret_output_as_3d ? gemm_info.depth_output_gemm3d : 1;
+
+    TensorShape output_shape{ input0.tensor_shape() };
+
+    if(!reinterpret_input_as_3d && !reinterpret_output_as_3d)
+    {
+        output_shape.set(0, gemm_info.n);
+        output_shape.set(1, gemm_info.m);
+    }
+    else
+    {
+        // If the output of GEMM has to be reinterpreted as 3D, the number of input0 rows (M) is obtained collapsing the second and third
+        // dimension of the output tensor
+        const unsigned int batch_size = reinterpret_input_as_3d ? input0.tensor_shape()[3] : input0.tensor_shape()[2];
+        output_shape.set(0, gemm_info.n);
+        output_shape.set(1, gemm_info.m / depth_output_gemm3d);
+        output_shape.set(2, reinterpret_output_as_3d ? depth_output_gemm3d : batch_size);
+        output_shape.set(3, reinterpret_output_as_3d ? batch_size : 1);
+    }
+
+    return output_shape;
+}
+
 /** Calculate the matrix multiplication output shape of two tensors
  *
  * @param[in] input           Input tensor info
-- 
cgit v1.2.1