From ebc3a90721fe4a41b8e141466894d4d7185c01b7 Mon Sep 17 00:00:00 2001
From: Michele Di Giorgio <michele.digiorgio@arm.com>
Date: Fri, 16 Nov 2018 16:04:25 +0000
Subject: COMPMID-1706: Fuse the bias addition within CLGEMM

Change-Id: I378f2023f4fa010f195f76716ac07aa86279bfae
Signed-off-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Reviewed-on: https://review.mlplatform.org/280
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
---
 arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h  |  4 ++--
 arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h | 12 +++++++-----
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'arm_compute/runtime/CL')

diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index d6d88cec55..e800dd7cbb 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -136,7 +136,7 @@ private:
     CLGEMM                                              _mm_gemm;
     CLGEMMLowpMatrixMultiplyCore                        _mm_gemmlowp;
     CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage;
-    CLGEMMMatrixAccumulateBiasesKernel                  _accumulate_biases_kernel;
+    CLGEMMMatrixAccumulateBiasesKernel                  _accumulate_biases_kernel; // TODO(COMPMID-1889): Use CLGEMM to add bias in CLFullyConnectedLayer
     CLTensor                                            _flatten_output;
     CLTensor                                            _gemmlowp_output;
     CLTensor                                            _converted_weights_output;
diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
index d7694a8328..b304576f33 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2018 ARM Limited.
+ * Copyright (c) 2017-2019 ARM Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -163,7 +163,7 @@ private:
      * @param[in, out] output                Output tensor. Data types supported: Same as @p input,
      *                                       except for input of QASYMM8 type where output should be of S32 type.
      * @param[in]      gemmlowp_output_stage GEMMLowp output stage info
-     * @param[in]      gemm_3d_depth         (Optional) Depth of GEMM 3D (Defaults to 1)
+     * @param[in]      gemm_3d_depth         Depth of GEMM 3D
      */
     void configure_mm(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage, int gemm_3d_depth = 1);
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines
@@ -175,13 +175,14 @@ private:
      * @param[in] biases                Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM].
      *                                  Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type.
      * @param[in] gemmlowp_output_stage GEMMLowp output stage info
-     * @param[in] gemm_3d_depth         (Optional) Depth of GEMM 3D (Defaults to 1)
-     * @param[in] skip_im2col           (Optional) Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout. (Default to false)
+     * @param[in] gemm_3d_depth         Depth of GEMM 3D
+     * @param[in] skip_im2col           Flag which specifies if im2col has to be skipped. i.e. 1x1 convolution with NHWC data layout.
+     * @param[in] run_addition          Flag which specifies if @ref CLGEMMMatrixMatrixMultiplyAddition to be run.
      *
      * @return a status
      */
     static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const GEMMLowpOutputStageInfo &gemmlowp_output_stage,
-                              int gemm_3d_depth = 1, bool skip_im2col = false);
+                              int gemm_3d_depth, bool skip_im2col, bool run_addition);
 
 private:
     CLMemoryGroup                        _memory_group;
@@ -207,6 +208,7 @@ private:
     bool _is_quantized;
     bool _is_activationlayer_enabled;
     bool _is_prepared;
+    bool _run_addition;
 };
 } // namespace arm_compute
 #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */
-- 
cgit v1.2.1