From 68508897deafe26b5d50566a6ca3ba70c728dd12 Mon Sep 17 00:00:00 2001
From: Teresa Charlin <teresa.charlinreyes@arm.com>
Date: Wed, 7 Apr 2021 19:18:08 +0100
Subject: Substitute CLFullyConnectedLayerReshapeWeights by CLTranspose

Resolves partially: COMPMID-4359 (1/2)

Signed-off-by: Teresa Charlin <teresa.charlinreyes@arm.com>
Change-Id: Id1859f3cd530eb05f027226e2004cf518778147e
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5377
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 .../runtime/CL/functions/CLFullyConnectedLayer.h   | 68 +++-------------------
 docs/06_functions_list.dox                         |  1 -
 src/runtime/CL/functions/CLFullyConnectedLayer.cpp | 42 +------------
 3 files changed, 10 insertions(+), 101 deletions(-)

diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index a640e344d4..75cb2dc1fa 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -31,65 +31,15 @@
 #include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 #include "arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h"
+#include "arm_compute/runtime/CL/functions/CLTranspose.h"
 #include "arm_compute/runtime/IWeightsManager.h"
 #include "arm_compute/runtime/MemoryGroup.h"
 
 namespace arm_compute
 {
-/** Function to reshape the weights of Fully Connected layer with OpenCL by transposing input tensor. This function calls the following kernel:
- *
- *  -# @ref opencl::kernels::ClTransposeKernel
- *
- * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
- */
-class CLFullyConnectedLayerReshapeWeights : public IFunction
-{
-public:
-    /** Constructor */
-    CLFullyConnectedLayerReshapeWeights();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLFullyConnectedLayerReshapeWeights(const CLFullyConnectedLayerReshapeWeights &) = delete;
-    /** Default move constructor */
-    CLFullyConnectedLayerReshapeWeights(CLFullyConnectedLayerReshapeWeights &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLFullyConnectedLayerReshapeWeights &operator=(const CLFullyConnectedLayerReshapeWeights &) = delete;
-    /** Default move assignment operator */
-    CLFullyConnectedLayerReshapeWeights &operator=(CLFullyConnectedLayerReshapeWeights &&) = default;
-    /** Default destructor */
-    ~CLFullyConnectedLayerReshapeWeights();
-    /** Set the input and output tensors.
-     *
-     * @param[in]  input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[out] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
-     */
-    void configure(const ICLTensor *input, ICLTensor *output);
-    /** Set the input and output tensors.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[out] output          Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref CLFullyConnectedLayerReshapeWeights
-     *
-     * @param[in] input  Weights tensor. The weights must be 2 dimensional. Data types supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
-     * @param[in] output Destination tensor which stores the transposed input tensor. Data type supported: Same as @p input.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
-    // Inherited methods overridden
-    void run() override;
-
-private:
-    struct Impl;
-    std::unique_ptr<Impl> _impl;
-};
-
 namespace weights_transformations
 {
-/** Basic function to manage the reshape weights generated from @ref CLFullyConnectedLayerReshapeWeights */
+/** Basic function to manage the reshape weights generated from @ref CLTranspose */
 class CLFullyConnectedLayerReshapeWeightsManaged : public ITransformWeights
 {
 public:
@@ -119,7 +69,7 @@ public:
         return _uid;
     }
 
-    /** Configures the @ref CLFullyConnectedLayerReshapeWeights function
+    /** Configures the @ref CLTranspose function
      *
      * @param[in] input Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
      */
@@ -127,7 +77,7 @@ public:
     {
         configure(CLKernelLibrary::get().get_compile_context(), input);
     }
-    /** Configures the @ref CLFullyConnectedLayerReshapeWeights function
+    /** Configures the @ref CLTranspose function
      *
      * @param[in] compile_context The compile context to be used.
      * @param[in] input           Source tensor. Data type supported: QASYMM8/QASYMM8_SIGNED/F16/F32.
@@ -138,16 +88,16 @@ public:
     }
 
 private:
-    static constexpr uint32_t           _uid = 0x0;
-    CLTensor                            _output{};
-    CLFullyConnectedLayerReshapeWeights _func{};
+    static constexpr uint32_t _uid = 0x0;
+    CLTensor                  _output{};
+    CLTranspose               _func{};
 };
 } // namespace weights_transformations
 
 /** Basic function to compute a Fully Connected layer on OpenCL. This function calls the following OpenCL kernels:
  *
  *  -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
- *  -# @ref CLFullyConnectedLayerReshapeWeights (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
+ *  -# @ref CLTranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
  *  -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
@@ -232,7 +182,7 @@ private:
     weights_transformations::CLConvertFullyConnectedWeightsManaged      _convert_weights_managed;
     weights_transformations::CLFullyConnectedLayerReshapeWeightsManaged _reshape_weights_managed_function;
     CLFlattenLayer                                                      _flatten_layer;
-    CLFullyConnectedLayerReshapeWeights                                 _reshape_weights_function;
+    CLTranspose                                                         _reshape_weights_function;
     CLGEMM                                                              _mm_gemm;
     CLGEMMLowpMatrixMultiplyCore                                        _mm_gemmlowp;
     CLTensor                                                            _flatten_output;
diff --git a/docs/06_functions_list.dox b/docs/06_functions_list.dox
index 0b2263555d..de43a0cee7 100644
--- a/docs/06_functions_list.dox
+++ b/docs/06_functions_list.dox
@@ -205,7 +205,6 @@ namespace arm_compute
         - @ref CLFillBorder
         - @ref CLFlattenLayer
         - @ref CLFloor
-        - @ref CLFullyConnectedLayerReshapeWeights
         - @ref CLGather
         - @ref CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint
         - @ref CLGEMMLowpQuantizeDownInt32ToInt8ScaleByFixedPoint
diff --git a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
index 176c5bebd5..945675f4dd 100644
--- a/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
+++ b/src/runtime/CL/functions/CLFullyConnectedLayer.cpp
@@ -141,46 +141,6 @@ Status validate_mm(const ITensorInfo &input, const ITensorInfo &weights, const I
 }
 } // namespace
 
-struct CLFullyConnectedLayerReshapeWeights::Impl
-{
-    const ITensor                                      *src{ nullptr };
-    ITensor                                            *dst{ nullptr };
-    std::unique_ptr<opencl::kernels::ClTransposeKernel> op{ nullptr };
-};
-
-CLFullyConnectedLayerReshapeWeights::CLFullyConnectedLayerReshapeWeights()
-    : _impl(std::make_unique<Impl>())
-{
-}
-
-CLFullyConnectedLayerReshapeWeights::~CLFullyConnectedLayerReshapeWeights() = default;
-
-void CLFullyConnectedLayerReshapeWeights::configure(const ICLTensor *input, ICLTensor *output)
-{
-    configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLFullyConnectedLayerReshapeWeights::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
-    _impl->src = input;
-    _impl->dst = output;
-    _impl->op  = std::make_unique<opencl::kernels::ClTransposeKernel>();
-    _impl->op->configure(compile_context, _impl->src->info(), _impl->dst->info());
-}
-
-Status CLFullyConnectedLayerReshapeWeights::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
-    return opencl::kernels::ClTransposeKernel::validate(input, output);
-}
-
-void CLFullyConnectedLayerReshapeWeights::run()
-{
-    ITensorPack pack{};
-    pack.add_tensor(TensorType::ACL_SRC, _impl->src);
-    pack.add_tensor(TensorType::ACL_DST, _impl->dst);
-    CLScheduler::get().enqueue_op(*_impl->op.get(), pack, false);
-}
-
 CLFullyConnectedLayer::CLFullyConnectedLayer(std::shared_ptr<IMemoryManager> memory_manager, IWeightsManager *weights_manager)
     : _memory_group(memory_manager), _weights_manager(weights_manager), _convert_weights(), _convert_weights_managed(), _reshape_weights_managed_function(), _flatten_layer(), _reshape_weights_function(),
       _mm_gemm(memory_manager, weights_manager), _mm_gemmlowp(memory_manager), _flatten_output(), _converted_weights_output(), _reshape_weights_output(), _are_weights_converted(true),
@@ -404,7 +364,7 @@ Status CLFullyConnectedLayer::validate(const ITensorInfo *input, const ITensorIn
     if(!weights_reshaped)
     {
         // Validate reshape weights kernel
-        ARM_COMPUTE_RETURN_ON_ERROR(CLFullyConnectedLayerReshapeWeights::validate(weights, &reshaped_weights));
+        ARM_COMPUTE_RETURN_ON_ERROR(CLTranspose::validate(weights, &reshaped_weights));
         weights_to_use = &reshaped_weights;
     }
 
-- 
cgit v1.2.1