From ab23dd0fbc632063235a6ad408241dc79a35d3e4 Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Mon, 6 Jul 2020 14:57:36 +0100
Subject: COMPMID-3387: Support memory injection in CLActivationLayer

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I31f9620607b372fc3340c71e748a5ea177d9da62
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/3520
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
---
 .../core/CL/kernels/CLActivationLayerKernel.h      | 19 ++-------
 .../runtime/CL/functions/CLActivationLayer.h       | 47 ++++++++++++++++++++--
 arm_compute/runtime/CL/functions/CLLSTMLayer.h     | 16 ++++----
 arm_compute/runtime/CL/functions/CLRNNLayer.h      |  4 +-
 4 files changed, 58 insertions(+), 28 deletions(-)

(limited to 'arm_compute')

diff --git a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
index 1e83a689cd..d8b556a120 100644
--- a/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
+++ b/arm_compute/core/CL/kernels/CLActivationLayerKernel.h
@@ -45,16 +45,6 @@ public:
     CLActivationLayerKernel &operator=(CLActivationLayerKernel &&) = default;
     /** Default destructor */
     ~CLActivationLayerKernel() = default;
-    /** Set the input and output tensor.
-     *
-     * @note If the output tensor is a nullptr, the activation function will be performed in-place
-     *
-     * @param[in, out] input    Source tensor. In case of @p output tensor = nullptr, this tensor will store the result
-     *                          of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
-     * @param[out]     output   Destination tensor. Data type supported: same as @p input
-     * @param[in]      act_info Activation layer information.
-     */
-    void configure(ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
     /** Set the input and output tensor.
      *
      * @note If the output tensor is a nullptr, the activation function will be performed in-place
@@ -65,7 +55,7 @@ public:
      * @param[out]     output          Destination tensor. Data type supported: same as @p input
      * @param[in]      act_info        Activation layer information.
      */
-    void configure(const CLCompileContext &compile_context, ICLTensor *input, ICLTensor *output, ActivationLayerInfo act_info);
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info);
     /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayerKernel
      *
      * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
@@ -78,12 +68,11 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
 
     // Inherited methods overridden:
-    void run(const Window &window, cl::CommandQueue &queue) override;
+    void run_op(const InputTensorMap &inputs, const OutputTensorMap &outputs,
+                const Window &window, cl::CommandQueue &queue) override;
 
 private:
-    ICLTensor *_input;
-    ICLTensor *_output;
-    bool       _run_in_place;
+    bool _run_in_place;
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_CLACTIVATIONLAYERKERNEL_H */
diff --git a/arm_compute/runtime/CL/functions/CLActivationLayer.h b/arm_compute/runtime/CL/functions/CLActivationLayer.h
index fbb34e5fb9..7353789e4a 100644
--- a/arm_compute/runtime/CL/functions/CLActivationLayer.h
+++ b/arm_compute/runtime/CL/functions/CLActivationLayer.h
@@ -24,6 +24,7 @@
 #ifndef ARM_COMPUTE_CLACTIVATIONLAYER_H
 #define ARM_COMPUTE_CLACTIVATIONLAYER_H
 
+#include "arm_compute/runtime/CL/ICLOperator.h"
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
 
 #include "arm_compute/core/Types.h"
@@ -36,7 +37,7 @@ class ICLTensor;
  *
  * @note The function simulates an activation layer with the specified activation function.
  */
-class CLActivationLayer : public ICLSimpleFunction
+class CLActivationLayer : public IFunction
 {
 public:
     /** Constructor
@@ -44,14 +45,16 @@ public:
      * @param[in] ctx Runtime context to be used by the function
      */
     CLActivationLayer(CLRuntimeContext *ctx = nullptr);
+    /** Destructor */
+    ~CLActivationLayer();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLActivationLayer(const CLActivationLayer &) = delete;
     /** Default move constructor */
-    CLActivationLayer(CLActivationLayer &&) = default;
+    CLActivationLayer(CLActivationLayer &&);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLActivationLayer &operator=(const CLActivationLayer &) = delete;
     /** Default move assignment operator */
-    CLActivationLayer &operator=(CLActivationLayer &&) = default;
+    CLActivationLayer &operator=(CLActivationLayer &&);
     /** Set the input and output tensor.
      *
      * @note If the output tensor is a nullptr or is equal to the input, the activation function will be performed in-place
@@ -83,6 +86,44 @@ public:
      * @return a status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
+};
+
+namespace experimental
+{
+/** Basic function to run @ref CLActivationLayerKernel */
+class CLActivationLayer : public ICLOperator
+{
+public:
+    /** Set the input and output tensor.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input           Source tensor info. In case of @p output tensor = nullptr, this tensor will store the result
+     *                                 of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[out]     output          Destination tensor info. Data type supported: same as @p input
+     * @param[in]      act_info        Activation layer parameters.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *output, ActivationLayerInfo act_info);
+    /** Static function to check if given info will lead to a valid configuration of @ref CLActivationLayer
+     *
+     * @param[in] input    Source tensor info. In case of @p output tensor info = nullptr, this tensor will store the result
+     *                     of the activation function. Data types supported: QASYMM8/QASYMM8_SIGNED/QSYMM16/F16/F32.
+     * @param[in] output   Destination tensor info. Data type supported: same as @p input
+     * @param[in] act_info Activation layer information.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const ActivationLayerInfo &act_info);
+
+    // Inherited methods overridden:
+    MemoryRequirements workspace() const override;
 };
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLACTIVATIONLAYER_H */
diff --git a/arm_compute/runtime/CL/functions/CLLSTMLayer.h b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
index a29513aaae..7b8b5135da 100644
--- a/arm_compute/runtime/CL/functions/CLLSTMLayer.h
+++ b/arm_compute/runtime/CL/functions/CLLSTMLayer.h
@@ -26,7 +26,6 @@
 
 #include "arm_compute/runtime/IFunction.h"
 
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLCopyKernel.h"
 #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
 #include "arm_compute/core/CL/kernels/CLMemsetKernel.h"
@@ -34,6 +33,7 @@
 #include "arm_compute/core/CL/kernels/CLWidthConcatenate2TensorsKernel.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLTensor.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLConcatenateLayer.h"
 #include "arm_compute/runtime/CL/functions/CLElementwiseOperations.h"
 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
@@ -207,28 +207,28 @@ private:
     CLArithmeticAddition                 _accum_input_gate1;
     CLSaturatedArithmeticOperationKernel _subtract_input_gate;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_input_gate;
-    CLActivationLayerKernel              _activation_input_gate;
+    CLActivationLayer                    _activation_input_gate;
     CLFullyConnectedLayer                _fully_connected_forget_gate;
     CLArithmeticAddition                 _accum_forget_gate1;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_forget_gate;
-    CLActivationLayerKernel              _activation_forget_gate;
+    CLActivationLayer                    _activation_forget_gate;
     CLFullyConnectedLayer                _fully_connected_cell_state;
     CLGEMM                               _gemm_cell_state1;
     CLTransposeKernel                    _transpose_cell_state;
     CLSaturatedArithmeticOperationKernel _accum_cell_state1;
     CLSaturatedArithmeticOperationKernel _accum_cell_state2;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_cell_state1;
-    CLActivationLayerKernel              _activation_cell_state;
-    CLActivationLayerKernel              _cell_clip;
+    CLActivationLayer                    _activation_cell_state;
+    CLActivationLayer                    _cell_clip;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_cell_state2;
     CLFullyConnectedLayer                _fully_connected_output;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_output_state1;
     CLArithmeticAddition                 _accum_output1;
-    CLActivationLayerKernel              _activation_output;
-    CLActivationLayerKernel              _activation_output_state;
+    CLActivationLayer                    _activation_output;
+    CLActivationLayer                    _activation_output_state;
     CLPixelWiseMultiplicationKernel      _pixelwise_mul_output_state2;
     CLFullyConnectedLayer                _fully_connected_output_state;
-    CLActivationLayerKernel              _projection_clip;
+    CLActivationLayer                    _projection_clip;
     CLCopyKernel                         _copy_cell_state;
     CLCopyKernel                         _copy_output;
     CLConcatenateLayer                   _concat_scratch_buffer;
diff --git a/arm_compute/runtime/CL/functions/CLRNNLayer.h b/arm_compute/runtime/CL/functions/CLRNNLayer.h
index 0291eb17a9..bd9de2b0c9 100644
--- a/arm_compute/runtime/CL/functions/CLRNNLayer.h
+++ b/arm_compute/runtime/CL/functions/CLRNNLayer.h
@@ -24,10 +24,10 @@
 #ifndef ARM_COMPUTE_CLRNN_LAYER_H
 #define ARM_COMPUTE_CLRNN_LAYER_H
 
-#include "arm_compute/core/CL/kernels/CLActivationLayerKernel.h"
 #include "arm_compute/core/CL/kernels/CLCopyKernel.h"
 #include "arm_compute/core/CL/kernels/CLElementwiseOperationKernel.h"
 #include "arm_compute/runtime/CL/ICLSimpleFunction.h"
+#include "arm_compute/runtime/CL/functions/CLActivationLayer.h"
 #include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
 #include "arm_compute/runtime/CL/functions/CLGEMM.h"
 
@@ -88,7 +88,7 @@ private:
     MemoryGroup                          _memory_group;
     CLGEMM                               _gemm_state_f;
     CLSaturatedArithmeticOperationKernel _add_kernel;
-    CLActivationLayerKernel              _activation_kernel;
+    CLActivationLayer                    _activation;
     CLFullyConnectedLayer                _fully_connected_kernel;
     CLCopyKernel                         _copy_kernel;
     CLTensor                             _fully_connected_out;
-- 
cgit v1.2.1